1 2/* FF is big and ugly so feel free to write lines as long as you like. 3 * Aieeeeeeeee ! 4 * 5 * Let me make that clearer: 6 * Aieeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeeee ! !! !!! 7 */ 8 9#include "device9.h" 10#include "basetexture9.h" 11#include "vertexdeclaration9.h" 12#include "vertexshader9.h" 13#include "pixelshader9.h" 14#include "nine_ff.h" 15#include "nine_defines.h" 16#include "nine_helpers.h" 17#include "nine_pipe.h" 18#include "nine_dump.h" 19 20#include "pipe/p_context.h" 21#include "tgsi/tgsi_ureg.h" 22#include "tgsi/tgsi_dump.h" 23#include "util/u_box.h" 24#include "util/u_hash_table.h" 25#include "util/u_upload_mgr.h" 26 27#define DBG_CHANNEL DBG_FF 28 29#define NINE_FF_NUM_VS_CONST 196 30#define NINE_FF_NUM_PS_CONST 24 31 32struct fvec4 33{ 34 float x, y, z, w; 35}; 36 37struct nine_ff_vs_key 38{ 39 union { 40 struct { 41 uint32_t position_t : 1; 42 uint32_t lighting : 1; 43 uint32_t darkness : 1; /* lighting enabled but no active lights */ 44 uint32_t localviewer : 1; 45 uint32_t vertexpointsize : 1; 46 uint32_t pointscale : 1; 47 uint32_t vertexblend : 3; 48 uint32_t vertexblend_indexed : 1; 49 uint32_t vertextween : 1; 50 uint32_t mtl_diffuse : 2; /* 0 = material, 1 = color1, 2 = color2 */ 51 uint32_t mtl_ambient : 2; 52 uint32_t mtl_specular : 2; 53 uint32_t mtl_emissive : 2; 54 uint32_t fog_mode : 2; 55 uint32_t fog_range : 1; 56 uint32_t color0in_one : 1; 57 uint32_t color1in_zero : 1; 58 uint32_t has_normal : 1; 59 uint32_t fog : 1; 60 uint32_t normalizenormals : 1; 61 uint32_t ucp : 1; 62 uint32_t pad1 : 4; 63 uint32_t tc_dim_input: 16; /* 8 * 2 bits */ 64 uint32_t pad2 : 16; 65 uint32_t tc_dim_output: 24; /* 8 * 3 bits */ 66 uint32_t pad3 : 8; 67 uint32_t tc_gen : 24; /* 8 * 3 bits */ 68 uint32_t pad4 : 8; 69 uint32_t tc_idx : 24; 70 uint32_t pad5 : 8; 71 uint32_t passthrough; 72 }; 73 uint64_t value64[3]; /* don't forget to resize VertexShader9.ff_key */ 74 uint32_t value32[6]; 75 }; 76}; 77 78/* Texture stage state: 79 * 80 * COLOROP D3DTOP 5 bit 81 * ALPHAOP D3DTOP 5 bit 82 * COLORARG0 D3DTA 3 bit 83 * COLORARG1 D3DTA 3 bit 84 * COLORARG2 D3DTA 3 bit 85 * ALPHAARG0 D3DTA 3 bit 86 * ALPHAARG1 D3DTA 3 bit 87 * ALPHAARG2 D3DTA 3 bit 88 * RESULTARG D3DTA 1 bit (CURRENT:0 or TEMP:1) 89 * TEXCOORDINDEX 0 - 7 3 bit 90 * =========================== 91 * 32 bit per stage 92 */ 93struct nine_ff_ps_key 94{ 95 union { 96 struct { 97 struct { 98 uint32_t colorop : 5; 99 uint32_t alphaop : 5; 100 uint32_t colorarg0 : 3; 101 uint32_t colorarg1 : 3; 102 uint32_t colorarg2 : 3; 103 uint32_t alphaarg0 : 3; 104 uint32_t alphaarg1 : 3; 105 uint32_t alphaarg2 : 3; 106 uint32_t resultarg : 1; /* CURRENT:0 or TEMP:1 */ 107 uint32_t textarget : 2; /* 1D/2D/3D/CUBE */ 108 uint32_t pad : 1; 109 /* that's 32 bit exactly */ 110 } ts[8]; 111 uint32_t projected : 16; 112 uint32_t fog : 1; /* for vFog coming from VS */ 113 uint32_t fog_mode : 2; 114 uint32_t fog_source : 1; /* 0: Z, 1: W */ 115 uint32_t specular : 1; 116 uint32_t pad1 : 11; /* 9 32-bit words with this */ 117 uint8_t colorarg_b4[3]; 118 uint8_t colorarg_b5[3]; 119 uint8_t alphaarg_b4[3]; /* 11 32-bit words plus a byte */ 120 uint8_t pad2[3]; 121 }; 122 uint64_t value64[6]; /* don't forget to resize PixelShader9.ff_key */ 123 uint32_t value32[12]; 124 }; 125}; 126 127static uint32_t nine_ff_vs_key_hash(const void *key) 128{ 129 const struct nine_ff_vs_key *vs = key; 130 unsigned i; 131 uint32_t hash = vs->value32[0]; 132 for (i = 1; i < ARRAY_SIZE(vs->value32); ++i) 133 hash ^= vs->value32[i]; 134 return hash; 135} 136static bool nine_ff_vs_key_comp(const void *key1, const void *key2) 137{ 138 struct nine_ff_vs_key *a = (struct nine_ff_vs_key *)key1; 139 struct nine_ff_vs_key *b = (struct nine_ff_vs_key *)key2; 140 141 return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0; 142} 143static uint32_t nine_ff_ps_key_hash(const void *key) 144{ 145 const struct nine_ff_ps_key *ps = key; 146 unsigned i; 147 uint32_t hash = ps->value32[0]; 148 for (i = 1; i < ARRAY_SIZE(ps->value32); ++i) 149 hash ^= ps->value32[i]; 150 return hash; 151} 152static bool nine_ff_ps_key_comp(const void *key1, const void *key2) 153{ 154 struct nine_ff_ps_key *a = (struct nine_ff_ps_key *)key1; 155 struct nine_ff_ps_key *b = (struct nine_ff_ps_key *)key2; 156 157 return memcmp(a->value64, b->value64, sizeof(a->value64)) == 0; 158} 159static uint32_t nine_ff_fvf_key_hash(const void *key) 160{ 161 return *(DWORD *)key; 162} 163static bool nine_ff_fvf_key_comp(const void *key1, const void *key2) 164{ 165 return *(DWORD *)key1 == *(DWORD *)key2; 166} 167 168static void nine_ff_prune_vs(struct NineDevice9 *); 169static void nine_ff_prune_ps(struct NineDevice9 *); 170 171static void nine_ureg_tgsi_dump(struct ureg_program *ureg, boolean override) 172{ 173 if (debug_get_bool_option("NINE_FF_DUMP", FALSE) || override) { 174 const struct tgsi_token *toks = ureg_get_tokens(ureg, NULL); 175 tgsi_dump(toks, 0); 176 ureg_free_tokens(toks); 177 } 178} 179 180#define _X(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_X) 181#define _Y(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Y) 182#define _Z(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_Z) 183#define _W(r) ureg_scalar(ureg_src(r), TGSI_SWIZZLE_W) 184 185#define _XXXX(r) ureg_scalar(r, TGSI_SWIZZLE_X) 186#define _YYYY(r) ureg_scalar(r, TGSI_SWIZZLE_Y) 187#define _ZZZZ(r) ureg_scalar(r, TGSI_SWIZZLE_Z) 188#define _WWWW(r) ureg_scalar(r, TGSI_SWIZZLE_W) 189 190#define _XYZW(r) (r) 191 192/* AL should contain base address of lights table. */ 193#define LIGHT_CONST(i) \ 194 ureg_src_indirect(ureg_DECL_constant(ureg, i), _X(AL)) 195 196#define MATERIAL_CONST(i) \ 197 ureg_DECL_constant(ureg, 19 + (i)) 198 199#define _CONST(n) ureg_DECL_constant(ureg, n) 200 201/* VS FF constants layout: 202 * 203 * CONST[ 0.. 3] D3DTS_WORLD * D3DTS_VIEW * D3DTS_PROJECTION 204 * CONST[ 4.. 7] D3DTS_WORLD * D3DTS_VIEW 205 * CONST[ 8..11] D3DTS_PROJECTION 206 * CONST[12..15] D3DTS_VIEW^(-1) 207 * CONST[16..18] Normal matrix 208 * 209 * CONST[19].xyz MATERIAL.Emissive + Material.Ambient * RS.Ambient 210 * CONST[20] MATERIAL.Diffuse 211 * CONST[21] MATERIAL.Ambient 212 * CONST[22] MATERIAL.Specular 213 * CONST[23].x___ MATERIAL.Power 214 * CONST[24] MATERIAL.Emissive 215 * CONST[25] RS.Ambient 216 * 217 * CONST[26].x___ RS.PointSizeMin 218 * CONST[26]._y__ RS.PointSizeMax 219 * CONST[26].__z_ RS.PointSize 220 * CONST[26].___w RS.PointScaleA 221 * CONST[27].x___ RS.PointScaleB 222 * CONST[27]._y__ RS.PointScaleC 223 * 224 * CONST[28].x___ RS.FogEnd 225 * CONST[28]._y__ 1.0f / (RS.FogEnd - RS.FogStart) 226 * CONST[28].__z_ RS.FogDensity 227 228 * CONST[30].x___ TWEENFACTOR 229 * 230 * CONST[32].x___ LIGHT[0].Type 231 * CONST[32]._yzw LIGHT[0].Attenuation0,1,2 232 * CONST[33] LIGHT[0].Diffuse 233 * CONST[34] LIGHT[0].Specular 234 * CONST[35] LIGHT[0].Ambient 235 * CONST[36].xyz_ LIGHT[0].Position 236 * CONST[36].___w LIGHT[0].Range 237 * CONST[37].xyz_ LIGHT[0].Direction 238 * CONST[37].___w LIGHT[0].Falloff 239 * CONST[38].x___ cos(LIGHT[0].Theta / 2) 240 * CONST[38]._y__ cos(LIGHT[0].Phi / 2) 241 * CONST[38].__z_ 1.0f / (cos(LIGHT[0].Theta / 2) - cos(Light[0].Phi / 2)) 242 * CONST[39].xyz_ LIGHT[0].HalfVector (for directional lights) 243 * CONST[39].___w 1 if this is the last active light, 0 if not 244 * CONST[40] LIGHT[1] 245 * CONST[48] LIGHT[2] 246 * CONST[56] LIGHT[3] 247 * CONST[64] LIGHT[4] 248 * CONST[72] LIGHT[5] 249 * CONST[80] LIGHT[6] 250 * CONST[88] LIGHT[7] 251 * NOTE: no lighting code is generated if there are no active lights 252 * 253 * CONST[100].x___ Viewport 2/width 254 * CONST[100]._y__ Viewport 2/height 255 * CONST[100].__z_ Viewport 1/(zmax - zmin) 256 * CONST[100].___w Viewport width 257 * CONST[101].x___ Viewport x0 258 * CONST[101]._y__ Viewport y0 259 * CONST[101].__z_ Viewport z0 260 * 261 * CONST[128..131] D3DTS_TEXTURE0 262 * CONST[132..135] D3DTS_TEXTURE1 263 * CONST[136..139] D3DTS_TEXTURE2 264 * CONST[140..143] D3DTS_TEXTURE3 265 * CONST[144..147] D3DTS_TEXTURE4 266 * CONST[148..151] D3DTS_TEXTURE5 267 * CONST[152..155] D3DTS_TEXTURE6 268 * CONST[156..159] D3DTS_TEXTURE7 269 * 270 * CONST[160] D3DTS_WORLDMATRIX[0] * D3DTS_VIEW 271 * CONST[164] D3DTS_WORLDMATRIX[1] * D3DTS_VIEW 272 * ... 273 * CONST[192] D3DTS_WORLDMATRIX[8] * D3DTS_VIEW 274 */ 275struct vs_build_ctx 276{ 277 struct ureg_program *ureg; 278 const struct nine_ff_vs_key *key; 279 280 uint16_t input[PIPE_MAX_ATTRIBS]; 281 unsigned num_inputs; 282 283 struct ureg_src aVtx; 284 struct ureg_src aNrm; 285 struct ureg_src aCol[2]; 286 struct ureg_src aTex[8]; 287 struct ureg_src aPsz; 288 struct ureg_src aInd; 289 struct ureg_src aWgt; 290 291 struct ureg_src aVtx1; /* tweening */ 292 struct ureg_src aNrm1; 293 294 struct ureg_src mtlA; 295 struct ureg_src mtlD; 296 struct ureg_src mtlS; 297 struct ureg_src mtlE; 298}; 299 300static inline unsigned 301get_texcoord_sn(struct pipe_screen *screen) 302{ 303 if (screen->get_param(screen, PIPE_CAP_TGSI_TEXCOORD)) 304 return TGSI_SEMANTIC_TEXCOORD; 305 return TGSI_SEMANTIC_GENERIC; 306} 307 308static inline struct ureg_src 309build_vs_add_input(struct vs_build_ctx *vs, uint16_t ndecl) 310{ 311 const unsigned i = vs->num_inputs++; 312 assert(i < PIPE_MAX_ATTRIBS); 313 vs->input[i] = ndecl; 314 return ureg_DECL_vs_input(vs->ureg, i); 315} 316 317/* NOTE: dst may alias src */ 318static inline void 319ureg_normalize3(struct ureg_program *ureg, 320 struct ureg_dst dst, struct ureg_src src) 321{ 322 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 323 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 324 325 ureg_DP3(ureg, tmp_x, src, src); 326 ureg_RSQ(ureg, tmp_x, _X(tmp)); 327 ureg_MUL(ureg, dst, src, _X(tmp)); 328 ureg_release_temporary(ureg, tmp); 329} 330 331static void * 332nine_ff_build_vs(struct NineDevice9 *device, struct vs_build_ctx *vs) 333{ 334 const struct nine_ff_vs_key *key = vs->key; 335 struct ureg_program *ureg = ureg_create(PIPE_SHADER_VERTEX); 336 struct ureg_dst oPos, oCol[2], oPsz, oFog; 337 struct ureg_dst AR; 338 unsigned i, c; 339 unsigned label[32], l = 0; 340 boolean need_aNrm = key->lighting || key->passthrough & (1 << NINE_DECLUSAGE_NORMAL); 341 boolean has_aNrm; 342 boolean need_aVtx = key->lighting || key->fog_mode || key->pointscale || key->ucp; 343 const unsigned texcoord_sn = get_texcoord_sn(device->screen); 344 345 vs->ureg = ureg; 346 347 /* Check which inputs we should transform. */ 348 for (i = 0; i < 8 * 3; i += 3) { 349 switch ((key->tc_gen >> i) & 0x7) { 350 case NINED3DTSS_TCI_CAMERASPACENORMAL: 351 need_aNrm = TRUE; 352 break; 353 case NINED3DTSS_TCI_CAMERASPACEPOSITION: 354 need_aVtx = TRUE; 355 break; 356 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: 357 need_aVtx = need_aNrm = TRUE; 358 break; 359 case NINED3DTSS_TCI_SPHEREMAP: 360 need_aVtx = need_aNrm = TRUE; 361 break; 362 default: 363 break; 364 } 365 } 366 367 has_aNrm = need_aNrm && key->has_normal; 368 369 /* Declare and record used inputs (needed for linkage with vertex format): 370 * (texture coordinates handled later) 371 */ 372 vs->aVtx = build_vs_add_input(vs, 373 key->position_t ? NINE_DECLUSAGE_POSITIONT : NINE_DECLUSAGE_POSITION); 374 375 vs->aNrm = ureg_imm1f(ureg, 0.0f); 376 if (has_aNrm) 377 vs->aNrm = build_vs_add_input(vs, NINE_DECLUSAGE_NORMAL); 378 379 vs->aCol[0] = ureg_imm1f(ureg, 1.0f); 380 vs->aCol[1] = ureg_imm1f(ureg, 0.0f); 381 382 if (key->lighting || key->darkness) { 383 const unsigned mask = key->mtl_diffuse | key->mtl_specular | 384 key->mtl_ambient | key->mtl_emissive; 385 if ((mask & 0x1) && !key->color0in_one) 386 vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0)); 387 if ((mask & 0x2) && !key->color1in_zero) 388 vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1)); 389 390 vs->mtlD = MATERIAL_CONST(1); 391 vs->mtlA = MATERIAL_CONST(2); 392 vs->mtlS = MATERIAL_CONST(3); 393 vs->mtlE = MATERIAL_CONST(5); 394 if (key->mtl_diffuse == 1) vs->mtlD = vs->aCol[0]; else 395 if (key->mtl_diffuse == 2) vs->mtlD = vs->aCol[1]; 396 if (key->mtl_ambient == 1) vs->mtlA = vs->aCol[0]; else 397 if (key->mtl_ambient == 2) vs->mtlA = vs->aCol[1]; 398 if (key->mtl_specular == 1) vs->mtlS = vs->aCol[0]; else 399 if (key->mtl_specular == 2) vs->mtlS = vs->aCol[1]; 400 if (key->mtl_emissive == 1) vs->mtlE = vs->aCol[0]; else 401 if (key->mtl_emissive == 2) vs->mtlE = vs->aCol[1]; 402 } else { 403 if (!key->color0in_one) vs->aCol[0] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 0)); 404 if (!key->color1in_zero) vs->aCol[1] = build_vs_add_input(vs, NINE_DECLUSAGE_i(COLOR, 1)); 405 } 406 407 if (key->vertexpointsize) 408 vs->aPsz = build_vs_add_input(vs, NINE_DECLUSAGE_PSIZE); 409 410 if (key->vertexblend_indexed || key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) 411 vs->aInd = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDINDICES); 412 if (key->vertexblend || key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) 413 vs->aWgt = build_vs_add_input(vs, NINE_DECLUSAGE_BLENDWEIGHT); 414 if (key->vertextween) { 415 vs->aVtx1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(POSITION,1)); 416 vs->aNrm1 = build_vs_add_input(vs, NINE_DECLUSAGE_i(NORMAL,1)); 417 } 418 419 /* Declare outputs: 420 */ 421 oPos = ureg_DECL_output(ureg, TGSI_SEMANTIC_POSITION, 0); /* HPOS */ 422 oCol[0] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0)); 423 oCol[1] = ureg_saturate(ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 1)); 424 if (key->fog || key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { 425 oFog = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 16); 426 oFog = ureg_writemask(oFog, TGSI_WRITEMASK_X); 427 } 428 429 if (key->vertexpointsize || key->pointscale) { 430 oPsz = ureg_DECL_output_masked(ureg, TGSI_SEMANTIC_PSIZE, 0, 431 TGSI_WRITEMASK_X, 0, 1); 432 oPsz = ureg_writemask(oPsz, TGSI_WRITEMASK_X); 433 } 434 435 if (key->lighting || key->vertexblend) 436 AR = ureg_DECL_address(ureg); 437 438 /* === Vertex transformation / vertex blending: 439 */ 440 441 if (key->position_t) { 442 if (device->driver_caps.window_space_position_support) { 443 ureg_MOV(ureg, oPos, vs->aVtx); 444 } else { 445 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 446 /* vs->aVtx contains the coordinates buffer wise. 447 * later in the pipeline, clipping, viewport and division 448 * by w (rhw = 1/w) are going to be applied, so do the reverse 449 * of these transformations (except clipping) to have the good 450 * position at the end.*/ 451 ureg_MOV(ureg, tmp, vs->aVtx); 452 /* X from [X_min, X_min + width] to [-1, 1], same for Y. Z to [0, 1] */ 453 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), ureg_negate(_CONST(101))); 454 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _CONST(100)); 455 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, -1.0f)); 456 /* Y needs to be reversed */ 457 ureg_MOV(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_Y), ureg_negate(ureg_src(tmp))); 458 /* inverse rhw */ 459 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_W), _W(tmp)); 460 /* multiply X, Y, Z by w */ 461 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(tmp), _W(tmp)); 462 ureg_MOV(ureg, oPos, ureg_src(tmp)); 463 ureg_release_temporary(ureg, tmp); 464 } 465 } else if (key->vertexblend) { 466 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 467 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg); 468 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg); 469 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg); 470 struct ureg_dst sum_blendweights = ureg_DECL_temporary(ureg); 471 struct ureg_src cWM[4]; 472 473 for (i = 160; i <= 195; ++i) 474 ureg_DECL_constant(ureg, i); 475 476 /* translate world matrix index to constant file index */ 477 if (key->vertexblend_indexed) { 478 ureg_MAD(ureg, tmp, vs->aInd, ureg_imm1f(ureg, 4.0f), ureg_imm1f(ureg, 160.0f)); 479 ureg_ARL(ureg, AR, ureg_src(tmp)); 480 } 481 482 ureg_MOV(ureg, aVtx_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f)); 483 ureg_MOV(ureg, aNrm_dst, ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 0.0f)); 484 ureg_MOV(ureg, sum_blendweights, ureg_imm4f(ureg, 1.0f, 1.0f, 1.0f, 1.0f)); 485 486 for (i = 0; i < key->vertexblend; ++i) { 487 for (c = 0; c < 4; ++c) { 488 cWM[c] = ureg_src_dimension(ureg_src_register(TGSI_FILE_CONSTANT, (160 + i * 4) * !key->vertexblend_indexed + c), 0); 489 if (key->vertexblend_indexed) 490 cWM[c] = ureg_src_indirect(cWM[c], ureg_scalar(ureg_src(AR), i)); 491 } 492 493 /* multiply by WORLD(index) */ 494 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), cWM[0]); 495 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), cWM[1], ureg_src(tmp)); 496 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), cWM[2], ureg_src(tmp)); 497 ureg_MAD(ureg, tmp, _WWWW(vs->aVtx), cWM[3], ureg_src(tmp)); 498 499 if (has_aNrm) { 500 /* Note: the spec says the transpose of the inverse of the 501 * WorldView matrices should be used, but all tests show 502 * otherwise. 503 * Only case unknown: D3DVBF_0WEIGHTS */ 504 ureg_MUL(ureg, tmp2, _XXXX(vs->aNrm), cWM[0]); 505 ureg_MAD(ureg, tmp2, _YYYY(vs->aNrm), cWM[1], ureg_src(tmp2)); 506 ureg_MAD(ureg, tmp2, _ZZZZ(vs->aNrm), cWM[2], ureg_src(tmp2)); 507 } 508 509 if (i < (key->vertexblend - 1)) { 510 /* accumulate weighted position value */ 511 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(vs->aWgt, i), ureg_src(aVtx_dst)); 512 if (has_aNrm) 513 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(vs->aWgt, i), ureg_src(aNrm_dst)); 514 /* subtract weighted position value for last value */ 515 ureg_ADD(ureg, sum_blendweights, ureg_src(sum_blendweights), ureg_negate(ureg_scalar(vs->aWgt, i))); 516 } 517 } 518 519 /* the last weighted position is always 1 - sum_of_previous_weights */ 520 ureg_MAD(ureg, aVtx_dst, ureg_src(tmp), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aVtx_dst)); 521 if (has_aNrm) 522 ureg_MAD(ureg, aNrm_dst, ureg_src(tmp2), ureg_scalar(ureg_src(sum_blendweights), key->vertexblend - 1), ureg_src(aNrm_dst)); 523 524 /* multiply by VIEW_PROJ */ 525 ureg_MUL(ureg, tmp, _X(aVtx_dst), _CONST(8)); 526 ureg_MAD(ureg, tmp, _Y(aVtx_dst), _CONST(9), ureg_src(tmp)); 527 ureg_MAD(ureg, tmp, _Z(aVtx_dst), _CONST(10), ureg_src(tmp)); 528 ureg_MAD(ureg, oPos, _W(aVtx_dst), _CONST(11), ureg_src(tmp)); 529 530 if (need_aVtx) 531 vs->aVtx = ureg_src(aVtx_dst); 532 533 ureg_release_temporary(ureg, tmp); 534 ureg_release_temporary(ureg, tmp2); 535 ureg_release_temporary(ureg, sum_blendweights); 536 if (!need_aVtx) 537 ureg_release_temporary(ureg, aVtx_dst); 538 539 if (has_aNrm) { 540 if (key->normalizenormals) 541 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst)); 542 vs->aNrm = ureg_src(aNrm_dst); 543 } else 544 ureg_release_temporary(ureg, aNrm_dst); 545 } else { 546 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 547 548 if (key->vertextween) { 549 struct ureg_dst aVtx_dst = ureg_DECL_temporary(ureg); 550 ureg_LRP(ureg, aVtx_dst, _XXXX(_CONST(30)), vs->aVtx1, vs->aVtx); 551 vs->aVtx = ureg_src(aVtx_dst); 552 if (has_aNrm) { 553 struct ureg_dst aNrm_dst = ureg_DECL_temporary(ureg); 554 ureg_LRP(ureg, aNrm_dst, _XXXX(_CONST(30)), vs->aNrm1, vs->aNrm); 555 vs->aNrm = ureg_src(aNrm_dst); 556 } 557 } 558 559 /* position = vertex * WORLD_VIEW_PROJ */ 560 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(0)); 561 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(1), ureg_src(tmp)); 562 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(2), ureg_src(tmp)); 563 ureg_MAD(ureg, oPos, _WWWW(vs->aVtx), _CONST(3), ureg_src(tmp)); 564 ureg_release_temporary(ureg, tmp); 565 566 if (need_aVtx) { 567 struct ureg_dst aVtx_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 568 ureg_MUL(ureg, aVtx_dst, _XXXX(vs->aVtx), _CONST(4)); 569 ureg_MAD(ureg, aVtx_dst, _YYYY(vs->aVtx), _CONST(5), ureg_src(aVtx_dst)); 570 ureg_MAD(ureg, aVtx_dst, _ZZZZ(vs->aVtx), _CONST(6), ureg_src(aVtx_dst)); 571 ureg_MAD(ureg, aVtx_dst, _WWWW(vs->aVtx), _CONST(7), ureg_src(aVtx_dst)); 572 vs->aVtx = ureg_src(aVtx_dst); 573 } 574 if (has_aNrm) { 575 struct ureg_dst aNrm_dst = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 576 ureg_MUL(ureg, aNrm_dst, _XXXX(vs->aNrm), _CONST(16)); 577 ureg_MAD(ureg, aNrm_dst, _YYYY(vs->aNrm), _CONST(17), ureg_src(aNrm_dst)); 578 ureg_MAD(ureg, aNrm_dst, _ZZZZ(vs->aNrm), _CONST(18), ureg_src(aNrm_dst)); 579 if (key->normalizenormals) 580 ureg_normalize3(ureg, aNrm_dst, ureg_src(aNrm_dst)); 581 vs->aNrm = ureg_src(aNrm_dst); 582 } 583 } 584 585 /* === Process point size: 586 */ 587 if (key->vertexpointsize || key->pointscale) { 588 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 589 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 590 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); 591 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 592 if (key->vertexpointsize) { 593 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 594 ureg_MAX(ureg, tmp_z, _XXXX(vs->aPsz), _XXXX(cPsz1)); 595 ureg_MIN(ureg, tmp_z, _Z(tmp), _YYYY(cPsz1)); 596 } else { 597 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 598 ureg_MOV(ureg, tmp_z, _ZZZZ(cPsz1)); 599 } 600 601 if (key->pointscale) { 602 struct ureg_src cPsz1 = ureg_DECL_constant(ureg, 26); 603 struct ureg_src cPsz2 = ureg_DECL_constant(ureg, 27); 604 605 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx); 606 ureg_RSQ(ureg, tmp_y, _X(tmp)); 607 ureg_MUL(ureg, tmp_y, _Y(tmp), _X(tmp)); 608 ureg_CMP(ureg, tmp_y, ureg_negate(_Y(tmp)), _Y(tmp), ureg_imm1f(ureg, 0.0f)); 609 ureg_MAD(ureg, tmp_x, _Y(tmp), _YYYY(cPsz2), _XXXX(cPsz2)); 610 ureg_MAD(ureg, tmp_x, _Y(tmp), _X(tmp), _WWWW(cPsz1)); 611 ureg_RSQ(ureg, tmp_x, _X(tmp)); 612 ureg_MUL(ureg, tmp_x, _X(tmp), _Z(tmp)); 613 ureg_MUL(ureg, tmp_x, _X(tmp), _WWWW(_CONST(100))); 614 ureg_MAX(ureg, tmp_x, _X(tmp), _XXXX(cPsz1)); 615 ureg_MIN(ureg, tmp_z, _X(tmp), _YYYY(cPsz1)); 616 } 617 618 ureg_MOV(ureg, oPsz, _Z(tmp)); 619 ureg_release_temporary(ureg, tmp); 620 } 621 622 for (i = 0; i < 8; ++i) { 623 struct ureg_dst tmp, tmp_x, tmp2; 624 struct ureg_dst oTex, input_coord, transformed, t, aVtx_normed; 625 unsigned c, writemask; 626 const unsigned tci = (key->tc_gen >> (i * 3)) & 0x7; 627 const unsigned idx = (key->tc_idx >> (i * 3)) & 0x7; 628 unsigned dim_input = 1 + ((key->tc_dim_input >> (i * 2)) & 0x3); 629 const unsigned dim_output = (key->tc_dim_output >> (i * 3)) & 0x7; 630 631 /* No texture output of index s */ 632 if (tci == NINED3DTSS_TCI_DISABLE) 633 continue; 634 oTex = ureg_DECL_output(ureg, texcoord_sn, i); 635 tmp = ureg_DECL_temporary(ureg); 636 tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 637 input_coord = ureg_DECL_temporary(ureg); 638 transformed = ureg_DECL_temporary(ureg); 639 640 /* Get the coordinate */ 641 switch (tci) { 642 case NINED3DTSS_TCI_PASSTHRU: 643 /* NINED3DTSS_TCI_PASSTHRU => Use texcoord coming from index idx * 644 * Else the idx is used only to determine wrapping mode. */ 645 vs->aTex[idx] = build_vs_add_input(vs, NINE_DECLUSAGE_i(TEXCOORD,idx)); 646 ureg_MOV(ureg, input_coord, vs->aTex[idx]); 647 break; 648 case NINED3DTSS_TCI_CAMERASPACENORMAL: 649 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aNrm); 650 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 651 dim_input = 4; 652 break; 653 case NINED3DTSS_TCI_CAMERASPACEPOSITION: 654 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), vs->aVtx); 655 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 656 dim_input = 4; 657 break; 658 case NINED3DTSS_TCI_CAMERASPACEREFLECTIONVECTOR: 659 tmp.WriteMask = TGSI_WRITEMASK_XYZ; 660 aVtx_normed = ureg_DECL_temporary(ureg); 661 ureg_normalize3(ureg, aVtx_normed, vs->aVtx); 662 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm); 663 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp)); 664 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); 665 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XYZ), ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp))); 666 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_W), ureg_imm1f(ureg, 1.0f)); 667 ureg_release_temporary(ureg, aVtx_normed); 668 dim_input = 4; 669 tmp.WriteMask = TGSI_WRITEMASK_XYZW; 670 break; 671 case NINED3DTSS_TCI_SPHEREMAP: 672 /* Implement the formula of GL_SPHERE_MAP */ 673 tmp.WriteMask = TGSI_WRITEMASK_XYZ; 674 aVtx_normed = ureg_DECL_temporary(ureg); 675 tmp2 = ureg_DECL_temporary(ureg); 676 ureg_normalize3(ureg, aVtx_normed, vs->aVtx); 677 ureg_DP3(ureg, tmp_x, ureg_src(aVtx_normed), vs->aNrm); 678 ureg_MUL(ureg, tmp, vs->aNrm, _X(tmp)); 679 ureg_ADD(ureg, tmp, ureg_src(tmp), ureg_src(tmp)); 680 ureg_ADD(ureg, tmp, ureg_src(aVtx_normed), ureg_negate(ureg_src(tmp))); 681 /* now tmp = normed(Vtx) - 2 dot3(normed(Vtx), Nrm) Nrm */ 682 ureg_MOV(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_XYZ), ureg_src(tmp)); 683 ureg_MUL(ureg, tmp2, ureg_src(tmp2), ureg_src(tmp2)); 684 ureg_DP3(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_src(tmp2)); 685 ureg_RSQ(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2)); 686 ureg_MUL(ureg, ureg_writemask(tmp2, TGSI_WRITEMASK_X), ureg_src(tmp2), ureg_imm1f(ureg, 0.5f)); 687 /* tmp2 = 0.5 / sqrt(tmp.x^2 + tmp.y^2 + (tmp.z+1)^2) 688 * TODO: z coordinates are a bit different gl vs d3d, should the formula be adapted ? */ 689 ureg_MUL(ureg, tmp, ureg_src(tmp), _X(tmp2)); 690 ureg_ADD(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_XY), ureg_src(tmp), ureg_imm1f(ureg, 0.5f)); 691 ureg_MOV(ureg, ureg_writemask(input_coord, TGSI_WRITEMASK_ZW), ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); 692 ureg_release_temporary(ureg, aVtx_normed); 693 ureg_release_temporary(ureg, tmp2); 694 dim_input = 4; 695 tmp.WriteMask = TGSI_WRITEMASK_XYZW; 696 break; 697 default: 698 assert(0); 699 break; 700 } 701 702 /* Apply the transformation */ 703 /* dim_output == 0 => do not transform the components. 704 * XYZRHW also disables transformation */ 705 if (!dim_output || key->position_t) { 706 ureg_release_temporary(ureg, transformed); 707 transformed = input_coord; 708 writemask = TGSI_WRITEMASK_XYZW; 709 } else { 710 for (c = 0; c < dim_output; c++) { 711 t = ureg_writemask(transformed, 1 << c); 712 switch (dim_input) { 713 /* dim_input = 1 2 3: -> we add trailing 1 to input*/ 714 case 1: ureg_MAD(ureg, t, _X(input_coord), _XXXX(_CONST(128 + i * 4 + c)), _YYYY(_CONST(128 + i * 4 + c))); 715 break; 716 case 2: ureg_DP2(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); 717 ureg_ADD(ureg, t, ureg_src(transformed), _ZZZZ(_CONST(128 + i * 4 + c))); 718 break; 719 case 3: ureg_DP3(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); 720 ureg_ADD(ureg, t, ureg_src(transformed), _WWWW(_CONST(128 + i * 4 + c))); 721 break; 722 case 4: ureg_DP4(ureg, t, ureg_src(input_coord), _CONST(128 + i * 4 + c)); break; 723 default: 724 assert(0); 725 } 726 } 727 writemask = (1 << dim_output) - 1; 728 ureg_release_temporary(ureg, input_coord); 729 } 730 731 ureg_MOV(ureg, ureg_writemask(oTex, writemask), ureg_src(transformed)); 732 ureg_release_temporary(ureg, transformed); 733 ureg_release_temporary(ureg, tmp); 734 } 735 736 /* === Lighting: 737 * 738 * DIRECTIONAL: Light at infinite distance, parallel rays, no attenuation. 739 * POINT: Finite distance to scene, divergent rays, isotropic, attenuation. 740 * SPOT: Finite distance, divergent rays, angular dependence, attenuation. 741 * 742 * vec3 normal = normalize(in.Normal * NormalMatrix); 743 * vec3 hitDir = light.direction; 744 * float atten = 1.0; 745 * 746 * if (light.type != DIRECTIONAL) 747 * { 748 * vec3 hitVec = light.position - eyeVertex; 749 * float d = length(hitVec); 750 * hitDir = hitVec / d; 751 * atten = 1 / ((light.atten2 * d + light.atten1) * d + light.atten0); 752 * } 753 * 754 * if (light.type == SPOTLIGHT) 755 * { 756 * float rho = dp3(-hitVec, light.direction); 757 * if (rho < cos(light.phi / 2)) 758 * atten = 0; 759 * if (rho < cos(light.theta / 2)) 760 * atten *= pow(some_func(rho), light.falloff); 761 * } 762 * 763 * float nDotHit = dp3_sat(normal, hitVec); 764 * float powFact = 0.0; 765 * 766 * if (nDotHit > 0.0) 767 * { 768 * vec3 midVec = normalize(hitDir + eye); 769 * float nDotMid = dp3_sat(normal, midVec); 770 * pFact = pow(nDotMid, material.power); 771 * } 772 * 773 * ambient += light.ambient * atten; 774 * diffuse += light.diffuse * atten * nDotHit; 775 * specular += light.specular * atten * powFact; 776 */ 777 if (key->lighting) { 778 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 779 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 780 struct ureg_dst tmp_y = ureg_writemask(tmp, TGSI_WRITEMASK_Y); 781 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 782 struct ureg_dst rAtt = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W); 783 struct ureg_dst rHit = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 784 struct ureg_dst rMid = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 785 786 struct ureg_dst rCtr = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_W); 787 788 struct ureg_dst AL = ureg_writemask(AR, TGSI_WRITEMASK_X); 789 790 /* Light.*.Alpha is not used. */ 791 struct ureg_dst rD = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 792 struct ureg_dst rA = ureg_writemask(ureg_DECL_temporary(ureg), TGSI_WRITEMASK_XYZ); 793 struct ureg_dst rS = ureg_DECL_temporary(ureg); 794 795 struct ureg_src mtlP = _XXXX(MATERIAL_CONST(4)); 796 797 struct ureg_src cLKind = _XXXX(LIGHT_CONST(0)); 798 struct ureg_src cLAtt0 = _YYYY(LIGHT_CONST(0)); 799 struct ureg_src cLAtt1 = _ZZZZ(LIGHT_CONST(0)); 800 struct ureg_src cLAtt2 = _WWWW(LIGHT_CONST(0)); 801 struct ureg_src cLColD = _XYZW(LIGHT_CONST(1)); 802 struct ureg_src cLColS = _XYZW(LIGHT_CONST(2)); 803 struct ureg_src cLColA = _XYZW(LIGHT_CONST(3)); 804 struct ureg_src cLPos = _XYZW(LIGHT_CONST(4)); 805 struct ureg_src cLRng = _WWWW(LIGHT_CONST(4)); 806 struct ureg_src cLDir = _XYZW(LIGHT_CONST(5)); 807 struct ureg_src cLFOff = _WWWW(LIGHT_CONST(5)); 808 struct ureg_src cLTht = _XXXX(LIGHT_CONST(6)); 809 struct ureg_src cLPhi = _YYYY(LIGHT_CONST(6)); 810 struct ureg_src cLSDiv = _ZZZZ(LIGHT_CONST(6)); 811 struct ureg_src cLLast = _WWWW(LIGHT_CONST(7)); 812 813 const unsigned loop_label = l++; 814 815 /* Declare all light constants to allow indirect adressing */ 816 for (i = 32; i < 96; i++) 817 ureg_DECL_constant(ureg, i); 818 819 ureg_MOV(ureg, rCtr, ureg_imm1f(ureg, 32.0f)); /* &lightconst(0) */ 820 ureg_MOV(ureg, rD, ureg_imm1f(ureg, 0.0f)); 821 ureg_MOV(ureg, rA, ureg_imm1f(ureg, 0.0f)); 822 ureg_MOV(ureg, rS, ureg_imm1f(ureg, 0.0f)); 823 824 /* loop management */ 825 ureg_BGNLOOP(ureg, &label[loop_label]); 826 ureg_ARL(ureg, AL, _W(rCtr)); 827 828 /* if (not DIRECTIONAL light): */ 829 ureg_SNE(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_DIRECTIONAL)); 830 ureg_MOV(ureg, rHit, ureg_negate(cLDir)); 831 ureg_MOV(ureg, rAtt, ureg_imm1f(ureg, 1.0f)); 832 ureg_IF(ureg, _X(tmp), &label[l++]); 833 { 834 /* hitDir = light.position - eyeVtx 835 * d = length(hitDir) 836 */ 837 ureg_ADD(ureg, rHit, cLPos, ureg_negate(vs->aVtx)); 838 ureg_DP3(ureg, tmp_x, ureg_src(rHit), ureg_src(rHit)); 839 ureg_RSQ(ureg, tmp_y, _X(tmp)); 840 ureg_MUL(ureg, tmp_x, _X(tmp), _Y(tmp)); /* length */ 841 842 /* att = 1.0 / (light.att0 + (light.att1 + light.att2 * d) * d) */ 843 ureg_MAD(ureg, rAtt, _X(tmp), cLAtt2, cLAtt1); 844 ureg_MAD(ureg, rAtt, _X(tmp), _W(rAtt), cLAtt0); 845 ureg_RCP(ureg, rAtt, _W(rAtt)); 846 /* cut-off if distance exceeds Light.Range */ 847 ureg_SLT(ureg, tmp_x, _X(tmp), cLRng); 848 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp)); 849 } 850 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 851 ureg_ENDIF(ureg); 852 853 /* normalize hitDir */ 854 ureg_normalize3(ureg, rHit, ureg_src(rHit)); 855 856 /* if (SPOT light) */ 857 ureg_SEQ(ureg, tmp_x, cLKind, ureg_imm1f(ureg, D3DLIGHT_SPOT)); 858 ureg_IF(ureg, _X(tmp), &label[l++]); 859 { 860 /* rho = dp3(-hitDir, light.spotDir) 861 * 862 * if (rho > light.ctht2) NOTE: 0 <= phi <= pi, 0 <= theta <= phi 863 * spotAtt = 1 864 * else 865 * if (rho <= light.cphi2) 866 * spotAtt = 0 867 * else 868 * spotAtt = (rho - light.cphi2) / (light.ctht2 - light.cphi2) ^ light.falloff 869 */ 870 ureg_DP3(ureg, tmp_y, ureg_negate(ureg_src(rHit)), cLDir); /* rho */ 871 ureg_ADD(ureg, tmp_x, _Y(tmp), ureg_negate(cLPhi)); 872 ureg_MUL(ureg, tmp_x, _X(tmp), cLSDiv); 873 ureg_POW(ureg, tmp_x, _X(tmp), cLFOff); /* spotAtten */ 874 ureg_SGE(ureg, tmp_z, _Y(tmp), cLTht); /* if inside theta && phi */ 875 ureg_SGE(ureg, tmp_y, _Y(tmp), cLPhi); /* if inside phi */ 876 ureg_MAD(ureg, ureg_saturate(tmp_x), _X(tmp), _Y(tmp), _Z(tmp)); 877 ureg_MUL(ureg, rAtt, _W(rAtt), _X(tmp)); 878 } 879 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 880 ureg_ENDIF(ureg); 881 882 /* directional factors, let's not use LIT because of clarity */ 883 884 if (has_aNrm) { 885 if (key->localviewer) { 886 ureg_normalize3(ureg, rMid, vs->aVtx); 887 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_negate(ureg_src(rMid))); 888 } else { 889 ureg_ADD(ureg, rMid, ureg_src(rHit), ureg_imm3f(ureg, 0.0f, 0.0f, -1.0f)); 890 } 891 ureg_normalize3(ureg, rMid, ureg_src(rMid)); 892 ureg_DP3(ureg, ureg_saturate(tmp_x), vs->aNrm, ureg_src(rHit)); 893 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid)); 894 ureg_MUL(ureg, tmp_z, _X(tmp), _Y(tmp)); 895 /* Tests show that specular is computed only if (dp3(normal,hitDir) > 0). 896 * For front facing, it is more restrictive than test (dp3(normal,mid) > 0). 897 * No tests were made for backfacing, so add the two conditions */ 898 ureg_IF(ureg, _Z(tmp), &label[l++]); 899 { 900 ureg_DP3(ureg, ureg_saturate(tmp_y), vs->aNrm, ureg_src(rMid)); 901 ureg_POW(ureg, tmp_y, _Y(tmp), mtlP); 902 ureg_MUL(ureg, tmp_y, _W(rAtt), _Y(tmp)); /* power factor * att */ 903 ureg_MAD(ureg, rS, cLColS, _Y(tmp), ureg_src(rS)); /* accumulate specular */ 904 } 905 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 906 ureg_ENDIF(ureg); 907 908 ureg_MUL(ureg, tmp_x, _W(rAtt), _X(tmp)); /* dp3(normal,hitDir) * att */ 909 ureg_MAD(ureg, rD, cLColD, _X(tmp), ureg_src(rD)); /* accumulate diffuse */ 910 } 911 912 ureg_MAD(ureg, rA, cLColA, _W(rAtt), ureg_src(rA)); /* accumulate ambient */ 913 914 /* break if this was the last light */ 915 ureg_IF(ureg, cLLast, &label[l++]); 916 ureg_BRK(ureg); 917 ureg_ENDIF(ureg); 918 ureg_fixup_label(ureg, label[l-1], ureg_get_instruction_number(ureg)); 919 920 ureg_ADD(ureg, rCtr, _W(rCtr), ureg_imm1f(ureg, 8.0f)); 921 ureg_fixup_label(ureg, label[loop_label], ureg_get_instruction_number(ureg)); 922 ureg_ENDLOOP(ureg, &label[loop_label]); 923 924 /* Apply to material: 925 * 926 * oCol[0] = (material.emissive + material.ambient * rs.ambient) + 927 * material.ambient * ambient + 928 * material.diffuse * diffuse + 929 * oCol[1] = material.specular * specular; 930 */ 931 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) 932 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), vs->mtlA, _CONST(19)); 933 else { 934 ureg_ADD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), ureg_src(rA), _CONST(25)); 935 ureg_MAD(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), vs->mtlA, ureg_src(tmp), vs->mtlE); 936 } 937 938 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), ureg_src(rD), vs->mtlD, ureg_src(tmp)); 939 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD); 940 ureg_MUL(ureg, oCol[1], ureg_src(rS), vs->mtlS); 941 ureg_release_temporary(ureg, rAtt); 942 ureg_release_temporary(ureg, rHit); 943 ureg_release_temporary(ureg, rMid); 944 ureg_release_temporary(ureg, rCtr); 945 ureg_release_temporary(ureg, rD); 946 ureg_release_temporary(ureg, rA); 947 ureg_release_temporary(ureg, rS); 948 ureg_release_temporary(ureg, rAtt); 949 ureg_release_temporary(ureg, tmp); 950 } else 951 /* COLOR */ 952 if (key->darkness) { 953 if (key->mtl_emissive == 0 && key->mtl_ambient == 0) 954 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), _CONST(19)); 955 else 956 ureg_MAD(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_XYZ), vs->mtlA, _CONST(25), vs->mtlE); 957 ureg_MOV(ureg, ureg_writemask(oCol[0], TGSI_WRITEMASK_W), vs->mtlD); 958 ureg_MOV(ureg, oCol[1], ureg_imm1f(ureg, 0.0f)); 959 } else { 960 ureg_MOV(ureg, oCol[0], vs->aCol[0]); 961 ureg_MOV(ureg, oCol[1], vs->aCol[1]); 962 } 963 964 /* === Process fog. 965 * 966 * exp(x) = ex2(log2(e) * x) 967 */ 968 if (key->fog_mode) { 969 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 970 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 971 struct ureg_dst tmp_z = ureg_writemask(tmp, TGSI_WRITEMASK_Z); 972 if (key->fog_range) { 973 ureg_DP3(ureg, tmp_x, vs->aVtx, vs->aVtx); 974 ureg_RSQ(ureg, tmp_z, _X(tmp)); 975 ureg_MUL(ureg, tmp_z, _Z(tmp), _X(tmp)); 976 } else { 977 ureg_MOV(ureg, tmp_z, ureg_abs(_ZZZZ(vs->aVtx))); 978 } 979 980 if (key->fog_mode == D3DFOG_EXP) { 981 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28))); 982 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f)); 983 ureg_EX2(ureg, tmp_x, _X(tmp)); 984 } else 985 if (key->fog_mode == D3DFOG_EXP2) { 986 ureg_MUL(ureg, tmp_x, _Z(tmp), _ZZZZ(_CONST(28))); 987 ureg_MUL(ureg, tmp_x, _X(tmp), _X(tmp)); 988 ureg_MUL(ureg, tmp_x, _X(tmp), ureg_imm1f(ureg, -1.442695f)); 989 ureg_EX2(ureg, tmp_x, _X(tmp)); 990 } else 991 if (key->fog_mode == D3DFOG_LINEAR) { 992 ureg_ADD(ureg, tmp_x, _XXXX(_CONST(28)), ureg_negate(_Z(tmp))); 993 ureg_MUL(ureg, ureg_saturate(tmp_x), _X(tmp), _YYYY(_CONST(28))); 994 } 995 ureg_MOV(ureg, oFog, _X(tmp)); 996 ureg_release_temporary(ureg, tmp); 997 } else if (key->fog && !(key->passthrough & (1 << NINE_DECLUSAGE_FOG))) { 998 ureg_MOV(ureg, oFog, ureg_scalar(vs->aCol[1], TGSI_SWIZZLE_W)); 999 } 1000 1001 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDWEIGHT)) { 1002 struct ureg_src input; 1003 struct ureg_dst output; 1004 input = vs->aWgt; 1005 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 19); 1006 ureg_MOV(ureg, output, input); 1007 } 1008 if (key->passthrough & (1 << NINE_DECLUSAGE_BLENDINDICES)) { 1009 struct ureg_src input; 1010 struct ureg_dst output; 1011 input = vs->aInd; 1012 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 20); 1013 ureg_MOV(ureg, output, input); 1014 } 1015 if (key->passthrough & (1 << NINE_DECLUSAGE_NORMAL)) { 1016 struct ureg_src input; 1017 struct ureg_dst output; 1018 input = vs->aNrm; 1019 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 21); 1020 ureg_MOV(ureg, output, input); 1021 } 1022 if (key->passthrough & (1 << NINE_DECLUSAGE_TANGENT)) { 1023 struct ureg_src input; 1024 struct ureg_dst output; 1025 input = build_vs_add_input(vs, NINE_DECLUSAGE_TANGENT); 1026 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 22); 1027 ureg_MOV(ureg, output, input); 1028 } 1029 if (key->passthrough & (1 << NINE_DECLUSAGE_BINORMAL)) { 1030 struct ureg_src input; 1031 struct ureg_dst output; 1032 input = build_vs_add_input(vs, NINE_DECLUSAGE_BINORMAL); 1033 output = ureg_DECL_output(ureg, TGSI_SEMANTIC_GENERIC, 23); 1034 ureg_MOV(ureg, output, input); 1035 } 1036 if (key->passthrough & (1 << NINE_DECLUSAGE_FOG)) { 1037 struct ureg_src input; 1038 struct ureg_dst output; 1039 input = build_vs_add_input(vs, NINE_DECLUSAGE_FOG); 1040 input = ureg_scalar(input, TGSI_SWIZZLE_X); 1041 output = oFog; 1042 ureg_MOV(ureg, output, input); 1043 } 1044 if (key->passthrough & (1 << NINE_DECLUSAGE_DEPTH)) { 1045 (void) 0; /* TODO: replace z of position output ? */ 1046 } 1047 1048 /* ucp for ff applies on world coordinates. 1049 * aVtx is in worldview coordinates. */ 1050 if (key->ucp) { 1051 struct ureg_dst clipVect = ureg_DECL_output(ureg, TGSI_SEMANTIC_CLIPVERTEX, 0); 1052 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1053 ureg_MUL(ureg, tmp, _XXXX(vs->aVtx), _CONST(12)); 1054 ureg_MAD(ureg, tmp, _YYYY(vs->aVtx), _CONST(13), ureg_src(tmp)); 1055 ureg_MAD(ureg, tmp, _ZZZZ(vs->aVtx), _CONST(14), ureg_src(tmp)); 1056 ureg_ADD(ureg, clipVect, _CONST(15), ureg_src(tmp)); 1057 ureg_release_temporary(ureg, tmp); 1058 } 1059 1060 if (key->position_t && device->driver_caps.window_space_position_support) 1061 ureg_property(ureg, TGSI_PROPERTY_VS_WINDOW_SPACE_POSITION, TRUE); 1062 1063 ureg_END(ureg); 1064 nine_ureg_tgsi_dump(ureg, FALSE); 1065 return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL); 1066} 1067 1068/* PS FF constants layout: 1069 * 1070 * CONST[ 0.. 7] stage[i].D3DTSS_CONSTANT 1071 * CONST[ 8..15].x___ stage[i].D3DTSS_BUMPENVMAT00 1072 * CONST[ 8..15]._y__ stage[i].D3DTSS_BUMPENVMAT01 1073 * CONST[ 8..15].__z_ stage[i].D3DTSS_BUMPENVMAT10 1074 * CONST[ 8..15].___w stage[i].D3DTSS_BUMPENVMAT11 1075 * CONST[16..19].x_z_ stage[i].D3DTSS_BUMPENVLSCALE 1076 * CONST[17..19]._y_w stage[i].D3DTSS_BUMPENVLOFFSET 1077 * 1078 * CONST[20] D3DRS_TEXTUREFACTOR 1079 * CONST[21] D3DRS_FOGCOLOR 1080 * CONST[22].x___ RS.FogEnd 1081 * CONST[22]._y__ 1.0f / (RS.FogEnd - RS.FogStart) 1082 * CONST[22].__z_ RS.FogDensity 1083 */ 1084struct ps_build_ctx 1085{ 1086 struct ureg_program *ureg; 1087 1088 struct ureg_src vC[2]; /* DIFFUSE, SPECULAR */ 1089 struct ureg_src vT[8]; /* TEXCOORD[i] */ 1090 struct ureg_dst rCur; /* D3DTA_CURRENT */ 1091 struct ureg_dst rMod; 1092 struct ureg_src rCurSrc; 1093 struct ureg_dst rTmp; /* D3DTA_TEMP */ 1094 struct ureg_src rTmpSrc; 1095 struct ureg_dst rTex; 1096 struct ureg_src rTexSrc; 1097 struct ureg_src cBEM[8]; 1098 struct ureg_src s[8]; 1099 1100 struct { 1101 unsigned index; 1102 unsigned index_pre_mod; 1103 } stage; 1104}; 1105 1106static struct ureg_src 1107ps_get_ts_arg(struct ps_build_ctx *ps, unsigned ta) 1108{ 1109 struct ureg_src reg; 1110 1111 switch (ta & D3DTA_SELECTMASK) { 1112 case D3DTA_CONSTANT: 1113 reg = ureg_DECL_constant(ps->ureg, ps->stage.index); 1114 break; 1115 case D3DTA_CURRENT: 1116 reg = (ps->stage.index == ps->stage.index_pre_mod) ? ureg_src(ps->rMod) : ps->rCurSrc; 1117 break; 1118 case D3DTA_DIFFUSE: 1119 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR); 1120 break; 1121 case D3DTA_SPECULAR: 1122 reg = ureg_DECL_fs_input(ps->ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1123 break; 1124 case D3DTA_TEMP: 1125 reg = ps->rTmpSrc; 1126 break; 1127 case D3DTA_TEXTURE: 1128 reg = ps->rTexSrc; 1129 break; 1130 case D3DTA_TFACTOR: 1131 reg = ureg_DECL_constant(ps->ureg, 20); 1132 break; 1133 default: 1134 assert(0); 1135 reg = ureg_src_undef(); 1136 break; 1137 } 1138 if (ta & D3DTA_COMPLEMENT) { 1139 struct ureg_dst dst = ureg_DECL_temporary(ps->ureg); 1140 ureg_ADD(ps->ureg, dst, ureg_imm1f(ps->ureg, 1.0f), ureg_negate(reg)); 1141 reg = ureg_src(dst); 1142 } 1143 if (ta & D3DTA_ALPHAREPLICATE) 1144 reg = _WWWW(reg); 1145 return reg; 1146} 1147 1148static struct ureg_dst 1149ps_get_ts_dst(struct ps_build_ctx *ps, unsigned ta) 1150{ 1151 assert(!(ta & (D3DTA_COMPLEMENT | D3DTA_ALPHAREPLICATE))); 1152 1153 switch (ta & D3DTA_SELECTMASK) { 1154 case D3DTA_CURRENT: 1155 return ps->rCur; 1156 case D3DTA_TEMP: 1157 return ps->rTmp; 1158 default: 1159 assert(0); 1160 return ureg_dst_undef(); 1161 } 1162} 1163 1164static uint8_t ps_d3dtop_args_mask(D3DTEXTUREOP top) 1165{ 1166 switch (top) { 1167 case D3DTOP_DISABLE: 1168 return 0x0; 1169 case D3DTOP_SELECTARG1: 1170 case D3DTOP_PREMODULATE: 1171 return 0x2; 1172 case D3DTOP_SELECTARG2: 1173 return 0x4; 1174 case D3DTOP_MULTIPLYADD: 1175 case D3DTOP_LERP: 1176 return 0x7; 1177 default: 1178 return 0x6; 1179 } 1180} 1181 1182static inline boolean 1183is_MOV_no_op(struct ureg_dst dst, struct ureg_src src) 1184{ 1185 return !dst.WriteMask || 1186 (dst.File == src.File && 1187 dst.Index == src.Index && 1188 !dst.Indirect && 1189 !dst.Saturate && 1190 !src.Indirect && 1191 !src.Negate && 1192 !src.Absolute && 1193 (!(dst.WriteMask & TGSI_WRITEMASK_X) || (src.SwizzleX == TGSI_SWIZZLE_X)) && 1194 (!(dst.WriteMask & TGSI_WRITEMASK_Y) || (src.SwizzleY == TGSI_SWIZZLE_Y)) && 1195 (!(dst.WriteMask & TGSI_WRITEMASK_Z) || (src.SwizzleZ == TGSI_SWIZZLE_Z)) && 1196 (!(dst.WriteMask & TGSI_WRITEMASK_W) || (src.SwizzleW == TGSI_SWIZZLE_W))); 1197 1198} 1199 1200static void 1201ps_do_ts_op(struct ps_build_ctx *ps, unsigned top, struct ureg_dst dst, struct ureg_src *arg) 1202{ 1203 struct ureg_program *ureg = ps->ureg; 1204 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1205 struct ureg_dst tmp2 = ureg_DECL_temporary(ureg); 1206 struct ureg_dst tmp_x = ureg_writemask(tmp, TGSI_WRITEMASK_X); 1207 1208 tmp.WriteMask = dst.WriteMask; 1209 1210 if (top != D3DTOP_SELECTARG1 && top != D3DTOP_SELECTARG2 && 1211 top != D3DTOP_MODULATE && top != D3DTOP_PREMODULATE && 1212 top != D3DTOP_BLENDDIFFUSEALPHA && top != D3DTOP_BLENDTEXTUREALPHA && 1213 top != D3DTOP_BLENDFACTORALPHA && top != D3DTOP_BLENDCURRENTALPHA && 1214 top != D3DTOP_BUMPENVMAP && top != D3DTOP_BUMPENVMAPLUMINANCE && 1215 top != D3DTOP_LERP) 1216 dst = ureg_saturate(dst); 1217 1218 switch (top) { 1219 case D3DTOP_SELECTARG1: 1220 if (!is_MOV_no_op(dst, arg[1])) 1221 ureg_MOV(ureg, dst, arg[1]); 1222 break; 1223 case D3DTOP_SELECTARG2: 1224 if (!is_MOV_no_op(dst, arg[2])) 1225 ureg_MOV(ureg, dst, arg[2]); 1226 break; 1227 case D3DTOP_MODULATE: 1228 ureg_MUL(ureg, dst, arg[1], arg[2]); 1229 break; 1230 case D3DTOP_MODULATE2X: 1231 ureg_MUL(ureg, tmp, arg[1], arg[2]); 1232 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_src(tmp)); 1233 break; 1234 case D3DTOP_MODULATE4X: 1235 ureg_MUL(ureg, tmp, arg[1], arg[2]); 1236 ureg_MUL(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 4.0f)); 1237 break; 1238 case D3DTOP_ADD: 1239 ureg_ADD(ureg, dst, arg[1], arg[2]); 1240 break; 1241 case D3DTOP_ADDSIGNED: 1242 ureg_ADD(ureg, tmp, arg[1], arg[2]); 1243 ureg_ADD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, -0.5f)); 1244 break; 1245 case D3DTOP_ADDSIGNED2X: 1246 ureg_ADD(ureg, tmp, arg[1], arg[2]); 1247 ureg_MAD(ureg, dst, ureg_src(tmp), ureg_imm1f(ureg, 2.0f), ureg_imm1f(ureg, -1.0f)); 1248 break; 1249 case D3DTOP_SUBTRACT: 1250 ureg_ADD(ureg, dst, arg[1], ureg_negate(arg[2])); 1251 break; 1252 case D3DTOP_ADDSMOOTH: 1253 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1])); 1254 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], arg[1]); 1255 break; 1256 case D3DTOP_BLENDDIFFUSEALPHA: 1257 ureg_LRP(ureg, dst, _WWWW(ps->vC[0]), arg[1], arg[2]); 1258 break; 1259 case D3DTOP_BLENDTEXTUREALPHA: 1260 /* XXX: alpha taken from previous stage, texture or result ? */ 1261 ureg_LRP(ureg, dst, _W(ps->rTex), arg[1], arg[2]); 1262 break; 1263 case D3DTOP_BLENDFACTORALPHA: 1264 ureg_LRP(ureg, dst, _WWWW(_CONST(20)), arg[1], arg[2]); 1265 break; 1266 case D3DTOP_BLENDTEXTUREALPHAPM: 1267 ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_W(ps->rTex))); 1268 ureg_MAD(ureg, dst, arg[2], _X(tmp), arg[1]); 1269 break; 1270 case D3DTOP_BLENDCURRENTALPHA: 1271 ureg_LRP(ureg, dst, _WWWW(ps->rCurSrc), arg[1], arg[2]); 1272 break; 1273 case D3DTOP_PREMODULATE: 1274 ureg_MOV(ureg, dst, arg[1]); 1275 ps->stage.index_pre_mod = ps->stage.index + 1; 1276 break; 1277 case D3DTOP_MODULATEALPHA_ADDCOLOR: 1278 ureg_MAD(ureg, dst, _WWWW(arg[1]), arg[2], arg[1]); 1279 break; 1280 case D3DTOP_MODULATECOLOR_ADDALPHA: 1281 ureg_MAD(ureg, dst, arg[1], arg[2], _WWWW(arg[1])); 1282 break; 1283 case D3DTOP_MODULATEINVALPHA_ADDCOLOR: 1284 ureg_ADD(ureg, tmp_x, ureg_imm1f(ureg, 1.0f), ureg_negate(_WWWW(arg[1]))); 1285 ureg_MAD(ureg, dst, _X(tmp), arg[2], arg[1]); 1286 break; 1287 case D3DTOP_MODULATEINVCOLOR_ADDALPHA: 1288 ureg_ADD(ureg, tmp, ureg_imm1f(ureg, 1.0f), ureg_negate(arg[1])); 1289 ureg_MAD(ureg, dst, ureg_src(tmp), arg[2], _WWWW(arg[1])); 1290 break; 1291 case D3DTOP_BUMPENVMAP: 1292 break; 1293 case D3DTOP_BUMPENVMAPLUMINANCE: 1294 break; 1295 case D3DTOP_DOTPRODUCT3: 1296 ureg_ADD(ureg, tmp, arg[1], ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5)); 1297 ureg_ADD(ureg, tmp2, arg[2] , ureg_imm4f(ureg,-0.5,-0.5,-0.5,-0.5)); 1298 ureg_DP3(ureg, tmp, ureg_src(tmp), ureg_src(tmp2)); 1299 ureg_MUL(ureg, ureg_saturate(dst), ureg_src(tmp), ureg_imm4f(ureg,4.0,4.0,4.0,4.0)); 1300 break; 1301 case D3DTOP_MULTIPLYADD: 1302 ureg_MAD(ureg, dst, arg[1], arg[2], arg[0]); 1303 break; 1304 case D3DTOP_LERP: 1305 ureg_LRP(ureg, dst, arg[0], arg[1], arg[2]); 1306 break; 1307 case D3DTOP_DISABLE: 1308 /* no-op ? */ 1309 break; 1310 default: 1311 assert(!"invalid D3DTOP"); 1312 break; 1313 } 1314 ureg_release_temporary(ureg, tmp); 1315 ureg_release_temporary(ureg, tmp2); 1316} 1317 1318static void * 1319nine_ff_build_ps(struct NineDevice9 *device, struct nine_ff_ps_key *key) 1320{ 1321 struct ps_build_ctx ps; 1322 struct ureg_program *ureg = ureg_create(PIPE_SHADER_FRAGMENT); 1323 struct ureg_dst oCol; 1324 unsigned s; 1325 const unsigned texcoord_sn = get_texcoord_sn(device->screen); 1326 1327 memset(&ps, 0, sizeof(ps)); 1328 ps.ureg = ureg; 1329 ps.stage.index_pre_mod = -1; 1330 1331 ps.vC[0] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 0, TGSI_INTERPOLATE_COLOR); 1332 1333 ps.rCur = ureg_DECL_temporary(ureg); 1334 ps.rTmp = ureg_DECL_temporary(ureg); 1335 ps.rTex = ureg_DECL_temporary(ureg); 1336 ps.rCurSrc = ureg_src(ps.rCur); 1337 ps.rTmpSrc = ureg_src(ps.rTmp); 1338 ps.rTexSrc = ureg_src(ps.rTex); 1339 1340 /* Initial values */ 1341 ureg_MOV(ureg, ps.rCur, ps.vC[0]); 1342 ureg_MOV(ureg, ps.rTmp, ureg_imm1f(ureg, 0.0f)); 1343 ureg_MOV(ureg, ps.rTex, ureg_imm1f(ureg, 0.0f)); 1344 1345 for (s = 0; s < 8; ++s) { 1346 ps.s[s] = ureg_src_undef(); 1347 1348 if (key->ts[s].colorop != D3DTOP_DISABLE) { 1349 if (key->ts[s].colorarg0 == D3DTA_SPECULAR || 1350 key->ts[s].colorarg1 == D3DTA_SPECULAR || 1351 key->ts[s].colorarg2 == D3DTA_SPECULAR) 1352 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1353 1354 if (key->ts[s].colorarg0 == D3DTA_TEXTURE || 1355 key->ts[s].colorarg1 == D3DTA_TEXTURE || 1356 key->ts[s].colorarg2 == D3DTA_TEXTURE || 1357 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA || 1358 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) { 1359 ps.s[s] = ureg_DECL_sampler(ureg, s); 1360 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE); 1361 } 1362 if (s && (key->ts[s - 1].colorop == D3DTOP_PREMODULATE || 1363 key->ts[s - 1].alphaop == D3DTOP_PREMODULATE)) 1364 ps.s[s] = ureg_DECL_sampler(ureg, s); 1365 } 1366 1367 if (key->ts[s].alphaop != D3DTOP_DISABLE) { 1368 if (key->ts[s].alphaarg0 == D3DTA_SPECULAR || 1369 key->ts[s].alphaarg1 == D3DTA_SPECULAR || 1370 key->ts[s].alphaarg2 == D3DTA_SPECULAR) 1371 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1372 1373 if (key->ts[s].alphaarg0 == D3DTA_TEXTURE || 1374 key->ts[s].alphaarg1 == D3DTA_TEXTURE || 1375 key->ts[s].alphaarg2 == D3DTA_TEXTURE || 1376 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHA || 1377 key->ts[s].colorop == D3DTOP_BLENDTEXTUREALPHAPM) { 1378 ps.s[s] = ureg_DECL_sampler(ureg, s); 1379 ps.vT[s] = ureg_DECL_fs_input(ureg, texcoord_sn, s, TGSI_INTERPOLATE_PERSPECTIVE); 1380 } 1381 } 1382 } 1383 if (key->specular) 1384 ps.vC[1] = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_COLOR, 1, TGSI_INTERPOLATE_COLOR); 1385 1386 oCol = ureg_DECL_output(ureg, TGSI_SEMANTIC_COLOR, 0); 1387 1388 /* Run stages. 1389 */ 1390 for (s = 0; s < 8; ++s) { 1391 unsigned colorarg[3]; 1392 unsigned alphaarg[3]; 1393 const uint8_t used_c = ps_d3dtop_args_mask(key->ts[s].colorop); 1394 const uint8_t used_a = ps_d3dtop_args_mask(key->ts[s].alphaop); 1395 struct ureg_dst dst; 1396 struct ureg_src arg[3]; 1397 1398 if (key->ts[s].colorop == D3DTOP_DISABLE) { 1399 assert (key->ts[s].alphaop == D3DTOP_DISABLE); 1400 continue; 1401 } 1402 ps.stage.index = s; 1403 1404 DBG("STAGE[%u]: colorop=%s alphaop=%s\n", s, 1405 nine_D3DTOP_to_str(key->ts[s].colorop), 1406 nine_D3DTOP_to_str(key->ts[s].alphaop)); 1407 1408 if (!ureg_src_is_undef(ps.s[s])) { 1409 unsigned target; 1410 struct ureg_src texture_coord = ps.vT[s]; 1411 struct ureg_dst delta; 1412 switch (key->ts[s].textarget) { 1413 case 0: target = TGSI_TEXTURE_1D; break; 1414 case 1: target = TGSI_TEXTURE_2D; break; 1415 case 2: target = TGSI_TEXTURE_3D; break; 1416 case 3: target = TGSI_TEXTURE_CUBE; break; 1417 /* this is a 2 bit bitfield, do I really need a default case ? */ 1418 } 1419 1420 /* Modify coordinates */ 1421 if (s >= 1 && 1422 (key->ts[s-1].colorop == D3DTOP_BUMPENVMAP || 1423 key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE)) { 1424 delta = ureg_DECL_temporary(ureg); 1425 /* Du' = D3DTSS_BUMPENVMAT00(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT10(stage s-1)*t(s-1)G */ 1426 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _X(ps.rTex), _XXXX(_CONST(8 + s - 1))); 1427 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_X), _Y(ps.rTex), _ZZZZ(_CONST(8 + s - 1)), ureg_src(delta)); 1428 /* Dv' = D3DTSS_BUMPENVMAT01(stage s-1)*t(s-1)R + D3DTSS_BUMPENVMAT11(stage s-1)*t(s-1)G */ 1429 ureg_MUL(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _X(ps.rTex), _YYYY(_CONST(8 + s - 1))); 1430 ureg_MAD(ureg, ureg_writemask(delta, TGSI_WRITEMASK_Y), _Y(ps.rTex), _WWWW(_CONST(8 + s - 1)), ureg_src(delta)); 1431 texture_coord = ureg_src(ureg_DECL_temporary(ureg)); 1432 ureg_MOV(ureg, ureg_writemask(ureg_dst(texture_coord), ureg_dst(ps.vT[s]).WriteMask), ps.vT[s]); 1433 ureg_ADD(ureg, ureg_writemask(ureg_dst(texture_coord), TGSI_WRITEMASK_XY), texture_coord, ureg_src(delta)); 1434 /* Prepare luminance multiplier 1435 * t(s)RGBA = t(s)RGBA * clamp[(t(s-1)B * D3DTSS_BUMPENVLSCALE(stage s-1)) + D3DTSS_BUMPENVLOFFSET(stage s-1)] */ 1436 if (key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) { 1437 struct ureg_src bumpenvlscale = ((s-1) & 1) ? _ZZZZ(_CONST(16 + (s-1) / 2)) : _XXXX(_CONST(16 + (s-1) / 2)); 1438 struct ureg_src bumpenvloffset = ((s-1) & 1) ? _WWWW(_CONST(16 + (s-1) / 2)) : _YYYY(_CONST(16 + (s-1) / 2)); 1439 1440 ureg_MAD(ureg, ureg_saturate(ureg_writemask(delta, TGSI_WRITEMASK_X)), _Z(ps.rTex), bumpenvlscale, bumpenvloffset); 1441 } 1442 } 1443 if (key->projected & (3 << (s *2))) { 1444 unsigned dim = 1 + ((key->projected >> (2 * s)) & 3); 1445 if (dim == 4) 1446 ureg_TXP(ureg, ps.rTex, target, texture_coord, ps.s[s]); 1447 else { 1448 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 1449 ureg_RCP(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_X), ureg_scalar(texture_coord, dim-1)); 1450 ureg_MUL(ureg, ps.rTmp, _X(tmp), texture_coord); 1451 ureg_TEX(ureg, ps.rTex, target, ps.rTmpSrc, ps.s[s]); 1452 ureg_release_temporary(ureg, tmp); 1453 } 1454 } else { 1455 ureg_TEX(ureg, ps.rTex, target, texture_coord, ps.s[s]); 1456 } 1457 if (s >= 1 && key->ts[s-1].colorop == D3DTOP_BUMPENVMAPLUMINANCE) 1458 ureg_MUL(ureg, ps.rTex, ureg_src(ps.rTex), _X(delta)); 1459 } 1460 1461 if (key->ts[s].colorop == D3DTOP_BUMPENVMAP || 1462 key->ts[s].colorop == D3DTOP_BUMPENVMAPLUMINANCE) 1463 continue; 1464 1465 dst = ps_get_ts_dst(&ps, key->ts[s].resultarg ? D3DTA_TEMP : D3DTA_CURRENT); 1466 1467 if (ps.stage.index_pre_mod == ps.stage.index) { 1468 ps.rMod = ureg_DECL_temporary(ureg); 1469 ureg_MUL(ureg, ps.rMod, ps.rCurSrc, ps.rTexSrc); 1470 } 1471 1472 colorarg[0] = (key->ts[s].colorarg0 | (((key->colorarg_b4[0] >> s) & 0x1) << 4) | ((key->colorarg_b5[0] >> s) << 5)) & 0x3f; 1473 colorarg[1] = (key->ts[s].colorarg1 | (((key->colorarg_b4[1] >> s) & 0x1) << 4) | ((key->colorarg_b5[1] >> s) << 5)) & 0x3f; 1474 colorarg[2] = (key->ts[s].colorarg2 | (((key->colorarg_b4[2] >> s) & 0x1) << 4) | ((key->colorarg_b5[2] >> s) << 5)) & 0x3f; 1475 alphaarg[0] = (key->ts[s].alphaarg0 | ((key->alphaarg_b4[0] >> s) << 4)) & 0x1f; 1476 alphaarg[1] = (key->ts[s].alphaarg1 | ((key->alphaarg_b4[1] >> s) << 4)) & 0x1f; 1477 alphaarg[2] = (key->ts[s].alphaarg2 | ((key->alphaarg_b4[2] >> s) << 4)) & 0x1f; 1478 1479 if (key->ts[s].colorop != key->ts[s].alphaop || 1480 colorarg[0] != alphaarg[0] || 1481 colorarg[1] != alphaarg[1] || 1482 colorarg[2] != alphaarg[2]) 1483 dst.WriteMask = TGSI_WRITEMASK_XYZ; 1484 1485 /* Special DOTPRODUCT behaviour (see wine tests) */ 1486 if (key->ts[s].colorop == D3DTOP_DOTPRODUCT3) 1487 dst.WriteMask = TGSI_WRITEMASK_XYZW; 1488 1489 if (used_c & 0x1) arg[0] = ps_get_ts_arg(&ps, colorarg[0]); 1490 if (used_c & 0x2) arg[1] = ps_get_ts_arg(&ps, colorarg[1]); 1491 if (used_c & 0x4) arg[2] = ps_get_ts_arg(&ps, colorarg[2]); 1492 ps_do_ts_op(&ps, key->ts[s].colorop, dst, arg); 1493 1494 if (dst.WriteMask != TGSI_WRITEMASK_XYZW) { 1495 dst.WriteMask = TGSI_WRITEMASK_W; 1496 1497 if (used_a & 0x1) arg[0] = ps_get_ts_arg(&ps, alphaarg[0]); 1498 if (used_a & 0x2) arg[1] = ps_get_ts_arg(&ps, alphaarg[1]); 1499 if (used_a & 0x4) arg[2] = ps_get_ts_arg(&ps, alphaarg[2]); 1500 ps_do_ts_op(&ps, key->ts[s].alphaop, dst, arg); 1501 } 1502 } 1503 1504 if (key->specular) 1505 ureg_ADD(ureg, ureg_writemask(ps.rCur, TGSI_WRITEMASK_XYZ), ps.rCurSrc, ps.vC[1]); 1506 1507 /* Fog. 1508 */ 1509 if (key->fog_mode) { 1510 struct ureg_dst rFog = ureg_writemask(ps.rTmp, TGSI_WRITEMASK_X); 1511 struct ureg_src vPos; 1512 if (device->screen->get_param(device->screen, 1513 PIPE_CAP_TGSI_FS_POSITION_IS_SYSVAL)) { 1514 vPos = ureg_DECL_system_value(ureg, TGSI_SEMANTIC_POSITION, 0); 1515 } else { 1516 vPos = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_POSITION, 0, 1517 TGSI_INTERPOLATE_LINEAR); 1518 } 1519 1520 /* Source is either W or Z. 1521 * When we use vs ff, 1522 * Z is when an orthogonal projection matrix is detected, 1523 * W (WFOG) else. 1524 * Z is used for programmable vs. 1525 * Note: Tests indicate that the projection matrix coefficients do 1526 * actually affect pixel fog (and not vertex fog) when vs ff is used, 1527 * which justifies taking the position's w instead of taking the z coordinate 1528 * before the projection in the vs shader. 1529 */ 1530 if (!key->fog_source) 1531 ureg_MOV(ureg, rFog, _ZZZZ(vPos)); 1532 else 1533 /* Position's w is 1/w */ 1534 ureg_RCP(ureg, rFog, _WWWW(vPos)); 1535 1536 if (key->fog_mode == D3DFOG_EXP) { 1537 ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22))); 1538 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f)); 1539 ureg_EX2(ureg, rFog, _X(rFog)); 1540 } else 1541 if (key->fog_mode == D3DFOG_EXP2) { 1542 ureg_MUL(ureg, rFog, _X(rFog), _ZZZZ(_CONST(22))); 1543 ureg_MUL(ureg, rFog, _X(rFog), _X(rFog)); 1544 ureg_MUL(ureg, rFog, _X(rFog), ureg_imm1f(ureg, -1.442695f)); 1545 ureg_EX2(ureg, rFog, _X(rFog)); 1546 } else 1547 if (key->fog_mode == D3DFOG_LINEAR) { 1548 ureg_ADD(ureg, rFog, _XXXX(_CONST(22)), ureg_negate(_X(rFog))); 1549 ureg_MUL(ureg, ureg_saturate(rFog), _X(rFog), _YYYY(_CONST(22))); 1550 } 1551 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _X(rFog), ps.rCurSrc, _CONST(21)); 1552 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc); 1553 } else 1554 if (key->fog) { 1555 struct ureg_src vFog = ureg_DECL_fs_input(ureg, TGSI_SEMANTIC_GENERIC, 16, TGSI_INTERPOLATE_PERSPECTIVE); 1556 ureg_LRP(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_XYZ), _XXXX(vFog), ps.rCurSrc, _CONST(21)); 1557 ureg_MOV(ureg, ureg_writemask(oCol, TGSI_WRITEMASK_W), ps.rCurSrc); 1558 } else { 1559 ureg_MOV(ureg, oCol, ps.rCurSrc); 1560 } 1561 1562 ureg_END(ureg); 1563 nine_ureg_tgsi_dump(ureg, FALSE); 1564 return nine_create_shader_with_so_and_destroy(ureg, device->context.pipe, NULL); 1565} 1566 1567static struct NineVertexShader9 * 1568nine_ff_get_vs(struct NineDevice9 *device) 1569{ 1570 const struct nine_context *context = &device->context; 1571 struct NineVertexShader9 *vs; 1572 struct vs_build_ctx bld; 1573 struct nine_ff_vs_key key; 1574 unsigned s, i; 1575 boolean has_indexes = false; 1576 boolean has_weights = false; 1577 char input_texture_coord[8]; 1578 1579 assert(sizeof(key) <= sizeof(key.value32)); 1580 1581 memset(&key, 0, sizeof(key)); 1582 memset(&bld, 0, sizeof(bld)); 1583 memset(&input_texture_coord, 0, sizeof(input_texture_coord)); 1584 1585 bld.key = &key; 1586 1587 /* FIXME: this shouldn't be NULL, but it is on init */ 1588 if (context->vdecl) { 1589 key.color0in_one = 1; 1590 key.color1in_zero = 1; 1591 for (i = 0; i < context->vdecl->nelems; i++) { 1592 uint16_t usage = context->vdecl->usage_map[i]; 1593 if (usage == NINE_DECLUSAGE_POSITIONT) 1594 key.position_t = 1; 1595 else if (usage == NINE_DECLUSAGE_i(COLOR, 0)) 1596 key.color0in_one = 0; 1597 else if (usage == NINE_DECLUSAGE_i(COLOR, 1)) 1598 key.color1in_zero = 0; 1599 else if (usage == NINE_DECLUSAGE_i(BLENDINDICES, 0)) { 1600 has_indexes = true; 1601 key.passthrough |= 1 << usage; 1602 } else if (usage == NINE_DECLUSAGE_i(BLENDWEIGHT, 0)) { 1603 has_weights = true; 1604 key.passthrough |= 1 << usage; 1605 } else if (usage == NINE_DECLUSAGE_i(NORMAL, 0)) { 1606 key.has_normal = 1; 1607 key.passthrough |= 1 << usage; 1608 } else if (usage == NINE_DECLUSAGE_PSIZE) 1609 key.vertexpointsize = 1; 1610 else if (usage % NINE_DECLUSAGE_COUNT == NINE_DECLUSAGE_TEXCOORD) { 1611 s = usage / NINE_DECLUSAGE_COUNT; 1612 if (s < 8) 1613 input_texture_coord[s] = nine_decltype_get_dim(context->vdecl->decls[i].Type); 1614 else 1615 DBG("FF given texture coordinate >= 8. Ignoring\n"); 1616 } else if (usage < NINE_DECLUSAGE_NONE) 1617 key.passthrough |= 1 << usage; 1618 } 1619 } 1620 /* ff vs + ps 3.0: some elements are passed to the ps (wine test). 1621 * We do restrict to indices 0 */ 1622 key.passthrough &= ~((1 << NINE_DECLUSAGE_POSITION) | (1 << NINE_DECLUSAGE_PSIZE) | 1623 (1 << NINE_DECLUSAGE_TEXCOORD) | (1 << NINE_DECLUSAGE_POSITIONT) | 1624 (1 << NINE_DECLUSAGE_TESSFACTOR) | (1 << NINE_DECLUSAGE_SAMPLE)); 1625 if (!key.position_t) 1626 key.passthrough = 0; 1627 key.pointscale = !!context->rs[D3DRS_POINTSCALEENABLE]; 1628 1629 key.lighting = !!context->rs[D3DRS_LIGHTING] && context->ff.num_lights_active; 1630 key.darkness = !!context->rs[D3DRS_LIGHTING] && !context->ff.num_lights_active; 1631 if (key.position_t) { 1632 key.darkness = 0; /* |= key.lighting; */ /* XXX ? */ 1633 key.lighting = 0; 1634 } 1635 if ((key.lighting | key.darkness) && context->rs[D3DRS_COLORVERTEX]) { 1636 uint32_t mask = (key.color0in_one ? 0 : 1) | (key.color1in_zero ? 0 : 2); 1637 key.mtl_diffuse = context->rs[D3DRS_DIFFUSEMATERIALSOURCE] & mask; 1638 key.mtl_ambient = context->rs[D3DRS_AMBIENTMATERIALSOURCE] & mask; 1639 key.mtl_specular = context->rs[D3DRS_SPECULARMATERIALSOURCE] & mask; 1640 key.mtl_emissive = context->rs[D3DRS_EMISSIVEMATERIALSOURCE] & mask; 1641 } 1642 key.fog = !!context->rs[D3DRS_FOGENABLE]; 1643 key.fog_mode = (!key.position_t && context->rs[D3DRS_FOGENABLE]) ? context->rs[D3DRS_FOGVERTEXMODE] : 0; 1644 if (key.fog_mode) 1645 key.fog_range = context->rs[D3DRS_RANGEFOGENABLE]; 1646 1647 key.localviewer = !!context->rs[D3DRS_LOCALVIEWER]; 1648 key.normalizenormals = !!context->rs[D3DRS_NORMALIZENORMALS]; 1649 key.ucp = !!context->rs[D3DRS_CLIPPLANEENABLE]; 1650 1651 if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) { 1652 key.vertexblend_indexed = !!context->rs[D3DRS_INDEXEDVERTEXBLENDENABLE] && has_indexes; 1653 1654 switch (context->rs[D3DRS_VERTEXBLEND]) { 1655 case D3DVBF_0WEIGHTS: key.vertexblend = key.vertexblend_indexed; break; 1656 case D3DVBF_1WEIGHTS: key.vertexblend = 2; break; 1657 case D3DVBF_2WEIGHTS: key.vertexblend = 3; break; 1658 case D3DVBF_3WEIGHTS: key.vertexblend = 4; break; 1659 case D3DVBF_TWEENING: key.vertextween = 1; break; 1660 default: 1661 assert(!"invalid D3DVBF"); 1662 break; 1663 } 1664 if (!has_weights && context->rs[D3DRS_VERTEXBLEND] != D3DVBF_0WEIGHTS) 1665 key.vertexblend = 0; /* TODO: if key.vertexblend_indexed, perhaps it should use 1.0 as weight, or revert to D3DVBF_0WEIGHTS */ 1666 } 1667 1668 for (s = 0; s < 8; ++s) { 1669 unsigned gen = (context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] >> 16) + 1; 1670 unsigned idx = context->ff.tex_stage[s][D3DTSS_TEXCOORDINDEX] & 7; 1671 unsigned dim; 1672 1673 if (key.position_t && gen > NINED3DTSS_TCI_PASSTHRU) 1674 gen = NINED3DTSS_TCI_PASSTHRU; 1675 1676 if (!input_texture_coord[idx] && gen == NINED3DTSS_TCI_PASSTHRU) 1677 gen = NINED3DTSS_TCI_DISABLE; 1678 1679 key.tc_gen |= gen << (s * 3); 1680 key.tc_idx |= idx << (s * 3); 1681 key.tc_dim_input |= ((input_texture_coord[idx]-1) & 0x3) << (s * 2); 1682 1683 dim = context->ff.tex_stage[s][D3DTSS_TEXTURETRANSFORMFLAGS] & 0x7; 1684 if (dim > 4) 1685 dim = input_texture_coord[idx]; 1686 if (dim == 1) /* NV behaviour */ 1687 dim = 0; 1688 key.tc_dim_output |= dim << (s * 3); 1689 } 1690 1691 DBG("VS ff key hash: %x\n", nine_ff_vs_key_hash(&key)); 1692 vs = util_hash_table_get(device->ff.ht_vs, &key); 1693 if (vs) 1694 return vs; 1695 NineVertexShader9_new(device, &vs, NULL, nine_ff_build_vs(device, &bld)); 1696 1697 nine_ff_prune_vs(device); 1698 if (vs) { 1699 unsigned n; 1700 1701 memcpy(&vs->ff_key, &key, sizeof(vs->ff_key)); 1702 1703 _mesa_hash_table_insert(device->ff.ht_vs, &vs->ff_key, vs); 1704 device->ff.num_vs++; 1705 1706 vs->num_inputs = bld.num_inputs; 1707 for (n = 0; n < bld.num_inputs; ++n) 1708 vs->input_map[n].ndecl = bld.input[n]; 1709 1710 vs->position_t = key.position_t; 1711 vs->point_size = key.vertexpointsize | key.pointscale; 1712 } 1713 return vs; 1714} 1715 1716#define GET_D3DTS(n) nine_state_access_transform(&context->ff, D3DTS_##n, FALSE) 1717#define IS_D3DTS_DIRTY(s,n) ((s)->ff.changed.transform[(D3DTS_##n) / 32] & (1 << ((D3DTS_##n) % 32))) 1718 1719static struct NinePixelShader9 * 1720nine_ff_get_ps(struct NineDevice9 *device) 1721{ 1722 struct nine_context *context = &device->context; 1723 D3DMATRIX *projection_matrix = GET_D3DTS(PROJECTION); 1724 struct NinePixelShader9 *ps; 1725 struct nine_ff_ps_key key; 1726 unsigned s; 1727 uint8_t sampler_mask = 0; 1728 1729 assert(sizeof(key) <= sizeof(key.value32)); 1730 1731 memset(&key, 0, sizeof(key)); 1732 for (s = 0; s < 8; ++s) { 1733 key.ts[s].colorop = context->ff.tex_stage[s][D3DTSS_COLOROP]; 1734 key.ts[s].alphaop = context->ff.tex_stage[s][D3DTSS_ALPHAOP]; 1735 const uint8_t used_c = ps_d3dtop_args_mask(key.ts[s].colorop); 1736 const uint8_t used_a = ps_d3dtop_args_mask(key.ts[s].alphaop); 1737 /* MSDN says D3DTOP_DISABLE disables this and all subsequent stages. 1738 * ALPHAOP cannot be enabled if COLOROP is disabled. 1739 * Verified on Windows. */ 1740 if (key.ts[s].colorop == D3DTOP_DISABLE) { 1741 key.ts[s].alphaop = D3DTOP_DISABLE; /* DISABLE == 1, avoid degenerate keys */ 1742 break; 1743 } 1744 1745 if (!context->texture[s].enabled && 1746 ((context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE && 1747 used_c & 0x1) || 1748 (context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE && 1749 used_c & 0x2) || 1750 (context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE && 1751 used_c & 0x4))) { 1752 /* Tested on Windows: Invalid texture read disables the stage 1753 * and the subsequent ones, but only for colorop. For alpha, 1754 * it's as if the texture had alpha of 1.0, which is what 1755 * has our dummy texture in that case. Invalid color also 1756 * disabled the following alpha stages. */ 1757 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE; 1758 break; 1759 } 1760 1761 if (context->ff.tex_stage[s][D3DTSS_COLORARG0] == D3DTA_TEXTURE || 1762 context->ff.tex_stage[s][D3DTSS_COLORARG1] == D3DTA_TEXTURE || 1763 context->ff.tex_stage[s][D3DTSS_COLORARG2] == D3DTA_TEXTURE || 1764 context->ff.tex_stage[s][D3DTSS_ALPHAARG0] == D3DTA_TEXTURE || 1765 context->ff.tex_stage[s][D3DTSS_ALPHAARG1] == D3DTA_TEXTURE || 1766 context->ff.tex_stage[s][D3DTSS_ALPHAARG2] == D3DTA_TEXTURE) 1767 sampler_mask |= (1 << s); 1768 1769 if (key.ts[s].colorop != D3DTOP_DISABLE) { 1770 if (used_c & 0x1) key.ts[s].colorarg0 = context->ff.tex_stage[s][D3DTSS_COLORARG0] & 0x7; 1771 if (used_c & 0x2) key.ts[s].colorarg1 = context->ff.tex_stage[s][D3DTSS_COLORARG1] & 0x7; 1772 if (used_c & 0x4) key.ts[s].colorarg2 = context->ff.tex_stage[s][D3DTSS_COLORARG2] & 0x7; 1773 if (used_c & 0x1) key.colorarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 4) & 0x1) << s; 1774 if (used_c & 0x1) key.colorarg_b5[0] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG0] >> 5) & 0x1) << s; 1775 if (used_c & 0x2) key.colorarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 4) & 0x1) << s; 1776 if (used_c & 0x2) key.colorarg_b5[1] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG1] >> 5) & 0x1) << s; 1777 if (used_c & 0x4) key.colorarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 4) & 0x1) << s; 1778 if (used_c & 0x4) key.colorarg_b5[2] |= ((context->ff.tex_stage[s][D3DTSS_COLORARG2] >> 5) & 0x1) << s; 1779 } 1780 if (key.ts[s].alphaop != D3DTOP_DISABLE) { 1781 if (used_a & 0x1) key.ts[s].alphaarg0 = context->ff.tex_stage[s][D3DTSS_ALPHAARG0] & 0x7; 1782 if (used_a & 0x2) key.ts[s].alphaarg1 = context->ff.tex_stage[s][D3DTSS_ALPHAARG1] & 0x7; 1783 if (used_a & 0x4) key.ts[s].alphaarg2 = context->ff.tex_stage[s][D3DTSS_ALPHAARG2] & 0x7; 1784 if (used_a & 0x1) key.alphaarg_b4[0] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG0] >> 4) & 0x1) << s; 1785 if (used_a & 0x2) key.alphaarg_b4[1] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG1] >> 4) & 0x1) << s; 1786 if (used_a & 0x4) key.alphaarg_b4[2] |= ((context->ff.tex_stage[s][D3DTSS_ALPHAARG2] >> 4) & 0x1) << s; 1787 } 1788 key.ts[s].resultarg = context->ff.tex_stage[s][D3DTSS_RESULTARG] == D3DTA_TEMP; 1789 1790 if (context->texture[s].enabled) { 1791 switch (context->texture[s].type) { 1792 case D3DRTYPE_TEXTURE: key.ts[s].textarget = 1; break; 1793 case D3DRTYPE_VOLUMETEXTURE: key.ts[s].textarget = 2; break; 1794 case D3DRTYPE_CUBETEXTURE: key.ts[s].textarget = 3; break; 1795 default: 1796 assert(!"unexpected texture type"); 1797 break; 1798 } 1799 } else { 1800 key.ts[s].textarget = 1; 1801 } 1802 } 1803 1804 /* Note: If colorop is D3DTOP_DISABLE for the first stage 1805 * (which implies alphaop is too), nothing particular happens, 1806 * that is, current is equal to diffuse (which is the case anyway, 1807 * because it is how it is initialized). 1808 * Special case seems if alphaop is D3DTOP_DISABLE and not colorop, 1809 * because then if the resultarg is TEMP, then diffuse alpha is written 1810 * to it. */ 1811 if (key.ts[0].colorop != D3DTOP_DISABLE && 1812 key.ts[0].alphaop == D3DTOP_DISABLE && 1813 key.ts[0].resultarg != 0) { 1814 key.ts[0].alphaop = D3DTOP_SELECTARG1; 1815 key.ts[0].alphaarg1 = D3DTA_DIFFUSE; 1816 } 1817 /* When no alpha stage writes to current, diffuse alpha is taken. 1818 * Since we initialize current to diffuse, we have the behaviour. */ 1819 1820 /* Last stage always writes to Current */ 1821 if (s >= 1) 1822 key.ts[s-1].resultarg = 0; 1823 1824 key.projected = nine_ff_get_projected_key_ff(context); 1825 key.specular = !!context->rs[D3DRS_SPECULARENABLE]; 1826 1827 for (; s < 8; ++s) 1828 key.ts[s].colorop = key.ts[s].alphaop = D3DTOP_DISABLE; 1829 if (context->rs[D3DRS_FOGENABLE]) 1830 key.fog_mode = context->rs[D3DRS_FOGTABLEMODE]; 1831 key.fog = !!context->rs[D3DRS_FOGENABLE]; 1832 /* Pixel fog (with WFOG advertised): source is either Z or W. 1833 * W is the source if vs ff is used, and the 1834 * projection matrix is not orthogonal. 1835 * Tests on Win 10 seem to indicate _34 1836 * and _33 are checked against 0, 1. */ 1837 if (key.fog_mode && key.fog) 1838 key.fog_source = !context->programmable_vs && 1839 !(projection_matrix->_34 == 0.0f && 1840 projection_matrix->_44 == 1.0f); 1841 1842 DBG("PS ff key hash: %x\n", nine_ff_ps_key_hash(&key)); 1843 ps = util_hash_table_get(device->ff.ht_ps, &key); 1844 if (ps) 1845 return ps; 1846 NinePixelShader9_new(device, &ps, NULL, nine_ff_build_ps(device, &key)); 1847 1848 nine_ff_prune_ps(device); 1849 if (ps) { 1850 memcpy(&ps->ff_key, &key, sizeof(ps->ff_key)); 1851 1852 _mesa_hash_table_insert(device->ff.ht_ps, &ps->ff_key, ps); 1853 device->ff.num_ps++; 1854 1855 ps->rt_mask = 0x1; 1856 ps->sampler_mask = sampler_mask; 1857 } 1858 return ps; 1859} 1860 1861static void 1862nine_ff_load_vs_transforms(struct NineDevice9 *device) 1863{ 1864 struct nine_context *context = &device->context; 1865 D3DMATRIX T; 1866 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const; 1867 unsigned i; 1868 1869 /* TODO: make this nicer, and only upload the ones we need */ 1870 /* TODO: use ff.vs_const as storage of W, V, P matrices */ 1871 1872 if (IS_D3DTS_DIRTY(context, WORLD) || 1873 IS_D3DTS_DIRTY(context, VIEW) || 1874 IS_D3DTS_DIRTY(context, PROJECTION)) { 1875 /* WVP, WV matrices */ 1876 nine_d3d_matrix_matrix_mul(&M[1], GET_D3DTS(WORLD), GET_D3DTS(VIEW)); 1877 nine_d3d_matrix_matrix_mul(&M[0], &M[1], GET_D3DTS(PROJECTION)); 1878 1879 /* normal matrix == transpose(inverse(WV)) */ 1880 nine_d3d_matrix_inverse(&T, &M[1]); 1881 nine_d3d_matrix_transpose(&M[4], &T); 1882 1883 /* P matrix */ 1884 M[2] = *GET_D3DTS(PROJECTION); 1885 1886 /* V and W matrix */ 1887 nine_d3d_matrix_inverse(&M[3], GET_D3DTS(VIEW)); 1888 M[40] = M[1]; 1889 } 1890 1891 if (context->rs[D3DRS_VERTEXBLEND] != D3DVBF_DISABLE) { 1892 /* load other world matrices */ 1893 for (i = 1; i <= 8; ++i) { 1894 nine_d3d_matrix_matrix_mul(&M[40 + i], GET_D3DTS(WORLDMATRIX(i)), GET_D3DTS(VIEW)); 1895 } 1896 } 1897 1898 device->ff.vs_const[30 * 4] = asfloat(context->rs[D3DRS_TWEENFACTOR]); 1899} 1900 1901static void 1902nine_ff_load_lights(struct NineDevice9 *device) 1903{ 1904 struct nine_context *context = &device->context; 1905 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 1906 unsigned l; 1907 1908 if (context->changed.group & NINE_STATE_FF_MATERIAL) { 1909 const D3DMATERIAL9 *mtl = &context->ff.material; 1910 1911 memcpy(&dst[20], &mtl->Diffuse, 4 * sizeof(float)); 1912 memcpy(&dst[21], &mtl->Ambient, 4 * sizeof(float)); 1913 memcpy(&dst[22], &mtl->Specular, 4 * sizeof(float)); 1914 dst[23].x = mtl->Power; 1915 memcpy(&dst[24], &mtl->Emissive, 4 * sizeof(float)); 1916 d3dcolor_to_rgba(&dst[25].x, context->rs[D3DRS_AMBIENT]); 1917 dst[19].x = dst[25].x * mtl->Ambient.r + mtl->Emissive.r; 1918 dst[19].y = dst[25].y * mtl->Ambient.g + mtl->Emissive.g; 1919 dst[19].z = dst[25].z * mtl->Ambient.b + mtl->Emissive.b; 1920 } 1921 1922 if (!(context->changed.group & NINE_STATE_FF_LIGHTING)) 1923 return; 1924 1925 for (l = 0; l < context->ff.num_lights_active; ++l) { 1926 const D3DLIGHT9 *light = &context->ff.light[context->ff.active_light[l]]; 1927 1928 dst[32 + l * 8].x = light->Type; 1929 dst[32 + l * 8].y = light->Attenuation0; 1930 dst[32 + l * 8].z = light->Attenuation1; 1931 dst[32 + l * 8].w = light->Attenuation2; 1932 memcpy(&dst[33 + l * 8].x, &light->Diffuse, sizeof(light->Diffuse)); 1933 memcpy(&dst[34 + l * 8].x, &light->Specular, sizeof(light->Specular)); 1934 memcpy(&dst[35 + l * 8].x, &light->Ambient, sizeof(light->Ambient)); 1935 nine_d3d_vector4_matrix_mul((D3DVECTOR *)&dst[36 + l * 8].x, &light->Position, GET_D3DTS(VIEW)); 1936 nine_d3d_vector3_matrix_mul((D3DVECTOR *)&dst[37 + l * 8].x, &light->Direction, GET_D3DTS(VIEW)); 1937 dst[36 + l * 8].w = light->Type == D3DLIGHT_DIRECTIONAL ? 1e9f : light->Range; 1938 dst[37 + l * 8].w = light->Falloff; 1939 dst[38 + l * 8].x = cosf(light->Theta * 0.5f); 1940 dst[38 + l * 8].y = cosf(light->Phi * 0.5f); 1941 dst[38 + l * 8].z = 1.0f / (dst[38 + l * 8].x - dst[38 + l * 8].y); 1942 dst[39 + l * 8].w = (float)((l + 1) == context->ff.num_lights_active); 1943 } 1944} 1945 1946static void 1947nine_ff_load_point_and_fog_params(struct NineDevice9 *device) 1948{ 1949 struct nine_context *context = &device->context; 1950 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 1951 1952 if (!(context->changed.group & NINE_STATE_FF_VS_OTHER)) 1953 return; 1954 dst[26].x = asfloat(context->rs[D3DRS_POINTSIZE_MIN]); 1955 dst[26].y = asfloat(context->rs[D3DRS_POINTSIZE_MAX]); 1956 dst[26].z = asfloat(context->rs[D3DRS_POINTSIZE]); 1957 dst[26].w = asfloat(context->rs[D3DRS_POINTSCALE_A]); 1958 dst[27].x = asfloat(context->rs[D3DRS_POINTSCALE_B]); 1959 dst[27].y = asfloat(context->rs[D3DRS_POINTSCALE_C]); 1960 dst[28].x = asfloat(context->rs[D3DRS_FOGEND]); 1961 dst[28].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART])); 1962 if (isinf(dst[28].y)) 1963 dst[28].y = 0.0f; 1964 dst[28].z = asfloat(context->rs[D3DRS_FOGDENSITY]); 1965} 1966 1967static void 1968nine_ff_load_tex_matrices(struct NineDevice9 *device) 1969{ 1970 struct nine_context *context = &device->context; 1971 D3DMATRIX *M = (D3DMATRIX *)device->ff.vs_const; 1972 unsigned s; 1973 1974 if (!(context->ff.changed.transform[0] & 0xff0000)) 1975 return; 1976 for (s = 0; s < 8; ++s) { 1977 if (IS_D3DTS_DIRTY(context, TEXTURE0 + s)) 1978 nine_d3d_matrix_transpose(&M[32 + s], nine_state_access_transform(&context->ff, D3DTS_TEXTURE0 + s, FALSE)); 1979 } 1980} 1981 1982static void 1983nine_ff_load_ps_params(struct NineDevice9 *device) 1984{ 1985 struct nine_context *context = &device->context; 1986 struct fvec4 *dst = (struct fvec4 *)device->ff.ps_const; 1987 unsigned s; 1988 1989 if (!(context->changed.group & NINE_STATE_FF_PS_CONSTS)) 1990 return; 1991 1992 for (s = 0; s < 8; ++s) 1993 d3dcolor_to_rgba(&dst[s].x, context->ff.tex_stage[s][D3DTSS_CONSTANT]); 1994 1995 for (s = 0; s < 8; ++s) { 1996 dst[8 + s].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT00]); 1997 dst[8 + s].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT01]); 1998 dst[8 + s].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT10]); 1999 dst[8 + s].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVMAT11]); 2000 if (s & 1) { 2001 dst[16 + s / 2].z = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]); 2002 dst[16 + s / 2].w = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]); 2003 } else { 2004 dst[16 + s / 2].x = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLSCALE]); 2005 dst[16 + s / 2].y = asfloat(context->ff.tex_stage[s][D3DTSS_BUMPENVLOFFSET]); 2006 } 2007 } 2008 2009 d3dcolor_to_rgba(&dst[20].x, context->rs[D3DRS_TEXTUREFACTOR]); 2010 d3dcolor_to_rgba(&dst[21].x, context->rs[D3DRS_FOGCOLOR]); 2011 dst[22].x = asfloat(context->rs[D3DRS_FOGEND]); 2012 dst[22].y = 1.0f / (asfloat(context->rs[D3DRS_FOGEND]) - asfloat(context->rs[D3DRS_FOGSTART])); 2013 dst[22].z = asfloat(context->rs[D3DRS_FOGDENSITY]); 2014} 2015 2016static void 2017nine_ff_load_viewport_info(struct NineDevice9 *device) 2018{ 2019 D3DVIEWPORT9 *viewport = &device->context.viewport; 2020 struct fvec4 *dst = (struct fvec4 *)device->ff.vs_const; 2021 float diffZ = viewport->MaxZ - viewport->MinZ; 2022 2023 /* Note: the other functions avoids to fill the const again if nothing changed. 2024 * But we don't have much to fill, and adding code to allow that may be complex 2025 * so just fill it always */ 2026 dst[100].x = 2.0f / (float)(viewport->Width); 2027 dst[100].y = 2.0f / (float)(viewport->Height); 2028 dst[100].z = (diffZ == 0.0f) ? 0.0f : (1.0f / diffZ); 2029 dst[100].w = (float)(viewport->Width); 2030 dst[101].x = (float)(viewport->X); 2031 dst[101].y = (float)(viewport->Y); 2032 dst[101].z = (float)(viewport->MinZ); 2033} 2034 2035void 2036nine_ff_update(struct NineDevice9 *device) 2037{ 2038 struct nine_context *context = &device->context; 2039 struct pipe_constant_buffer cb; 2040 2041 DBG("vs=%p ps=%p\n", context->vs, context->ps); 2042 2043 /* NOTE: the only reference belongs to the hash table */ 2044 if (!context->programmable_vs) { 2045 device->ff.vs = nine_ff_get_vs(device); 2046 context->changed.group |= NINE_STATE_VS; 2047 } 2048 if (!context->ps) { 2049 device->ff.ps = nine_ff_get_ps(device); 2050 context->changed.group |= NINE_STATE_PS; 2051 } 2052 2053 if (!context->programmable_vs) { 2054 nine_ff_load_vs_transforms(device); 2055 nine_ff_load_tex_matrices(device); 2056 nine_ff_load_lights(device); 2057 nine_ff_load_point_and_fog_params(device); 2058 nine_ff_load_viewport_info(device); 2059 2060 memset(context->ff.changed.transform, 0, sizeof(context->ff.changed.transform)); 2061 2062 cb.buffer_offset = 0; 2063 cb.buffer = NULL; 2064 cb.user_buffer = device->ff.vs_const; 2065 cb.buffer_size = NINE_FF_NUM_VS_CONST * 4 * sizeof(float); 2066 2067 context->pipe_data.cb_vs_ff = cb; 2068 context->commit |= NINE_STATE_COMMIT_CONST_VS; 2069 2070 context->changed.group &= ~NINE_STATE_FF_VS; 2071 } 2072 2073 if (!context->ps) { 2074 nine_ff_load_ps_params(device); 2075 2076 cb.buffer_offset = 0; 2077 cb.buffer = NULL; 2078 cb.user_buffer = device->ff.ps_const; 2079 cb.buffer_size = NINE_FF_NUM_PS_CONST * 4 * sizeof(float); 2080 2081 context->pipe_data.cb_ps_ff = cb; 2082 context->commit |= NINE_STATE_COMMIT_CONST_PS; 2083 2084 context->changed.group &= ~NINE_STATE_FF_PS; 2085 } 2086} 2087 2088 2089boolean 2090nine_ff_init(struct NineDevice9 *device) 2091{ 2092 device->ff.ht_vs = _mesa_hash_table_create(NULL, nine_ff_vs_key_hash, 2093 nine_ff_vs_key_comp); 2094 device->ff.ht_ps = _mesa_hash_table_create(NULL, nine_ff_ps_key_hash, 2095 nine_ff_ps_key_comp); 2096 2097 device->ff.ht_fvf = _mesa_hash_table_create(NULL, nine_ff_fvf_key_hash, 2098 nine_ff_fvf_key_comp); 2099 2100 device->ff.vs_const = CALLOC(NINE_FF_NUM_VS_CONST, 4 * sizeof(float)); 2101 device->ff.ps_const = CALLOC(NINE_FF_NUM_PS_CONST, 4 * sizeof(float)); 2102 2103 return device->ff.ht_vs && device->ff.ht_ps && 2104 device->ff.ht_fvf && 2105 device->ff.vs_const && device->ff.ps_const; 2106} 2107 2108static enum pipe_error nine_ff_ht_delete_cb(void *key, void *value, void *data) 2109{ 2110 NineUnknown_Unbind(NineUnknown(value)); 2111 return PIPE_OK; 2112} 2113 2114void 2115nine_ff_fini(struct NineDevice9 *device) 2116{ 2117 if (device->ff.ht_vs) { 2118 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL); 2119 _mesa_hash_table_destroy(device->ff.ht_vs, NULL); 2120 } 2121 if (device->ff.ht_ps) { 2122 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL); 2123 _mesa_hash_table_destroy(device->ff.ht_ps, NULL); 2124 } 2125 if (device->ff.ht_fvf) { 2126 util_hash_table_foreach(device->ff.ht_fvf, nine_ff_ht_delete_cb, NULL); 2127 _mesa_hash_table_destroy(device->ff.ht_fvf, NULL); 2128 } 2129 device->ff.vs = NULL; /* destroyed by unbinding from hash table */ 2130 device->ff.ps = NULL; 2131 2132 FREE(device->ff.vs_const); 2133 FREE(device->ff.ps_const); 2134} 2135 2136static void 2137nine_ff_prune_vs(struct NineDevice9 *device) 2138{ 2139 struct nine_context *context = &device->context; 2140 2141 if (device->ff.num_vs > 1024) { 2142 /* could destroy the bound one here, so unbind */ 2143 context->pipe->bind_vs_state(context->pipe, NULL); 2144 util_hash_table_foreach(device->ff.ht_vs, nine_ff_ht_delete_cb, NULL); 2145 _mesa_hash_table_clear(device->ff.ht_vs, NULL); 2146 device->ff.num_vs = 0; 2147 context->changed.group |= NINE_STATE_VS; 2148 } 2149} 2150static void 2151nine_ff_prune_ps(struct NineDevice9 *device) 2152{ 2153 struct nine_context *context = &device->context; 2154 2155 if (device->ff.num_ps > 1024) { 2156 /* could destroy the bound one here, so unbind */ 2157 context->pipe->bind_fs_state(context->pipe, NULL); 2158 util_hash_table_foreach(device->ff.ht_ps, nine_ff_ht_delete_cb, NULL); 2159 _mesa_hash_table_clear(device->ff.ht_ps, NULL); 2160 device->ff.num_ps = 0; 2161 context->changed.group |= NINE_STATE_PS; 2162 } 2163} 2164 2165/* ========================================================================== */ 2166 2167/* Matrix multiplication: 2168 * 2169 * in memory: 0 1 2 3 (row major) 2170 * 4 5 6 7 2171 * 8 9 a b 2172 * c d e f 2173 * 2174 * cA cB cC cD 2175 * r0 = (r0 * cA) (r0 * cB) . . 2176 * r1 = (r1 * cA) (r1 * cB) 2177 * r2 = (r2 * cA) . 2178 * r3 = (r3 * cA) . 2179 * 2180 * r: (11) (12) (13) (14) 2181 * (21) (22) (23) (24) 2182 * (31) (32) (33) (34) 2183 * (41) (42) (43) (44) 2184 * l: (11 12 13 14) 2185 * (21 22 23 24) 2186 * (31 32 33 34) 2187 * (41 42 43 44) 2188 * 2189 * v: (x y z 1 ) 2190 * 2191 * t.xyzw = MUL(v.xxxx, r[0]); 2192 * t.xyzw = MAD(v.yyyy, r[1], t.xyzw); 2193 * t.xyzw = MAD(v.zzzz, r[2], t.xyzw); 2194 * v.xyzw = MAD(v.wwww, r[3], t.xyzw); 2195 * 2196 * v.x = DP4(v, c[0]); 2197 * v.y = DP4(v, c[1]); 2198 * v.z = DP4(v, c[2]); 2199 * v.w = DP4(v, c[3]) = 1 2200 */ 2201 2202/* 2203static void 2204nine_D3DMATRIX_print(const D3DMATRIX *M) 2205{ 2206 DBG("\n(%f %f %f %f)\n" 2207 "(%f %f %f %f)\n" 2208 "(%f %f %f %f)\n" 2209 "(%f %f %f %f)\n", 2210 M->m[0][0], M->m[0][1], M->m[0][2], M->m[0][3], 2211 M->m[1][0], M->m[1][1], M->m[1][2], M->m[1][3], 2212 M->m[2][0], M->m[2][1], M->m[2][2], M->m[2][3], 2213 M->m[3][0], M->m[3][1], M->m[3][2], M->m[3][3]); 2214} 2215*/ 2216 2217static inline float 2218nine_DP4_row_col(const D3DMATRIX *A, int r, const D3DMATRIX *B, int c) 2219{ 2220 return A->m[r][0] * B->m[0][c] + 2221 A->m[r][1] * B->m[1][c] + 2222 A->m[r][2] * B->m[2][c] + 2223 A->m[r][3] * B->m[3][c]; 2224} 2225 2226static inline float 2227nine_DP4_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c) 2228{ 2229 return v->x * M->m[0][c] + 2230 v->y * M->m[1][c] + 2231 v->z * M->m[2][c] + 2232 1.0f * M->m[3][c]; 2233} 2234 2235static inline float 2236nine_DP3_vec_col(const D3DVECTOR *v, const D3DMATRIX *M, int c) 2237{ 2238 return v->x * M->m[0][c] + 2239 v->y * M->m[1][c] + 2240 v->z * M->m[2][c]; 2241} 2242 2243void 2244nine_d3d_matrix_matrix_mul(D3DMATRIX *D, const D3DMATRIX *L, const D3DMATRIX *R) 2245{ 2246 D->_11 = nine_DP4_row_col(L, 0, R, 0); 2247 D->_12 = nine_DP4_row_col(L, 0, R, 1); 2248 D->_13 = nine_DP4_row_col(L, 0, R, 2); 2249 D->_14 = nine_DP4_row_col(L, 0, R, 3); 2250 2251 D->_21 = nine_DP4_row_col(L, 1, R, 0); 2252 D->_22 = nine_DP4_row_col(L, 1, R, 1); 2253 D->_23 = nine_DP4_row_col(L, 1, R, 2); 2254 D->_24 = nine_DP4_row_col(L, 1, R, 3); 2255 2256 D->_31 = nine_DP4_row_col(L, 2, R, 0); 2257 D->_32 = nine_DP4_row_col(L, 2, R, 1); 2258 D->_33 = nine_DP4_row_col(L, 2, R, 2); 2259 D->_34 = nine_DP4_row_col(L, 2, R, 3); 2260 2261 D->_41 = nine_DP4_row_col(L, 3, R, 0); 2262 D->_42 = nine_DP4_row_col(L, 3, R, 1); 2263 D->_43 = nine_DP4_row_col(L, 3, R, 2); 2264 D->_44 = nine_DP4_row_col(L, 3, R, 3); 2265} 2266 2267void 2268nine_d3d_vector4_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M) 2269{ 2270 d->x = nine_DP4_vec_col(v, M, 0); 2271 d->y = nine_DP4_vec_col(v, M, 1); 2272 d->z = nine_DP4_vec_col(v, M, 2); 2273} 2274 2275void 2276nine_d3d_vector3_matrix_mul(D3DVECTOR *d, const D3DVECTOR *v, const D3DMATRIX *M) 2277{ 2278 d->x = nine_DP3_vec_col(v, M, 0); 2279 d->y = nine_DP3_vec_col(v, M, 1); 2280 d->z = nine_DP3_vec_col(v, M, 2); 2281} 2282 2283void 2284nine_d3d_matrix_transpose(D3DMATRIX *D, const D3DMATRIX *M) 2285{ 2286 unsigned i, j; 2287 for (i = 0; i < 4; ++i) 2288 for (j = 0; j < 4; ++j) 2289 D->m[i][j] = M->m[j][i]; 2290} 2291 2292#define _M_ADD_PROD_1i_2j_3k_4l(i,j,k,l) do { \ 2293 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \ 2294 if (t > 0.0f) pos += t; else neg += t; } while(0) 2295 2296#define _M_SUB_PROD_1i_2j_3k_4l(i,j,k,l) do { \ 2297 float t = M->_1##i * M->_2##j * M->_3##k * M->_4##l; \ 2298 if (t > 0.0f) neg -= t; else pos -= t; } while(0) 2299float 2300nine_d3d_matrix_det(const D3DMATRIX *M) 2301{ 2302 float pos = 0.0f; 2303 float neg = 0.0f; 2304 2305 _M_ADD_PROD_1i_2j_3k_4l(1, 2, 3, 4); 2306 _M_ADD_PROD_1i_2j_3k_4l(1, 3, 4, 2); 2307 _M_ADD_PROD_1i_2j_3k_4l(1, 4, 2, 3); 2308 2309 _M_ADD_PROD_1i_2j_3k_4l(2, 1, 4, 3); 2310 _M_ADD_PROD_1i_2j_3k_4l(2, 3, 1, 4); 2311 _M_ADD_PROD_1i_2j_3k_4l(2, 4, 3, 1); 2312 2313 _M_ADD_PROD_1i_2j_3k_4l(3, 1, 2, 4); 2314 _M_ADD_PROD_1i_2j_3k_4l(3, 2, 4, 1); 2315 _M_ADD_PROD_1i_2j_3k_4l(3, 4, 1, 2); 2316 2317 _M_ADD_PROD_1i_2j_3k_4l(4, 1, 3, 2); 2318 _M_ADD_PROD_1i_2j_3k_4l(4, 2, 1, 3); 2319 _M_ADD_PROD_1i_2j_3k_4l(4, 3, 2, 1); 2320 2321 _M_SUB_PROD_1i_2j_3k_4l(1, 2, 4, 3); 2322 _M_SUB_PROD_1i_2j_3k_4l(1, 3, 2, 4); 2323 _M_SUB_PROD_1i_2j_3k_4l(1, 4, 3, 2); 2324 2325 _M_SUB_PROD_1i_2j_3k_4l(2, 1, 3, 4); 2326 _M_SUB_PROD_1i_2j_3k_4l(2, 3, 4, 1); 2327 _M_SUB_PROD_1i_2j_3k_4l(2, 4, 1, 3); 2328 2329 _M_SUB_PROD_1i_2j_3k_4l(3, 1, 4, 2); 2330 _M_SUB_PROD_1i_2j_3k_4l(3, 2, 1, 4); 2331 _M_SUB_PROD_1i_2j_3k_4l(3, 4, 2, 1); 2332 2333 _M_SUB_PROD_1i_2j_3k_4l(4, 1, 2, 3); 2334 _M_SUB_PROD_1i_2j_3k_4l(4, 2, 3, 1); 2335 _M_SUB_PROD_1i_2j_3k_4l(4, 3, 1, 2); 2336 2337 return pos + neg; 2338} 2339 2340/* XXX: Probably better to just use src/mesa/math/m_matrix.c because 2341 * I have no idea where this code came from. 2342 */ 2343void 2344nine_d3d_matrix_inverse(D3DMATRIX *D, const D3DMATRIX *M) 2345{ 2346 int i, k; 2347 float det; 2348 2349 D->m[0][0] = 2350 M->m[1][1] * M->m[2][2] * M->m[3][3] - 2351 M->m[1][1] * M->m[3][2] * M->m[2][3] - 2352 M->m[1][2] * M->m[2][1] * M->m[3][3] + 2353 M->m[1][2] * M->m[3][1] * M->m[2][3] + 2354 M->m[1][3] * M->m[2][1] * M->m[3][2] - 2355 M->m[1][3] * M->m[3][1] * M->m[2][2]; 2356 2357 D->m[0][1] = 2358 -M->m[0][1] * M->m[2][2] * M->m[3][3] + 2359 M->m[0][1] * M->m[3][2] * M->m[2][3] + 2360 M->m[0][2] * M->m[2][1] * M->m[3][3] - 2361 M->m[0][2] * M->m[3][1] * M->m[2][3] - 2362 M->m[0][3] * M->m[2][1] * M->m[3][2] + 2363 M->m[0][3] * M->m[3][1] * M->m[2][2]; 2364 2365 D->m[0][2] = 2366 M->m[0][1] * M->m[1][2] * M->m[3][3] - 2367 M->m[0][1] * M->m[3][2] * M->m[1][3] - 2368 M->m[0][2] * M->m[1][1] * M->m[3][3] + 2369 M->m[0][2] * M->m[3][1] * M->m[1][3] + 2370 M->m[0][3] * M->m[1][1] * M->m[3][2] - 2371 M->m[0][3] * M->m[3][1] * M->m[1][2]; 2372 2373 D->m[0][3] = 2374 -M->m[0][1] * M->m[1][2] * M->m[2][3] + 2375 M->m[0][1] * M->m[2][2] * M->m[1][3] + 2376 M->m[0][2] * M->m[1][1] * M->m[2][3] - 2377 M->m[0][2] * M->m[2][1] * M->m[1][3] - 2378 M->m[0][3] * M->m[1][1] * M->m[2][2] + 2379 M->m[0][3] * M->m[2][1] * M->m[1][2]; 2380 2381 D->m[1][0] = 2382 -M->m[1][0] * M->m[2][2] * M->m[3][3] + 2383 M->m[1][0] * M->m[3][2] * M->m[2][3] + 2384 M->m[1][2] * M->m[2][0] * M->m[3][3] - 2385 M->m[1][2] * M->m[3][0] * M->m[2][3] - 2386 M->m[1][3] * M->m[2][0] * M->m[3][2] + 2387 M->m[1][3] * M->m[3][0] * M->m[2][2]; 2388 2389 D->m[1][1] = 2390 M->m[0][0] * M->m[2][2] * M->m[3][3] - 2391 M->m[0][0] * M->m[3][2] * M->m[2][3] - 2392 M->m[0][2] * M->m[2][0] * M->m[3][3] + 2393 M->m[0][2] * M->m[3][0] * M->m[2][3] + 2394 M->m[0][3] * M->m[2][0] * M->m[3][2] - 2395 M->m[0][3] * M->m[3][0] * M->m[2][2]; 2396 2397 D->m[1][2] = 2398 -M->m[0][0] * M->m[1][2] * M->m[3][3] + 2399 M->m[0][0] * M->m[3][2] * M->m[1][3] + 2400 M->m[0][2] * M->m[1][0] * M->m[3][3] - 2401 M->m[0][2] * M->m[3][0] * M->m[1][3] - 2402 M->m[0][3] * M->m[1][0] * M->m[3][2] + 2403 M->m[0][3] * M->m[3][0] * M->m[1][2]; 2404 2405 D->m[1][3] = 2406 M->m[0][0] * M->m[1][2] * M->m[2][3] - 2407 M->m[0][0] * M->m[2][2] * M->m[1][3] - 2408 M->m[0][2] * M->m[1][0] * M->m[2][3] + 2409 M->m[0][2] * M->m[2][0] * M->m[1][3] + 2410 M->m[0][3] * M->m[1][0] * M->m[2][2] - 2411 M->m[0][3] * M->m[2][0] * M->m[1][2]; 2412 2413 D->m[2][0] = 2414 M->m[1][0] * M->m[2][1] * M->m[3][3] - 2415 M->m[1][0] * M->m[3][1] * M->m[2][3] - 2416 M->m[1][1] * M->m[2][0] * M->m[3][3] + 2417 M->m[1][1] * M->m[3][0] * M->m[2][3] + 2418 M->m[1][3] * M->m[2][0] * M->m[3][1] - 2419 M->m[1][3] * M->m[3][0] * M->m[2][1]; 2420 2421 D->m[2][1] = 2422 -M->m[0][0] * M->m[2][1] * M->m[3][3] + 2423 M->m[0][0] * M->m[3][1] * M->m[2][3] + 2424 M->m[0][1] * M->m[2][0] * M->m[3][3] - 2425 M->m[0][1] * M->m[3][0] * M->m[2][3] - 2426 M->m[0][3] * M->m[2][0] * M->m[3][1] + 2427 M->m[0][3] * M->m[3][0] * M->m[2][1]; 2428 2429 D->m[2][2] = 2430 M->m[0][0] * M->m[1][1] * M->m[3][3] - 2431 M->m[0][0] * M->m[3][1] * M->m[1][3] - 2432 M->m[0][1] * M->m[1][0] * M->m[3][3] + 2433 M->m[0][1] * M->m[3][0] * M->m[1][3] + 2434 M->m[0][3] * M->m[1][0] * M->m[3][1] - 2435 M->m[0][3] * M->m[3][0] * M->m[1][1]; 2436 2437 D->m[2][3] = 2438 -M->m[0][0] * M->m[1][1] * M->m[2][3] + 2439 M->m[0][0] * M->m[2][1] * M->m[1][3] + 2440 M->m[0][1] * M->m[1][0] * M->m[2][3] - 2441 M->m[0][1] * M->m[2][0] * M->m[1][3] - 2442 M->m[0][3] * M->m[1][0] * M->m[2][1] + 2443 M->m[0][3] * M->m[2][0] * M->m[1][1]; 2444 2445 D->m[3][0] = 2446 -M->m[1][0] * M->m[2][1] * M->m[3][2] + 2447 M->m[1][0] * M->m[3][1] * M->m[2][2] + 2448 M->m[1][1] * M->m[2][0] * M->m[3][2] - 2449 M->m[1][1] * M->m[3][0] * M->m[2][2] - 2450 M->m[1][2] * M->m[2][0] * M->m[3][1] + 2451 M->m[1][2] * M->m[3][0] * M->m[2][1]; 2452 2453 D->m[3][1] = 2454 M->m[0][0] * M->m[2][1] * M->m[3][2] - 2455 M->m[0][0] * M->m[3][1] * M->m[2][2] - 2456 M->m[0][1] * M->m[2][0] * M->m[3][2] + 2457 M->m[0][1] * M->m[3][0] * M->m[2][2] + 2458 M->m[0][2] * M->m[2][0] * M->m[3][1] - 2459 M->m[0][2] * M->m[3][0] * M->m[2][1]; 2460 2461 D->m[3][2] = 2462 -M->m[0][0] * M->m[1][1] * M->m[3][2] + 2463 M->m[0][0] * M->m[3][1] * M->m[1][2] + 2464 M->m[0][1] * M->m[1][0] * M->m[3][2] - 2465 M->m[0][1] * M->m[3][0] * M->m[1][2] - 2466 M->m[0][2] * M->m[1][0] * M->m[3][1] + 2467 M->m[0][2] * M->m[3][0] * M->m[1][1]; 2468 2469 D->m[3][3] = 2470 M->m[0][0] * M->m[1][1] * M->m[2][2] - 2471 M->m[0][0] * M->m[2][1] * M->m[1][2] - 2472 M->m[0][1] * M->m[1][0] * M->m[2][2] + 2473 M->m[0][1] * M->m[2][0] * M->m[1][2] + 2474 M->m[0][2] * M->m[1][0] * M->m[2][1] - 2475 M->m[0][2] * M->m[2][0] * M->m[1][1]; 2476 2477 det = 2478 M->m[0][0] * D->m[0][0] + 2479 M->m[1][0] * D->m[0][1] + 2480 M->m[2][0] * D->m[0][2] + 2481 M->m[3][0] * D->m[0][3]; 2482 2483 if (fabsf(det) < 1e-30) {/* non inversible */ 2484 *D = *M; /* wine tests */ 2485 return; 2486 } 2487 2488 det = 1.0 / det; 2489 2490 for (i = 0; i < 4; i++) 2491 for (k = 0; k < 4; k++) 2492 D->m[i][k] *= det; 2493 2494#if defined(DEBUG) || !defined(NDEBUG) 2495 { 2496 D3DMATRIX I; 2497 2498 nine_d3d_matrix_matrix_mul(&I, D, M); 2499 2500 for (i = 0; i < 4; ++i) 2501 for (k = 0; k < 4; ++k) 2502 if (fabsf(I.m[i][k] - (float)(i == k)) > 1e-3) 2503 DBG("Matrix inversion check FAILED !\n"); 2504 } 2505#endif 2506} 2507