ffvertex_prog.c revision 4a49301e
1/************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * \file ffvertex_prog.c 30 * 31 * Create a vertex program to execute the current fixed function T&L pipeline. 32 * \author Keith Whitwell 33 */ 34 35 36#include "main/glheader.h" 37#include "main/mtypes.h" 38#include "main/macros.h" 39#include "main/enums.h" 40#include "main/ffvertex_prog.h" 41#include "shader/program.h" 42#include "shader/prog_cache.h" 43#include "shader/prog_instruction.h" 44#include "shader/prog_parameter.h" 45#include "shader/prog_print.h" 46#include "shader/prog_statevars.h" 47 48 49/** Max of number of lights and texture coord units */ 50#define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS) 51 52struct state_key { 53 unsigned light_color_material_mask:12; 54 unsigned light_global_enabled:1; 55 unsigned light_local_viewer:1; 56 unsigned light_twoside:1; 57 unsigned material_shininess_is_zero:1; 58 unsigned need_eye_coords:1; 59 unsigned normalize:1; 60 unsigned rescale_normals:1; 61 62 unsigned fog_source_is_depth:1; 63 unsigned separate_specular:1; 64 unsigned point_attenuated:1; 65 unsigned point_array:1; 66 unsigned texture_enabled_global:1; 67 unsigned fragprog_inputs_read:12; 68 69 unsigned varying_vp_inputs; 70 71 struct { 72 unsigned light_enabled:1; 73 unsigned light_eyepos3_is_zero:1; 74 unsigned light_spotcutoff_is_180:1; 75 unsigned light_attenuated:1; 76 unsigned texunit_really_enabled:1; 77 unsigned texmat_enabled:1; 78 unsigned texgen_enabled:4; 79 unsigned texgen_mode0:4; 80 unsigned texgen_mode1:4; 81 unsigned texgen_mode2:4; 82 unsigned texgen_mode3:4; 83 } unit[NUM_UNITS]; 84}; 85 86 87#define TXG_NONE 0 88#define TXG_OBJ_LINEAR 1 89#define TXG_EYE_LINEAR 2 90#define TXG_SPHERE_MAP 3 91#define TXG_REFLECTION_MAP 4 92#define TXG_NORMAL_MAP 5 93 94static GLuint translate_texgen( GLboolean enabled, GLenum mode ) 95{ 96 if (!enabled) 97 return TXG_NONE; 98 99 switch (mode) { 100 case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; 101 case GL_EYE_LINEAR: return TXG_EYE_LINEAR; 102 case GL_SPHERE_MAP: return TXG_SPHERE_MAP; 103 case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; 104 case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; 105 default: return TXG_NONE; 106 } 107} 108 109 110 111static GLboolean check_active_shininess( GLcontext *ctx, 112 const struct state_key *key, 113 GLuint side ) 114{ 115 GLuint bit = 1 << (MAT_ATTRIB_FRONT_SHININESS + side); 116 117 if ((key->varying_vp_inputs & VERT_BIT_COLOR0) && 118 (key->light_color_material_mask & bit)) 119 return GL_TRUE; 120 121 if (key->varying_vp_inputs & (bit << 16)) 122 return GL_TRUE; 123 124 if (ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS + side][0] != 0.0F) 125 return GL_TRUE; 126 127 return GL_FALSE; 128} 129 130 131static void make_state_key( GLcontext *ctx, struct state_key *key ) 132{ 133 const struct gl_fragment_program *fp; 134 GLuint i; 135 136 memset(key, 0, sizeof(struct state_key)); 137 fp = ctx->FragmentProgram._Current; 138 139 /* This now relies on texenvprogram.c being active: 140 */ 141 assert(fp); 142 143 key->need_eye_coords = ctx->_NeedEyeCoords; 144 145 key->fragprog_inputs_read = fp->Base.InputsRead; 146 key->varying_vp_inputs = ctx->varying_vp_inputs; 147 148 if (ctx->RenderMode == GL_FEEDBACK) { 149 /* make sure the vertprog emits color and tex0 */ 150 key->fragprog_inputs_read |= (FRAG_BIT_COL0 | FRAG_BIT_TEX0); 151 } 152 153 key->separate_specular = (ctx->Light.Model.ColorControl == 154 GL_SEPARATE_SPECULAR_COLOR); 155 156 if (ctx->Light.Enabled) { 157 key->light_global_enabled = 1; 158 159 if (ctx->Light.Model.LocalViewer) 160 key->light_local_viewer = 1; 161 162 if (ctx->Light.Model.TwoSide) 163 key->light_twoside = 1; 164 165 if (ctx->Light.ColorMaterialEnabled) { 166 key->light_color_material_mask = ctx->Light.ColorMaterialBitmask; 167 } 168 169 for (i = 0; i < MAX_LIGHTS; i++) { 170 struct gl_light *light = &ctx->Light.Light[i]; 171 172 if (light->Enabled) { 173 key->unit[i].light_enabled = 1; 174 175 if (light->EyePosition[3] == 0.0) 176 key->unit[i].light_eyepos3_is_zero = 1; 177 178 if (light->SpotCutoff == 180.0) 179 key->unit[i].light_spotcutoff_is_180 = 1; 180 181 if (light->ConstantAttenuation != 1.0 || 182 light->LinearAttenuation != 0.0 || 183 light->QuadraticAttenuation != 0.0) 184 key->unit[i].light_attenuated = 1; 185 } 186 } 187 188 if (check_active_shininess(ctx, key, 0)) { 189 key->material_shininess_is_zero = 0; 190 } 191 else if (key->light_twoside && 192 check_active_shininess(ctx, key, 1)) { 193 key->material_shininess_is_zero = 0; 194 } 195 else { 196 key->material_shininess_is_zero = 1; 197 } 198 } 199 200 if (ctx->Transform.Normalize) 201 key->normalize = 1; 202 203 if (ctx->Transform.RescaleNormals) 204 key->rescale_normals = 1; 205 206 if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) 207 key->fog_source_is_depth = 1; 208 209 if (ctx->Point._Attenuated) 210 key->point_attenuated = 1; 211 212#if FEATURE_point_size_array 213 if (ctx->Array.ArrayObj->PointSize.Enabled) 214 key->point_array = 1; 215#endif 216 217 if (ctx->Texture._TexGenEnabled || 218 ctx->Texture._TexMatEnabled || 219 ctx->Texture._EnabledUnits) 220 key->texture_enabled_global = 1; 221 222 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 223 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; 224 225 if (texUnit->_ReallyEnabled) 226 key->unit[i].texunit_really_enabled = 1; 227 228 if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i)) 229 key->unit[i].texmat_enabled = 1; 230 231 if (texUnit->TexGenEnabled) { 232 key->unit[i].texgen_enabled = 1; 233 234 key->unit[i].texgen_mode0 = 235 translate_texgen( texUnit->TexGenEnabled & (1<<0), 236 texUnit->GenS.Mode ); 237 key->unit[i].texgen_mode1 = 238 translate_texgen( texUnit->TexGenEnabled & (1<<1), 239 texUnit->GenT.Mode ); 240 key->unit[i].texgen_mode2 = 241 translate_texgen( texUnit->TexGenEnabled & (1<<2), 242 texUnit->GenR.Mode ); 243 key->unit[i].texgen_mode3 = 244 translate_texgen( texUnit->TexGenEnabled & (1<<3), 245 texUnit->GenQ.Mode ); 246 } 247 } 248} 249 250 251 252/* Very useful debugging tool - produces annotated listing of 253 * generated program with line/function references for each 254 * instruction back into this file: 255 */ 256#define DISASSEM 0 257 258 259/* Use uregs to represent registers internally, translate to Mesa's 260 * expected formats on emit. 261 * 262 * NOTE: These are passed by value extensively in this file rather 263 * than as usual by pointer reference. If this disturbs you, try 264 * remembering they are just 32bits in size. 265 * 266 * GCC is smart enough to deal with these dword-sized structures in 267 * much the same way as if I had defined them as dwords and was using 268 * macros to access and set the fields. This is much nicer and easier 269 * to evolve. 270 */ 271struct ureg { 272 GLuint file:4; 273 GLint idx:9; /* relative addressing may be negative */ 274 /* sizeof(idx) should == sizeof(prog_src_reg::Index) */ 275 GLuint negate:1; 276 GLuint swz:12; 277 GLuint pad:6; 278}; 279 280 281struct tnl_program { 282 const struct state_key *state; 283 struct gl_vertex_program *program; 284 GLint max_inst; /** number of instructions allocated for program */ 285 GLboolean mvp_with_dp4; 286 287 GLuint temp_in_use; 288 GLuint temp_reserved; 289 290 struct ureg eye_position; 291 struct ureg eye_position_z; 292 struct ureg eye_position_normalized; 293 struct ureg transformed_normal; 294 struct ureg identity; 295 296 GLuint materials; 297 GLuint color_materials; 298}; 299 300 301static const struct ureg undef = { 302 PROGRAM_UNDEFINED, 303 0, 304 0, 305 0, 306 0 307}; 308 309/* Local shorthand: 310 */ 311#define X SWIZZLE_X 312#define Y SWIZZLE_Y 313#define Z SWIZZLE_Z 314#define W SWIZZLE_W 315 316 317/* Construct a ureg: 318 */ 319static struct ureg make_ureg(GLuint file, GLint idx) 320{ 321 struct ureg reg; 322 reg.file = file; 323 reg.idx = idx; 324 reg.negate = 0; 325 reg.swz = SWIZZLE_NOOP; 326 reg.pad = 0; 327 return reg; 328} 329 330 331 332static struct ureg negate( struct ureg reg ) 333{ 334 reg.negate ^= 1; 335 return reg; 336} 337 338 339static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 340{ 341 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 342 GET_SWZ(reg.swz, y), 343 GET_SWZ(reg.swz, z), 344 GET_SWZ(reg.swz, w)); 345 return reg; 346} 347 348 349static struct ureg swizzle1( struct ureg reg, int x ) 350{ 351 return swizzle(reg, x, x, x, x); 352} 353 354 355static struct ureg get_temp( struct tnl_program *p ) 356{ 357 int bit = _mesa_ffs( ~p->temp_in_use ); 358 if (!bit) { 359 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 360 _mesa_exit(1); 361 } 362 363 if ((GLuint) bit > p->program->Base.NumTemporaries) 364 p->program->Base.NumTemporaries = bit; 365 366 p->temp_in_use |= 1<<(bit-1); 367 return make_ureg(PROGRAM_TEMPORARY, bit-1); 368} 369 370 371static struct ureg reserve_temp( struct tnl_program *p ) 372{ 373 struct ureg temp = get_temp( p ); 374 p->temp_reserved |= 1<<temp.idx; 375 return temp; 376} 377 378 379static void release_temp( struct tnl_program *p, struct ureg reg ) 380{ 381 if (reg.file == PROGRAM_TEMPORARY) { 382 p->temp_in_use &= ~(1<<reg.idx); 383 p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ 384 } 385} 386 387static void release_temps( struct tnl_program *p ) 388{ 389 p->temp_in_use = p->temp_reserved; 390} 391 392 393static struct ureg register_param5(struct tnl_program *p, 394 GLint s0, 395 GLint s1, 396 GLint s2, 397 GLint s3, 398 GLint s4) 399{ 400 gl_state_index tokens[STATE_LENGTH]; 401 GLint idx; 402 tokens[0] = s0; 403 tokens[1] = s1; 404 tokens[2] = s2; 405 tokens[3] = s3; 406 tokens[4] = s4; 407 idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens ); 408 return make_ureg(PROGRAM_STATE_VAR, idx); 409} 410 411 412#define register_param1(p,s0) register_param5(p,s0,0,0,0,0) 413#define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0) 414#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) 415#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) 416 417 418 419/** 420 * \param input one of VERT_ATTRIB_x tokens. 421 */ 422static struct ureg register_input( struct tnl_program *p, GLuint input ) 423{ 424 assert(input < 32); 425 426 if (p->state->varying_vp_inputs & (1<<input)) { 427 p->program->Base.InputsRead |= (1<<input); 428 return make_ureg(PROGRAM_INPUT, input); 429 } 430 else { 431 return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input ); 432 } 433} 434 435 436/** 437 * \param input one of VERT_RESULT_x tokens. 438 */ 439static struct ureg register_output( struct tnl_program *p, GLuint output ) 440{ 441 p->program->Base.OutputsWritten |= BITFIELD64_BIT(output); 442 return make_ureg(PROGRAM_OUTPUT, output); 443} 444 445 446static struct ureg register_const4f( struct tnl_program *p, 447 GLfloat s0, 448 GLfloat s1, 449 GLfloat s2, 450 GLfloat s3) 451{ 452 GLfloat values[4]; 453 GLint idx; 454 GLuint swizzle; 455 values[0] = s0; 456 values[1] = s1; 457 values[2] = s2; 458 values[3] = s3; 459 idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, 460 &swizzle ); 461 ASSERT(swizzle == SWIZZLE_NOOP); 462 return make_ureg(PROGRAM_CONSTANT, idx); 463} 464 465#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 466#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) 467#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 468#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 469 470static GLboolean is_undef( struct ureg reg ) 471{ 472 return reg.file == PROGRAM_UNDEFINED; 473} 474 475 476static struct ureg get_identity_param( struct tnl_program *p ) 477{ 478 if (is_undef(p->identity)) 479 p->identity = register_const4f(p, 0,0,0,1); 480 481 return p->identity; 482} 483 484static void register_matrix_param5( struct tnl_program *p, 485 GLint s0, /* modelview, projection, etc */ 486 GLint s1, /* texture matrix number */ 487 GLint s2, /* first row */ 488 GLint s3, /* last row */ 489 GLint s4, /* inverse, transpose, etc */ 490 struct ureg *matrix ) 491{ 492 GLint i; 493 494 /* This is a bit sad as the support is there to pull the whole 495 * matrix out in one go: 496 */ 497 for (i = 0; i <= s3 - s2; i++) 498 matrix[i] = register_param5( p, s0, s1, i, i, s4 ); 499} 500 501 502static void emit_arg( struct prog_src_register *src, 503 struct ureg reg ) 504{ 505 src->File = reg.file; 506 src->Index = reg.idx; 507 src->Swizzle = reg.swz; 508 src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE; 509 src->Abs = 0; 510 src->RelAddr = 0; 511 /* Check that bitfield sizes aren't exceeded */ 512 ASSERT(src->Index == reg.idx); 513} 514 515 516static void emit_dst( struct prog_dst_register *dst, 517 struct ureg reg, GLuint mask ) 518{ 519 dst->File = reg.file; 520 dst->Index = reg.idx; 521 /* allow zero as a shorthand for xyzw */ 522 dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 523 dst->CondMask = COND_TR; /* always pass cond test */ 524 dst->CondSwizzle = SWIZZLE_NOOP; 525 dst->CondSrc = 0; 526 dst->pad = 0; 527 /* Check that bitfield sizes aren't exceeded */ 528 ASSERT(dst->Index == reg.idx); 529} 530 531 532static void debug_insn( struct prog_instruction *inst, const char *fn, 533 GLuint line ) 534{ 535 if (DISASSEM) { 536 static const char *last_fn; 537 538 if (fn != last_fn) { 539 last_fn = fn; 540 _mesa_printf("%s:\n", fn); 541 } 542 543 _mesa_printf("%d:\t", line); 544 _mesa_print_instruction(inst); 545 } 546} 547 548 549static void emit_op3fn(struct tnl_program *p, 550 enum prog_opcode op, 551 struct ureg dest, 552 GLuint mask, 553 struct ureg src0, 554 struct ureg src1, 555 struct ureg src2, 556 const char *fn, 557 GLuint line) 558{ 559 GLuint nr; 560 struct prog_instruction *inst; 561 562 assert((GLint) p->program->Base.NumInstructions <= p->max_inst); 563 564 if (p->program->Base.NumInstructions == p->max_inst) { 565 /* need to extend the program's instruction array */ 566 struct prog_instruction *newInst; 567 568 /* double the size */ 569 p->max_inst *= 2; 570 571 newInst = _mesa_alloc_instructions(p->max_inst); 572 if (!newInst) { 573 _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build"); 574 return; 575 } 576 577 _mesa_copy_instructions(newInst, 578 p->program->Base.Instructions, 579 p->program->Base.NumInstructions); 580 581 _mesa_free_instructions(p->program->Base.Instructions, 582 p->program->Base.NumInstructions); 583 584 p->program->Base.Instructions = newInst; 585 } 586 587 nr = p->program->Base.NumInstructions++; 588 589 inst = &p->program->Base.Instructions[nr]; 590 inst->Opcode = (enum prog_opcode) op; 591 inst->Data = 0; 592 593 emit_arg( &inst->SrcReg[0], src0 ); 594 emit_arg( &inst->SrcReg[1], src1 ); 595 emit_arg( &inst->SrcReg[2], src2 ); 596 597 emit_dst( &inst->DstReg, dest, mask ); 598 599 debug_insn(inst, fn, line); 600} 601 602 603#define emit_op3(p, op, dst, mask, src0, src1, src2) \ 604 emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__) 605 606#define emit_op2(p, op, dst, mask, src0, src1) \ 607 emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__) 608 609#define emit_op1(p, op, dst, mask, src0) \ 610 emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__) 611 612 613static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) 614{ 615 if (reg.file == PROGRAM_TEMPORARY && 616 !(p->temp_reserved & (1<<reg.idx))) 617 return reg; 618 else { 619 struct ureg temp = get_temp(p); 620 emit_op1(p, OPCODE_MOV, temp, 0, reg); 621 return temp; 622 } 623} 624 625 626/* Currently no tracking performed of input/output/register size or 627 * active elements. Could be used to reduce these operations, as 628 * could the matrix type. 629 */ 630static void emit_matrix_transform_vec4( struct tnl_program *p, 631 struct ureg dest, 632 const struct ureg *mat, 633 struct ureg src) 634{ 635 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); 636 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); 637 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); 638 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); 639} 640 641 642/* This version is much easier to implement if writemasks are not 643 * supported natively on the target or (like SSE), the target doesn't 644 * have a clean/obvious dotproduct implementation. 645 */ 646static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, 647 struct ureg dest, 648 const struct ureg *mat, 649 struct ureg src) 650{ 651 struct ureg tmp; 652 653 if (dest.file != PROGRAM_TEMPORARY) 654 tmp = get_temp(p); 655 else 656 tmp = dest; 657 658 emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 659 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 660 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 661 emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 662 663 if (dest.file != PROGRAM_TEMPORARY) 664 release_temp(p, tmp); 665} 666 667 668static void emit_matrix_transform_vec3( struct tnl_program *p, 669 struct ureg dest, 670 const struct ureg *mat, 671 struct ureg src) 672{ 673 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); 674 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); 675 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); 676} 677 678 679static void emit_normalize_vec3( struct tnl_program *p, 680 struct ureg dest, 681 struct ureg src ) 682{ 683#if 0 684 /* XXX use this when drivers are ready for NRM3 */ 685 emit_op1(p, OPCODE_NRM3, dest, WRITEMASK_XYZ, src); 686#else 687 struct ureg tmp = get_temp(p); 688 emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src); 689 emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); 690 emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X)); 691 release_temp(p, tmp); 692#endif 693} 694 695 696static void emit_passthrough( struct tnl_program *p, 697 GLuint input, 698 GLuint output ) 699{ 700 struct ureg out = register_output(p, output); 701 emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); 702} 703 704 705static struct ureg get_eye_position( struct tnl_program *p ) 706{ 707 if (is_undef(p->eye_position)) { 708 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 709 struct ureg modelview[4]; 710 711 p->eye_position = reserve_temp(p); 712 713 if (p->mvp_with_dp4) { 714 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 715 0, modelview ); 716 717 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); 718 } 719 else { 720 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 721 STATE_MATRIX_TRANSPOSE, modelview ); 722 723 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); 724 } 725 } 726 727 return p->eye_position; 728} 729 730 731static struct ureg get_eye_position_z( struct tnl_program *p ) 732{ 733 if (!is_undef(p->eye_position)) 734 return swizzle1(p->eye_position, Z); 735 736 if (is_undef(p->eye_position_z)) { 737 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 738 struct ureg modelview[4]; 739 740 p->eye_position_z = reserve_temp(p); 741 742 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 743 0, modelview ); 744 745 emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]); 746 } 747 748 return p->eye_position_z; 749} 750 751 752static struct ureg get_eye_position_normalized( struct tnl_program *p ) 753{ 754 if (is_undef(p->eye_position_normalized)) { 755 struct ureg eye = get_eye_position(p); 756 p->eye_position_normalized = reserve_temp(p); 757 emit_normalize_vec3(p, p->eye_position_normalized, eye); 758 } 759 760 return p->eye_position_normalized; 761} 762 763 764static struct ureg get_transformed_normal( struct tnl_program *p ) 765{ 766 if (is_undef(p->transformed_normal) && 767 !p->state->need_eye_coords && 768 !p->state->normalize && 769 !(p->state->need_eye_coords == p->state->rescale_normals)) 770 { 771 p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL ); 772 } 773 else if (is_undef(p->transformed_normal)) 774 { 775 struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); 776 struct ureg mvinv[3]; 777 struct ureg transformed_normal = reserve_temp(p); 778 779 if (p->state->need_eye_coords) { 780 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2, 781 STATE_MATRIX_INVTRANS, mvinv ); 782 783 /* Transform to eye space: 784 */ 785 emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal ); 786 normal = transformed_normal; 787 } 788 789 /* Normalize/Rescale: 790 */ 791 if (p->state->normalize) { 792 emit_normalize_vec3( p, transformed_normal, normal ); 793 normal = transformed_normal; 794 } 795 else if (p->state->need_eye_coords == p->state->rescale_normals) { 796 /* This is already adjusted for eye/non-eye rendering: 797 */ 798 struct ureg rescale = register_param2(p, STATE_INTERNAL, 799 STATE_NORMAL_SCALE); 800 801 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale ); 802 normal = transformed_normal; 803 } 804 805 assert(normal.file == PROGRAM_TEMPORARY); 806 p->transformed_normal = normal; 807 } 808 809 return p->transformed_normal; 810} 811 812 813static void build_hpos( struct tnl_program *p ) 814{ 815 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 816 struct ureg hpos = register_output( p, VERT_RESULT_HPOS ); 817 struct ureg mvp[4]; 818 819 if (p->mvp_with_dp4) { 820 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 821 0, mvp ); 822 emit_matrix_transform_vec4( p, hpos, mvp, pos ); 823 } 824 else { 825 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 826 STATE_MATRIX_TRANSPOSE, mvp ); 827 emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); 828 } 829} 830 831 832static GLuint material_attrib( GLuint side, GLuint property ) 833{ 834 return (property - STATE_AMBIENT) * 2 + side; 835} 836 837 838/** 839 * Get a bitmask of which material values vary on a per-vertex basis. 840 */ 841static void set_material_flags( struct tnl_program *p ) 842{ 843 p->color_materials = 0; 844 p->materials = 0; 845 846 if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) { 847 p->materials = 848 p->color_materials = p->state->light_color_material_mask; 849 } 850 851 p->materials |= (p->state->varying_vp_inputs >> 16); 852} 853 854 855static struct ureg get_material( struct tnl_program *p, GLuint side, 856 GLuint property ) 857{ 858 GLuint attrib = material_attrib(side, property); 859 860 if (p->color_materials & (1<<attrib)) 861 return register_input(p, VERT_ATTRIB_COLOR0); 862 else if (p->materials & (1<<attrib)) { 863 /* Put material values in the GENERIC slots -- they are not used 864 * for anything in fixed function mode. 865 */ 866 return register_input( p, attrib + VERT_ATTRIB_GENERIC0 ); 867 } 868 else 869 return register_param3( p, STATE_MATERIAL, side, property ); 870} 871 872#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \ 873 MAT_BIT_FRONT_AMBIENT | \ 874 MAT_BIT_FRONT_DIFFUSE) << (side)) 875 876 877/** 878 * Either return a precalculated constant value or emit code to 879 * calculate these values dynamically in the case where material calls 880 * are present between begin/end pairs. 881 * 882 * Probably want to shift this to the program compilation phase - if 883 * we always emitted the calculation here, a smart compiler could 884 * detect that it was constant (given a certain set of inputs), and 885 * lift it out of the main loop. That way the programs created here 886 * would be independent of the vertex_buffer details. 887 */ 888static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) 889{ 890 if (p->materials & SCENE_COLOR_BITS(side)) { 891 struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); 892 struct ureg material_emission = get_material(p, side, STATE_EMISSION); 893 struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); 894 struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); 895 struct ureg tmp = make_temp(p, material_diffuse); 896 emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, 897 material_ambient, material_emission); 898 return tmp; 899 } 900 else 901 return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); 902} 903 904 905static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 906 GLuint side, GLuint property ) 907{ 908 GLuint attrib = material_attrib(side, property); 909 if (p->materials & (1<<attrib)) { 910 struct ureg light_value = 911 register_param3(p, STATE_LIGHT, light, property); 912 struct ureg material_value = get_material(p, side, property); 913 struct ureg tmp = get_temp(p); 914 emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value); 915 return tmp; 916 } 917 else 918 return register_param4(p, STATE_LIGHTPROD, light, side, property); 919} 920 921 922static struct ureg calculate_light_attenuation( struct tnl_program *p, 923 GLuint i, 924 struct ureg VPpli, 925 struct ureg dist ) 926{ 927 struct ureg attenuation = register_param3(p, STATE_LIGHT, i, 928 STATE_ATTENUATION); 929 struct ureg att = get_temp(p); 930 931 /* Calculate spot attenuation: 932 */ 933 if (!p->state->unit[i].light_spotcutoff_is_180) { 934 struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, 935 STATE_LIGHT_SPOT_DIR_NORMALIZED, i); 936 struct ureg spot = get_temp(p); 937 struct ureg slt = get_temp(p); 938 939 emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); 940 emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); 941 emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); 942 emit_op2(p, OPCODE_MUL, att, 0, slt, spot); 943 944 release_temp(p, spot); 945 release_temp(p, slt); 946 } 947 948 /* Calculate distance attenuation: 949 */ 950 if (p->state->unit[i].light_attenuated) { 951 /* 1/d,d,d,1/d */ 952 emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); 953 /* 1,d,d*d,1/d */ 954 emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 955 /* 1/dist-atten */ 956 emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); 957 958 if (!p->state->unit[i].light_spotcutoff_is_180) { 959 /* dist-atten */ 960 emit_op1(p, OPCODE_RCP, dist, 0, dist); 961 /* spot-atten * dist-atten */ 962 emit_op2(p, OPCODE_MUL, att, 0, dist, att); 963 } 964 else { 965 /* dist-atten */ 966 emit_op1(p, OPCODE_RCP, att, 0, dist); 967 } 968 } 969 970 return att; 971} 972 973 974/** 975 * Compute: 976 * lit.y = MAX(0, dots.x) 977 * lit.z = SLT(0, dots.x) 978 */ 979static void emit_degenerate_lit( struct tnl_program *p, 980 struct ureg lit, 981 struct ureg dots ) 982{ 983 struct ureg id = get_identity_param(p); /* id = {0,0,0,1} */ 984 985 /* Note that lit.x & lit.w will not be examined. Note also that 986 * dots.xyzw == dots.xxxx. 987 */ 988 989 /* MAX lit, id, dots; 990 */ 991 emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots); 992 993 /* result[2] = (in > 0 ? 1 : 0) 994 * SLT lit.z, id.z, dots; # lit.z = (0 < dots.z) ? 1 : 0 995 */ 996 emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots); 997} 998 999 1000/* Need to add some addtional parameters to allow lighting in object 1001 * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye 1002 * space lighting. 1003 */ 1004static void build_lighting( struct tnl_program *p ) 1005{ 1006 const GLboolean twoside = p->state->light_twoside; 1007 const GLboolean separate = p->state->separate_specular; 1008 GLuint nr_lights = 0, count = 0; 1009 struct ureg normal = get_transformed_normal(p); 1010 struct ureg lit = get_temp(p); 1011 struct ureg dots = get_temp(p); 1012 struct ureg _col0 = undef, _col1 = undef; 1013 struct ureg _bfc0 = undef, _bfc1 = undef; 1014 GLuint i; 1015 1016 /* 1017 * NOTE: 1018 * dots.x = dot(normal, VPpli) 1019 * dots.y = dot(normal, halfAngle) 1020 * dots.z = back.shininess 1021 * dots.w = front.shininess 1022 */ 1023 1024 for (i = 0; i < MAX_LIGHTS; i++) 1025 if (p->state->unit[i].light_enabled) 1026 nr_lights++; 1027 1028 set_material_flags(p); 1029 1030 { 1031 if (!p->state->material_shininess_is_zero) { 1032 struct ureg shininess = get_material(p, 0, STATE_SHININESS); 1033 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); 1034 release_temp(p, shininess); 1035 } 1036 1037 _col0 = make_temp(p, get_scenecolor(p, 0)); 1038 if (separate) 1039 _col1 = make_temp(p, get_identity_param(p)); 1040 else 1041 _col1 = _col0; 1042 } 1043 1044 if (twoside) { 1045 if (!p->state->material_shininess_is_zero) { 1046 /* Note that we negate the back-face specular exponent here. 1047 * The negation will be un-done later in the back-face code below. 1048 */ 1049 struct ureg shininess = get_material(p, 1, STATE_SHININESS); 1050 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, 1051 negate(swizzle1(shininess,X))); 1052 release_temp(p, shininess); 1053 } 1054 1055 _bfc0 = make_temp(p, get_scenecolor(p, 1)); 1056 if (separate) 1057 _bfc1 = make_temp(p, get_identity_param(p)); 1058 else 1059 _bfc1 = _bfc0; 1060 } 1061 1062 /* If no lights, still need to emit the scenecolor. 1063 */ 1064 { 1065 struct ureg res0 = register_output( p, VERT_RESULT_COL0 ); 1066 emit_op1(p, OPCODE_MOV, res0, 0, _col0); 1067 } 1068 1069 if (separate) { 1070 struct ureg res1 = register_output( p, VERT_RESULT_COL1 ); 1071 emit_op1(p, OPCODE_MOV, res1, 0, _col1); 1072 } 1073 1074 if (twoside) { 1075 struct ureg res0 = register_output( p, VERT_RESULT_BFC0 ); 1076 emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); 1077 } 1078 1079 if (twoside && separate) { 1080 struct ureg res1 = register_output( p, VERT_RESULT_BFC1 ); 1081 emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); 1082 } 1083 1084 if (nr_lights == 0) { 1085 release_temps(p); 1086 return; 1087 } 1088 1089 for (i = 0; i < MAX_LIGHTS; i++) { 1090 if (p->state->unit[i].light_enabled) { 1091 struct ureg half = undef; 1092 struct ureg att = undef, VPpli = undef; 1093 1094 count++; 1095 1096 if (p->state->unit[i].light_eyepos3_is_zero) { 1097 /* Can used precomputed constants in this case. 1098 * Attenuation never applies to infinite lights. 1099 */ 1100 VPpli = register_param3(p, STATE_INTERNAL, 1101 STATE_LIGHT_POSITION_NORMALIZED, i); 1102 1103 if (!p->state->material_shininess_is_zero) { 1104 if (p->state->light_local_viewer) { 1105 struct ureg eye_hat = get_eye_position_normalized(p); 1106 half = get_temp(p); 1107 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1108 emit_normalize_vec3(p, half, half); 1109 } 1110 else { 1111 half = register_param3(p, STATE_INTERNAL, 1112 STATE_LIGHT_HALF_VECTOR, i); 1113 } 1114 } 1115 } 1116 else { 1117 struct ureg Ppli = register_param3(p, STATE_INTERNAL, 1118 STATE_LIGHT_POSITION, i); 1119 struct ureg V = get_eye_position(p); 1120 struct ureg dist = get_temp(p); 1121 1122 VPpli = get_temp(p); 1123 1124 /* Calculate VPpli vector 1125 */ 1126 emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); 1127 1128 /* Normalize VPpli. The dist value also used in 1129 * attenuation below. 1130 */ 1131 emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); 1132 emit_op1(p, OPCODE_RSQ, dist, 0, dist); 1133 emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); 1134 1135 /* Calculate attenuation: 1136 */ 1137 if (!p->state->unit[i].light_spotcutoff_is_180 || 1138 p->state->unit[i].light_attenuated) { 1139 att = calculate_light_attenuation(p, i, VPpli, dist); 1140 } 1141 1142 /* Calculate viewer direction, or use infinite viewer: 1143 */ 1144 if (!p->state->material_shininess_is_zero) { 1145 half = get_temp(p); 1146 1147 if (p->state->light_local_viewer) { 1148 struct ureg eye_hat = get_eye_position_normalized(p); 1149 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1150 } 1151 else { 1152 struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 1153 emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); 1154 } 1155 1156 emit_normalize_vec3(p, half, half); 1157 } 1158 1159 release_temp(p, dist); 1160 } 1161 1162 /* Calculate dot products: 1163 */ 1164 if (p->state->material_shininess_is_zero) { 1165 emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli); 1166 } 1167 else { 1168 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); 1169 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); 1170 } 1171 1172 /* Front face lighting: 1173 */ 1174 { 1175 struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); 1176 struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); 1177 struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); 1178 struct ureg res0, res1; 1179 GLuint mask0, mask1; 1180 1181 if (count == nr_lights) { 1182 if (separate) { 1183 mask0 = WRITEMASK_XYZ; 1184 mask1 = WRITEMASK_XYZ; 1185 res0 = register_output( p, VERT_RESULT_COL0 ); 1186 res1 = register_output( p, VERT_RESULT_COL1 ); 1187 } 1188 else { 1189 mask0 = 0; 1190 mask1 = WRITEMASK_XYZ; 1191 res0 = _col0; 1192 res1 = register_output( p, VERT_RESULT_COL0 ); 1193 } 1194 } 1195 else { 1196 mask0 = 0; 1197 mask1 = 0; 1198 res0 = _col0; 1199 res1 = _col1; 1200 } 1201 1202 if (!is_undef(att)) { 1203 /* light is attenuated by distance */ 1204 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1205 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1206 emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); 1207 } 1208 else if (!p->state->material_shininess_is_zero) { 1209 /* there's a non-zero specular term */ 1210 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1211 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1212 } 1213 else { 1214 /* no attenutation, no specular */ 1215 emit_degenerate_lit(p, lit, dots); 1216 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1217 } 1218 1219 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); 1220 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); 1221 1222 release_temp(p, ambient); 1223 release_temp(p, diffuse); 1224 release_temp(p, specular); 1225 } 1226 1227 /* Back face lighting: 1228 */ 1229 if (twoside) { 1230 struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); 1231 struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); 1232 struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); 1233 struct ureg res0, res1; 1234 GLuint mask0, mask1; 1235 1236 if (count == nr_lights) { 1237 if (separate) { 1238 mask0 = WRITEMASK_XYZ; 1239 mask1 = WRITEMASK_XYZ; 1240 res0 = register_output( p, VERT_RESULT_BFC0 ); 1241 res1 = register_output( p, VERT_RESULT_BFC1 ); 1242 } 1243 else { 1244 mask0 = 0; 1245 mask1 = WRITEMASK_XYZ; 1246 res0 = _bfc0; 1247 res1 = register_output( p, VERT_RESULT_BFC0 ); 1248 } 1249 } 1250 else { 1251 res0 = _bfc0; 1252 res1 = _bfc1; 1253 mask0 = 0; 1254 mask1 = 0; 1255 } 1256 1257 /* For the back face we need to negate the X and Y component 1258 * dot products. dots.Z has the negated back-face specular 1259 * exponent. We swizzle that into the W position. This 1260 * negation makes the back-face specular term positive again. 1261 */ 1262 dots = negate(swizzle(dots,X,Y,W,Z)); 1263 1264 if (!is_undef(att)) { 1265 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1266 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1267 emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); 1268 } 1269 else if (!p->state->material_shininess_is_zero) { 1270 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1271 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/ 1272 } 1273 else { 1274 emit_degenerate_lit(p, lit, dots); 1275 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); 1276 } 1277 1278 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); 1279 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); 1280 /* restore dots to its original state for subsequent lights 1281 * by negating and swizzling again. 1282 */ 1283 dots = negate(swizzle(dots,X,Y,W,Z)); 1284 1285 release_temp(p, ambient); 1286 release_temp(p, diffuse); 1287 release_temp(p, specular); 1288 } 1289 1290 release_temp(p, half); 1291 release_temp(p, VPpli); 1292 release_temp(p, att); 1293 } 1294 } 1295 1296 release_temps( p ); 1297} 1298 1299 1300static void build_fog( struct tnl_program *p ) 1301{ 1302 struct ureg fog = register_output(p, VERT_RESULT_FOGC); 1303 struct ureg input; 1304 1305 if (p->state->fog_source_is_depth) { 1306 input = get_eye_position_z(p); 1307 } 1308 else { 1309 input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); 1310 } 1311 1312 /* result.fog = {abs(f),0,0,1}; */ 1313 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1314 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p)); 1315} 1316 1317 1318static void build_reflect_texgen( struct tnl_program *p, 1319 struct ureg dest, 1320 GLuint writemask ) 1321{ 1322 struct ureg normal = get_transformed_normal(p); 1323 struct ureg eye_hat = get_eye_position_normalized(p); 1324 struct ureg tmp = get_temp(p); 1325 1326 /* n.u */ 1327 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1328 /* 2n.u */ 1329 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1330 /* (-2n.u)n + u */ 1331 emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); 1332 1333 release_temp(p, tmp); 1334} 1335 1336 1337static void build_sphere_texgen( struct tnl_program *p, 1338 struct ureg dest, 1339 GLuint writemask ) 1340{ 1341 struct ureg normal = get_transformed_normal(p); 1342 struct ureg eye_hat = get_eye_position_normalized(p); 1343 struct ureg tmp = get_temp(p); 1344 struct ureg half = register_scalar_const(p, .5); 1345 struct ureg r = get_temp(p); 1346 struct ureg inv_m = get_temp(p); 1347 struct ureg id = get_identity_param(p); 1348 1349 /* Could share the above calculations, but it would be 1350 * a fairly odd state for someone to set (both sphere and 1351 * reflection active for different texture coordinate 1352 * components. Of course - if two texture units enable 1353 * reflect and/or sphere, things start to tilt in favour 1354 * of seperating this out: 1355 */ 1356 1357 /* n.u */ 1358 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1359 /* 2n.u */ 1360 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1361 /* (-2n.u)n + u */ 1362 emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); 1363 /* r + 0,0,1 */ 1364 emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 1365 /* rx^2 + ry^2 + (rz+1)^2 */ 1366 emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); 1367 /* 2/m */ 1368 emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 1369 /* 1/m */ 1370 emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); 1371 /* r/m + 1/2 */ 1372 emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); 1373 1374 release_temp(p, tmp); 1375 release_temp(p, r); 1376 release_temp(p, inv_m); 1377} 1378 1379 1380static void build_texture_transform( struct tnl_program *p ) 1381{ 1382 GLuint i, j; 1383 1384 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 1385 1386 if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i))) 1387 continue; 1388 1389 if (p->state->unit[i].texgen_enabled || 1390 p->state->unit[i].texmat_enabled) { 1391 1392 GLuint texmat_enabled = p->state->unit[i].texmat_enabled; 1393 struct ureg out = register_output(p, VERT_RESULT_TEX0 + i); 1394 struct ureg out_texgen = undef; 1395 1396 if (p->state->unit[i].texgen_enabled) { 1397 GLuint copy_mask = 0; 1398 GLuint sphere_mask = 0; 1399 GLuint reflect_mask = 0; 1400 GLuint normal_mask = 0; 1401 GLuint modes[4]; 1402 1403 if (texmat_enabled) 1404 out_texgen = get_temp(p); 1405 else 1406 out_texgen = out; 1407 1408 modes[0] = p->state->unit[i].texgen_mode0; 1409 modes[1] = p->state->unit[i].texgen_mode1; 1410 modes[2] = p->state->unit[i].texgen_mode2; 1411 modes[3] = p->state->unit[i].texgen_mode3; 1412 1413 for (j = 0; j < 4; j++) { 1414 switch (modes[j]) { 1415 case TXG_OBJ_LINEAR: { 1416 struct ureg obj = register_input(p, VERT_ATTRIB_POS); 1417 struct ureg plane = 1418 register_param3(p, STATE_TEXGEN, i, 1419 STATE_TEXGEN_OBJECT_S + j); 1420 1421 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1422 obj, plane ); 1423 break; 1424 } 1425 case TXG_EYE_LINEAR: { 1426 struct ureg eye = get_eye_position(p); 1427 struct ureg plane = 1428 register_param3(p, STATE_TEXGEN, i, 1429 STATE_TEXGEN_EYE_S + j); 1430 1431 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1432 eye, plane ); 1433 break; 1434 } 1435 case TXG_SPHERE_MAP: 1436 sphere_mask |= WRITEMASK_X << j; 1437 break; 1438 case TXG_REFLECTION_MAP: 1439 reflect_mask |= WRITEMASK_X << j; 1440 break; 1441 case TXG_NORMAL_MAP: 1442 normal_mask |= WRITEMASK_X << j; 1443 break; 1444 case TXG_NONE: 1445 copy_mask |= WRITEMASK_X << j; 1446 } 1447 } 1448 1449 if (sphere_mask) { 1450 build_sphere_texgen(p, out_texgen, sphere_mask); 1451 } 1452 1453 if (reflect_mask) { 1454 build_reflect_texgen(p, out_texgen, reflect_mask); 1455 } 1456 1457 if (normal_mask) { 1458 struct ureg normal = get_transformed_normal(p); 1459 emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); 1460 } 1461 1462 if (copy_mask) { 1463 struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); 1464 emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); 1465 } 1466 } 1467 1468 if (texmat_enabled) { 1469 struct ureg texmat[4]; 1470 struct ureg in = (!is_undef(out_texgen) ? 1471 out_texgen : 1472 register_input(p, VERT_ATTRIB_TEX0+i)); 1473 if (p->mvp_with_dp4) { 1474 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1475 0, texmat ); 1476 emit_matrix_transform_vec4( p, out, texmat, in ); 1477 } 1478 else { 1479 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1480 STATE_MATRIX_TRANSPOSE, texmat ); 1481 emit_transpose_matrix_transform_vec4( p, out, texmat, in ); 1482 } 1483 } 1484 1485 release_temps(p); 1486 } 1487 else { 1488 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i); 1489 } 1490 } 1491} 1492 1493 1494/** 1495 * Point size attenuation computation. 1496 */ 1497static void build_atten_pointsize( struct tnl_program *p ) 1498{ 1499 struct ureg eye = get_eye_position_z(p); 1500 struct ureg state_size = register_param1(p, STATE_POINT_SIZE); 1501 struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); 1502 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1503 struct ureg ut = get_temp(p); 1504 1505 /* dist = |eyez| */ 1506 emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z)); 1507 /* p1 + dist * (p2 + dist * p3); */ 1508 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1509 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y)); 1510 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1511 ut, swizzle1(state_attenuation, X)); 1512 1513 /* 1 / sqrt(factor) */ 1514 emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); 1515 1516#if 0 1517 /* out = pointSize / sqrt(factor) */ 1518 emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 1519#else 1520 /* this is a good place to clamp the point size since there's likely 1521 * no hardware registers to clamp point size at rasterization time. 1522 */ 1523 emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); 1524 emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); 1525 emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z)); 1526#endif 1527 1528 release_temp(p, ut); 1529} 1530 1531 1532/** 1533 * Pass-though per-vertex point size, from user's point size array. 1534 */ 1535static void build_array_pointsize( struct tnl_program *p ) 1536{ 1537 struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE); 1538 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1539 emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in); 1540} 1541 1542 1543static void build_tnl_program( struct tnl_program *p ) 1544{ 1545 /* Emit the program, starting with modelviewproject: 1546 */ 1547 build_hpos(p); 1548 1549 /* Lighting calculations: 1550 */ 1551 if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) { 1552 if (p->state->light_global_enabled) 1553 build_lighting(p); 1554 else { 1555 if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) 1556 emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0); 1557 1558 if (p->state->fragprog_inputs_read & FRAG_BIT_COL1) 1559 emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1); 1560 } 1561 } 1562 1563 if (p->state->fragprog_inputs_read & FRAG_BIT_FOGC) 1564 build_fog(p); 1565 1566 if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY) 1567 build_texture_transform(p); 1568 1569 if (p->state->point_attenuated) 1570 build_atten_pointsize(p); 1571 else if (p->state->point_array) 1572 build_array_pointsize(p); 1573 1574 /* Finish up: 1575 */ 1576 emit_op1(p, OPCODE_END, undef, 0, undef); 1577 1578 /* Disassemble: 1579 */ 1580 if (DISASSEM) { 1581 _mesa_printf ("\n"); 1582 } 1583} 1584 1585 1586static void 1587create_new_program( const struct state_key *key, 1588 struct gl_vertex_program *program, 1589 GLboolean mvp_with_dp4, 1590 GLuint max_temps) 1591{ 1592 struct tnl_program p; 1593 1594 _mesa_memset(&p, 0, sizeof(p)); 1595 p.state = key; 1596 p.program = program; 1597 p.eye_position = undef; 1598 p.eye_position_z = undef; 1599 p.eye_position_normalized = undef; 1600 p.transformed_normal = undef; 1601 p.identity = undef; 1602 p.temp_in_use = 0; 1603 p.mvp_with_dp4 = mvp_with_dp4; 1604 1605 if (max_temps >= sizeof(int) * 8) 1606 p.temp_reserved = 0; 1607 else 1608 p.temp_reserved = ~((1<<max_temps)-1); 1609 1610 /* Start by allocating 32 instructions. 1611 * If we need more, we'll grow the instruction array as needed. 1612 */ 1613 p.max_inst = 32; 1614 p.program->Base.Instructions = _mesa_alloc_instructions(p.max_inst); 1615 p.program->Base.String = NULL; 1616 p.program->Base.NumInstructions = 1617 p.program->Base.NumTemporaries = 1618 p.program->Base.NumParameters = 1619 p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0; 1620 p.program->Base.Parameters = _mesa_new_parameter_list(); 1621 p.program->Base.InputsRead = 0; 1622 p.program->Base.OutputsWritten = 0; 1623 1624 build_tnl_program( &p ); 1625} 1626 1627 1628/** 1629 * Return a vertex program which implements the current fixed-function 1630 * transform/lighting/texgen operations. 1631 * XXX move this into core mesa (main/) 1632 */ 1633struct gl_vertex_program * 1634_mesa_get_fixed_func_vertex_program(GLcontext *ctx) 1635{ 1636 struct gl_vertex_program *prog; 1637 struct state_key key; 1638 1639 /* Grab all the relevent state and put it in a single structure: 1640 */ 1641 make_state_key(ctx, &key); 1642 1643 /* Look for an already-prepared program for this state: 1644 */ 1645 prog = (struct gl_vertex_program *) 1646 _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, sizeof(key)); 1647 1648 if (!prog) { 1649 /* OK, we'll have to build a new one */ 1650 if (0) 1651 _mesa_printf("Build new TNL program\n"); 1652 1653 prog = (struct gl_vertex_program *) 1654 ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); 1655 if (!prog) 1656 return NULL; 1657 1658 create_new_program( &key, prog, 1659 ctx->mvp_with_dp4, 1660 ctx->Const.VertexProgram.MaxTemps ); 1661 1662#if 0 1663 if (ctx->Driver.ProgramStringNotify) 1664 ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, 1665 &prog->Base ); 1666#endif 1667 _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache, 1668 &key, sizeof(key), &prog->Base); 1669 } 1670 1671 return prog; 1672} 1673