ffvertex_prog.c revision 3464ebd5
1/************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * \file ffvertex_prog.c 30 * 31 * Create a vertex program to execute the current fixed function T&L pipeline. 32 * \author Keith Whitwell 33 */ 34 35 36#include "main/glheader.h" 37#include "main/mtypes.h" 38#include "main/macros.h" 39#include "main/mfeatures.h" 40#include "main/enums.h" 41#include "main/ffvertex_prog.h" 42#include "program/program.h" 43#include "program/prog_cache.h" 44#include "program/prog_instruction.h" 45#include "program/prog_parameter.h" 46#include "program/prog_print.h" 47#include "program/prog_statevars.h" 48 49 50/** Max of number of lights and texture coord units */ 51#define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS) 52 53struct state_key { 54 unsigned light_color_material_mask:12; 55 unsigned light_global_enabled:1; 56 unsigned light_local_viewer:1; 57 unsigned light_twoside:1; 58 unsigned material_shininess_is_zero:1; 59 unsigned need_eye_coords:1; 60 unsigned normalize:1; 61 unsigned rescale_normals:1; 62 63 unsigned fog_source_is_depth:1; 64 unsigned separate_specular:1; 65 unsigned point_attenuated:1; 66 unsigned point_array:1; 67 unsigned texture_enabled_global:1; 68 unsigned fragprog_inputs_read:12; 69 70 unsigned varying_vp_inputs; 71 72 struct { 73 unsigned light_enabled:1; 74 unsigned light_eyepos3_is_zero:1; 75 unsigned light_spotcutoff_is_180:1; 76 unsigned light_attenuated:1; 77 unsigned texunit_really_enabled:1; 78 unsigned texmat_enabled:1; 79 unsigned coord_replace:1; 80 unsigned texgen_enabled:4; 81 unsigned texgen_mode0:4; 82 unsigned texgen_mode1:4; 83 unsigned texgen_mode2:4; 84 unsigned texgen_mode3:4; 85 } unit[NUM_UNITS]; 86}; 87 88 89#define TXG_NONE 0 90#define TXG_OBJ_LINEAR 1 91#define TXG_EYE_LINEAR 2 92#define TXG_SPHERE_MAP 3 93#define TXG_REFLECTION_MAP 4 94#define TXG_NORMAL_MAP 5 95 96static GLuint translate_texgen( GLboolean enabled, GLenum mode ) 97{ 98 if (!enabled) 99 return TXG_NONE; 100 101 switch (mode) { 102 case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; 103 case GL_EYE_LINEAR: return TXG_EYE_LINEAR; 104 case GL_SPHERE_MAP: return TXG_SPHERE_MAP; 105 case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; 106 case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; 107 default: return TXG_NONE; 108 } 109} 110 111 112 113static GLboolean check_active_shininess( struct gl_context *ctx, 114 const struct state_key *key, 115 GLuint side ) 116{ 117 GLuint bit = 1 << (MAT_ATTRIB_FRONT_SHININESS + side); 118 119 if ((key->varying_vp_inputs & VERT_BIT_COLOR0) && 120 (key->light_color_material_mask & bit)) 121 return GL_TRUE; 122 123 if (key->varying_vp_inputs & (bit << 16)) 124 return GL_TRUE; 125 126 if (ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS + side][0] != 0.0F) 127 return GL_TRUE; 128 129 return GL_FALSE; 130} 131 132 133static void make_state_key( struct gl_context *ctx, struct state_key *key ) 134{ 135 const struct gl_fragment_program *fp; 136 GLuint i; 137 138 memset(key, 0, sizeof(struct state_key)); 139 fp = ctx->FragmentProgram._Current; 140 141 /* This now relies on texenvprogram.c being active: 142 */ 143 assert(fp); 144 145 key->need_eye_coords = ctx->_NeedEyeCoords; 146 147 key->fragprog_inputs_read = fp->Base.InputsRead; 148 key->varying_vp_inputs = ctx->varying_vp_inputs; 149 150 if (ctx->RenderMode == GL_FEEDBACK) { 151 /* make sure the vertprog emits color and tex0 */ 152 key->fragprog_inputs_read |= (FRAG_BIT_COL0 | FRAG_BIT_TEX0); 153 } 154 155 key->separate_specular = (ctx->Light.Model.ColorControl == 156 GL_SEPARATE_SPECULAR_COLOR); 157 158 if (ctx->Light.Enabled) { 159 key->light_global_enabled = 1; 160 161 if (ctx->Light.Model.LocalViewer) 162 key->light_local_viewer = 1; 163 164 if (ctx->Light.Model.TwoSide) 165 key->light_twoside = 1; 166 167 if (ctx->Light.ColorMaterialEnabled) { 168 key->light_color_material_mask = ctx->Light.ColorMaterialBitmask; 169 } 170 171 for (i = 0; i < MAX_LIGHTS; i++) { 172 struct gl_light *light = &ctx->Light.Light[i]; 173 174 if (light->Enabled) { 175 key->unit[i].light_enabled = 1; 176 177 if (light->EyePosition[3] == 0.0) 178 key->unit[i].light_eyepos3_is_zero = 1; 179 180 if (light->SpotCutoff == 180.0) 181 key->unit[i].light_spotcutoff_is_180 = 1; 182 183 if (light->ConstantAttenuation != 1.0 || 184 light->LinearAttenuation != 0.0 || 185 light->QuadraticAttenuation != 0.0) 186 key->unit[i].light_attenuated = 1; 187 } 188 } 189 190 if (check_active_shininess(ctx, key, 0)) { 191 key->material_shininess_is_zero = 0; 192 } 193 else if (key->light_twoside && 194 check_active_shininess(ctx, key, 1)) { 195 key->material_shininess_is_zero = 0; 196 } 197 else { 198 key->material_shininess_is_zero = 1; 199 } 200 } 201 202 if (ctx->Transform.Normalize) 203 key->normalize = 1; 204 205 if (ctx->Transform.RescaleNormals) 206 key->rescale_normals = 1; 207 208 if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) 209 key->fog_source_is_depth = 1; 210 211 if (ctx->Point._Attenuated) 212 key->point_attenuated = 1; 213 214#if FEATURE_point_size_array 215 if (ctx->Array.ArrayObj->PointSize.Enabled) 216 key->point_array = 1; 217#endif 218 219 if (ctx->Texture._TexGenEnabled || 220 ctx->Texture._TexMatEnabled || 221 ctx->Texture._EnabledUnits) 222 key->texture_enabled_global = 1; 223 224 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 225 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; 226 227 if (texUnit->_ReallyEnabled) 228 key->unit[i].texunit_really_enabled = 1; 229 230 if (ctx->Point.PointSprite) 231 if (ctx->Point.CoordReplace[i]) 232 key->unit[i].coord_replace = 1; 233 234 if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i)) 235 key->unit[i].texmat_enabled = 1; 236 237 if (texUnit->TexGenEnabled) { 238 key->unit[i].texgen_enabled = 1; 239 240 key->unit[i].texgen_mode0 = 241 translate_texgen( texUnit->TexGenEnabled & (1<<0), 242 texUnit->GenS.Mode ); 243 key->unit[i].texgen_mode1 = 244 translate_texgen( texUnit->TexGenEnabled & (1<<1), 245 texUnit->GenT.Mode ); 246 key->unit[i].texgen_mode2 = 247 translate_texgen( texUnit->TexGenEnabled & (1<<2), 248 texUnit->GenR.Mode ); 249 key->unit[i].texgen_mode3 = 250 translate_texgen( texUnit->TexGenEnabled & (1<<3), 251 texUnit->GenQ.Mode ); 252 } 253 } 254} 255 256 257 258/* Very useful debugging tool - produces annotated listing of 259 * generated program with line/function references for each 260 * instruction back into this file: 261 */ 262#define DISASSEM 0 263 264 265/* Use uregs to represent registers internally, translate to Mesa's 266 * expected formats on emit. 267 * 268 * NOTE: These are passed by value extensively in this file rather 269 * than as usual by pointer reference. If this disturbs you, try 270 * remembering they are just 32bits in size. 271 * 272 * GCC is smart enough to deal with these dword-sized structures in 273 * much the same way as if I had defined them as dwords and was using 274 * macros to access and set the fields. This is much nicer and easier 275 * to evolve. 276 */ 277struct ureg { 278 GLuint file:4; 279 GLint idx:9; /* relative addressing may be negative */ 280 /* sizeof(idx) should == sizeof(prog_src_reg::Index) */ 281 GLuint negate:1; 282 GLuint swz:12; 283 GLuint pad:6; 284}; 285 286 287struct tnl_program { 288 const struct state_key *state; 289 struct gl_vertex_program *program; 290 GLint max_inst; /** number of instructions allocated for program */ 291 GLboolean mvp_with_dp4; 292 293 GLuint temp_in_use; 294 GLuint temp_reserved; 295 296 struct ureg eye_position; 297 struct ureg eye_position_z; 298 struct ureg eye_position_normalized; 299 struct ureg transformed_normal; 300 struct ureg identity; 301 302 GLuint materials; 303 GLuint color_materials; 304}; 305 306 307static const struct ureg undef = { 308 PROGRAM_UNDEFINED, 309 0, 310 0, 311 0, 312 0 313}; 314 315/* Local shorthand: 316 */ 317#define X SWIZZLE_X 318#define Y SWIZZLE_Y 319#define Z SWIZZLE_Z 320#define W SWIZZLE_W 321 322 323/* Construct a ureg: 324 */ 325static struct ureg make_ureg(GLuint file, GLint idx) 326{ 327 struct ureg reg; 328 reg.file = file; 329 reg.idx = idx; 330 reg.negate = 0; 331 reg.swz = SWIZZLE_NOOP; 332 reg.pad = 0; 333 return reg; 334} 335 336 337 338static struct ureg negate( struct ureg reg ) 339{ 340 reg.negate ^= 1; 341 return reg; 342} 343 344 345static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 346{ 347 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 348 GET_SWZ(reg.swz, y), 349 GET_SWZ(reg.swz, z), 350 GET_SWZ(reg.swz, w)); 351 return reg; 352} 353 354 355static struct ureg swizzle1( struct ureg reg, int x ) 356{ 357 return swizzle(reg, x, x, x, x); 358} 359 360 361static struct ureg get_temp( struct tnl_program *p ) 362{ 363 int bit = _mesa_ffs( ~p->temp_in_use ); 364 if (!bit) { 365 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 366 exit(1); 367 } 368 369 if ((GLuint) bit > p->program->Base.NumTemporaries) 370 p->program->Base.NumTemporaries = bit; 371 372 p->temp_in_use |= 1<<(bit-1); 373 return make_ureg(PROGRAM_TEMPORARY, bit-1); 374} 375 376 377static struct ureg reserve_temp( struct tnl_program *p ) 378{ 379 struct ureg temp = get_temp( p ); 380 p->temp_reserved |= 1<<temp.idx; 381 return temp; 382} 383 384 385static void release_temp( struct tnl_program *p, struct ureg reg ) 386{ 387 if (reg.file == PROGRAM_TEMPORARY) { 388 p->temp_in_use &= ~(1<<reg.idx); 389 p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ 390 } 391} 392 393static void release_temps( struct tnl_program *p ) 394{ 395 p->temp_in_use = p->temp_reserved; 396} 397 398 399static struct ureg register_param5(struct tnl_program *p, 400 GLint s0, 401 GLint s1, 402 GLint s2, 403 GLint s3, 404 GLint s4) 405{ 406 gl_state_index tokens[STATE_LENGTH]; 407 GLint idx; 408 tokens[0] = s0; 409 tokens[1] = s1; 410 tokens[2] = s2; 411 tokens[3] = s3; 412 tokens[4] = s4; 413 idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens ); 414 return make_ureg(PROGRAM_STATE_VAR, idx); 415} 416 417 418#define register_param1(p,s0) register_param5(p,s0,0,0,0,0) 419#define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0) 420#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) 421#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) 422 423 424 425/** 426 * \param input one of VERT_ATTRIB_x tokens. 427 */ 428static struct ureg register_input( struct tnl_program *p, GLuint input ) 429{ 430 assert(input < 32); 431 432 if (p->state->varying_vp_inputs & (1<<input)) { 433 p->program->Base.InputsRead |= (1<<input); 434 return make_ureg(PROGRAM_INPUT, input); 435 } 436 else { 437 return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input ); 438 } 439} 440 441 442/** 443 * \param input one of VERT_RESULT_x tokens. 444 */ 445static struct ureg register_output( struct tnl_program *p, GLuint output ) 446{ 447 p->program->Base.OutputsWritten |= BITFIELD64_BIT(output); 448 return make_ureg(PROGRAM_OUTPUT, output); 449} 450 451 452static struct ureg register_const4f( struct tnl_program *p, 453 GLfloat s0, 454 GLfloat s1, 455 GLfloat s2, 456 GLfloat s3) 457{ 458 GLfloat values[4]; 459 GLint idx; 460 GLuint swizzle; 461 values[0] = s0; 462 values[1] = s1; 463 values[2] = s2; 464 values[3] = s3; 465 idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, 466 &swizzle ); 467 ASSERT(swizzle == SWIZZLE_NOOP); 468 return make_ureg(PROGRAM_CONSTANT, idx); 469} 470 471#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 472#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) 473#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 474#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 475 476static GLboolean is_undef( struct ureg reg ) 477{ 478 return reg.file == PROGRAM_UNDEFINED; 479} 480 481 482static struct ureg get_identity_param( struct tnl_program *p ) 483{ 484 if (is_undef(p->identity)) 485 p->identity = register_const4f(p, 0,0,0,1); 486 487 return p->identity; 488} 489 490static void register_matrix_param5( struct tnl_program *p, 491 GLint s0, /* modelview, projection, etc */ 492 GLint s1, /* texture matrix number */ 493 GLint s2, /* first row */ 494 GLint s3, /* last row */ 495 GLint s4, /* inverse, transpose, etc */ 496 struct ureg *matrix ) 497{ 498 GLint i; 499 500 /* This is a bit sad as the support is there to pull the whole 501 * matrix out in one go: 502 */ 503 for (i = 0; i <= s3 - s2; i++) 504 matrix[i] = register_param5( p, s0, s1, i, i, s4 ); 505} 506 507 508static void emit_arg( struct prog_src_register *src, 509 struct ureg reg ) 510{ 511 src->File = reg.file; 512 src->Index = reg.idx; 513 src->Swizzle = reg.swz; 514 src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE; 515 src->Abs = 0; 516 src->RelAddr = 0; 517 /* Check that bitfield sizes aren't exceeded */ 518 ASSERT(src->Index == reg.idx); 519} 520 521 522static void emit_dst( struct prog_dst_register *dst, 523 struct ureg reg, GLuint mask ) 524{ 525 dst->File = reg.file; 526 dst->Index = reg.idx; 527 /* allow zero as a shorthand for xyzw */ 528 dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 529 dst->CondMask = COND_TR; /* always pass cond test */ 530 dst->CondSwizzle = SWIZZLE_NOOP; 531 dst->CondSrc = 0; 532 /* Check that bitfield sizes aren't exceeded */ 533 ASSERT(dst->Index == reg.idx); 534} 535 536 537static void debug_insn( struct prog_instruction *inst, const char *fn, 538 GLuint line ) 539{ 540 if (DISASSEM) { 541 static const char *last_fn; 542 543 if (fn != last_fn) { 544 last_fn = fn; 545 printf("%s:\n", fn); 546 } 547 548 printf("%d:\t", line); 549 _mesa_print_instruction(inst); 550 } 551} 552 553 554static void emit_op3fn(struct tnl_program *p, 555 enum prog_opcode op, 556 struct ureg dest, 557 GLuint mask, 558 struct ureg src0, 559 struct ureg src1, 560 struct ureg src2, 561 const char *fn, 562 GLuint line) 563{ 564 GLuint nr; 565 struct prog_instruction *inst; 566 567 assert((GLint) p->program->Base.NumInstructions <= p->max_inst); 568 569 if (p->program->Base.NumInstructions == p->max_inst) { 570 /* need to extend the program's instruction array */ 571 struct prog_instruction *newInst; 572 573 /* double the size */ 574 p->max_inst *= 2; 575 576 newInst = _mesa_alloc_instructions(p->max_inst); 577 if (!newInst) { 578 _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build"); 579 return; 580 } 581 582 _mesa_copy_instructions(newInst, 583 p->program->Base.Instructions, 584 p->program->Base.NumInstructions); 585 586 _mesa_free_instructions(p->program->Base.Instructions, 587 p->program->Base.NumInstructions); 588 589 p->program->Base.Instructions = newInst; 590 } 591 592 nr = p->program->Base.NumInstructions++; 593 594 inst = &p->program->Base.Instructions[nr]; 595 inst->Opcode = (enum prog_opcode) op; 596 inst->Data = 0; 597 598 emit_arg( &inst->SrcReg[0], src0 ); 599 emit_arg( &inst->SrcReg[1], src1 ); 600 emit_arg( &inst->SrcReg[2], src2 ); 601 602 emit_dst( &inst->DstReg, dest, mask ); 603 604 debug_insn(inst, fn, line); 605} 606 607 608#define emit_op3(p, op, dst, mask, src0, src1, src2) \ 609 emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__) 610 611#define emit_op2(p, op, dst, mask, src0, src1) \ 612 emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__) 613 614#define emit_op1(p, op, dst, mask, src0) \ 615 emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__) 616 617 618static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) 619{ 620 if (reg.file == PROGRAM_TEMPORARY && 621 !(p->temp_reserved & (1<<reg.idx))) 622 return reg; 623 else { 624 struct ureg temp = get_temp(p); 625 emit_op1(p, OPCODE_MOV, temp, 0, reg); 626 return temp; 627 } 628} 629 630 631/* Currently no tracking performed of input/output/register size or 632 * active elements. Could be used to reduce these operations, as 633 * could the matrix type. 634 */ 635static void emit_matrix_transform_vec4( struct tnl_program *p, 636 struct ureg dest, 637 const struct ureg *mat, 638 struct ureg src) 639{ 640 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); 641 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); 642 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); 643 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); 644} 645 646 647/* This version is much easier to implement if writemasks are not 648 * supported natively on the target or (like SSE), the target doesn't 649 * have a clean/obvious dotproduct implementation. 650 */ 651static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, 652 struct ureg dest, 653 const struct ureg *mat, 654 struct ureg src) 655{ 656 struct ureg tmp; 657 658 if (dest.file != PROGRAM_TEMPORARY) 659 tmp = get_temp(p); 660 else 661 tmp = dest; 662 663 emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 664 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 665 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 666 emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 667 668 if (dest.file != PROGRAM_TEMPORARY) 669 release_temp(p, tmp); 670} 671 672 673static void emit_matrix_transform_vec3( struct tnl_program *p, 674 struct ureg dest, 675 const struct ureg *mat, 676 struct ureg src) 677{ 678 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); 679 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); 680 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); 681} 682 683 684static void emit_normalize_vec3( struct tnl_program *p, 685 struct ureg dest, 686 struct ureg src ) 687{ 688#if 0 689 /* XXX use this when drivers are ready for NRM3 */ 690 emit_op1(p, OPCODE_NRM3, dest, WRITEMASK_XYZ, src); 691#else 692 struct ureg tmp = get_temp(p); 693 emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src); 694 emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); 695 emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X)); 696 release_temp(p, tmp); 697#endif 698} 699 700 701static void emit_passthrough( struct tnl_program *p, 702 GLuint input, 703 GLuint output ) 704{ 705 struct ureg out = register_output(p, output); 706 emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); 707} 708 709 710static struct ureg get_eye_position( struct tnl_program *p ) 711{ 712 if (is_undef(p->eye_position)) { 713 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 714 struct ureg modelview[4]; 715 716 p->eye_position = reserve_temp(p); 717 718 if (p->mvp_with_dp4) { 719 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 720 0, modelview ); 721 722 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); 723 } 724 else { 725 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 726 STATE_MATRIX_TRANSPOSE, modelview ); 727 728 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); 729 } 730 } 731 732 return p->eye_position; 733} 734 735 736static struct ureg get_eye_position_z( struct tnl_program *p ) 737{ 738 if (!is_undef(p->eye_position)) 739 return swizzle1(p->eye_position, Z); 740 741 if (is_undef(p->eye_position_z)) { 742 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 743 struct ureg modelview[4]; 744 745 p->eye_position_z = reserve_temp(p); 746 747 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 748 0, modelview ); 749 750 emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]); 751 } 752 753 return p->eye_position_z; 754} 755 756 757static struct ureg get_eye_position_normalized( struct tnl_program *p ) 758{ 759 if (is_undef(p->eye_position_normalized)) { 760 struct ureg eye = get_eye_position(p); 761 p->eye_position_normalized = reserve_temp(p); 762 emit_normalize_vec3(p, p->eye_position_normalized, eye); 763 } 764 765 return p->eye_position_normalized; 766} 767 768 769static struct ureg get_transformed_normal( struct tnl_program *p ) 770{ 771 if (is_undef(p->transformed_normal) && 772 !p->state->need_eye_coords && 773 !p->state->normalize && 774 !(p->state->need_eye_coords == p->state->rescale_normals)) 775 { 776 p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL ); 777 } 778 else if (is_undef(p->transformed_normal)) 779 { 780 struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); 781 struct ureg mvinv[3]; 782 struct ureg transformed_normal = reserve_temp(p); 783 784 if (p->state->need_eye_coords) { 785 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2, 786 STATE_MATRIX_INVTRANS, mvinv ); 787 788 /* Transform to eye space: 789 */ 790 emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal ); 791 normal = transformed_normal; 792 } 793 794 /* Normalize/Rescale: 795 */ 796 if (p->state->normalize) { 797 emit_normalize_vec3( p, transformed_normal, normal ); 798 normal = transformed_normal; 799 } 800 else if (p->state->need_eye_coords == p->state->rescale_normals) { 801 /* This is already adjusted for eye/non-eye rendering: 802 */ 803 struct ureg rescale = register_param2(p, STATE_INTERNAL, 804 STATE_NORMAL_SCALE); 805 806 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale ); 807 normal = transformed_normal; 808 } 809 810 assert(normal.file == PROGRAM_TEMPORARY); 811 p->transformed_normal = normal; 812 } 813 814 return p->transformed_normal; 815} 816 817 818static void build_hpos( struct tnl_program *p ) 819{ 820 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 821 struct ureg hpos = register_output( p, VERT_RESULT_HPOS ); 822 struct ureg mvp[4]; 823 824 if (p->mvp_with_dp4) { 825 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 826 0, mvp ); 827 emit_matrix_transform_vec4( p, hpos, mvp, pos ); 828 } 829 else { 830 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 831 STATE_MATRIX_TRANSPOSE, mvp ); 832 emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); 833 } 834} 835 836 837static GLuint material_attrib( GLuint side, GLuint property ) 838{ 839 return (property - STATE_AMBIENT) * 2 + side; 840} 841 842 843/** 844 * Get a bitmask of which material values vary on a per-vertex basis. 845 */ 846static void set_material_flags( struct tnl_program *p ) 847{ 848 p->color_materials = 0; 849 p->materials = 0; 850 851 if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) { 852 p->materials = 853 p->color_materials = p->state->light_color_material_mask; 854 } 855 856 p->materials |= (p->state->varying_vp_inputs >> 16); 857} 858 859 860static struct ureg get_material( struct tnl_program *p, GLuint side, 861 GLuint property ) 862{ 863 GLuint attrib = material_attrib(side, property); 864 865 if (p->color_materials & (1<<attrib)) 866 return register_input(p, VERT_ATTRIB_COLOR0); 867 else if (p->materials & (1<<attrib)) { 868 /* Put material values in the GENERIC slots -- they are not used 869 * for anything in fixed function mode. 870 */ 871 return register_input( p, attrib + VERT_ATTRIB_GENERIC0 ); 872 } 873 else 874 return register_param3( p, STATE_MATERIAL, side, property ); 875} 876 877#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \ 878 MAT_BIT_FRONT_AMBIENT | \ 879 MAT_BIT_FRONT_DIFFUSE) << (side)) 880 881 882/** 883 * Either return a precalculated constant value or emit code to 884 * calculate these values dynamically in the case where material calls 885 * are present between begin/end pairs. 886 * 887 * Probably want to shift this to the program compilation phase - if 888 * we always emitted the calculation here, a smart compiler could 889 * detect that it was constant (given a certain set of inputs), and 890 * lift it out of the main loop. That way the programs created here 891 * would be independent of the vertex_buffer details. 892 */ 893static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) 894{ 895 if (p->materials & SCENE_COLOR_BITS(side)) { 896 struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); 897 struct ureg material_emission = get_material(p, side, STATE_EMISSION); 898 struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); 899 struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); 900 struct ureg tmp = make_temp(p, material_diffuse); 901 emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, 902 material_ambient, material_emission); 903 return tmp; 904 } 905 else 906 return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); 907} 908 909 910static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 911 GLuint side, GLuint property ) 912{ 913 GLuint attrib = material_attrib(side, property); 914 if (p->materials & (1<<attrib)) { 915 struct ureg light_value = 916 register_param3(p, STATE_LIGHT, light, property); 917 struct ureg material_value = get_material(p, side, property); 918 struct ureg tmp = get_temp(p); 919 emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value); 920 return tmp; 921 } 922 else 923 return register_param4(p, STATE_LIGHTPROD, light, side, property); 924} 925 926 927static struct ureg calculate_light_attenuation( struct tnl_program *p, 928 GLuint i, 929 struct ureg VPpli, 930 struct ureg dist ) 931{ 932 struct ureg attenuation = register_param3(p, STATE_LIGHT, i, 933 STATE_ATTENUATION); 934 struct ureg att = get_temp(p); 935 936 /* Calculate spot attenuation: 937 */ 938 if (!p->state->unit[i].light_spotcutoff_is_180) { 939 struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, 940 STATE_LIGHT_SPOT_DIR_NORMALIZED, i); 941 struct ureg spot = get_temp(p); 942 struct ureg slt = get_temp(p); 943 944 emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); 945 emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); 946 emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); 947 emit_op2(p, OPCODE_MUL, att, 0, slt, spot); 948 949 release_temp(p, spot); 950 release_temp(p, slt); 951 } 952 953 /* Calculate distance attenuation: 954 */ 955 if (p->state->unit[i].light_attenuated) { 956 /* 1/d,d,d,1/d */ 957 emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); 958 /* 1,d,d*d,1/d */ 959 emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 960 /* 1/dist-atten */ 961 emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); 962 963 if (!p->state->unit[i].light_spotcutoff_is_180) { 964 /* dist-atten */ 965 emit_op1(p, OPCODE_RCP, dist, 0, dist); 966 /* spot-atten * dist-atten */ 967 emit_op2(p, OPCODE_MUL, att, 0, dist, att); 968 } 969 else { 970 /* dist-atten */ 971 emit_op1(p, OPCODE_RCP, att, 0, dist); 972 } 973 } 974 975 return att; 976} 977 978 979/** 980 * Compute: 981 * lit.y = MAX(0, dots.x) 982 * lit.z = SLT(0, dots.x) 983 */ 984static void emit_degenerate_lit( struct tnl_program *p, 985 struct ureg lit, 986 struct ureg dots ) 987{ 988 struct ureg id = get_identity_param(p); /* id = {0,0,0,1} */ 989 990 /* Note that lit.x & lit.w will not be examined. Note also that 991 * dots.xyzw == dots.xxxx. 992 */ 993 994 /* MAX lit, id, dots; 995 */ 996 emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots); 997 998 /* result[2] = (in > 0 ? 1 : 0) 999 * SLT lit.z, id.z, dots; # lit.z = (0 < dots.z) ? 1 : 0 1000 */ 1001 emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots); 1002} 1003 1004 1005/* Need to add some addtional parameters to allow lighting in object 1006 * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye 1007 * space lighting. 1008 */ 1009static void build_lighting( struct tnl_program *p ) 1010{ 1011 const GLboolean twoside = p->state->light_twoside; 1012 const GLboolean separate = p->state->separate_specular; 1013 GLuint nr_lights = 0, count = 0; 1014 struct ureg normal = get_transformed_normal(p); 1015 struct ureg lit = get_temp(p); 1016 struct ureg dots = get_temp(p); 1017 struct ureg _col0 = undef, _col1 = undef; 1018 struct ureg _bfc0 = undef, _bfc1 = undef; 1019 GLuint i; 1020 1021 /* 1022 * NOTE: 1023 * dots.x = dot(normal, VPpli) 1024 * dots.y = dot(normal, halfAngle) 1025 * dots.z = back.shininess 1026 * dots.w = front.shininess 1027 */ 1028 1029 for (i = 0; i < MAX_LIGHTS; i++) 1030 if (p->state->unit[i].light_enabled) 1031 nr_lights++; 1032 1033 set_material_flags(p); 1034 1035 { 1036 if (!p->state->material_shininess_is_zero) { 1037 struct ureg shininess = get_material(p, 0, STATE_SHININESS); 1038 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); 1039 release_temp(p, shininess); 1040 } 1041 1042 _col0 = make_temp(p, get_scenecolor(p, 0)); 1043 if (separate) 1044 _col1 = make_temp(p, get_identity_param(p)); 1045 else 1046 _col1 = _col0; 1047 } 1048 1049 if (twoside) { 1050 if (!p->state->material_shininess_is_zero) { 1051 /* Note that we negate the back-face specular exponent here. 1052 * The negation will be un-done later in the back-face code below. 1053 */ 1054 struct ureg shininess = get_material(p, 1, STATE_SHININESS); 1055 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, 1056 negate(swizzle1(shininess,X))); 1057 release_temp(p, shininess); 1058 } 1059 1060 _bfc0 = make_temp(p, get_scenecolor(p, 1)); 1061 if (separate) 1062 _bfc1 = make_temp(p, get_identity_param(p)); 1063 else 1064 _bfc1 = _bfc0; 1065 } 1066 1067 /* If no lights, still need to emit the scenecolor. 1068 */ 1069 { 1070 struct ureg res0 = register_output( p, VERT_RESULT_COL0 ); 1071 emit_op1(p, OPCODE_MOV, res0, 0, _col0); 1072 } 1073 1074 if (separate) { 1075 struct ureg res1 = register_output( p, VERT_RESULT_COL1 ); 1076 emit_op1(p, OPCODE_MOV, res1, 0, _col1); 1077 } 1078 1079 if (twoside) { 1080 struct ureg res0 = register_output( p, VERT_RESULT_BFC0 ); 1081 emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); 1082 } 1083 1084 if (twoside && separate) { 1085 struct ureg res1 = register_output( p, VERT_RESULT_BFC1 ); 1086 emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); 1087 } 1088 1089 if (nr_lights == 0) { 1090 release_temps(p); 1091 return; 1092 } 1093 1094 for (i = 0; i < MAX_LIGHTS; i++) { 1095 if (p->state->unit[i].light_enabled) { 1096 struct ureg half = undef; 1097 struct ureg att = undef, VPpli = undef; 1098 1099 count++; 1100 1101 if (p->state->unit[i].light_eyepos3_is_zero) { 1102 /* Can used precomputed constants in this case. 1103 * Attenuation never applies to infinite lights. 1104 */ 1105 VPpli = register_param3(p, STATE_INTERNAL, 1106 STATE_LIGHT_POSITION_NORMALIZED, i); 1107 1108 if (!p->state->material_shininess_is_zero) { 1109 if (p->state->light_local_viewer) { 1110 struct ureg eye_hat = get_eye_position_normalized(p); 1111 half = get_temp(p); 1112 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1113 emit_normalize_vec3(p, half, half); 1114 } 1115 else { 1116 half = register_param3(p, STATE_INTERNAL, 1117 STATE_LIGHT_HALF_VECTOR, i); 1118 } 1119 } 1120 } 1121 else { 1122 struct ureg Ppli = register_param3(p, STATE_INTERNAL, 1123 STATE_LIGHT_POSITION, i); 1124 struct ureg V = get_eye_position(p); 1125 struct ureg dist = get_temp(p); 1126 1127 VPpli = get_temp(p); 1128 1129 /* Calculate VPpli vector 1130 */ 1131 emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); 1132 1133 /* Normalize VPpli. The dist value also used in 1134 * attenuation below. 1135 */ 1136 emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); 1137 emit_op1(p, OPCODE_RSQ, dist, 0, dist); 1138 emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); 1139 1140 /* Calculate attenuation: 1141 */ 1142 if (!p->state->unit[i].light_spotcutoff_is_180 || 1143 p->state->unit[i].light_attenuated) { 1144 att = calculate_light_attenuation(p, i, VPpli, dist); 1145 } 1146 1147 /* Calculate viewer direction, or use infinite viewer: 1148 */ 1149 if (!p->state->material_shininess_is_zero) { 1150 half = get_temp(p); 1151 1152 if (p->state->light_local_viewer) { 1153 struct ureg eye_hat = get_eye_position_normalized(p); 1154 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1155 } 1156 else { 1157 struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 1158 emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); 1159 } 1160 1161 emit_normalize_vec3(p, half, half); 1162 } 1163 1164 release_temp(p, dist); 1165 } 1166 1167 /* Calculate dot products: 1168 */ 1169 if (p->state->material_shininess_is_zero) { 1170 emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli); 1171 } 1172 else { 1173 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); 1174 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); 1175 } 1176 1177 /* Front face lighting: 1178 */ 1179 { 1180 struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); 1181 struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); 1182 struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); 1183 struct ureg res0, res1; 1184 GLuint mask0, mask1; 1185 1186 if (count == nr_lights) { 1187 if (separate) { 1188 mask0 = WRITEMASK_XYZ; 1189 mask1 = WRITEMASK_XYZ; 1190 res0 = register_output( p, VERT_RESULT_COL0 ); 1191 res1 = register_output( p, VERT_RESULT_COL1 ); 1192 } 1193 else { 1194 mask0 = 0; 1195 mask1 = WRITEMASK_XYZ; 1196 res0 = _col0; 1197 res1 = register_output( p, VERT_RESULT_COL0 ); 1198 } 1199 } 1200 else { 1201 mask0 = 0; 1202 mask1 = 0; 1203 res0 = _col0; 1204 res1 = _col1; 1205 } 1206 1207 if (!is_undef(att)) { 1208 /* light is attenuated by distance */ 1209 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1210 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1211 emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); 1212 } 1213 else if (!p->state->material_shininess_is_zero) { 1214 /* there's a non-zero specular term */ 1215 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1216 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1217 } 1218 else { 1219 /* no attenutation, no specular */ 1220 emit_degenerate_lit(p, lit, dots); 1221 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1222 } 1223 1224 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); 1225 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); 1226 1227 release_temp(p, ambient); 1228 release_temp(p, diffuse); 1229 release_temp(p, specular); 1230 } 1231 1232 /* Back face lighting: 1233 */ 1234 if (twoside) { 1235 struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); 1236 struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); 1237 struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); 1238 struct ureg res0, res1; 1239 GLuint mask0, mask1; 1240 1241 if (count == nr_lights) { 1242 if (separate) { 1243 mask0 = WRITEMASK_XYZ; 1244 mask1 = WRITEMASK_XYZ; 1245 res0 = register_output( p, VERT_RESULT_BFC0 ); 1246 res1 = register_output( p, VERT_RESULT_BFC1 ); 1247 } 1248 else { 1249 mask0 = 0; 1250 mask1 = WRITEMASK_XYZ; 1251 res0 = _bfc0; 1252 res1 = register_output( p, VERT_RESULT_BFC0 ); 1253 } 1254 } 1255 else { 1256 res0 = _bfc0; 1257 res1 = _bfc1; 1258 mask0 = 0; 1259 mask1 = 0; 1260 } 1261 1262 /* For the back face we need to negate the X and Y component 1263 * dot products. dots.Z has the negated back-face specular 1264 * exponent. We swizzle that into the W position. This 1265 * negation makes the back-face specular term positive again. 1266 */ 1267 dots = negate(swizzle(dots,X,Y,W,Z)); 1268 1269 if (!is_undef(att)) { 1270 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1271 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1272 emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); 1273 } 1274 else if (!p->state->material_shininess_is_zero) { 1275 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1276 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/ 1277 } 1278 else { 1279 emit_degenerate_lit(p, lit, dots); 1280 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); 1281 } 1282 1283 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); 1284 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); 1285 /* restore dots to its original state for subsequent lights 1286 * by negating and swizzling again. 1287 */ 1288 dots = negate(swizzle(dots,X,Y,W,Z)); 1289 1290 release_temp(p, ambient); 1291 release_temp(p, diffuse); 1292 release_temp(p, specular); 1293 } 1294 1295 release_temp(p, half); 1296 release_temp(p, VPpli); 1297 release_temp(p, att); 1298 } 1299 } 1300 1301 release_temps( p ); 1302} 1303 1304 1305static void build_fog( struct tnl_program *p ) 1306{ 1307 struct ureg fog = register_output(p, VERT_RESULT_FOGC); 1308 struct ureg input; 1309 1310 if (p->state->fog_source_is_depth) { 1311 input = get_eye_position_z(p); 1312 } 1313 else { 1314 input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); 1315 } 1316 1317 /* result.fog = {abs(f),0,0,1}; */ 1318 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1319 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p)); 1320} 1321 1322 1323static void build_reflect_texgen( struct tnl_program *p, 1324 struct ureg dest, 1325 GLuint writemask ) 1326{ 1327 struct ureg normal = get_transformed_normal(p); 1328 struct ureg eye_hat = get_eye_position_normalized(p); 1329 struct ureg tmp = get_temp(p); 1330 1331 /* n.u */ 1332 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1333 /* 2n.u */ 1334 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1335 /* (-2n.u)n + u */ 1336 emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); 1337 1338 release_temp(p, tmp); 1339} 1340 1341 1342static void build_sphere_texgen( struct tnl_program *p, 1343 struct ureg dest, 1344 GLuint writemask ) 1345{ 1346 struct ureg normal = get_transformed_normal(p); 1347 struct ureg eye_hat = get_eye_position_normalized(p); 1348 struct ureg tmp = get_temp(p); 1349 struct ureg half = register_scalar_const(p, .5); 1350 struct ureg r = get_temp(p); 1351 struct ureg inv_m = get_temp(p); 1352 struct ureg id = get_identity_param(p); 1353 1354 /* Could share the above calculations, but it would be 1355 * a fairly odd state for someone to set (both sphere and 1356 * reflection active for different texture coordinate 1357 * components. Of course - if two texture units enable 1358 * reflect and/or sphere, things start to tilt in favour 1359 * of seperating this out: 1360 */ 1361 1362 /* n.u */ 1363 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1364 /* 2n.u */ 1365 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1366 /* (-2n.u)n + u */ 1367 emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); 1368 /* r + 0,0,1 */ 1369 emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 1370 /* rx^2 + ry^2 + (rz+1)^2 */ 1371 emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); 1372 /* 2/m */ 1373 emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 1374 /* 1/m */ 1375 emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); 1376 /* r/m + 1/2 */ 1377 emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); 1378 1379 release_temp(p, tmp); 1380 release_temp(p, r); 1381 release_temp(p, inv_m); 1382} 1383 1384 1385static void build_texture_transform( struct tnl_program *p ) 1386{ 1387 GLuint i, j; 1388 1389 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 1390 1391 if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i))) 1392 continue; 1393 1394 if (p->state->unit[i].coord_replace) 1395 continue; 1396 1397 if (p->state->unit[i].texgen_enabled || 1398 p->state->unit[i].texmat_enabled) { 1399 1400 GLuint texmat_enabled = p->state->unit[i].texmat_enabled; 1401 struct ureg out = register_output(p, VERT_RESULT_TEX0 + i); 1402 struct ureg out_texgen = undef; 1403 1404 if (p->state->unit[i].texgen_enabled) { 1405 GLuint copy_mask = 0; 1406 GLuint sphere_mask = 0; 1407 GLuint reflect_mask = 0; 1408 GLuint normal_mask = 0; 1409 GLuint modes[4]; 1410 1411 if (texmat_enabled) 1412 out_texgen = get_temp(p); 1413 else 1414 out_texgen = out; 1415 1416 modes[0] = p->state->unit[i].texgen_mode0; 1417 modes[1] = p->state->unit[i].texgen_mode1; 1418 modes[2] = p->state->unit[i].texgen_mode2; 1419 modes[3] = p->state->unit[i].texgen_mode3; 1420 1421 for (j = 0; j < 4; j++) { 1422 switch (modes[j]) { 1423 case TXG_OBJ_LINEAR: { 1424 struct ureg obj = register_input(p, VERT_ATTRIB_POS); 1425 struct ureg plane = 1426 register_param3(p, STATE_TEXGEN, i, 1427 STATE_TEXGEN_OBJECT_S + j); 1428 1429 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1430 obj, plane ); 1431 break; 1432 } 1433 case TXG_EYE_LINEAR: { 1434 struct ureg eye = get_eye_position(p); 1435 struct ureg plane = 1436 register_param3(p, STATE_TEXGEN, i, 1437 STATE_TEXGEN_EYE_S + j); 1438 1439 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1440 eye, plane ); 1441 break; 1442 } 1443 case TXG_SPHERE_MAP: 1444 sphere_mask |= WRITEMASK_X << j; 1445 break; 1446 case TXG_REFLECTION_MAP: 1447 reflect_mask |= WRITEMASK_X << j; 1448 break; 1449 case TXG_NORMAL_MAP: 1450 normal_mask |= WRITEMASK_X << j; 1451 break; 1452 case TXG_NONE: 1453 copy_mask |= WRITEMASK_X << j; 1454 } 1455 } 1456 1457 if (sphere_mask) { 1458 build_sphere_texgen(p, out_texgen, sphere_mask); 1459 } 1460 1461 if (reflect_mask) { 1462 build_reflect_texgen(p, out_texgen, reflect_mask); 1463 } 1464 1465 if (normal_mask) { 1466 struct ureg normal = get_transformed_normal(p); 1467 emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); 1468 } 1469 1470 if (copy_mask) { 1471 struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); 1472 emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); 1473 } 1474 } 1475 1476 if (texmat_enabled) { 1477 struct ureg texmat[4]; 1478 struct ureg in = (!is_undef(out_texgen) ? 1479 out_texgen : 1480 register_input(p, VERT_ATTRIB_TEX0+i)); 1481 if (p->mvp_with_dp4) { 1482 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1483 0, texmat ); 1484 emit_matrix_transform_vec4( p, out, texmat, in ); 1485 } 1486 else { 1487 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1488 STATE_MATRIX_TRANSPOSE, texmat ); 1489 emit_transpose_matrix_transform_vec4( p, out, texmat, in ); 1490 } 1491 } 1492 1493 release_temps(p); 1494 } 1495 else { 1496 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i); 1497 } 1498 } 1499} 1500 1501 1502/** 1503 * Point size attenuation computation. 1504 */ 1505static void build_atten_pointsize( struct tnl_program *p ) 1506{ 1507 struct ureg eye = get_eye_position_z(p); 1508 struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED); 1509 struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); 1510 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1511 struct ureg ut = get_temp(p); 1512 1513 /* dist = |eyez| */ 1514 emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z)); 1515 /* p1 + dist * (p2 + dist * p3); */ 1516 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1517 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y)); 1518 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1519 ut, swizzle1(state_attenuation, X)); 1520 1521 /* 1 / sqrt(factor) */ 1522 emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); 1523 1524#if 0 1525 /* out = pointSize / sqrt(factor) */ 1526 emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 1527#else 1528 /* this is a good place to clamp the point size since there's likely 1529 * no hardware registers to clamp point size at rasterization time. 1530 */ 1531 emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); 1532 emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); 1533 emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z)); 1534#endif 1535 1536 release_temp(p, ut); 1537} 1538 1539 1540/** 1541 * Pass-though per-vertex point size, from user's point size array. 1542 */ 1543static void build_array_pointsize( struct tnl_program *p ) 1544{ 1545 struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE); 1546 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1547 emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in); 1548} 1549 1550 1551static void build_tnl_program( struct tnl_program *p ) 1552{ 1553 /* Emit the program, starting with modelviewproject: 1554 */ 1555 build_hpos(p); 1556 1557 /* Lighting calculations: 1558 */ 1559 if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) { 1560 if (p->state->light_global_enabled) 1561 build_lighting(p); 1562 else { 1563 if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) 1564 emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0); 1565 1566 if (p->state->fragprog_inputs_read & FRAG_BIT_COL1) 1567 emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1); 1568 } 1569 } 1570 1571 if (p->state->fragprog_inputs_read & FRAG_BIT_FOGC) 1572 build_fog(p); 1573 1574 if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY) 1575 build_texture_transform(p); 1576 1577 if (p->state->point_attenuated) 1578 build_atten_pointsize(p); 1579 else if (p->state->point_array) 1580 build_array_pointsize(p); 1581 1582 /* Finish up: 1583 */ 1584 emit_op1(p, OPCODE_END, undef, 0, undef); 1585 1586 /* Disassemble: 1587 */ 1588 if (DISASSEM) { 1589 printf ("\n"); 1590 } 1591} 1592 1593 1594static void 1595create_new_program( const struct state_key *key, 1596 struct gl_vertex_program *program, 1597 GLboolean mvp_with_dp4, 1598 GLuint max_temps) 1599{ 1600 struct tnl_program p; 1601 1602 memset(&p, 0, sizeof(p)); 1603 p.state = key; 1604 p.program = program; 1605 p.eye_position = undef; 1606 p.eye_position_z = undef; 1607 p.eye_position_normalized = undef; 1608 p.transformed_normal = undef; 1609 p.identity = undef; 1610 p.temp_in_use = 0; 1611 p.mvp_with_dp4 = mvp_with_dp4; 1612 1613 if (max_temps >= sizeof(int) * 8) 1614 p.temp_reserved = 0; 1615 else 1616 p.temp_reserved = ~((1<<max_temps)-1); 1617 1618 /* Start by allocating 32 instructions. 1619 * If we need more, we'll grow the instruction array as needed. 1620 */ 1621 p.max_inst = 32; 1622 p.program->Base.Instructions = _mesa_alloc_instructions(p.max_inst); 1623 p.program->Base.String = NULL; 1624 p.program->Base.NumInstructions = 1625 p.program->Base.NumTemporaries = 1626 p.program->Base.NumParameters = 1627 p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0; 1628 p.program->Base.Parameters = _mesa_new_parameter_list(); 1629 p.program->Base.InputsRead = 0; 1630 p.program->Base.OutputsWritten = 0; 1631 1632 build_tnl_program( &p ); 1633} 1634 1635 1636/** 1637 * Return a vertex program which implements the current fixed-function 1638 * transform/lighting/texgen operations. 1639 * XXX move this into core mesa (main/) 1640 */ 1641struct gl_vertex_program * 1642_mesa_get_fixed_func_vertex_program(struct gl_context *ctx) 1643{ 1644 struct gl_vertex_program *prog; 1645 struct state_key key; 1646 1647 /* Grab all the relevent state and put it in a single structure: 1648 */ 1649 make_state_key(ctx, &key); 1650 1651 /* Look for an already-prepared program for this state: 1652 */ 1653 prog = (struct gl_vertex_program *) 1654 _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, sizeof(key)); 1655 1656 if (!prog) { 1657 /* OK, we'll have to build a new one */ 1658 if (0) 1659 printf("Build new TNL program\n"); 1660 1661 prog = (struct gl_vertex_program *) 1662 ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); 1663 if (!prog) 1664 return NULL; 1665 1666 create_new_program( &key, prog, 1667 ctx->mvp_with_dp4, 1668 ctx->Const.VertexProgram.MaxTemps ); 1669 1670#if 0 1671 if (ctx->Driver.ProgramStringNotify) 1672 ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, 1673 &prog->Base ); 1674#endif 1675 _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache, 1676 &key, sizeof(key), &prog->Base); 1677 } 1678 1679 return prog; 1680} 1681