ffvertex_prog.c revision cdc920a0
1/************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * \file ffvertex_prog.c 30 * 31 * Create a vertex program to execute the current fixed function T&L pipeline. 32 * \author Keith Whitwell 33 */ 34 35 36#include "main/glheader.h" 37#include "main/mtypes.h" 38#include "main/macros.h" 39#include "main/enums.h" 40#include "main/ffvertex_prog.h" 41#include "shader/program.h" 42#include "shader/prog_cache.h" 43#include "shader/prog_instruction.h" 44#include "shader/prog_parameter.h" 45#include "shader/prog_print.h" 46#include "shader/prog_statevars.h" 47 48 49/** Max of number of lights and texture coord units */ 50#define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS) 51 52struct state_key { 53 unsigned light_color_material_mask:12; 54 unsigned light_global_enabled:1; 55 unsigned light_local_viewer:1; 56 unsigned light_twoside:1; 57 unsigned material_shininess_is_zero:1; 58 unsigned need_eye_coords:1; 59 unsigned normalize:1; 60 unsigned rescale_normals:1; 61 62 unsigned fog_source_is_depth:1; 63 unsigned separate_specular:1; 64 unsigned point_attenuated:1; 65 unsigned point_array:1; 66 unsigned texture_enabled_global:1; 67 unsigned fragprog_inputs_read:12; 68 69 unsigned varying_vp_inputs; 70 71 struct { 72 unsigned light_enabled:1; 73 unsigned light_eyepos3_is_zero:1; 74 unsigned light_spotcutoff_is_180:1; 75 unsigned light_attenuated:1; 76 unsigned texunit_really_enabled:1; 77 unsigned texmat_enabled:1; 78 unsigned texgen_enabled:4; 79 unsigned texgen_mode0:4; 80 unsigned texgen_mode1:4; 81 unsigned texgen_mode2:4; 82 unsigned texgen_mode3:4; 83 } unit[NUM_UNITS]; 84}; 85 86 87#define TXG_NONE 0 88#define TXG_OBJ_LINEAR 1 89#define TXG_EYE_LINEAR 2 90#define TXG_SPHERE_MAP 3 91#define TXG_REFLECTION_MAP 4 92#define TXG_NORMAL_MAP 5 93 94static GLuint translate_texgen( GLboolean enabled, GLenum mode ) 95{ 96 if (!enabled) 97 return TXG_NONE; 98 99 switch (mode) { 100 case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; 101 case GL_EYE_LINEAR: return TXG_EYE_LINEAR; 102 case GL_SPHERE_MAP: return TXG_SPHERE_MAP; 103 case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; 104 case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; 105 default: return TXG_NONE; 106 } 107} 108 109 110 111static GLboolean check_active_shininess( GLcontext *ctx, 112 const struct state_key *key, 113 GLuint side ) 114{ 115 GLuint bit = 1 << (MAT_ATTRIB_FRONT_SHININESS + side); 116 117 if ((key->varying_vp_inputs & VERT_BIT_COLOR0) && 118 (key->light_color_material_mask & bit)) 119 return GL_TRUE; 120 121 if (key->varying_vp_inputs & (bit << 16)) 122 return GL_TRUE; 123 124 if (ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS + side][0] != 0.0F) 125 return GL_TRUE; 126 127 return GL_FALSE; 128} 129 130 131static void make_state_key( GLcontext *ctx, struct state_key *key ) 132{ 133 const struct gl_fragment_program *fp; 134 GLuint i; 135 136 memset(key, 0, sizeof(struct state_key)); 137 fp = ctx->FragmentProgram._Current; 138 139 /* This now relies on texenvprogram.c being active: 140 */ 141 assert(fp); 142 143 key->need_eye_coords = ctx->_NeedEyeCoords; 144 145 key->fragprog_inputs_read = fp->Base.InputsRead; 146 key->varying_vp_inputs = ctx->varying_vp_inputs; 147 148 if (ctx->RenderMode == GL_FEEDBACK) { 149 /* make sure the vertprog emits color and tex0 */ 150 key->fragprog_inputs_read |= (FRAG_BIT_COL0 | FRAG_BIT_TEX0); 151 } 152 153 key->separate_specular = (ctx->Light.Model.ColorControl == 154 GL_SEPARATE_SPECULAR_COLOR); 155 156 if (ctx->Light.Enabled) { 157 key->light_global_enabled = 1; 158 159 if (ctx->Light.Model.LocalViewer) 160 key->light_local_viewer = 1; 161 162 if (ctx->Light.Model.TwoSide) 163 key->light_twoside = 1; 164 165 if (ctx->Light.ColorMaterialEnabled) { 166 key->light_color_material_mask = ctx->Light.ColorMaterialBitmask; 167 } 168 169 for (i = 0; i < MAX_LIGHTS; i++) { 170 struct gl_light *light = &ctx->Light.Light[i]; 171 172 if (light->Enabled) { 173 key->unit[i].light_enabled = 1; 174 175 if (light->EyePosition[3] == 0.0) 176 key->unit[i].light_eyepos3_is_zero = 1; 177 178 if (light->SpotCutoff == 180.0) 179 key->unit[i].light_spotcutoff_is_180 = 1; 180 181 if (light->ConstantAttenuation != 1.0 || 182 light->LinearAttenuation != 0.0 || 183 light->QuadraticAttenuation != 0.0) 184 key->unit[i].light_attenuated = 1; 185 } 186 } 187 188 if (check_active_shininess(ctx, key, 0)) { 189 key->material_shininess_is_zero = 0; 190 } 191 else if (key->light_twoside && 192 check_active_shininess(ctx, key, 1)) { 193 key->material_shininess_is_zero = 0; 194 } 195 else { 196 key->material_shininess_is_zero = 1; 197 } 198 } 199 200 if (ctx->Transform.Normalize) 201 key->normalize = 1; 202 203 if (ctx->Transform.RescaleNormals) 204 key->rescale_normals = 1; 205 206 if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) 207 key->fog_source_is_depth = 1; 208 209 if (ctx->Point._Attenuated) 210 key->point_attenuated = 1; 211 212#if FEATURE_point_size_array 213 if (ctx->Array.ArrayObj->PointSize.Enabled) 214 key->point_array = 1; 215#endif 216 217 if (ctx->Texture._TexGenEnabled || 218 ctx->Texture._TexMatEnabled || 219 ctx->Texture._EnabledUnits) 220 key->texture_enabled_global = 1; 221 222 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 223 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; 224 225 if (texUnit->_ReallyEnabled) 226 key->unit[i].texunit_really_enabled = 1; 227 228 if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i)) 229 key->unit[i].texmat_enabled = 1; 230 231 if (texUnit->TexGenEnabled) { 232 key->unit[i].texgen_enabled = 1; 233 234 key->unit[i].texgen_mode0 = 235 translate_texgen( texUnit->TexGenEnabled & (1<<0), 236 texUnit->GenS.Mode ); 237 key->unit[i].texgen_mode1 = 238 translate_texgen( texUnit->TexGenEnabled & (1<<1), 239 texUnit->GenT.Mode ); 240 key->unit[i].texgen_mode2 = 241 translate_texgen( texUnit->TexGenEnabled & (1<<2), 242 texUnit->GenR.Mode ); 243 key->unit[i].texgen_mode3 = 244 translate_texgen( texUnit->TexGenEnabled & (1<<3), 245 texUnit->GenQ.Mode ); 246 } 247 } 248} 249 250 251 252/* Very useful debugging tool - produces annotated listing of 253 * generated program with line/function references for each 254 * instruction back into this file: 255 */ 256#define DISASSEM 0 257 258 259/* Use uregs to represent registers internally, translate to Mesa's 260 * expected formats on emit. 261 * 262 * NOTE: These are passed by value extensively in this file rather 263 * than as usual by pointer reference. If this disturbs you, try 264 * remembering they are just 32bits in size. 265 * 266 * GCC is smart enough to deal with these dword-sized structures in 267 * much the same way as if I had defined them as dwords and was using 268 * macros to access and set the fields. This is much nicer and easier 269 * to evolve. 270 */ 271struct ureg { 272 GLuint file:4; 273 GLint idx:9; /* relative addressing may be negative */ 274 /* sizeof(idx) should == sizeof(prog_src_reg::Index) */ 275 GLuint negate:1; 276 GLuint swz:12; 277 GLuint pad:6; 278}; 279 280 281struct tnl_program { 282 const struct state_key *state; 283 struct gl_vertex_program *program; 284 GLint max_inst; /** number of instructions allocated for program */ 285 GLboolean mvp_with_dp4; 286 287 GLuint temp_in_use; 288 GLuint temp_reserved; 289 290 struct ureg eye_position; 291 struct ureg eye_position_z; 292 struct ureg eye_position_normalized; 293 struct ureg transformed_normal; 294 struct ureg identity; 295 296 GLuint materials; 297 GLuint color_materials; 298}; 299 300 301static const struct ureg undef = { 302 PROGRAM_UNDEFINED, 303 0, 304 0, 305 0, 306 0 307}; 308 309/* Local shorthand: 310 */ 311#define X SWIZZLE_X 312#define Y SWIZZLE_Y 313#define Z SWIZZLE_Z 314#define W SWIZZLE_W 315 316 317/* Construct a ureg: 318 */ 319static struct ureg make_ureg(GLuint file, GLint idx) 320{ 321 struct ureg reg; 322 reg.file = file; 323 reg.idx = idx; 324 reg.negate = 0; 325 reg.swz = SWIZZLE_NOOP; 326 reg.pad = 0; 327 return reg; 328} 329 330 331 332static struct ureg negate( struct ureg reg ) 333{ 334 reg.negate ^= 1; 335 return reg; 336} 337 338 339static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 340{ 341 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 342 GET_SWZ(reg.swz, y), 343 GET_SWZ(reg.swz, z), 344 GET_SWZ(reg.swz, w)); 345 return reg; 346} 347 348 349static struct ureg swizzle1( struct ureg reg, int x ) 350{ 351 return swizzle(reg, x, x, x, x); 352} 353 354 355static struct ureg get_temp( struct tnl_program *p ) 356{ 357 int bit = _mesa_ffs( ~p->temp_in_use ); 358 if (!bit) { 359 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 360 exit(1); 361 } 362 363 if ((GLuint) bit > p->program->Base.NumTemporaries) 364 p->program->Base.NumTemporaries = bit; 365 366 p->temp_in_use |= 1<<(bit-1); 367 return make_ureg(PROGRAM_TEMPORARY, bit-1); 368} 369 370 371static struct ureg reserve_temp( struct tnl_program *p ) 372{ 373 struct ureg temp = get_temp( p ); 374 p->temp_reserved |= 1<<temp.idx; 375 return temp; 376} 377 378 379static void release_temp( struct tnl_program *p, struct ureg reg ) 380{ 381 if (reg.file == PROGRAM_TEMPORARY) { 382 p->temp_in_use &= ~(1<<reg.idx); 383 p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ 384 } 385} 386 387static void release_temps( struct tnl_program *p ) 388{ 389 p->temp_in_use = p->temp_reserved; 390} 391 392 393static struct ureg register_param5(struct tnl_program *p, 394 GLint s0, 395 GLint s1, 396 GLint s2, 397 GLint s3, 398 GLint s4) 399{ 400 gl_state_index tokens[STATE_LENGTH]; 401 GLint idx; 402 tokens[0] = s0; 403 tokens[1] = s1; 404 tokens[2] = s2; 405 tokens[3] = s3; 406 tokens[4] = s4; 407 idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens ); 408 return make_ureg(PROGRAM_STATE_VAR, idx); 409} 410 411 412#define register_param1(p,s0) register_param5(p,s0,0,0,0,0) 413#define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0) 414#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) 415#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) 416 417 418 419/** 420 * \param input one of VERT_ATTRIB_x tokens. 421 */ 422static struct ureg register_input( struct tnl_program *p, GLuint input ) 423{ 424 assert(input < 32); 425 426 if (p->state->varying_vp_inputs & (1<<input)) { 427 p->program->Base.InputsRead |= (1<<input); 428 return make_ureg(PROGRAM_INPUT, input); 429 } 430 else { 431 return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input ); 432 } 433} 434 435 436/** 437 * \param input one of VERT_RESULT_x tokens. 438 */ 439static struct ureg register_output( struct tnl_program *p, GLuint output ) 440{ 441 p->program->Base.OutputsWritten |= BITFIELD64_BIT(output); 442 return make_ureg(PROGRAM_OUTPUT, output); 443} 444 445 446static struct ureg register_const4f( struct tnl_program *p, 447 GLfloat s0, 448 GLfloat s1, 449 GLfloat s2, 450 GLfloat s3) 451{ 452 GLfloat values[4]; 453 GLint idx; 454 GLuint swizzle; 455 values[0] = s0; 456 values[1] = s1; 457 values[2] = s2; 458 values[3] = s3; 459 idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, 460 &swizzle ); 461 ASSERT(swizzle == SWIZZLE_NOOP); 462 return make_ureg(PROGRAM_CONSTANT, idx); 463} 464 465#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 466#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) 467#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 468#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 469 470static GLboolean is_undef( struct ureg reg ) 471{ 472 return reg.file == PROGRAM_UNDEFINED; 473} 474 475 476static struct ureg get_identity_param( struct tnl_program *p ) 477{ 478 if (is_undef(p->identity)) 479 p->identity = register_const4f(p, 0,0,0,1); 480 481 return p->identity; 482} 483 484static void register_matrix_param5( struct tnl_program *p, 485 GLint s0, /* modelview, projection, etc */ 486 GLint s1, /* texture matrix number */ 487 GLint s2, /* first row */ 488 GLint s3, /* last row */ 489 GLint s4, /* inverse, transpose, etc */ 490 struct ureg *matrix ) 491{ 492 GLint i; 493 494 /* This is a bit sad as the support is there to pull the whole 495 * matrix out in one go: 496 */ 497 for (i = 0; i <= s3 - s2; i++) 498 matrix[i] = register_param5( p, s0, s1, i, i, s4 ); 499} 500 501 502static void emit_arg( struct prog_src_register *src, 503 struct ureg reg ) 504{ 505 src->File = reg.file; 506 src->Index = reg.idx; 507 src->Swizzle = reg.swz; 508 src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE; 509 src->Abs = 0; 510 src->RelAddr = 0; 511 /* Check that bitfield sizes aren't exceeded */ 512 ASSERT(src->Index == reg.idx); 513} 514 515 516static void emit_dst( struct prog_dst_register *dst, 517 struct ureg reg, GLuint mask ) 518{ 519 dst->File = reg.file; 520 dst->Index = reg.idx; 521 /* allow zero as a shorthand for xyzw */ 522 dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 523 dst->CondMask = COND_TR; /* always pass cond test */ 524 dst->CondSwizzle = SWIZZLE_NOOP; 525 dst->CondSrc = 0; 526 /* Check that bitfield sizes aren't exceeded */ 527 ASSERT(dst->Index == reg.idx); 528} 529 530 531static void debug_insn( struct prog_instruction *inst, const char *fn, 532 GLuint line ) 533{ 534 if (DISASSEM) { 535 static const char *last_fn; 536 537 if (fn != last_fn) { 538 last_fn = fn; 539 printf("%s:\n", fn); 540 } 541 542 printf("%d:\t", line); 543 _mesa_print_instruction(inst); 544 } 545} 546 547 548static void emit_op3fn(struct tnl_program *p, 549 enum prog_opcode op, 550 struct ureg dest, 551 GLuint mask, 552 struct ureg src0, 553 struct ureg src1, 554 struct ureg src2, 555 const char *fn, 556 GLuint line) 557{ 558 GLuint nr; 559 struct prog_instruction *inst; 560 561 assert((GLint) p->program->Base.NumInstructions <= p->max_inst); 562 563 if (p->program->Base.NumInstructions == p->max_inst) { 564 /* need to extend the program's instruction array */ 565 struct prog_instruction *newInst; 566 567 /* double the size */ 568 p->max_inst *= 2; 569 570 newInst = _mesa_alloc_instructions(p->max_inst); 571 if (!newInst) { 572 _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build"); 573 return; 574 } 575 576 _mesa_copy_instructions(newInst, 577 p->program->Base.Instructions, 578 p->program->Base.NumInstructions); 579 580 _mesa_free_instructions(p->program->Base.Instructions, 581 p->program->Base.NumInstructions); 582 583 p->program->Base.Instructions = newInst; 584 } 585 586 nr = p->program->Base.NumInstructions++; 587 588 inst = &p->program->Base.Instructions[nr]; 589 inst->Opcode = (enum prog_opcode) op; 590 inst->Data = 0; 591 592 emit_arg( &inst->SrcReg[0], src0 ); 593 emit_arg( &inst->SrcReg[1], src1 ); 594 emit_arg( &inst->SrcReg[2], src2 ); 595 596 emit_dst( &inst->DstReg, dest, mask ); 597 598 debug_insn(inst, fn, line); 599} 600 601 602#define emit_op3(p, op, dst, mask, src0, src1, src2) \ 603 emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__) 604 605#define emit_op2(p, op, dst, mask, src0, src1) \ 606 emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__) 607 608#define emit_op1(p, op, dst, mask, src0) \ 609 emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__) 610 611 612static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) 613{ 614 if (reg.file == PROGRAM_TEMPORARY && 615 !(p->temp_reserved & (1<<reg.idx))) 616 return reg; 617 else { 618 struct ureg temp = get_temp(p); 619 emit_op1(p, OPCODE_MOV, temp, 0, reg); 620 return temp; 621 } 622} 623 624 625/* Currently no tracking performed of input/output/register size or 626 * active elements. Could be used to reduce these operations, as 627 * could the matrix type. 628 */ 629static void emit_matrix_transform_vec4( struct tnl_program *p, 630 struct ureg dest, 631 const struct ureg *mat, 632 struct ureg src) 633{ 634 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); 635 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); 636 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); 637 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); 638} 639 640 641/* This version is much easier to implement if writemasks are not 642 * supported natively on the target or (like SSE), the target doesn't 643 * have a clean/obvious dotproduct implementation. 644 */ 645static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, 646 struct ureg dest, 647 const struct ureg *mat, 648 struct ureg src) 649{ 650 struct ureg tmp; 651 652 if (dest.file != PROGRAM_TEMPORARY) 653 tmp = get_temp(p); 654 else 655 tmp = dest; 656 657 emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 658 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 659 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 660 emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 661 662 if (dest.file != PROGRAM_TEMPORARY) 663 release_temp(p, tmp); 664} 665 666 667static void emit_matrix_transform_vec3( struct tnl_program *p, 668 struct ureg dest, 669 const struct ureg *mat, 670 struct ureg src) 671{ 672 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); 673 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); 674 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); 675} 676 677 678static void emit_normalize_vec3( struct tnl_program *p, 679 struct ureg dest, 680 struct ureg src ) 681{ 682#if 0 683 /* XXX use this when drivers are ready for NRM3 */ 684 emit_op1(p, OPCODE_NRM3, dest, WRITEMASK_XYZ, src); 685#else 686 struct ureg tmp = get_temp(p); 687 emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src); 688 emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); 689 emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X)); 690 release_temp(p, tmp); 691#endif 692} 693 694 695static void emit_passthrough( struct tnl_program *p, 696 GLuint input, 697 GLuint output ) 698{ 699 struct ureg out = register_output(p, output); 700 emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); 701} 702 703 704static struct ureg get_eye_position( struct tnl_program *p ) 705{ 706 if (is_undef(p->eye_position)) { 707 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 708 struct ureg modelview[4]; 709 710 p->eye_position = reserve_temp(p); 711 712 if (p->mvp_with_dp4) { 713 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 714 0, modelview ); 715 716 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); 717 } 718 else { 719 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 720 STATE_MATRIX_TRANSPOSE, modelview ); 721 722 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); 723 } 724 } 725 726 return p->eye_position; 727} 728 729 730static struct ureg get_eye_position_z( struct tnl_program *p ) 731{ 732 if (!is_undef(p->eye_position)) 733 return swizzle1(p->eye_position, Z); 734 735 if (is_undef(p->eye_position_z)) { 736 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 737 struct ureg modelview[4]; 738 739 p->eye_position_z = reserve_temp(p); 740 741 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 742 0, modelview ); 743 744 emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]); 745 } 746 747 return p->eye_position_z; 748} 749 750 751static struct ureg get_eye_position_normalized( struct tnl_program *p ) 752{ 753 if (is_undef(p->eye_position_normalized)) { 754 struct ureg eye = get_eye_position(p); 755 p->eye_position_normalized = reserve_temp(p); 756 emit_normalize_vec3(p, p->eye_position_normalized, eye); 757 } 758 759 return p->eye_position_normalized; 760} 761 762 763static struct ureg get_transformed_normal( struct tnl_program *p ) 764{ 765 if (is_undef(p->transformed_normal) && 766 !p->state->need_eye_coords && 767 !p->state->normalize && 768 !(p->state->need_eye_coords == p->state->rescale_normals)) 769 { 770 p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL ); 771 } 772 else if (is_undef(p->transformed_normal)) 773 { 774 struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); 775 struct ureg mvinv[3]; 776 struct ureg transformed_normal = reserve_temp(p); 777 778 if (p->state->need_eye_coords) { 779 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2, 780 STATE_MATRIX_INVTRANS, mvinv ); 781 782 /* Transform to eye space: 783 */ 784 emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal ); 785 normal = transformed_normal; 786 } 787 788 /* Normalize/Rescale: 789 */ 790 if (p->state->normalize) { 791 emit_normalize_vec3( p, transformed_normal, normal ); 792 normal = transformed_normal; 793 } 794 else if (p->state->need_eye_coords == p->state->rescale_normals) { 795 /* This is already adjusted for eye/non-eye rendering: 796 */ 797 struct ureg rescale = register_param2(p, STATE_INTERNAL, 798 STATE_NORMAL_SCALE); 799 800 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale ); 801 normal = transformed_normal; 802 } 803 804 assert(normal.file == PROGRAM_TEMPORARY); 805 p->transformed_normal = normal; 806 } 807 808 return p->transformed_normal; 809} 810 811 812static void build_hpos( struct tnl_program *p ) 813{ 814 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 815 struct ureg hpos = register_output( p, VERT_RESULT_HPOS ); 816 struct ureg mvp[4]; 817 818 if (p->mvp_with_dp4) { 819 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 820 0, mvp ); 821 emit_matrix_transform_vec4( p, hpos, mvp, pos ); 822 } 823 else { 824 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 825 STATE_MATRIX_TRANSPOSE, mvp ); 826 emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); 827 } 828} 829 830 831static GLuint material_attrib( GLuint side, GLuint property ) 832{ 833 return (property - STATE_AMBIENT) * 2 + side; 834} 835 836 837/** 838 * Get a bitmask of which material values vary on a per-vertex basis. 839 */ 840static void set_material_flags( struct tnl_program *p ) 841{ 842 p->color_materials = 0; 843 p->materials = 0; 844 845 if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) { 846 p->materials = 847 p->color_materials = p->state->light_color_material_mask; 848 } 849 850 p->materials |= (p->state->varying_vp_inputs >> 16); 851} 852 853 854static struct ureg get_material( struct tnl_program *p, GLuint side, 855 GLuint property ) 856{ 857 GLuint attrib = material_attrib(side, property); 858 859 if (p->color_materials & (1<<attrib)) 860 return register_input(p, VERT_ATTRIB_COLOR0); 861 else if (p->materials & (1<<attrib)) { 862 /* Put material values in the GENERIC slots -- they are not used 863 * for anything in fixed function mode. 864 */ 865 return register_input( p, attrib + VERT_ATTRIB_GENERIC0 ); 866 } 867 else 868 return register_param3( p, STATE_MATERIAL, side, property ); 869} 870 871#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \ 872 MAT_BIT_FRONT_AMBIENT | \ 873 MAT_BIT_FRONT_DIFFUSE) << (side)) 874 875 876/** 877 * Either return a precalculated constant value or emit code to 878 * calculate these values dynamically in the case where material calls 879 * are present between begin/end pairs. 880 * 881 * Probably want to shift this to the program compilation phase - if 882 * we always emitted the calculation here, a smart compiler could 883 * detect that it was constant (given a certain set of inputs), and 884 * lift it out of the main loop. That way the programs created here 885 * would be independent of the vertex_buffer details. 886 */ 887static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) 888{ 889 if (p->materials & SCENE_COLOR_BITS(side)) { 890 struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); 891 struct ureg material_emission = get_material(p, side, STATE_EMISSION); 892 struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); 893 struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); 894 struct ureg tmp = make_temp(p, material_diffuse); 895 emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, 896 material_ambient, material_emission); 897 return tmp; 898 } 899 else 900 return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); 901} 902 903 904static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 905 GLuint side, GLuint property ) 906{ 907 GLuint attrib = material_attrib(side, property); 908 if (p->materials & (1<<attrib)) { 909 struct ureg light_value = 910 register_param3(p, STATE_LIGHT, light, property); 911 struct ureg material_value = get_material(p, side, property); 912 struct ureg tmp = get_temp(p); 913 emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value); 914 return tmp; 915 } 916 else 917 return register_param4(p, STATE_LIGHTPROD, light, side, property); 918} 919 920 921static struct ureg calculate_light_attenuation( struct tnl_program *p, 922 GLuint i, 923 struct ureg VPpli, 924 struct ureg dist ) 925{ 926 struct ureg attenuation = register_param3(p, STATE_LIGHT, i, 927 STATE_ATTENUATION); 928 struct ureg att = get_temp(p); 929 930 /* Calculate spot attenuation: 931 */ 932 if (!p->state->unit[i].light_spotcutoff_is_180) { 933 struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, 934 STATE_LIGHT_SPOT_DIR_NORMALIZED, i); 935 struct ureg spot = get_temp(p); 936 struct ureg slt = get_temp(p); 937 938 emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); 939 emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); 940 emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); 941 emit_op2(p, OPCODE_MUL, att, 0, slt, spot); 942 943 release_temp(p, spot); 944 release_temp(p, slt); 945 } 946 947 /* Calculate distance attenuation: 948 */ 949 if (p->state->unit[i].light_attenuated) { 950 /* 1/d,d,d,1/d */ 951 emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); 952 /* 1,d,d*d,1/d */ 953 emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 954 /* 1/dist-atten */ 955 emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); 956 957 if (!p->state->unit[i].light_spotcutoff_is_180) { 958 /* dist-atten */ 959 emit_op1(p, OPCODE_RCP, dist, 0, dist); 960 /* spot-atten * dist-atten */ 961 emit_op2(p, OPCODE_MUL, att, 0, dist, att); 962 } 963 else { 964 /* dist-atten */ 965 emit_op1(p, OPCODE_RCP, att, 0, dist); 966 } 967 } 968 969 return att; 970} 971 972 973/** 974 * Compute: 975 * lit.y = MAX(0, dots.x) 976 * lit.z = SLT(0, dots.x) 977 */ 978static void emit_degenerate_lit( struct tnl_program *p, 979 struct ureg lit, 980 struct ureg dots ) 981{ 982 struct ureg id = get_identity_param(p); /* id = {0,0,0,1} */ 983 984 /* Note that lit.x & lit.w will not be examined. Note also that 985 * dots.xyzw == dots.xxxx. 986 */ 987 988 /* MAX lit, id, dots; 989 */ 990 emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots); 991 992 /* result[2] = (in > 0 ? 1 : 0) 993 * SLT lit.z, id.z, dots; # lit.z = (0 < dots.z) ? 1 : 0 994 */ 995 emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots); 996} 997 998 999/* Need to add some addtional parameters to allow lighting in object 1000 * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye 1001 * space lighting. 1002 */ 1003static void build_lighting( struct tnl_program *p ) 1004{ 1005 const GLboolean twoside = p->state->light_twoside; 1006 const GLboolean separate = p->state->separate_specular; 1007 GLuint nr_lights = 0, count = 0; 1008 struct ureg normal = get_transformed_normal(p); 1009 struct ureg lit = get_temp(p); 1010 struct ureg dots = get_temp(p); 1011 struct ureg _col0 = undef, _col1 = undef; 1012 struct ureg _bfc0 = undef, _bfc1 = undef; 1013 GLuint i; 1014 1015 /* 1016 * NOTE: 1017 * dots.x = dot(normal, VPpli) 1018 * dots.y = dot(normal, halfAngle) 1019 * dots.z = back.shininess 1020 * dots.w = front.shininess 1021 */ 1022 1023 for (i = 0; i < MAX_LIGHTS; i++) 1024 if (p->state->unit[i].light_enabled) 1025 nr_lights++; 1026 1027 set_material_flags(p); 1028 1029 { 1030 if (!p->state->material_shininess_is_zero) { 1031 struct ureg shininess = get_material(p, 0, STATE_SHININESS); 1032 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); 1033 release_temp(p, shininess); 1034 } 1035 1036 _col0 = make_temp(p, get_scenecolor(p, 0)); 1037 if (separate) 1038 _col1 = make_temp(p, get_identity_param(p)); 1039 else 1040 _col1 = _col0; 1041 } 1042 1043 if (twoside) { 1044 if (!p->state->material_shininess_is_zero) { 1045 /* Note that we negate the back-face specular exponent here. 1046 * The negation will be un-done later in the back-face code below. 1047 */ 1048 struct ureg shininess = get_material(p, 1, STATE_SHININESS); 1049 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, 1050 negate(swizzle1(shininess,X))); 1051 release_temp(p, shininess); 1052 } 1053 1054 _bfc0 = make_temp(p, get_scenecolor(p, 1)); 1055 if (separate) 1056 _bfc1 = make_temp(p, get_identity_param(p)); 1057 else 1058 _bfc1 = _bfc0; 1059 } 1060 1061 /* If no lights, still need to emit the scenecolor. 1062 */ 1063 { 1064 struct ureg res0 = register_output( p, VERT_RESULT_COL0 ); 1065 emit_op1(p, OPCODE_MOV, res0, 0, _col0); 1066 } 1067 1068 if (separate) { 1069 struct ureg res1 = register_output( p, VERT_RESULT_COL1 ); 1070 emit_op1(p, OPCODE_MOV, res1, 0, _col1); 1071 } 1072 1073 if (twoside) { 1074 struct ureg res0 = register_output( p, VERT_RESULT_BFC0 ); 1075 emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); 1076 } 1077 1078 if (twoside && separate) { 1079 struct ureg res1 = register_output( p, VERT_RESULT_BFC1 ); 1080 emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); 1081 } 1082 1083 if (nr_lights == 0) { 1084 release_temps(p); 1085 return; 1086 } 1087 1088 for (i = 0; i < MAX_LIGHTS; i++) { 1089 if (p->state->unit[i].light_enabled) { 1090 struct ureg half = undef; 1091 struct ureg att = undef, VPpli = undef; 1092 1093 count++; 1094 1095 if (p->state->unit[i].light_eyepos3_is_zero) { 1096 /* Can used precomputed constants in this case. 1097 * Attenuation never applies to infinite lights. 1098 */ 1099 VPpli = register_param3(p, STATE_INTERNAL, 1100 STATE_LIGHT_POSITION_NORMALIZED, i); 1101 1102 if (!p->state->material_shininess_is_zero) { 1103 if (p->state->light_local_viewer) { 1104 struct ureg eye_hat = get_eye_position_normalized(p); 1105 half = get_temp(p); 1106 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1107 emit_normalize_vec3(p, half, half); 1108 } 1109 else { 1110 half = register_param3(p, STATE_INTERNAL, 1111 STATE_LIGHT_HALF_VECTOR, i); 1112 } 1113 } 1114 } 1115 else { 1116 struct ureg Ppli = register_param3(p, STATE_INTERNAL, 1117 STATE_LIGHT_POSITION, i); 1118 struct ureg V = get_eye_position(p); 1119 struct ureg dist = get_temp(p); 1120 1121 VPpli = get_temp(p); 1122 1123 /* Calculate VPpli vector 1124 */ 1125 emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); 1126 1127 /* Normalize VPpli. The dist value also used in 1128 * attenuation below. 1129 */ 1130 emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); 1131 emit_op1(p, OPCODE_RSQ, dist, 0, dist); 1132 emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); 1133 1134 /* Calculate attenuation: 1135 */ 1136 if (!p->state->unit[i].light_spotcutoff_is_180 || 1137 p->state->unit[i].light_attenuated) { 1138 att = calculate_light_attenuation(p, i, VPpli, dist); 1139 } 1140 1141 /* Calculate viewer direction, or use infinite viewer: 1142 */ 1143 if (!p->state->material_shininess_is_zero) { 1144 half = get_temp(p); 1145 1146 if (p->state->light_local_viewer) { 1147 struct ureg eye_hat = get_eye_position_normalized(p); 1148 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1149 } 1150 else { 1151 struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 1152 emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); 1153 } 1154 1155 emit_normalize_vec3(p, half, half); 1156 } 1157 1158 release_temp(p, dist); 1159 } 1160 1161 /* Calculate dot products: 1162 */ 1163 if (p->state->material_shininess_is_zero) { 1164 emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli); 1165 } 1166 else { 1167 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); 1168 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); 1169 } 1170 1171 /* Front face lighting: 1172 */ 1173 { 1174 struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); 1175 struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); 1176 struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); 1177 struct ureg res0, res1; 1178 GLuint mask0, mask1; 1179 1180 if (count == nr_lights) { 1181 if (separate) { 1182 mask0 = WRITEMASK_XYZ; 1183 mask1 = WRITEMASK_XYZ; 1184 res0 = register_output( p, VERT_RESULT_COL0 ); 1185 res1 = register_output( p, VERT_RESULT_COL1 ); 1186 } 1187 else { 1188 mask0 = 0; 1189 mask1 = WRITEMASK_XYZ; 1190 res0 = _col0; 1191 res1 = register_output( p, VERT_RESULT_COL0 ); 1192 } 1193 } 1194 else { 1195 mask0 = 0; 1196 mask1 = 0; 1197 res0 = _col0; 1198 res1 = _col1; 1199 } 1200 1201 if (!is_undef(att)) { 1202 /* light is attenuated by distance */ 1203 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1204 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1205 emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); 1206 } 1207 else if (!p->state->material_shininess_is_zero) { 1208 /* there's a non-zero specular term */ 1209 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1210 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1211 } 1212 else { 1213 /* no attenutation, no specular */ 1214 emit_degenerate_lit(p, lit, dots); 1215 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1216 } 1217 1218 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); 1219 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); 1220 1221 release_temp(p, ambient); 1222 release_temp(p, diffuse); 1223 release_temp(p, specular); 1224 } 1225 1226 /* Back face lighting: 1227 */ 1228 if (twoside) { 1229 struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); 1230 struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); 1231 struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); 1232 struct ureg res0, res1; 1233 GLuint mask0, mask1; 1234 1235 if (count == nr_lights) { 1236 if (separate) { 1237 mask0 = WRITEMASK_XYZ; 1238 mask1 = WRITEMASK_XYZ; 1239 res0 = register_output( p, VERT_RESULT_BFC0 ); 1240 res1 = register_output( p, VERT_RESULT_BFC1 ); 1241 } 1242 else { 1243 mask0 = 0; 1244 mask1 = WRITEMASK_XYZ; 1245 res0 = _bfc0; 1246 res1 = register_output( p, VERT_RESULT_BFC0 ); 1247 } 1248 } 1249 else { 1250 res0 = _bfc0; 1251 res1 = _bfc1; 1252 mask0 = 0; 1253 mask1 = 0; 1254 } 1255 1256 /* For the back face we need to negate the X and Y component 1257 * dot products. dots.Z has the negated back-face specular 1258 * exponent. We swizzle that into the W position. This 1259 * negation makes the back-face specular term positive again. 1260 */ 1261 dots = negate(swizzle(dots,X,Y,W,Z)); 1262 1263 if (!is_undef(att)) { 1264 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1265 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1266 emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); 1267 } 1268 else if (!p->state->material_shininess_is_zero) { 1269 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1270 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/ 1271 } 1272 else { 1273 emit_degenerate_lit(p, lit, dots); 1274 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); 1275 } 1276 1277 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); 1278 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); 1279 /* restore dots to its original state for subsequent lights 1280 * by negating and swizzling again. 1281 */ 1282 dots = negate(swizzle(dots,X,Y,W,Z)); 1283 1284 release_temp(p, ambient); 1285 release_temp(p, diffuse); 1286 release_temp(p, specular); 1287 } 1288 1289 release_temp(p, half); 1290 release_temp(p, VPpli); 1291 release_temp(p, att); 1292 } 1293 } 1294 1295 release_temps( p ); 1296} 1297 1298 1299static void build_fog( struct tnl_program *p ) 1300{ 1301 struct ureg fog = register_output(p, VERT_RESULT_FOGC); 1302 struct ureg input; 1303 1304 if (p->state->fog_source_is_depth) { 1305 input = get_eye_position_z(p); 1306 } 1307 else { 1308 input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); 1309 } 1310 1311 /* result.fog = {abs(f),0,0,1}; */ 1312 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1313 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p)); 1314} 1315 1316 1317static void build_reflect_texgen( struct tnl_program *p, 1318 struct ureg dest, 1319 GLuint writemask ) 1320{ 1321 struct ureg normal = get_transformed_normal(p); 1322 struct ureg eye_hat = get_eye_position_normalized(p); 1323 struct ureg tmp = get_temp(p); 1324 1325 /* n.u */ 1326 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1327 /* 2n.u */ 1328 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1329 /* (-2n.u)n + u */ 1330 emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); 1331 1332 release_temp(p, tmp); 1333} 1334 1335 1336static void build_sphere_texgen( struct tnl_program *p, 1337 struct ureg dest, 1338 GLuint writemask ) 1339{ 1340 struct ureg normal = get_transformed_normal(p); 1341 struct ureg eye_hat = get_eye_position_normalized(p); 1342 struct ureg tmp = get_temp(p); 1343 struct ureg half = register_scalar_const(p, .5); 1344 struct ureg r = get_temp(p); 1345 struct ureg inv_m = get_temp(p); 1346 struct ureg id = get_identity_param(p); 1347 1348 /* Could share the above calculations, but it would be 1349 * a fairly odd state for someone to set (both sphere and 1350 * reflection active for different texture coordinate 1351 * components. Of course - if two texture units enable 1352 * reflect and/or sphere, things start to tilt in favour 1353 * of seperating this out: 1354 */ 1355 1356 /* n.u */ 1357 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1358 /* 2n.u */ 1359 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1360 /* (-2n.u)n + u */ 1361 emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); 1362 /* r + 0,0,1 */ 1363 emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 1364 /* rx^2 + ry^2 + (rz+1)^2 */ 1365 emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); 1366 /* 2/m */ 1367 emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 1368 /* 1/m */ 1369 emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); 1370 /* r/m + 1/2 */ 1371 emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); 1372 1373 release_temp(p, tmp); 1374 release_temp(p, r); 1375 release_temp(p, inv_m); 1376} 1377 1378 1379static void build_texture_transform( struct tnl_program *p ) 1380{ 1381 GLuint i, j; 1382 1383 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 1384 1385 if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i))) 1386 continue; 1387 1388 if (p->state->unit[i].texgen_enabled || 1389 p->state->unit[i].texmat_enabled) { 1390 1391 GLuint texmat_enabled = p->state->unit[i].texmat_enabled; 1392 struct ureg out = register_output(p, VERT_RESULT_TEX0 + i); 1393 struct ureg out_texgen = undef; 1394 1395 if (p->state->unit[i].texgen_enabled) { 1396 GLuint copy_mask = 0; 1397 GLuint sphere_mask = 0; 1398 GLuint reflect_mask = 0; 1399 GLuint normal_mask = 0; 1400 GLuint modes[4]; 1401 1402 if (texmat_enabled) 1403 out_texgen = get_temp(p); 1404 else 1405 out_texgen = out; 1406 1407 modes[0] = p->state->unit[i].texgen_mode0; 1408 modes[1] = p->state->unit[i].texgen_mode1; 1409 modes[2] = p->state->unit[i].texgen_mode2; 1410 modes[3] = p->state->unit[i].texgen_mode3; 1411 1412 for (j = 0; j < 4; j++) { 1413 switch (modes[j]) { 1414 case TXG_OBJ_LINEAR: { 1415 struct ureg obj = register_input(p, VERT_ATTRIB_POS); 1416 struct ureg plane = 1417 register_param3(p, STATE_TEXGEN, i, 1418 STATE_TEXGEN_OBJECT_S + j); 1419 1420 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1421 obj, plane ); 1422 break; 1423 } 1424 case TXG_EYE_LINEAR: { 1425 struct ureg eye = get_eye_position(p); 1426 struct ureg plane = 1427 register_param3(p, STATE_TEXGEN, i, 1428 STATE_TEXGEN_EYE_S + j); 1429 1430 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1431 eye, plane ); 1432 break; 1433 } 1434 case TXG_SPHERE_MAP: 1435 sphere_mask |= WRITEMASK_X << j; 1436 break; 1437 case TXG_REFLECTION_MAP: 1438 reflect_mask |= WRITEMASK_X << j; 1439 break; 1440 case TXG_NORMAL_MAP: 1441 normal_mask |= WRITEMASK_X << j; 1442 break; 1443 case TXG_NONE: 1444 copy_mask |= WRITEMASK_X << j; 1445 } 1446 } 1447 1448 if (sphere_mask) { 1449 build_sphere_texgen(p, out_texgen, sphere_mask); 1450 } 1451 1452 if (reflect_mask) { 1453 build_reflect_texgen(p, out_texgen, reflect_mask); 1454 } 1455 1456 if (normal_mask) { 1457 struct ureg normal = get_transformed_normal(p); 1458 emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); 1459 } 1460 1461 if (copy_mask) { 1462 struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); 1463 emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); 1464 } 1465 } 1466 1467 if (texmat_enabled) { 1468 struct ureg texmat[4]; 1469 struct ureg in = (!is_undef(out_texgen) ? 1470 out_texgen : 1471 register_input(p, VERT_ATTRIB_TEX0+i)); 1472 if (p->mvp_with_dp4) { 1473 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1474 0, texmat ); 1475 emit_matrix_transform_vec4( p, out, texmat, in ); 1476 } 1477 else { 1478 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1479 STATE_MATRIX_TRANSPOSE, texmat ); 1480 emit_transpose_matrix_transform_vec4( p, out, texmat, in ); 1481 } 1482 } 1483 1484 release_temps(p); 1485 } 1486 else { 1487 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i); 1488 } 1489 } 1490} 1491 1492 1493/** 1494 * Point size attenuation computation. 1495 */ 1496static void build_atten_pointsize( struct tnl_program *p ) 1497{ 1498 struct ureg eye = get_eye_position_z(p); 1499 struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED); 1500 struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); 1501 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1502 struct ureg ut = get_temp(p); 1503 1504 /* dist = |eyez| */ 1505 emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z)); 1506 /* p1 + dist * (p2 + dist * p3); */ 1507 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1508 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y)); 1509 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1510 ut, swizzle1(state_attenuation, X)); 1511 1512 /* 1 / sqrt(factor) */ 1513 emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); 1514 1515#if 0 1516 /* out = pointSize / sqrt(factor) */ 1517 emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 1518#else 1519 /* this is a good place to clamp the point size since there's likely 1520 * no hardware registers to clamp point size at rasterization time. 1521 */ 1522 emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); 1523 emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); 1524 emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z)); 1525#endif 1526 1527 release_temp(p, ut); 1528} 1529 1530 1531/** 1532 * Pass-though per-vertex point size, from user's point size array. 1533 */ 1534static void build_array_pointsize( struct tnl_program *p ) 1535{ 1536 struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE); 1537 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1538 emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in); 1539} 1540 1541 1542static void build_tnl_program( struct tnl_program *p ) 1543{ 1544 /* Emit the program, starting with modelviewproject: 1545 */ 1546 build_hpos(p); 1547 1548 /* Lighting calculations: 1549 */ 1550 if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) { 1551 if (p->state->light_global_enabled) 1552 build_lighting(p); 1553 else { 1554 if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) 1555 emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0); 1556 1557 if (p->state->fragprog_inputs_read & FRAG_BIT_COL1) 1558 emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1); 1559 } 1560 } 1561 1562 if (p->state->fragprog_inputs_read & FRAG_BIT_FOGC) 1563 build_fog(p); 1564 1565 if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY) 1566 build_texture_transform(p); 1567 1568 if (p->state->point_attenuated) 1569 build_atten_pointsize(p); 1570 else if (p->state->point_array) 1571 build_array_pointsize(p); 1572 1573 /* Finish up: 1574 */ 1575 emit_op1(p, OPCODE_END, undef, 0, undef); 1576 1577 /* Disassemble: 1578 */ 1579 if (DISASSEM) { 1580 printf ("\n"); 1581 } 1582} 1583 1584 1585static void 1586create_new_program( const struct state_key *key, 1587 struct gl_vertex_program *program, 1588 GLboolean mvp_with_dp4, 1589 GLuint max_temps) 1590{ 1591 struct tnl_program p; 1592 1593 memset(&p, 0, sizeof(p)); 1594 p.state = key; 1595 p.program = program; 1596 p.eye_position = undef; 1597 p.eye_position_z = undef; 1598 p.eye_position_normalized = undef; 1599 p.transformed_normal = undef; 1600 p.identity = undef; 1601 p.temp_in_use = 0; 1602 p.mvp_with_dp4 = mvp_with_dp4; 1603 1604 if (max_temps >= sizeof(int) * 8) 1605 p.temp_reserved = 0; 1606 else 1607 p.temp_reserved = ~((1<<max_temps)-1); 1608 1609 /* Start by allocating 32 instructions. 1610 * If we need more, we'll grow the instruction array as needed. 1611 */ 1612 p.max_inst = 32; 1613 p.program->Base.Instructions = _mesa_alloc_instructions(p.max_inst); 1614 p.program->Base.String = NULL; 1615 p.program->Base.NumInstructions = 1616 p.program->Base.NumTemporaries = 1617 p.program->Base.NumParameters = 1618 p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0; 1619 p.program->Base.Parameters = _mesa_new_parameter_list(); 1620 p.program->Base.InputsRead = 0; 1621 p.program->Base.OutputsWritten = 0; 1622 1623 build_tnl_program( &p ); 1624} 1625 1626 1627/** 1628 * Return a vertex program which implements the current fixed-function 1629 * transform/lighting/texgen operations. 1630 * XXX move this into core mesa (main/) 1631 */ 1632struct gl_vertex_program * 1633_mesa_get_fixed_func_vertex_program(GLcontext *ctx) 1634{ 1635 struct gl_vertex_program *prog; 1636 struct state_key key; 1637 1638 /* Grab all the relevent state and put it in a single structure: 1639 */ 1640 make_state_key(ctx, &key); 1641 1642 /* Look for an already-prepared program for this state: 1643 */ 1644 prog = (struct gl_vertex_program *) 1645 _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, sizeof(key)); 1646 1647 if (!prog) { 1648 /* OK, we'll have to build a new one */ 1649 if (0) 1650 printf("Build new TNL program\n"); 1651 1652 prog = (struct gl_vertex_program *) 1653 ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); 1654 if (!prog) 1655 return NULL; 1656 1657 create_new_program( &key, prog, 1658 ctx->mvp_with_dp4, 1659 ctx->Const.VertexProgram.MaxTemps ); 1660 1661#if 0 1662 if (ctx->Driver.ProgramStringNotify) 1663 ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, 1664 &prog->Base ); 1665#endif 1666 _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache, 1667 &key, sizeof(key), &prog->Base); 1668 } 1669 1670 return prog; 1671} 1672