ffvertex_prog.c revision c1f859d4
1/************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * \file ffvertex_prog.c 30 * 31 * Create a vertex program to execute the current fixed function T&L pipeline. 32 * \author Keith Whitwell 33 */ 34 35 36#include "main/glheader.h" 37#include "main/mtypes.h" 38#include "main/macros.h" 39#include "main/enums.h" 40#include "main/ffvertex_prog.h" 41#include "shader/program.h" 42#include "shader/prog_cache.h" 43#include "shader/prog_instruction.h" 44#include "shader/prog_parameter.h" 45#include "shader/prog_print.h" 46#include "shader/prog_statevars.h" 47 48 49struct state_key { 50 unsigned light_global_enabled:1; 51 unsigned light_local_viewer:1; 52 unsigned light_twoside:1; 53 unsigned light_color_material:1; 54 unsigned light_color_material_mask:12; 55 unsigned light_material_mask:12; 56 unsigned material_shininess_is_zero:1; 57 58 unsigned need_eye_coords:1; 59 unsigned normalize:1; 60 unsigned rescale_normals:1; 61 unsigned fog_source_is_depth:1; 62 unsigned tnl_do_vertex_fog:1; 63 unsigned separate_specular:1; 64 unsigned fog_mode:2; 65 unsigned point_attenuated:1; 66 unsigned point_array:1; 67 unsigned texture_enabled_global:1; 68 unsigned fragprog_inputs_read:12; 69 70 struct { 71 unsigned light_enabled:1; 72 unsigned light_eyepos3_is_zero:1; 73 unsigned light_spotcutoff_is_180:1; 74 unsigned light_attenuated:1; 75 unsigned texunit_really_enabled:1; 76 unsigned texmat_enabled:1; 77 unsigned texgen_enabled:4; 78 unsigned texgen_mode0:4; 79 unsigned texgen_mode1:4; 80 unsigned texgen_mode2:4; 81 unsigned texgen_mode3:4; 82 } unit[8]; 83}; 84 85 86 87#define FOG_NONE 0 88#define FOG_LINEAR 1 89#define FOG_EXP 2 90#define FOG_EXP2 3 91 92static GLuint translate_fog_mode( GLenum mode ) 93{ 94 switch (mode) { 95 case GL_LINEAR: return FOG_LINEAR; 96 case GL_EXP: return FOG_EXP; 97 case GL_EXP2: return FOG_EXP2; 98 default: return FOG_NONE; 99 } 100} 101 102 103#define TXG_NONE 0 104#define TXG_OBJ_LINEAR 1 105#define TXG_EYE_LINEAR 2 106#define TXG_SPHERE_MAP 3 107#define TXG_REFLECTION_MAP 4 108#define TXG_NORMAL_MAP 5 109 110static GLuint translate_texgen( GLboolean enabled, GLenum mode ) 111{ 112 if (!enabled) 113 return TXG_NONE; 114 115 switch (mode) { 116 case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; 117 case GL_EYE_LINEAR: return TXG_EYE_LINEAR; 118 case GL_SPHERE_MAP: return TXG_SPHERE_MAP; 119 case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; 120 case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; 121 default: return TXG_NONE; 122 } 123} 124 125 126/** 127 * Returns bitmask of flags indicating which materials are set per-vertex 128 * in the current VB. 129 * XXX get these from the VBO... 130 */ 131static GLbitfield 132tnl_get_per_vertex_materials(GLcontext *ctx) 133{ 134 GLbitfield mask = 0x0; 135#if 0 136 TNLcontext *tnl = TNL_CONTEXT(ctx); 137 struct vertex_buffer *VB = &tnl->vb; 138 GLuint i; 139 140 for (i = _TNL_FIRST_MAT; i <= _TNL_LAST_MAT; i++) 141 if (VB->AttribPtr[i] && VB->AttribPtr[i]->stride) 142 mask |= 1 << (i - _TNL_FIRST_MAT); 143#endif 144 return mask; 145} 146 147 148/** 149 * Should fog be computed per-vertex? 150 */ 151static GLboolean 152tnl_get_per_vertex_fog(GLcontext *ctx) 153{ 154#if 0 155 TNLcontext *tnl = TNL_CONTEXT(ctx); 156 return tnl->_DoVertexFog; 157#else 158 return GL_FALSE; 159#endif 160} 161 162 163static GLboolean check_active_shininess( GLcontext *ctx, 164 const struct state_key *key, 165 GLuint side ) 166{ 167 GLuint bit = 1 << (MAT_ATTRIB_FRONT_SHININESS + side); 168 169 if (key->light_color_material_mask & bit) 170 return GL_TRUE; 171 172 if (key->light_material_mask & bit) 173 return GL_TRUE; 174 175 if (ctx->Light.Material.Attrib[MAT_ATTRIB_FRONT_SHININESS + side][0] != 0.0F) 176 return GL_TRUE; 177 178 return GL_FALSE; 179} 180 181 182static void make_state_key( GLcontext *ctx, struct state_key *key ) 183{ 184 const struct gl_fragment_program *fp; 185 GLuint i; 186 187 memset(key, 0, sizeof(struct state_key)); 188 fp = ctx->FragmentProgram._Current; 189 190 /* This now relies on texenvprogram.c being active: 191 */ 192 assert(fp); 193 194 key->need_eye_coords = ctx->_NeedEyeCoords; 195 196 key->fragprog_inputs_read = fp->Base.InputsRead; 197 198 if (ctx->RenderMode == GL_FEEDBACK) { 199 /* make sure the vertprog emits color and tex0 */ 200 key->fragprog_inputs_read |= (FRAG_BIT_COL0 | FRAG_BIT_TEX0); 201 } 202 203 key->separate_specular = (ctx->Light.Model.ColorControl == 204 GL_SEPARATE_SPECULAR_COLOR); 205 206 if (ctx->Light.Enabled) { 207 key->light_global_enabled = 1; 208 209 if (ctx->Light.Model.LocalViewer) 210 key->light_local_viewer = 1; 211 212 if (ctx->Light.Model.TwoSide) 213 key->light_twoside = 1; 214 215 if (ctx->Light.ColorMaterialEnabled) { 216 key->light_color_material = 1; 217 key->light_color_material_mask = ctx->Light.ColorMaterialBitmask; 218 } 219 220 key->light_material_mask = tnl_get_per_vertex_materials(ctx); 221 222 for (i = 0; i < MAX_LIGHTS; i++) { 223 struct gl_light *light = &ctx->Light.Light[i]; 224 225 if (light->Enabled) { 226 key->unit[i].light_enabled = 1; 227 228 if (light->EyePosition[3] == 0.0) 229 key->unit[i].light_eyepos3_is_zero = 1; 230 231 if (light->SpotCutoff == 180.0) 232 key->unit[i].light_spotcutoff_is_180 = 1; 233 234 if (light->ConstantAttenuation != 1.0 || 235 light->LinearAttenuation != 0.0 || 236 light->QuadraticAttenuation != 0.0) 237 key->unit[i].light_attenuated = 1; 238 } 239 } 240 241 if (check_active_shininess(ctx, key, 0)) { 242 key->material_shininess_is_zero = 0; 243 } 244 else if (key->light_twoside && 245 check_active_shininess(ctx, key, 1)) { 246 key->material_shininess_is_zero = 0; 247 } 248 else { 249 key->material_shininess_is_zero = 1; 250 } 251 } 252 253 if (ctx->Transform.Normalize) 254 key->normalize = 1; 255 256 if (ctx->Transform.RescaleNormals) 257 key->rescale_normals = 1; 258 259 key->fog_mode = translate_fog_mode(fp->FogOption); 260 261 if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) 262 key->fog_source_is_depth = 1; 263 264 key->tnl_do_vertex_fog = tnl_get_per_vertex_fog(ctx); 265 266 if (ctx->Point._Attenuated) 267 key->point_attenuated = 1; 268 269#if FEATURE_point_size_array 270 if (ctx->Array.ArrayObj->PointSize.Enabled) 271 key->point_array = 1; 272#endif 273 274 if (ctx->Texture._TexGenEnabled || 275 ctx->Texture._TexMatEnabled || 276 ctx->Texture._EnabledUnits) 277 key->texture_enabled_global = 1; 278 279 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 280 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; 281 282 if (texUnit->_ReallyEnabled) 283 key->unit[i].texunit_really_enabled = 1; 284 285 if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i)) 286 key->unit[i].texmat_enabled = 1; 287 288 if (texUnit->TexGenEnabled) { 289 key->unit[i].texgen_enabled = 1; 290 291 key->unit[i].texgen_mode0 = 292 translate_texgen( texUnit->TexGenEnabled & (1<<0), 293 texUnit->GenModeS ); 294 key->unit[i].texgen_mode1 = 295 translate_texgen( texUnit->TexGenEnabled & (1<<1), 296 texUnit->GenModeT ); 297 key->unit[i].texgen_mode2 = 298 translate_texgen( texUnit->TexGenEnabled & (1<<2), 299 texUnit->GenModeR ); 300 key->unit[i].texgen_mode3 = 301 translate_texgen( texUnit->TexGenEnabled & (1<<3), 302 texUnit->GenModeQ ); 303 } 304 } 305} 306 307 308 309/* Very useful debugging tool - produces annotated listing of 310 * generated program with line/function references for each 311 * instruction back into this file: 312 */ 313#define DISASSEM 0 314 315/* Should be tunable by the driver - do we want to do matrix 316 * multiplications with DP4's or with MUL/MAD's? SSE works better 317 * with the latter, drivers may differ. 318 */ 319#define PREFER_DP4 0 320 321 322/* Use uregs to represent registers internally, translate to Mesa's 323 * expected formats on emit. 324 * 325 * NOTE: These are passed by value extensively in this file rather 326 * than as usual by pointer reference. If this disturbs you, try 327 * remembering they are just 32bits in size. 328 * 329 * GCC is smart enough to deal with these dword-sized structures in 330 * much the same way as if I had defined them as dwords and was using 331 * macros to access and set the fields. This is much nicer and easier 332 * to evolve. 333 */ 334struct ureg { 335 GLuint file:4; 336 GLint idx:9; /* relative addressing may be negative */ 337 /* sizeof(idx) should == sizeof(prog_src_reg::Index) */ 338 GLuint negate:1; 339 GLuint swz:12; 340 GLuint pad:6; 341}; 342 343 344struct tnl_program { 345 const struct state_key *state; 346 struct gl_vertex_program *program; 347 GLint max_inst; /** number of instructions allocated for program */ 348 349 GLuint temp_in_use; 350 GLuint temp_reserved; 351 352 struct ureg eye_position; 353 struct ureg eye_position_z; 354 struct ureg eye_position_normalized; 355 struct ureg transformed_normal; 356 struct ureg identity; 357 358 GLuint materials; 359 GLuint color_materials; 360}; 361 362 363static const struct ureg undef = { 364 PROGRAM_UNDEFINED, 365 0, 366 0, 367 0, 368 0 369}; 370 371/* Local shorthand: 372 */ 373#define X SWIZZLE_X 374#define Y SWIZZLE_Y 375#define Z SWIZZLE_Z 376#define W SWIZZLE_W 377 378 379/* Construct a ureg: 380 */ 381static struct ureg make_ureg(GLuint file, GLint idx) 382{ 383 struct ureg reg; 384 reg.file = file; 385 reg.idx = idx; 386 reg.negate = 0; 387 reg.swz = SWIZZLE_NOOP; 388 reg.pad = 0; 389 return reg; 390} 391 392 393 394static struct ureg negate( struct ureg reg ) 395{ 396 reg.negate ^= 1; 397 return reg; 398} 399 400 401static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 402{ 403 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 404 GET_SWZ(reg.swz, y), 405 GET_SWZ(reg.swz, z), 406 GET_SWZ(reg.swz, w)); 407 408 return reg; 409} 410 411 412static struct ureg swizzle1( struct ureg reg, int x ) 413{ 414 return swizzle(reg, x, x, x, x); 415} 416 417 418static struct ureg get_temp( struct tnl_program *p ) 419{ 420 int bit = _mesa_ffs( ~p->temp_in_use ); 421 if (!bit) { 422 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 423 _mesa_exit(1); 424 } 425 426 if ((GLuint) bit > p->program->Base.NumTemporaries) 427 p->program->Base.NumTemporaries = bit; 428 429 p->temp_in_use |= 1<<(bit-1); 430 return make_ureg(PROGRAM_TEMPORARY, bit-1); 431} 432 433 434static struct ureg reserve_temp( struct tnl_program *p ) 435{ 436 struct ureg temp = get_temp( p ); 437 p->temp_reserved |= 1<<temp.idx; 438 return temp; 439} 440 441 442static void release_temp( struct tnl_program *p, struct ureg reg ) 443{ 444 if (reg.file == PROGRAM_TEMPORARY) { 445 p->temp_in_use &= ~(1<<reg.idx); 446 p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ 447 } 448} 449 450 451static void release_temps( struct tnl_program *p ) 452{ 453 p->temp_in_use = p->temp_reserved; 454} 455 456 457/** 458 * \param input one of VERT_ATTRIB_x tokens. 459 */ 460static struct ureg register_input( struct tnl_program *p, GLuint input ) 461{ 462 p->program->Base.InputsRead |= (1<<input); 463 return make_ureg(PROGRAM_INPUT, input); 464} 465 466 467/** 468 * \param input one of VERT_RESULT_x tokens. 469 */ 470static struct ureg register_output( struct tnl_program *p, GLuint output ) 471{ 472 p->program->Base.OutputsWritten |= (1<<output); 473 return make_ureg(PROGRAM_OUTPUT, output); 474} 475 476 477static struct ureg register_const4f( struct tnl_program *p, 478 GLfloat s0, 479 GLfloat s1, 480 GLfloat s2, 481 GLfloat s3) 482{ 483 GLfloat values[4]; 484 GLint idx; 485 GLuint swizzle; 486 values[0] = s0; 487 values[1] = s1; 488 values[2] = s2; 489 values[3] = s3; 490 idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, 491 &swizzle ); 492 ASSERT(swizzle == SWIZZLE_NOOP); 493 return make_ureg(PROGRAM_CONSTANT, idx); 494} 495 496 497#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 498#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) 499#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 500#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 501 502static GLboolean is_undef( struct ureg reg ) 503{ 504 return reg.file == PROGRAM_UNDEFINED; 505} 506 507 508static struct ureg get_identity_param( struct tnl_program *p ) 509{ 510 if (is_undef(p->identity)) 511 p->identity = register_const4f(p, 0,0,0,1); 512 513 return p->identity; 514} 515 516 517static struct ureg register_param5(struct tnl_program *p, 518 GLint s0, 519 GLint s1, 520 GLint s2, 521 GLint s3, 522 GLint s4) 523{ 524 gl_state_index tokens[STATE_LENGTH]; 525 GLint idx; 526 tokens[0] = s0; 527 tokens[1] = s1; 528 tokens[2] = s2; 529 tokens[3] = s3; 530 tokens[4] = s4; 531 idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens ); 532 return make_ureg(PROGRAM_STATE_VAR, idx); 533} 534 535 536#define register_param1(p,s0) register_param5(p,s0,0,0,0,0) 537#define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0) 538#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) 539#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) 540 541 542static void register_matrix_param5( struct tnl_program *p, 543 GLint s0, /* modelview, projection, etc */ 544 GLint s1, /* texture matrix number */ 545 GLint s2, /* first row */ 546 GLint s3, /* last row */ 547 GLint s4, /* inverse, transpose, etc */ 548 struct ureg *matrix ) 549{ 550 GLint i; 551 552 /* This is a bit sad as the support is there to pull the whole 553 * matrix out in one go: 554 */ 555 for (i = 0; i <= s3 - s2; i++) 556 matrix[i] = register_param5( p, s0, s1, i, i, s4 ); 557} 558 559 560static void emit_arg( struct prog_src_register *src, 561 struct ureg reg ) 562{ 563 src->File = reg.file; 564 src->Index = reg.idx; 565 src->Swizzle = reg.swz; 566 src->NegateBase = reg.negate ? NEGATE_XYZW : 0; 567 src->Abs = 0; 568 src->NegateAbs = 0; 569 src->RelAddr = 0; 570 /* Check that bitfield sizes aren't exceeded */ 571 ASSERT(src->Index == reg.idx); 572} 573 574 575static void emit_dst( struct prog_dst_register *dst, 576 struct ureg reg, GLuint mask ) 577{ 578 dst->File = reg.file; 579 dst->Index = reg.idx; 580 /* allow zero as a shorthand for xyzw */ 581 dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 582 dst->CondMask = COND_TR; /* always pass cond test */ 583 dst->CondSwizzle = SWIZZLE_NOOP; 584 dst->CondSrc = 0; 585 dst->pad = 0; 586 /* Check that bitfield sizes aren't exceeded */ 587 ASSERT(dst->Index == reg.idx); 588} 589 590 591static void debug_insn( struct prog_instruction *inst, const char *fn, 592 GLuint line ) 593{ 594 if (DISASSEM) { 595 static const char *last_fn; 596 597 if (fn != last_fn) { 598 last_fn = fn; 599 _mesa_printf("%s:\n", fn); 600 } 601 602 _mesa_printf("%d:\t", line); 603 _mesa_print_instruction(inst); 604 } 605} 606 607 608static void emit_op3fn(struct tnl_program *p, 609 enum prog_opcode op, 610 struct ureg dest, 611 GLuint mask, 612 struct ureg src0, 613 struct ureg src1, 614 struct ureg src2, 615 const char *fn, 616 GLuint line) 617{ 618 GLuint nr; 619 struct prog_instruction *inst; 620 621 assert((GLint) p->program->Base.NumInstructions <= p->max_inst); 622 623 if (p->program->Base.NumInstructions == p->max_inst) { 624 /* need to extend the program's instruction array */ 625 struct prog_instruction *newInst; 626 627 /* double the size */ 628 p->max_inst *= 2; 629 630 newInst = _mesa_alloc_instructions(p->max_inst); 631 if (!newInst) { 632 _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build"); 633 return; 634 } 635 636 _mesa_copy_instructions(newInst, 637 p->program->Base.Instructions, 638 p->program->Base.NumInstructions); 639 640 _mesa_free_instructions(p->program->Base.Instructions, 641 p->program->Base.NumInstructions); 642 643 p->program->Base.Instructions = newInst; 644 } 645 646 nr = p->program->Base.NumInstructions++; 647 648 inst = &p->program->Base.Instructions[nr]; 649 inst->Opcode = (enum prog_opcode) op; 650 inst->StringPos = 0; 651 inst->Data = 0; 652 653 emit_arg( &inst->SrcReg[0], src0 ); 654 emit_arg( &inst->SrcReg[1], src1 ); 655 emit_arg( &inst->SrcReg[2], src2 ); 656 657 emit_dst( &inst->DstReg, dest, mask ); 658 659 debug_insn(inst, fn, line); 660} 661 662 663#define emit_op3(p, op, dst, mask, src0, src1, src2) \ 664 emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__) 665 666#define emit_op2(p, op, dst, mask, src0, src1) \ 667 emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__) 668 669#define emit_op1(p, op, dst, mask, src0) \ 670 emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__) 671 672 673static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) 674{ 675 if (reg.file == PROGRAM_TEMPORARY && 676 !(p->temp_reserved & (1<<reg.idx))) 677 return reg; 678 else { 679 struct ureg temp = get_temp(p); 680 emit_op1(p, OPCODE_MOV, temp, 0, reg); 681 return temp; 682 } 683} 684 685 686/* Currently no tracking performed of input/output/register size or 687 * active elements. Could be used to reduce these operations, as 688 * could the matrix type. 689 */ 690static void emit_matrix_transform_vec4( struct tnl_program *p, 691 struct ureg dest, 692 const struct ureg *mat, 693 struct ureg src) 694{ 695 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); 696 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); 697 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); 698 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); 699} 700 701 702/* This version is much easier to implement if writemasks are not 703 * supported natively on the target or (like SSE), the target doesn't 704 * have a clean/obvious dotproduct implementation. 705 */ 706static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, 707 struct ureg dest, 708 const struct ureg *mat, 709 struct ureg src) 710{ 711 struct ureg tmp; 712 713 if (dest.file != PROGRAM_TEMPORARY) 714 tmp = get_temp(p); 715 else 716 tmp = dest; 717 718 emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 719 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 720 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 721 emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 722 723 if (dest.file != PROGRAM_TEMPORARY) 724 release_temp(p, tmp); 725} 726 727 728static void emit_matrix_transform_vec3( struct tnl_program *p, 729 struct ureg dest, 730 const struct ureg *mat, 731 struct ureg src) 732{ 733 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); 734 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); 735 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); 736} 737 738 739static void emit_normalize_vec3( struct tnl_program *p, 740 struct ureg dest, 741 struct ureg src ) 742{ 743#if 0 744 /* XXX use this when drivers are ready for NRM3 */ 745 emit_op1(p, OPCODE_NRM3, dest, WRITEMASK_XYZ, src); 746#else 747 struct ureg tmp = get_temp(p); 748 emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src); 749 emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); 750 emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X)); 751 release_temp(p, tmp); 752#endif 753} 754 755 756static void emit_passthrough( struct tnl_program *p, 757 GLuint input, 758 GLuint output ) 759{ 760 struct ureg out = register_output(p, output); 761 emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); 762} 763 764 765static struct ureg get_eye_position( struct tnl_program *p ) 766{ 767 if (is_undef(p->eye_position)) { 768 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 769 struct ureg modelview[4]; 770 771 p->eye_position = reserve_temp(p); 772 773 if (PREFER_DP4) { 774 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 775 0, modelview ); 776 777 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); 778 } 779 else { 780 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 781 STATE_MATRIX_TRANSPOSE, modelview ); 782 783 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); 784 } 785 } 786 787 return p->eye_position; 788} 789 790 791static struct ureg get_eye_position_z( struct tnl_program *p ) 792{ 793 if (!is_undef(p->eye_position)) 794 return swizzle1(p->eye_position, Z); 795 796 if (is_undef(p->eye_position_z)) { 797 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 798 struct ureg modelview[4]; 799 800 p->eye_position_z = reserve_temp(p); 801 802 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 803 0, modelview ); 804 805 emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]); 806 } 807 808 return p->eye_position_z; 809} 810 811 812static struct ureg get_eye_position_normalized( struct tnl_program *p ) 813{ 814 if (is_undef(p->eye_position_normalized)) { 815 struct ureg eye = get_eye_position(p); 816 p->eye_position_normalized = reserve_temp(p); 817 emit_normalize_vec3(p, p->eye_position_normalized, eye); 818 } 819 820 return p->eye_position_normalized; 821} 822 823 824static struct ureg get_transformed_normal( struct tnl_program *p ) 825{ 826 if (is_undef(p->transformed_normal) && 827 !p->state->need_eye_coords && 828 !p->state->normalize && 829 !(p->state->need_eye_coords == p->state->rescale_normals)) 830 { 831 p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL ); 832 } 833 else if (is_undef(p->transformed_normal)) 834 { 835 struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); 836 struct ureg mvinv[3]; 837 struct ureg transformed_normal = reserve_temp(p); 838 839 if (p->state->need_eye_coords) { 840 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2, 841 STATE_MATRIX_INVTRANS, mvinv ); 842 843 /* Transform to eye space: 844 */ 845 emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal ); 846 normal = transformed_normal; 847 } 848 849 /* Normalize/Rescale: 850 */ 851 if (p->state->normalize) { 852 emit_normalize_vec3( p, transformed_normal, normal ); 853 normal = transformed_normal; 854 } 855 else if (p->state->need_eye_coords == p->state->rescale_normals) { 856 /* This is already adjusted for eye/non-eye rendering: 857 */ 858 struct ureg rescale = register_param2(p, STATE_INTERNAL, 859 STATE_NORMAL_SCALE); 860 861 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale ); 862 normal = transformed_normal; 863 } 864 865 assert(normal.file == PROGRAM_TEMPORARY); 866 p->transformed_normal = normal; 867 } 868 869 return p->transformed_normal; 870} 871 872 873static void build_hpos( struct tnl_program *p ) 874{ 875 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 876 struct ureg hpos = register_output( p, VERT_RESULT_HPOS ); 877 struct ureg mvp[4]; 878 879 if (PREFER_DP4) { 880 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 881 0, mvp ); 882 emit_matrix_transform_vec4( p, hpos, mvp, pos ); 883 } 884 else { 885 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 886 STATE_MATRIX_TRANSPOSE, mvp ); 887 emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); 888 } 889} 890 891 892static GLuint material_attrib( GLuint side, GLuint property ) 893{ 894 return ((property - STATE_AMBIENT) * 2 + 895 side); 896} 897 898 899/** 900 * Get a bitmask of which material values vary on a per-vertex basis. 901 */ 902static void set_material_flags( struct tnl_program *p ) 903{ 904 p->color_materials = 0; 905 p->materials = 0; 906 907 if (p->state->light_color_material) { 908 p->materials = 909 p->color_materials = p->state->light_color_material_mask; 910 } 911 912 p->materials |= p->state->light_material_mask; 913} 914 915 916/* XXX temporary!!! */ 917#define _TNL_ATTRIB_MAT_FRONT_AMBIENT 32 918 919static struct ureg get_material( struct tnl_program *p, GLuint side, 920 GLuint property ) 921{ 922 GLuint attrib = material_attrib(side, property); 923 924 if (p->color_materials & (1<<attrib)) 925 return register_input(p, VERT_ATTRIB_COLOR0); 926 else if (p->materials & (1<<attrib)) 927 return register_input( p, attrib + _TNL_ATTRIB_MAT_FRONT_AMBIENT ); 928 else 929 return register_param3( p, STATE_MATERIAL, side, property ); 930} 931 932#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \ 933 MAT_BIT_FRONT_AMBIENT | \ 934 MAT_BIT_FRONT_DIFFUSE) << (side)) 935 936 937/** 938 * Either return a precalculated constant value or emit code to 939 * calculate these values dynamically in the case where material calls 940 * are present between begin/end pairs. 941 * 942 * Probably want to shift this to the program compilation phase - if 943 * we always emitted the calculation here, a smart compiler could 944 * detect that it was constant (given a certain set of inputs), and 945 * lift it out of the main loop. That way the programs created here 946 * would be independent of the vertex_buffer details. 947 */ 948static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) 949{ 950 if (p->materials & SCENE_COLOR_BITS(side)) { 951 struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); 952 struct ureg material_emission = get_material(p, side, STATE_EMISSION); 953 struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); 954 struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); 955 struct ureg tmp = make_temp(p, material_diffuse); 956 emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, 957 material_ambient, material_emission); 958 return tmp; 959 } 960 else 961 return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); 962} 963 964 965static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 966 GLuint side, GLuint property ) 967{ 968 GLuint attrib = material_attrib(side, property); 969 if (p->materials & (1<<attrib)) { 970 struct ureg light_value = 971 register_param3(p, STATE_LIGHT, light, property); 972 struct ureg material_value = get_material(p, side, property); 973 struct ureg tmp = get_temp(p); 974 emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value); 975 return tmp; 976 } 977 else 978 return register_param4(p, STATE_LIGHTPROD, light, side, property); 979} 980 981 982static struct ureg calculate_light_attenuation( struct tnl_program *p, 983 GLuint i, 984 struct ureg VPpli, 985 struct ureg dist ) 986{ 987 struct ureg attenuation = register_param3(p, STATE_LIGHT, i, 988 STATE_ATTENUATION); 989 struct ureg att = get_temp(p); 990 991 /* Calculate spot attenuation: 992 */ 993 if (!p->state->unit[i].light_spotcutoff_is_180) { 994 struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, 995 STATE_LIGHT_SPOT_DIR_NORMALIZED, i); 996 struct ureg spot = get_temp(p); 997 struct ureg slt = get_temp(p); 998 999 emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); 1000 emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); 1001 emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); 1002 emit_op2(p, OPCODE_MUL, att, 0, slt, spot); 1003 1004 release_temp(p, spot); 1005 release_temp(p, slt); 1006 } 1007 1008 /* Calculate distance attenuation: 1009 */ 1010 if (p->state->unit[i].light_attenuated) { 1011 1012 /* 1/d,d,d,1/d */ 1013 emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); 1014 /* 1,d,d*d,1/d */ 1015 emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 1016 /* 1/dist-atten */ 1017 emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); 1018 1019 if (!p->state->unit[i].light_spotcutoff_is_180) { 1020 /* dist-atten */ 1021 emit_op1(p, OPCODE_RCP, dist, 0, dist); 1022 /* spot-atten * dist-atten */ 1023 emit_op2(p, OPCODE_MUL, att, 0, dist, att); 1024 } else { 1025 /* dist-atten */ 1026 emit_op1(p, OPCODE_RCP, att, 0, dist); 1027 } 1028 } 1029 1030 return att; 1031} 1032 1033 1034/** 1035 * Compute: 1036 * lit.y = MAX(0, dots.x) 1037 * lit.z = SLT(0, dots.x) 1038 */ 1039static void emit_degenerate_lit( struct tnl_program *p, 1040 struct ureg lit, 1041 struct ureg dots ) 1042{ 1043 struct ureg id = get_identity_param(p); /* id = {0,0,0,1} */ 1044 1045 /* Note that lit.x & lit.w will not be examined. Note also that 1046 * dots.xyzw == dots.xxxx. 1047 */ 1048 1049 /* MAX lit, id, dots; 1050 */ 1051 emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots); 1052 1053 /* result[2] = (in > 0 ? 1 : 0) 1054 * SLT lit.z, id.z, dots; # lit.z = (0 < dots.z) ? 1 : 0 1055 */ 1056 emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots); 1057} 1058 1059 1060/* Need to add some addtional parameters to allow lighting in object 1061 * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye 1062 * space lighting. 1063 */ 1064static void build_lighting( struct tnl_program *p ) 1065{ 1066 const GLboolean twoside = p->state->light_twoside; 1067 const GLboolean separate = p->state->separate_specular; 1068 GLuint nr_lights = 0, count = 0; 1069 struct ureg normal = get_transformed_normal(p); 1070 struct ureg lit = get_temp(p); 1071 struct ureg dots = get_temp(p); 1072 struct ureg _col0 = undef, _col1 = undef; 1073 struct ureg _bfc0 = undef, _bfc1 = undef; 1074 GLuint i; 1075 1076 /* 1077 * NOTE: 1078 * dot.x = dot(normal, VPpli) 1079 * dot.y = dot(normal, halfAngle) 1080 * dot.z = back.shininess 1081 * dot.w = front.shininess 1082 */ 1083 1084 for (i = 0; i < MAX_LIGHTS; i++) 1085 if (p->state->unit[i].light_enabled) 1086 nr_lights++; 1087 1088 set_material_flags(p); 1089 1090 { 1091 if (!p->state->material_shininess_is_zero) { 1092 struct ureg shininess = get_material(p, 0, STATE_SHININESS); 1093 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); 1094 release_temp(p, shininess); 1095 } 1096 1097 _col0 = make_temp(p, get_scenecolor(p, 0)); 1098 if (separate) 1099 _col1 = make_temp(p, get_identity_param(p)); 1100 else 1101 _col1 = _col0; 1102 1103 } 1104 1105 if (twoside) { 1106 if (!p->state->material_shininess_is_zero) { 1107 /* Note that we negate the back-face specular exponent here. 1108 * The negation will be un-done later in the back-face code below. 1109 */ 1110 struct ureg shininess = get_material(p, 1, STATE_SHININESS); 1111 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, 1112 negate(swizzle1(shininess,X))); 1113 release_temp(p, shininess); 1114 } 1115 1116 _bfc0 = make_temp(p, get_scenecolor(p, 1)); 1117 if (separate) 1118 _bfc1 = make_temp(p, get_identity_param(p)); 1119 else 1120 _bfc1 = _bfc0; 1121 } 1122 1123 /* If no lights, still need to emit the scenecolor. 1124 */ 1125 { 1126 struct ureg res0 = register_output( p, VERT_RESULT_COL0 ); 1127 emit_op1(p, OPCODE_MOV, res0, 0, _col0); 1128 } 1129 1130 if (separate) { 1131 struct ureg res1 = register_output( p, VERT_RESULT_COL1 ); 1132 emit_op1(p, OPCODE_MOV, res1, 0, _col1); 1133 } 1134 1135 if (twoside) { 1136 struct ureg res0 = register_output( p, VERT_RESULT_BFC0 ); 1137 emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); 1138 } 1139 1140 if (twoside && separate) { 1141 struct ureg res1 = register_output( p, VERT_RESULT_BFC1 ); 1142 emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); 1143 } 1144 1145 if (nr_lights == 0) { 1146 release_temps(p); 1147 return; 1148 } 1149 1150 for (i = 0; i < MAX_LIGHTS; i++) { 1151 if (p->state->unit[i].light_enabled) { 1152 struct ureg half = undef; 1153 struct ureg att = undef, VPpli = undef; 1154 1155 count++; 1156 1157 if (p->state->unit[i].light_eyepos3_is_zero) { 1158 /* Can used precomputed constants in this case. 1159 * Attenuation never applies to infinite lights. 1160 */ 1161 VPpli = register_param3(p, STATE_INTERNAL, 1162 STATE_LIGHT_POSITION_NORMALIZED, i); 1163 1164 if (!p->state->material_shininess_is_zero) { 1165 if (p->state->light_local_viewer) { 1166 struct ureg eye_hat = get_eye_position_normalized(p); 1167 half = get_temp(p); 1168 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1169 emit_normalize_vec3(p, half, half); 1170 } else { 1171 half = register_param3(p, STATE_INTERNAL, 1172 STATE_LIGHT_HALF_VECTOR, i); 1173 } 1174 } 1175 } 1176 else { 1177 struct ureg Ppli = register_param3(p, STATE_INTERNAL, 1178 STATE_LIGHT_POSITION, i); 1179 struct ureg V = get_eye_position(p); 1180 struct ureg dist = get_temp(p); 1181 1182 VPpli = get_temp(p); 1183 1184 /* Calculate VPpli vector 1185 */ 1186 emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); 1187 1188 /* Normalize VPpli. The dist value also used in 1189 * attenuation below. 1190 */ 1191 emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); 1192 emit_op1(p, OPCODE_RSQ, dist, 0, dist); 1193 emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); 1194 1195 /* Calculate attenuation: 1196 */ 1197 if (!p->state->unit[i].light_spotcutoff_is_180 || 1198 p->state->unit[i].light_attenuated) { 1199 att = calculate_light_attenuation(p, i, VPpli, dist); 1200 } 1201 1202 /* Calculate viewer direction, or use infinite viewer: 1203 */ 1204 if (!p->state->material_shininess_is_zero) { 1205 half = get_temp(p); 1206 1207 if (p->state->light_local_viewer) { 1208 struct ureg eye_hat = get_eye_position_normalized(p); 1209 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1210 } 1211 else { 1212 struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 1213 emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); 1214 } 1215 1216 emit_normalize_vec3(p, half, half); 1217 } 1218 1219 release_temp(p, dist); 1220 } 1221 1222 /* Calculate dot products: 1223 */ 1224 if (p->state->material_shininess_is_zero) { 1225 emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli); 1226 } 1227 else { 1228 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); 1229 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); 1230 } 1231 1232 /* Front face lighting: 1233 */ 1234 { 1235 struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); 1236 struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); 1237 struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); 1238 struct ureg res0, res1; 1239 GLuint mask0, mask1; 1240 1241 if (count == nr_lights) { 1242 if (separate) { 1243 mask0 = WRITEMASK_XYZ; 1244 mask1 = WRITEMASK_XYZ; 1245 res0 = register_output( p, VERT_RESULT_COL0 ); 1246 res1 = register_output( p, VERT_RESULT_COL1 ); 1247 } 1248 else { 1249 mask0 = 0; 1250 mask1 = WRITEMASK_XYZ; 1251 res0 = _col0; 1252 res1 = register_output( p, VERT_RESULT_COL0 ); 1253 } 1254 } else { 1255 mask0 = 0; 1256 mask1 = 0; 1257 res0 = _col0; 1258 res1 = _col1; 1259 } 1260 1261 if (!is_undef(att)) { 1262 /* light is attenuated by distance */ 1263 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1264 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1265 emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); 1266 } 1267 else if (!p->state->material_shininess_is_zero) { 1268 /* there's a non-zero specular term */ 1269 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1270 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1271 } 1272 else { 1273 /* no attenutation, no specular */ 1274 emit_degenerate_lit(p, lit, dots); 1275 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1276 } 1277 1278 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); 1279 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); 1280 1281 release_temp(p, ambient); 1282 release_temp(p, diffuse); 1283 release_temp(p, specular); 1284 } 1285 1286 /* Back face lighting: 1287 */ 1288 if (twoside) { 1289 struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); 1290 struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); 1291 struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); 1292 struct ureg res0, res1; 1293 GLuint mask0, mask1; 1294 1295 if (count == nr_lights) { 1296 if (separate) { 1297 mask0 = WRITEMASK_XYZ; 1298 mask1 = WRITEMASK_XYZ; 1299 res0 = register_output( p, VERT_RESULT_BFC0 ); 1300 res1 = register_output( p, VERT_RESULT_BFC1 ); 1301 } 1302 else { 1303 mask0 = 0; 1304 mask1 = WRITEMASK_XYZ; 1305 res0 = _bfc0; 1306 res1 = register_output( p, VERT_RESULT_BFC0 ); 1307 } 1308 } else { 1309 res0 = _bfc0; 1310 res1 = _bfc1; 1311 mask0 = 0; 1312 mask1 = 0; 1313 } 1314 1315 /* For the back face we need to negate the X and Y component 1316 * dot products. dots.Z has the negated back-face specular 1317 * exponent. We swizzle that into the W position. This 1318 * negation makes the back-face specular term positive again. 1319 */ 1320 dots = negate(swizzle(dots,X,Y,W,Z)); 1321 1322 if (!is_undef(att)) { 1323 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1324 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1325 emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); 1326 } 1327 else if (!p->state->material_shininess_is_zero) { 1328 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1329 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); 1330 } 1331 else { 1332 emit_degenerate_lit(p, lit, dots); 1333 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); 1334 } 1335 1336 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); 1337 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); 1338 /* restore dots to its original state for subsequent lights 1339 * by negating and swizzling again. 1340 */ 1341 dots = negate(swizzle(dots,X,Y,W,Z)); 1342 1343 release_temp(p, ambient); 1344 release_temp(p, diffuse); 1345 release_temp(p, specular); 1346 } 1347 1348 release_temp(p, half); 1349 release_temp(p, VPpli); 1350 release_temp(p, att); 1351 } 1352 } 1353 1354 release_temps( p ); 1355} 1356 1357 1358static void build_fog( struct tnl_program *p ) 1359{ 1360 struct ureg fog = register_output(p, VERT_RESULT_FOGC); 1361 struct ureg input; 1362 1363 if (p->state->fog_source_is_depth) { 1364 input = get_eye_position_z(p); 1365 } 1366 else { 1367 input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); 1368 } 1369 1370 if (p->state->fog_mode && p->state->tnl_do_vertex_fog) { 1371 struct ureg params = register_param2(p, STATE_INTERNAL, 1372 STATE_FOG_PARAMS_OPTIMIZED); 1373 struct ureg tmp = get_temp(p); 1374 GLboolean useabs = (p->state->fog_mode != FOG_EXP2); 1375 1376 if (useabs) { 1377 emit_op1(p, OPCODE_ABS, tmp, 0, input); 1378 } 1379 1380 switch (p->state->fog_mode) { 1381 case FOG_LINEAR: { 1382 struct ureg id = get_identity_param(p); 1383 emit_op3(p, OPCODE_MAD, tmp, 0, useabs ? tmp : input, 1384 swizzle1(params,X), swizzle1(params,Y)); 1385 emit_op2(p, OPCODE_MAX, tmp, 0, tmp, swizzle1(id,X)); /* saturate */ 1386 emit_op2(p, OPCODE_MIN, fog, WRITEMASK_X, tmp, swizzle1(id,W)); 1387 break; 1388 } 1389 case FOG_EXP: 1390 emit_op2(p, OPCODE_MUL, tmp, 0, useabs ? tmp : input, 1391 swizzle1(params,Z)); 1392 emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp)); 1393 break; 1394 case FOG_EXP2: 1395 emit_op2(p, OPCODE_MUL, tmp, 0, input, swizzle1(params,W)); 1396 emit_op2(p, OPCODE_MUL, tmp, 0, tmp, tmp); 1397 emit_op1(p, OPCODE_EX2, fog, WRITEMASK_X, negate(tmp)); 1398 break; 1399 } 1400 1401 release_temp(p, tmp); 1402 } 1403 else { 1404 /* results = incoming fog coords (compute fog per-fragment later) 1405 * 1406 * KW: Is it really necessary to do anything in this case? 1407 * BP: Yes, we always need to compute the absolute value, unless 1408 * we want to push that down into the fragment program... 1409 */ 1410 GLboolean useabs = GL_TRUE; 1411 emit_op1(p, useabs ? OPCODE_ABS : OPCODE_MOV, fog, WRITEMASK_X, input); 1412 } 1413} 1414 1415 1416static void build_reflect_texgen( struct tnl_program *p, 1417 struct ureg dest, 1418 GLuint writemask ) 1419{ 1420 struct ureg normal = get_transformed_normal(p); 1421 struct ureg eye_hat = get_eye_position_normalized(p); 1422 struct ureg tmp = get_temp(p); 1423 1424 /* n.u */ 1425 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1426 /* 2n.u */ 1427 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1428 /* (-2n.u)n + u */ 1429 emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); 1430 1431 release_temp(p, tmp); 1432} 1433 1434 1435static void build_sphere_texgen( struct tnl_program *p, 1436 struct ureg dest, 1437 GLuint writemask ) 1438{ 1439 struct ureg normal = get_transformed_normal(p); 1440 struct ureg eye_hat = get_eye_position_normalized(p); 1441 struct ureg tmp = get_temp(p); 1442 struct ureg half = register_scalar_const(p, .5); 1443 struct ureg r = get_temp(p); 1444 struct ureg inv_m = get_temp(p); 1445 struct ureg id = get_identity_param(p); 1446 1447 /* Could share the above calculations, but it would be 1448 * a fairly odd state for someone to set (both sphere and 1449 * reflection active for different texture coordinate 1450 * components. Of course - if two texture units enable 1451 * reflect and/or sphere, things start to tilt in favour 1452 * of seperating this out: 1453 */ 1454 1455 /* n.u */ 1456 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1457 /* 2n.u */ 1458 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1459 /* (-2n.u)n + u */ 1460 emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); 1461 /* r + 0,0,1 */ 1462 emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 1463 /* rx^2 + ry^2 + (rz+1)^2 */ 1464 emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); 1465 /* 2/m */ 1466 emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 1467 /* 1/m */ 1468 emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); 1469 /* r/m + 1/2 */ 1470 emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); 1471 1472 release_temp(p, tmp); 1473 release_temp(p, r); 1474 release_temp(p, inv_m); 1475} 1476 1477 1478static void build_texture_transform( struct tnl_program *p ) 1479{ 1480 GLuint i, j; 1481 1482 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 1483 1484 if (!(p->state->fragprog_inputs_read & FRAG_BIT_TEX(i))) 1485 continue; 1486 1487 if (p->state->unit[i].texgen_enabled || 1488 p->state->unit[i].texmat_enabled) { 1489 1490 GLuint texmat_enabled = p->state->unit[i].texmat_enabled; 1491 struct ureg out = register_output(p, VERT_RESULT_TEX0 + i); 1492 struct ureg out_texgen = undef; 1493 1494 if (p->state->unit[i].texgen_enabled) { 1495 GLuint copy_mask = 0; 1496 GLuint sphere_mask = 0; 1497 GLuint reflect_mask = 0; 1498 GLuint normal_mask = 0; 1499 GLuint modes[4]; 1500 1501 if (texmat_enabled) 1502 out_texgen = get_temp(p); 1503 else 1504 out_texgen = out; 1505 1506 modes[0] = p->state->unit[i].texgen_mode0; 1507 modes[1] = p->state->unit[i].texgen_mode1; 1508 modes[2] = p->state->unit[i].texgen_mode2; 1509 modes[3] = p->state->unit[i].texgen_mode3; 1510 1511 for (j = 0; j < 4; j++) { 1512 switch (modes[j]) { 1513 case TXG_OBJ_LINEAR: { 1514 struct ureg obj = register_input(p, VERT_ATTRIB_POS); 1515 struct ureg plane = 1516 register_param3(p, STATE_TEXGEN, i, 1517 STATE_TEXGEN_OBJECT_S + j); 1518 1519 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1520 obj, plane ); 1521 break; 1522 } 1523 case TXG_EYE_LINEAR: { 1524 struct ureg eye = get_eye_position(p); 1525 struct ureg plane = 1526 register_param3(p, STATE_TEXGEN, i, 1527 STATE_TEXGEN_EYE_S + j); 1528 1529 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1530 eye, plane ); 1531 break; 1532 } 1533 case TXG_SPHERE_MAP: 1534 sphere_mask |= WRITEMASK_X << j; 1535 break; 1536 case TXG_REFLECTION_MAP: 1537 reflect_mask |= WRITEMASK_X << j; 1538 break; 1539 case TXG_NORMAL_MAP: 1540 normal_mask |= WRITEMASK_X << j; 1541 break; 1542 case TXG_NONE: 1543 copy_mask |= WRITEMASK_X << j; 1544 } 1545 } 1546 1547 if (sphere_mask) { 1548 build_sphere_texgen(p, out_texgen, sphere_mask); 1549 } 1550 1551 if (reflect_mask) { 1552 build_reflect_texgen(p, out_texgen, reflect_mask); 1553 } 1554 1555 if (normal_mask) { 1556 struct ureg normal = get_transformed_normal(p); 1557 emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); 1558 } 1559 1560 if (copy_mask) { 1561 struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); 1562 emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); 1563 } 1564 } 1565 1566 if (texmat_enabled) { 1567 struct ureg texmat[4]; 1568 struct ureg in = (!is_undef(out_texgen) ? 1569 out_texgen : 1570 register_input(p, VERT_ATTRIB_TEX0+i)); 1571 if (PREFER_DP4) { 1572 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1573 0, texmat ); 1574 emit_matrix_transform_vec4( p, out, texmat, in ); 1575 } 1576 else { 1577 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1578 STATE_MATRIX_TRANSPOSE, texmat ); 1579 emit_transpose_matrix_transform_vec4( p, out, texmat, in ); 1580 } 1581 } 1582 1583 release_temps(p); 1584 } 1585 else { 1586 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VERT_RESULT_TEX0+i); 1587 } 1588 } 1589} 1590 1591 1592/** 1593 * Point size attenuation computation. 1594 */ 1595static void build_atten_pointsize( struct tnl_program *p ) 1596{ 1597 struct ureg eye = get_eye_position_z(p); 1598 struct ureg state_size = register_param1(p, STATE_POINT_SIZE); 1599 struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); 1600 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1601 struct ureg ut = get_temp(p); 1602 1603 /* dist = |eyez| */ 1604 emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z)); 1605 /* p1 + dist * (p2 + dist * p3); */ 1606 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1607 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y)); 1608 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1609 ut, swizzle1(state_attenuation, X)); 1610 1611 /* 1 / sqrt(factor) */ 1612 emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); 1613 1614#if 0 1615 /* out = pointSize / sqrt(factor) */ 1616 emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 1617#else 1618 /* this is a good place to clamp the point size since there's likely 1619 * no hardware registers to clamp point size at rasterization time. 1620 */ 1621 emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); 1622 emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); 1623 emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z)); 1624#endif 1625 1626 release_temp(p, ut); 1627} 1628 1629 1630/** 1631 * Emit constant point size. 1632 */ 1633static void build_constant_pointsize( struct tnl_program *p ) 1634{ 1635 struct ureg state_size = register_param1(p, STATE_POINT_SIZE); 1636 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1637 emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, state_size); 1638} 1639 1640 1641/** 1642 * Pass-though per-vertex point size, from user's point size array. 1643 */ 1644static void build_array_pointsize( struct tnl_program *p ) 1645{ 1646 struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE); 1647 struct ureg out = register_output(p, VERT_RESULT_PSIZ); 1648 emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in); 1649} 1650 1651 1652static void build_tnl_program( struct tnl_program *p ) 1653{ /* Emit the program, starting with modelviewproject: 1654 */ 1655 build_hpos(p); 1656 1657 /* Lighting calculations: 1658 */ 1659 if (p->state->fragprog_inputs_read & (FRAG_BIT_COL0|FRAG_BIT_COL1)) { 1660 if (p->state->light_global_enabled) 1661 build_lighting(p); 1662 else { 1663 if (p->state->fragprog_inputs_read & FRAG_BIT_COL0) 1664 emit_passthrough(p, VERT_ATTRIB_COLOR0, VERT_RESULT_COL0); 1665 1666 if (p->state->fragprog_inputs_read & FRAG_BIT_COL1) 1667 emit_passthrough(p, VERT_ATTRIB_COLOR1, VERT_RESULT_COL1); 1668 } 1669 } 1670 1671 if ((p->state->fragprog_inputs_read & FRAG_BIT_FOGC) || 1672 p->state->fog_mode != FOG_NONE) 1673 build_fog(p); 1674 1675 if (p->state->fragprog_inputs_read & FRAG_BITS_TEX_ANY) 1676 build_texture_transform(p); 1677 1678 if (p->state->point_attenuated) 1679 build_atten_pointsize(p); 1680 else if (p->state->point_array) 1681 build_array_pointsize(p); 1682#if 0 1683 else 1684 build_constant_pointsize(p); 1685#else 1686 (void) build_constant_pointsize; 1687#endif 1688 1689 /* Finish up: 1690 */ 1691 emit_op1(p, OPCODE_END, undef, 0, undef); 1692 1693 /* Disassemble: 1694 */ 1695 if (DISASSEM) { 1696 _mesa_printf ("\n"); 1697 } 1698} 1699 1700 1701static void 1702create_new_program( const struct state_key *key, 1703 struct gl_vertex_program *program, 1704 GLuint max_temps) 1705{ 1706 struct tnl_program p; 1707 1708 _mesa_memset(&p, 0, sizeof(p)); 1709 p.state = key; 1710 p.program = program; 1711 p.eye_position = undef; 1712 p.eye_position_z = undef; 1713 p.eye_position_normalized = undef; 1714 p.transformed_normal = undef; 1715 p.identity = undef; 1716 p.temp_in_use = 0; 1717 1718 if (max_temps >= sizeof(int) * 8) 1719 p.temp_reserved = 0; 1720 else 1721 p.temp_reserved = ~((1<<max_temps)-1); 1722 1723 /* Start by allocating 32 instructions. 1724 * If we need more, we'll grow the instruction array as needed. 1725 */ 1726 p.max_inst = 32; 1727 p.program->Base.Instructions = _mesa_alloc_instructions(p.max_inst); 1728 p.program->Base.String = NULL; 1729 p.program->Base.NumInstructions = 1730 p.program->Base.NumTemporaries = 1731 p.program->Base.NumParameters = 1732 p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0; 1733 p.program->Base.Parameters = _mesa_new_parameter_list(); 1734 p.program->Base.InputsRead = 0; 1735 p.program->Base.OutputsWritten = 0; 1736 1737 build_tnl_program( &p ); 1738} 1739 1740 1741/** 1742 * Return a vertex program which implements the current fixed-function 1743 * transform/lighting/texgen operations. 1744 * XXX move this into core mesa (main/) 1745 */ 1746struct gl_vertex_program * 1747_mesa_get_fixed_func_vertex_program(GLcontext *ctx) 1748{ 1749 struct gl_vertex_program *prog; 1750 struct state_key key; 1751 1752 /* Grab all the relevent state and put it in a single structure: 1753 */ 1754 make_state_key(ctx, &key); 1755 1756 /* Look for an already-prepared program for this state: 1757 */ 1758 prog = (struct gl_vertex_program *) 1759 _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, sizeof(key)); 1760 1761 if (!prog) { 1762 /* OK, we'll have to build a new one */ 1763 if (0) 1764 _mesa_printf("Build new TNL program\n"); 1765 1766 prog = (struct gl_vertex_program *) 1767 ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0); 1768 if (!prog) 1769 return NULL; 1770 1771 create_new_program( &key, prog, 1772 ctx->Const.VertexProgram.MaxTemps ); 1773 1774#if 0 1775 if (ctx->Driver.ProgramStringNotify) 1776 ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, 1777 &prog->Base ); 1778#endif 1779 _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache, 1780 &key, sizeof(key), &prog->Base); 1781 } 1782 1783 return prog; 1784} 1785