ffvertex_prog.c revision af69d88d
1/************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * \file ffvertex_prog.c 30 * 31 * Create a vertex program to execute the current fixed function T&L pipeline. 32 * \author Keith Whitwell 33 */ 34 35 36#include "main/glheader.h" 37#include "main/mtypes.h" 38#include "main/macros.h" 39#include "main/enums.h" 40#include "main/ffvertex_prog.h" 41#include "program/program.h" 42#include "program/prog_cache.h" 43#include "program/prog_instruction.h" 44#include "program/prog_parameter.h" 45#include "program/prog_print.h" 46#include "program/prog_statevars.h" 47 48 49/** Max of number of lights and texture coord units */ 50#define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS) 51 52struct state_key { 53 unsigned light_color_material_mask:12; 54 unsigned light_global_enabled:1; 55 unsigned light_local_viewer:1; 56 unsigned light_twoside:1; 57 unsigned material_shininess_is_zero:1; 58 unsigned need_eye_coords:1; 59 unsigned normalize:1; 60 unsigned rescale_normals:1; 61 62 unsigned fog_source_is_depth:1; 63 unsigned fog_distance_mode:2; 64 unsigned separate_specular:1; 65 unsigned point_attenuated:1; 66 unsigned point_array:1; 67 unsigned texture_enabled_global:1; 68 unsigned fragprog_inputs_read:12; 69 70 GLbitfield64 varying_vp_inputs; 71 72 struct { 73 unsigned light_enabled:1; 74 unsigned light_eyepos3_is_zero:1; 75 unsigned light_spotcutoff_is_180:1; 76 unsigned light_attenuated:1; 77 unsigned texunit_really_enabled:1; 78 unsigned texmat_enabled:1; 79 unsigned coord_replace:1; 80 unsigned texgen_enabled:4; 81 unsigned texgen_mode0:4; 82 unsigned texgen_mode1:4; 83 unsigned texgen_mode2:4; 84 unsigned texgen_mode3:4; 85 } unit[NUM_UNITS]; 86}; 87 88 89#define TXG_NONE 0 90#define TXG_OBJ_LINEAR 1 91#define TXG_EYE_LINEAR 2 92#define TXG_SPHERE_MAP 3 93#define TXG_REFLECTION_MAP 4 94#define TXG_NORMAL_MAP 5 95 96static GLuint translate_texgen( GLboolean enabled, GLenum mode ) 97{ 98 if (!enabled) 99 return TXG_NONE; 100 101 switch (mode) { 102 case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; 103 case GL_EYE_LINEAR: return TXG_EYE_LINEAR; 104 case GL_SPHERE_MAP: return TXG_SPHERE_MAP; 105 case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; 106 case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; 107 default: return TXG_NONE; 108 } 109} 110 111#define FDM_EYE_RADIAL 0 112#define FDM_EYE_PLANE 1 113#define FDM_EYE_PLANE_ABS 2 114 115static GLuint translate_fog_distance_mode( GLenum mode ) 116{ 117 switch (mode) { 118 case GL_EYE_RADIAL_NV: 119 return FDM_EYE_RADIAL; 120 case GL_EYE_PLANE: 121 return FDM_EYE_PLANE; 122 default: /* shouldn't happen; fall through to a sensible default */ 123 case GL_EYE_PLANE_ABSOLUTE_NV: 124 return FDM_EYE_PLANE_ABS; 125 } 126} 127 128static GLboolean check_active_shininess( struct gl_context *ctx, 129 const struct state_key *key, 130 GLuint side ) 131{ 132 GLuint attr = MAT_ATTRIB_FRONT_SHININESS + side; 133 134 if ((key->varying_vp_inputs & VERT_BIT_COLOR0) && 135 (key->light_color_material_mask & (1 << attr))) 136 return GL_TRUE; 137 138 if (key->varying_vp_inputs & VERT_ATTRIB_GENERIC(attr)) 139 return GL_TRUE; 140 141 if (ctx->Light.Material.Attrib[attr][0] != 0.0F) 142 return GL_TRUE; 143 144 return GL_FALSE; 145} 146 147 148static void make_state_key( struct gl_context *ctx, struct state_key *key ) 149{ 150 const struct gl_fragment_program *fp; 151 GLuint i; 152 153 memset(key, 0, sizeof(struct state_key)); 154 fp = ctx->FragmentProgram._Current; 155 156 /* This now relies on texenvprogram.c being active: 157 */ 158 assert(fp); 159 160 key->need_eye_coords = ctx->_NeedEyeCoords; 161 162 key->fragprog_inputs_read = fp->Base.InputsRead; 163 key->varying_vp_inputs = ctx->varying_vp_inputs; 164 165 if (ctx->RenderMode == GL_FEEDBACK) { 166 /* make sure the vertprog emits color and tex0 */ 167 key->fragprog_inputs_read |= (VARYING_BIT_COL0 | VARYING_BIT_TEX0); 168 } 169 170 key->separate_specular = (ctx->Light.Model.ColorControl == 171 GL_SEPARATE_SPECULAR_COLOR); 172 173 if (ctx->Light.Enabled) { 174 key->light_global_enabled = 1; 175 176 if (ctx->Light.Model.LocalViewer) 177 key->light_local_viewer = 1; 178 179 if (ctx->Light.Model.TwoSide) 180 key->light_twoside = 1; 181 182 if (ctx->Light.ColorMaterialEnabled) { 183 key->light_color_material_mask = ctx->Light._ColorMaterialBitmask; 184 } 185 186 for (i = 0; i < MAX_LIGHTS; i++) { 187 struct gl_light *light = &ctx->Light.Light[i]; 188 189 if (light->Enabled) { 190 key->unit[i].light_enabled = 1; 191 192 if (light->EyePosition[3] == 0.0) 193 key->unit[i].light_eyepos3_is_zero = 1; 194 195 if (light->SpotCutoff == 180.0) 196 key->unit[i].light_spotcutoff_is_180 = 1; 197 198 if (light->ConstantAttenuation != 1.0 || 199 light->LinearAttenuation != 0.0 || 200 light->QuadraticAttenuation != 0.0) 201 key->unit[i].light_attenuated = 1; 202 } 203 } 204 205 if (check_active_shininess(ctx, key, 0)) { 206 key->material_shininess_is_zero = 0; 207 } 208 else if (key->light_twoside && 209 check_active_shininess(ctx, key, 1)) { 210 key->material_shininess_is_zero = 0; 211 } 212 else { 213 key->material_shininess_is_zero = 1; 214 } 215 } 216 217 if (ctx->Transform.Normalize) 218 key->normalize = 1; 219 220 if (ctx->Transform.RescaleNormals) 221 key->rescale_normals = 1; 222 223 if (ctx->Fog.FogCoordinateSource == GL_FRAGMENT_DEPTH_EXT) { 224 key->fog_source_is_depth = 1; 225 key->fog_distance_mode = translate_fog_distance_mode(ctx->Fog.FogDistanceMode); 226 } 227 228 if (ctx->Point._Attenuated) 229 key->point_attenuated = 1; 230 231 if (ctx->Array.VAO->VertexAttrib[VERT_ATTRIB_POINT_SIZE].Enabled) 232 key->point_array = 1; 233 234 if (ctx->Texture._TexGenEnabled || 235 ctx->Texture._TexMatEnabled || 236 ctx->Texture._MaxEnabledTexImageUnit != -1) 237 key->texture_enabled_global = 1; 238 239 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 240 struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; 241 242 if (texUnit->_Current) 243 key->unit[i].texunit_really_enabled = 1; 244 245 if (ctx->Point.PointSprite) 246 if (ctx->Point.CoordReplace[i]) 247 key->unit[i].coord_replace = 1; 248 249 if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i)) 250 key->unit[i].texmat_enabled = 1; 251 252 if (texUnit->TexGenEnabled) { 253 key->unit[i].texgen_enabled = 1; 254 255 key->unit[i].texgen_mode0 = 256 translate_texgen( texUnit->TexGenEnabled & (1<<0), 257 texUnit->GenS.Mode ); 258 key->unit[i].texgen_mode1 = 259 translate_texgen( texUnit->TexGenEnabled & (1<<1), 260 texUnit->GenT.Mode ); 261 key->unit[i].texgen_mode2 = 262 translate_texgen( texUnit->TexGenEnabled & (1<<2), 263 texUnit->GenR.Mode ); 264 key->unit[i].texgen_mode3 = 265 translate_texgen( texUnit->TexGenEnabled & (1<<3), 266 texUnit->GenQ.Mode ); 267 } 268 } 269} 270 271 272 273/* Very useful debugging tool - produces annotated listing of 274 * generated program with line/function references for each 275 * instruction back into this file: 276 */ 277#define DISASSEM 0 278 279 280/* Use uregs to represent registers internally, translate to Mesa's 281 * expected formats on emit. 282 * 283 * NOTE: These are passed by value extensively in this file rather 284 * than as usual by pointer reference. If this disturbs you, try 285 * remembering they are just 32bits in size. 286 * 287 * GCC is smart enough to deal with these dword-sized structures in 288 * much the same way as if I had defined them as dwords and was using 289 * macros to access and set the fields. This is much nicer and easier 290 * to evolve. 291 */ 292struct ureg { 293 GLuint file:4; 294 GLint idx:9; /* relative addressing may be negative */ 295 /* sizeof(idx) should == sizeof(prog_src_reg::Index) */ 296 GLuint negate:1; 297 GLuint swz:12; 298 GLuint pad:6; 299}; 300 301 302struct tnl_program { 303 const struct state_key *state; 304 struct gl_vertex_program *program; 305 GLint max_inst; /** number of instructions allocated for program */ 306 GLboolean mvp_with_dp4; 307 308 GLuint temp_in_use; 309 GLuint temp_reserved; 310 311 struct ureg eye_position; 312 struct ureg eye_position_z; 313 struct ureg eye_position_normalized; 314 struct ureg transformed_normal; 315 struct ureg identity; 316 317 GLuint materials; 318 GLuint color_materials; 319}; 320 321 322static const struct ureg undef = { 323 PROGRAM_UNDEFINED, 324 0, 325 0, 326 0, 327 0 328}; 329 330/* Local shorthand: 331 */ 332#define X SWIZZLE_X 333#define Y SWIZZLE_Y 334#define Z SWIZZLE_Z 335#define W SWIZZLE_W 336 337 338/* Construct a ureg: 339 */ 340static struct ureg make_ureg(GLuint file, GLint idx) 341{ 342 struct ureg reg; 343 reg.file = file; 344 reg.idx = idx; 345 reg.negate = 0; 346 reg.swz = SWIZZLE_NOOP; 347 reg.pad = 0; 348 return reg; 349} 350 351 352 353static struct ureg negate( struct ureg reg ) 354{ 355 reg.negate ^= 1; 356 return reg; 357} 358 359 360static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 361{ 362 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 363 GET_SWZ(reg.swz, y), 364 GET_SWZ(reg.swz, z), 365 GET_SWZ(reg.swz, w)); 366 return reg; 367} 368 369 370static struct ureg swizzle1( struct ureg reg, int x ) 371{ 372 return swizzle(reg, x, x, x, x); 373} 374 375 376static struct ureg get_temp( struct tnl_program *p ) 377{ 378 int bit = ffs( ~p->temp_in_use ); 379 if (!bit) { 380 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 381 exit(1); 382 } 383 384 if ((GLuint) bit > p->program->Base.NumTemporaries) 385 p->program->Base.NumTemporaries = bit; 386 387 p->temp_in_use |= 1<<(bit-1); 388 return make_ureg(PROGRAM_TEMPORARY, bit-1); 389} 390 391 392static struct ureg reserve_temp( struct tnl_program *p ) 393{ 394 struct ureg temp = get_temp( p ); 395 p->temp_reserved |= 1<<temp.idx; 396 return temp; 397} 398 399 400static void release_temp( struct tnl_program *p, struct ureg reg ) 401{ 402 if (reg.file == PROGRAM_TEMPORARY) { 403 p->temp_in_use &= ~(1<<reg.idx); 404 p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ 405 } 406} 407 408static void release_temps( struct tnl_program *p ) 409{ 410 p->temp_in_use = p->temp_reserved; 411} 412 413 414static struct ureg register_param5(struct tnl_program *p, 415 GLint s0, 416 GLint s1, 417 GLint s2, 418 GLint s3, 419 GLint s4) 420{ 421 gl_state_index tokens[STATE_LENGTH]; 422 GLint idx; 423 tokens[0] = s0; 424 tokens[1] = s1; 425 tokens[2] = s2; 426 tokens[3] = s3; 427 tokens[4] = s4; 428 idx = _mesa_add_state_reference( p->program->Base.Parameters, tokens ); 429 return make_ureg(PROGRAM_STATE_VAR, idx); 430} 431 432 433#define register_param1(p,s0) register_param5(p,s0,0,0,0,0) 434#define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0) 435#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) 436#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) 437 438 439 440/** 441 * \param input one of VERT_ATTRIB_x tokens. 442 */ 443static struct ureg register_input( struct tnl_program *p, GLuint input ) 444{ 445 assert(input < VERT_ATTRIB_MAX); 446 447 if (p->state->varying_vp_inputs & VERT_BIT(input)) { 448 p->program->Base.InputsRead |= VERT_BIT(input); 449 return make_ureg(PROGRAM_INPUT, input); 450 } 451 else { 452 return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input ); 453 } 454} 455 456 457/** 458 * \param input one of VARYING_SLOT_x tokens. 459 */ 460static struct ureg register_output( struct tnl_program *p, GLuint output ) 461{ 462 p->program->Base.OutputsWritten |= BITFIELD64_BIT(output); 463 return make_ureg(PROGRAM_OUTPUT, output); 464} 465 466 467static struct ureg register_const4f( struct tnl_program *p, 468 GLfloat s0, 469 GLfloat s1, 470 GLfloat s2, 471 GLfloat s3) 472{ 473 gl_constant_value values[4]; 474 GLint idx; 475 GLuint swizzle; 476 values[0].f = s0; 477 values[1].f = s1; 478 values[2].f = s2; 479 values[3].f = s3; 480 idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, 481 &swizzle ); 482 ASSERT(swizzle == SWIZZLE_NOOP); 483 return make_ureg(PROGRAM_CONSTANT, idx); 484} 485 486#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 487#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) 488#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 489#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 490 491static GLboolean is_undef( struct ureg reg ) 492{ 493 return reg.file == PROGRAM_UNDEFINED; 494} 495 496 497static struct ureg get_identity_param( struct tnl_program *p ) 498{ 499 if (is_undef(p->identity)) 500 p->identity = register_const4f(p, 0,0,0,1); 501 502 return p->identity; 503} 504 505static void register_matrix_param5( struct tnl_program *p, 506 GLint s0, /* modelview, projection, etc */ 507 GLint s1, /* texture matrix number */ 508 GLint s2, /* first row */ 509 GLint s3, /* last row */ 510 GLint s4, /* inverse, transpose, etc */ 511 struct ureg *matrix ) 512{ 513 GLint i; 514 515 /* This is a bit sad as the support is there to pull the whole 516 * matrix out in one go: 517 */ 518 for (i = 0; i <= s3 - s2; i++) 519 matrix[i] = register_param5( p, s0, s1, i, i, s4 ); 520} 521 522 523static void emit_arg( struct prog_src_register *src, 524 struct ureg reg ) 525{ 526 src->File = reg.file; 527 src->Index = reg.idx; 528 src->Swizzle = reg.swz; 529 src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE; 530 src->Abs = 0; 531 src->RelAddr = 0; 532 /* Check that bitfield sizes aren't exceeded */ 533 ASSERT(src->Index == reg.idx); 534} 535 536 537static void emit_dst( struct prog_dst_register *dst, 538 struct ureg reg, GLuint mask ) 539{ 540 dst->File = reg.file; 541 dst->Index = reg.idx; 542 /* allow zero as a shorthand for xyzw */ 543 dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 544 dst->CondMask = COND_TR; /* always pass cond test */ 545 dst->CondSwizzle = SWIZZLE_NOOP; 546 /* Check that bitfield sizes aren't exceeded */ 547 ASSERT(dst->Index == reg.idx); 548} 549 550 551static void debug_insn( struct prog_instruction *inst, const char *fn, 552 GLuint line ) 553{ 554 if (DISASSEM) { 555 static const char *last_fn; 556 557 if (fn != last_fn) { 558 last_fn = fn; 559 printf("%s:\n", fn); 560 } 561 562 printf("%d:\t", line); 563 _mesa_print_instruction(inst); 564 } 565} 566 567 568static void emit_op3fn(struct tnl_program *p, 569 enum prog_opcode op, 570 struct ureg dest, 571 GLuint mask, 572 struct ureg src0, 573 struct ureg src1, 574 struct ureg src2, 575 const char *fn, 576 GLuint line) 577{ 578 GLuint nr; 579 struct prog_instruction *inst; 580 581 assert((GLint) p->program->Base.NumInstructions <= p->max_inst); 582 583 if (p->program->Base.NumInstructions == p->max_inst) { 584 /* need to extend the program's instruction array */ 585 struct prog_instruction *newInst; 586 587 /* double the size */ 588 p->max_inst *= 2; 589 590 newInst = _mesa_alloc_instructions(p->max_inst); 591 if (!newInst) { 592 _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build"); 593 return; 594 } 595 596 _mesa_copy_instructions(newInst, 597 p->program->Base.Instructions, 598 p->program->Base.NumInstructions); 599 600 _mesa_free_instructions(p->program->Base.Instructions, 601 p->program->Base.NumInstructions); 602 603 p->program->Base.Instructions = newInst; 604 } 605 606 nr = p->program->Base.NumInstructions++; 607 608 inst = &p->program->Base.Instructions[nr]; 609 inst->Opcode = (enum prog_opcode) op; 610 611 emit_arg( &inst->SrcReg[0], src0 ); 612 emit_arg( &inst->SrcReg[1], src1 ); 613 emit_arg( &inst->SrcReg[2], src2 ); 614 615 emit_dst( &inst->DstReg, dest, mask ); 616 617 debug_insn(inst, fn, line); 618} 619 620 621#define emit_op3(p, op, dst, mask, src0, src1, src2) \ 622 emit_op3fn(p, op, dst, mask, src0, src1, src2, __FUNCTION__, __LINE__) 623 624#define emit_op2(p, op, dst, mask, src0, src1) \ 625 emit_op3fn(p, op, dst, mask, src0, src1, undef, __FUNCTION__, __LINE__) 626 627#define emit_op1(p, op, dst, mask, src0) \ 628 emit_op3fn(p, op, dst, mask, src0, undef, undef, __FUNCTION__, __LINE__) 629 630 631static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) 632{ 633 if (reg.file == PROGRAM_TEMPORARY && 634 !(p->temp_reserved & (1<<reg.idx))) 635 return reg; 636 else { 637 struct ureg temp = get_temp(p); 638 emit_op1(p, OPCODE_MOV, temp, 0, reg); 639 return temp; 640 } 641} 642 643 644/* Currently no tracking performed of input/output/register size or 645 * active elements. Could be used to reduce these operations, as 646 * could the matrix type. 647 */ 648static void emit_matrix_transform_vec4( struct tnl_program *p, 649 struct ureg dest, 650 const struct ureg *mat, 651 struct ureg src) 652{ 653 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); 654 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); 655 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); 656 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); 657} 658 659 660/* This version is much easier to implement if writemasks are not 661 * supported natively on the target or (like SSE), the target doesn't 662 * have a clean/obvious dotproduct implementation. 663 */ 664static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, 665 struct ureg dest, 666 const struct ureg *mat, 667 struct ureg src) 668{ 669 struct ureg tmp; 670 671 if (dest.file != PROGRAM_TEMPORARY) 672 tmp = get_temp(p); 673 else 674 tmp = dest; 675 676 emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 677 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 678 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 679 emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 680 681 if (dest.file != PROGRAM_TEMPORARY) 682 release_temp(p, tmp); 683} 684 685 686static void emit_matrix_transform_vec3( struct tnl_program *p, 687 struct ureg dest, 688 const struct ureg *mat, 689 struct ureg src) 690{ 691 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); 692 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); 693 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); 694} 695 696 697static void emit_normalize_vec3( struct tnl_program *p, 698 struct ureg dest, 699 struct ureg src ) 700{ 701 struct ureg tmp = get_temp(p); 702 emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src); 703 emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); 704 emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X)); 705 release_temp(p, tmp); 706} 707 708 709static void emit_passthrough( struct tnl_program *p, 710 GLuint input, 711 GLuint output ) 712{ 713 struct ureg out = register_output(p, output); 714 emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); 715} 716 717 718static struct ureg get_eye_position( struct tnl_program *p ) 719{ 720 if (is_undef(p->eye_position)) { 721 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 722 struct ureg modelview[4]; 723 724 p->eye_position = reserve_temp(p); 725 726 if (p->mvp_with_dp4) { 727 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 728 0, modelview ); 729 730 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); 731 } 732 else { 733 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 734 STATE_MATRIX_TRANSPOSE, modelview ); 735 736 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); 737 } 738 } 739 740 return p->eye_position; 741} 742 743 744static struct ureg get_eye_position_z( struct tnl_program *p ) 745{ 746 if (!is_undef(p->eye_position)) 747 return swizzle1(p->eye_position, Z); 748 749 if (is_undef(p->eye_position_z)) { 750 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 751 struct ureg modelview[4]; 752 753 p->eye_position_z = reserve_temp(p); 754 755 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 756 0, modelview ); 757 758 emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]); 759 } 760 761 return p->eye_position_z; 762} 763 764 765static struct ureg get_eye_position_normalized( struct tnl_program *p ) 766{ 767 if (is_undef(p->eye_position_normalized)) { 768 struct ureg eye = get_eye_position(p); 769 p->eye_position_normalized = reserve_temp(p); 770 emit_normalize_vec3(p, p->eye_position_normalized, eye); 771 } 772 773 return p->eye_position_normalized; 774} 775 776 777static struct ureg get_transformed_normal( struct tnl_program *p ) 778{ 779 if (is_undef(p->transformed_normal) && 780 !p->state->need_eye_coords && 781 !p->state->normalize && 782 !(p->state->need_eye_coords == p->state->rescale_normals)) 783 { 784 p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL ); 785 } 786 else if (is_undef(p->transformed_normal)) 787 { 788 struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); 789 struct ureg mvinv[3]; 790 struct ureg transformed_normal = reserve_temp(p); 791 792 if (p->state->need_eye_coords) { 793 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2, 794 STATE_MATRIX_INVTRANS, mvinv ); 795 796 /* Transform to eye space: 797 */ 798 emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal ); 799 normal = transformed_normal; 800 } 801 802 /* Normalize/Rescale: 803 */ 804 if (p->state->normalize) { 805 emit_normalize_vec3( p, transformed_normal, normal ); 806 normal = transformed_normal; 807 } 808 else if (p->state->need_eye_coords == p->state->rescale_normals) { 809 /* This is already adjusted for eye/non-eye rendering: 810 */ 811 struct ureg rescale = register_param2(p, STATE_INTERNAL, 812 STATE_NORMAL_SCALE); 813 814 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale ); 815 normal = transformed_normal; 816 } 817 818 assert(normal.file == PROGRAM_TEMPORARY); 819 p->transformed_normal = normal; 820 } 821 822 return p->transformed_normal; 823} 824 825 826static void build_hpos( struct tnl_program *p ) 827{ 828 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 829 struct ureg hpos = register_output( p, VARYING_SLOT_POS ); 830 struct ureg mvp[4]; 831 832 if (p->mvp_with_dp4) { 833 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 834 0, mvp ); 835 emit_matrix_transform_vec4( p, hpos, mvp, pos ); 836 } 837 else { 838 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 839 STATE_MATRIX_TRANSPOSE, mvp ); 840 emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); 841 } 842} 843 844 845static GLuint material_attrib( GLuint side, GLuint property ) 846{ 847 return (property - STATE_AMBIENT) * 2 + side; 848} 849 850 851/** 852 * Get a bitmask of which material values vary on a per-vertex basis. 853 */ 854static void set_material_flags( struct tnl_program *p ) 855{ 856 p->color_materials = 0; 857 p->materials = 0; 858 859 if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) { 860 p->materials = 861 p->color_materials = p->state->light_color_material_mask; 862 } 863 864 p->materials |= (p->state->varying_vp_inputs >> VERT_ATTRIB_GENERIC0); 865} 866 867 868static struct ureg get_material( struct tnl_program *p, GLuint side, 869 GLuint property ) 870{ 871 GLuint attrib = material_attrib(side, property); 872 873 if (p->color_materials & (1<<attrib)) 874 return register_input(p, VERT_ATTRIB_COLOR0); 875 else if (p->materials & (1<<attrib)) { 876 /* Put material values in the GENERIC slots -- they are not used 877 * for anything in fixed function mode. 878 */ 879 return register_input( p, attrib + VERT_ATTRIB_GENERIC0 ); 880 } 881 else 882 return register_param3( p, STATE_MATERIAL, side, property ); 883} 884 885#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \ 886 MAT_BIT_FRONT_AMBIENT | \ 887 MAT_BIT_FRONT_DIFFUSE) << (side)) 888 889 890/** 891 * Either return a precalculated constant value or emit code to 892 * calculate these values dynamically in the case where material calls 893 * are present between begin/end pairs. 894 * 895 * Probably want to shift this to the program compilation phase - if 896 * we always emitted the calculation here, a smart compiler could 897 * detect that it was constant (given a certain set of inputs), and 898 * lift it out of the main loop. That way the programs created here 899 * would be independent of the vertex_buffer details. 900 */ 901static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) 902{ 903 if (p->materials & SCENE_COLOR_BITS(side)) { 904 struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); 905 struct ureg material_emission = get_material(p, side, STATE_EMISSION); 906 struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); 907 struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); 908 struct ureg tmp = make_temp(p, material_diffuse); 909 emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, 910 material_ambient, material_emission); 911 return tmp; 912 } 913 else 914 return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); 915} 916 917 918static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 919 GLuint side, GLuint property ) 920{ 921 GLuint attrib = material_attrib(side, property); 922 if (p->materials & (1<<attrib)) { 923 struct ureg light_value = 924 register_param3(p, STATE_LIGHT, light, property); 925 struct ureg material_value = get_material(p, side, property); 926 struct ureg tmp = get_temp(p); 927 emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value); 928 return tmp; 929 } 930 else 931 return register_param4(p, STATE_LIGHTPROD, light, side, property); 932} 933 934 935static struct ureg calculate_light_attenuation( struct tnl_program *p, 936 GLuint i, 937 struct ureg VPpli, 938 struct ureg dist ) 939{ 940 struct ureg attenuation = register_param3(p, STATE_LIGHT, i, 941 STATE_ATTENUATION); 942 struct ureg att = undef; 943 944 /* Calculate spot attenuation: 945 */ 946 if (!p->state->unit[i].light_spotcutoff_is_180) { 947 struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, 948 STATE_LIGHT_SPOT_DIR_NORMALIZED, i); 949 struct ureg spot = get_temp(p); 950 struct ureg slt = get_temp(p); 951 952 att = get_temp(p); 953 954 emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); 955 emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); 956 emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); 957 emit_op2(p, OPCODE_MUL, att, 0, slt, spot); 958 959 release_temp(p, spot); 960 release_temp(p, slt); 961 } 962 963 /* Calculate distance attenuation(See formula (2.4) at glspec 2.1 page 62): 964 * 965 * Skip the calucation when _dist_ is undefined(light_eyepos3_is_zero) 966 */ 967 if (p->state->unit[i].light_attenuated && !is_undef(dist)) { 968 if (is_undef(att)) 969 att = get_temp(p); 970 /* 1/d,d,d,1/d */ 971 emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); 972 /* 1,d,d*d,1/d */ 973 emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 974 /* 1/dist-atten */ 975 emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); 976 977 if (!p->state->unit[i].light_spotcutoff_is_180) { 978 /* dist-atten */ 979 emit_op1(p, OPCODE_RCP, dist, 0, dist); 980 /* spot-atten * dist-atten */ 981 emit_op2(p, OPCODE_MUL, att, 0, dist, att); 982 } 983 else { 984 /* dist-atten */ 985 emit_op1(p, OPCODE_RCP, att, 0, dist); 986 } 987 } 988 989 return att; 990} 991 992 993/** 994 * Compute: 995 * lit.y = MAX(0, dots.x) 996 * lit.z = SLT(0, dots.x) 997 */ 998static void emit_degenerate_lit( struct tnl_program *p, 999 struct ureg lit, 1000 struct ureg dots ) 1001{ 1002 struct ureg id = get_identity_param(p); /* id = {0,0,0,1} */ 1003 1004 /* Note that lit.x & lit.w will not be examined. Note also that 1005 * dots.xyzw == dots.xxxx. 1006 */ 1007 1008 /* MAX lit, id, dots; 1009 */ 1010 emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots); 1011 1012 /* result[2] = (in > 0 ? 1 : 0) 1013 * SLT lit.z, id.z, dots; # lit.z = (0 < dots.z) ? 1 : 0 1014 */ 1015 emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots); 1016} 1017 1018 1019/* Need to add some addtional parameters to allow lighting in object 1020 * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye 1021 * space lighting. 1022 */ 1023static void build_lighting( struct tnl_program *p ) 1024{ 1025 const GLboolean twoside = p->state->light_twoside; 1026 const GLboolean separate = p->state->separate_specular; 1027 GLuint nr_lights = 0, count = 0; 1028 struct ureg normal = get_transformed_normal(p); 1029 struct ureg lit = get_temp(p); 1030 struct ureg dots = get_temp(p); 1031 struct ureg _col0 = undef, _col1 = undef; 1032 struct ureg _bfc0 = undef, _bfc1 = undef; 1033 GLuint i; 1034 1035 /* 1036 * NOTE: 1037 * dots.x = dot(normal, VPpli) 1038 * dots.y = dot(normal, halfAngle) 1039 * dots.z = back.shininess 1040 * dots.w = front.shininess 1041 */ 1042 1043 for (i = 0; i < MAX_LIGHTS; i++) 1044 if (p->state->unit[i].light_enabled) 1045 nr_lights++; 1046 1047 set_material_flags(p); 1048 1049 { 1050 if (!p->state->material_shininess_is_zero) { 1051 struct ureg shininess = get_material(p, 0, STATE_SHININESS); 1052 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); 1053 release_temp(p, shininess); 1054 } 1055 1056 _col0 = make_temp(p, get_scenecolor(p, 0)); 1057 if (separate) 1058 _col1 = make_temp(p, get_identity_param(p)); 1059 else 1060 _col1 = _col0; 1061 } 1062 1063 if (twoside) { 1064 if (!p->state->material_shininess_is_zero) { 1065 /* Note that we negate the back-face specular exponent here. 1066 * The negation will be un-done later in the back-face code below. 1067 */ 1068 struct ureg shininess = get_material(p, 1, STATE_SHININESS); 1069 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, 1070 negate(swizzle1(shininess,X))); 1071 release_temp(p, shininess); 1072 } 1073 1074 _bfc0 = make_temp(p, get_scenecolor(p, 1)); 1075 if (separate) 1076 _bfc1 = make_temp(p, get_identity_param(p)); 1077 else 1078 _bfc1 = _bfc0; 1079 } 1080 1081 /* If no lights, still need to emit the scenecolor. 1082 */ 1083 { 1084 struct ureg res0 = register_output( p, VARYING_SLOT_COL0 ); 1085 emit_op1(p, OPCODE_MOV, res0, 0, _col0); 1086 } 1087 1088 if (separate) { 1089 struct ureg res1 = register_output( p, VARYING_SLOT_COL1 ); 1090 emit_op1(p, OPCODE_MOV, res1, 0, _col1); 1091 } 1092 1093 if (twoside) { 1094 struct ureg res0 = register_output( p, VARYING_SLOT_BFC0 ); 1095 emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); 1096 } 1097 1098 if (twoside && separate) { 1099 struct ureg res1 = register_output( p, VARYING_SLOT_BFC1 ); 1100 emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); 1101 } 1102 1103 if (nr_lights == 0) { 1104 release_temps(p); 1105 return; 1106 } 1107 1108 for (i = 0; i < MAX_LIGHTS; i++) { 1109 if (p->state->unit[i].light_enabled) { 1110 struct ureg half = undef; 1111 struct ureg att = undef, VPpli = undef; 1112 struct ureg dist = undef; 1113 1114 count++; 1115 if (p->state->unit[i].light_eyepos3_is_zero) { 1116 VPpli = register_param3(p, STATE_INTERNAL, 1117 STATE_LIGHT_POSITION_NORMALIZED, i); 1118 } else { 1119 struct ureg Ppli = register_param3(p, STATE_INTERNAL, 1120 STATE_LIGHT_POSITION, i); 1121 struct ureg V = get_eye_position(p); 1122 1123 VPpli = get_temp(p); 1124 dist = get_temp(p); 1125 1126 /* Calculate VPpli vector 1127 */ 1128 emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); 1129 1130 /* Normalize VPpli. The dist value also used in 1131 * attenuation below. 1132 */ 1133 emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); 1134 emit_op1(p, OPCODE_RSQ, dist, 0, dist); 1135 emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); 1136 } 1137 1138 /* Calculate attenuation: 1139 */ 1140 att = calculate_light_attenuation(p, i, VPpli, dist); 1141 release_temp(p, dist); 1142 1143 /* Calculate viewer direction, or use infinite viewer: 1144 */ 1145 if (!p->state->material_shininess_is_zero) { 1146 if (p->state->light_local_viewer) { 1147 struct ureg eye_hat = get_eye_position_normalized(p); 1148 half = get_temp(p); 1149 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1150 emit_normalize_vec3(p, half, half); 1151 } else if (p->state->unit[i].light_eyepos3_is_zero) { 1152 half = register_param3(p, STATE_INTERNAL, 1153 STATE_LIGHT_HALF_VECTOR, i); 1154 } else { 1155 struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 1156 half = get_temp(p); 1157 emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); 1158 emit_normalize_vec3(p, half, half); 1159 } 1160 } 1161 1162 /* Calculate dot products: 1163 */ 1164 if (p->state->material_shininess_is_zero) { 1165 emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli); 1166 } 1167 else { 1168 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); 1169 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); 1170 } 1171 1172 /* Front face lighting: 1173 */ 1174 { 1175 struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); 1176 struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); 1177 struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); 1178 struct ureg res0, res1; 1179 GLuint mask0, mask1; 1180 1181 if (count == nr_lights) { 1182 if (separate) { 1183 mask0 = WRITEMASK_XYZ; 1184 mask1 = WRITEMASK_XYZ; 1185 res0 = register_output( p, VARYING_SLOT_COL0 ); 1186 res1 = register_output( p, VARYING_SLOT_COL1 ); 1187 } 1188 else { 1189 mask0 = 0; 1190 mask1 = WRITEMASK_XYZ; 1191 res0 = _col0; 1192 res1 = register_output( p, VARYING_SLOT_COL0 ); 1193 } 1194 } 1195 else { 1196 mask0 = 0; 1197 mask1 = 0; 1198 res0 = _col0; 1199 res1 = _col1; 1200 } 1201 1202 if (!is_undef(att)) { 1203 /* light is attenuated by distance */ 1204 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1205 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1206 emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); 1207 } 1208 else if (!p->state->material_shininess_is_zero) { 1209 /* there's a non-zero specular term */ 1210 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1211 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1212 } 1213 else { 1214 /* no attenutation, no specular */ 1215 emit_degenerate_lit(p, lit, dots); 1216 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1217 } 1218 1219 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); 1220 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); 1221 1222 release_temp(p, ambient); 1223 release_temp(p, diffuse); 1224 release_temp(p, specular); 1225 } 1226 1227 /* Back face lighting: 1228 */ 1229 if (twoside) { 1230 struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); 1231 struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); 1232 struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); 1233 struct ureg res0, res1; 1234 GLuint mask0, mask1; 1235 1236 if (count == nr_lights) { 1237 if (separate) { 1238 mask0 = WRITEMASK_XYZ; 1239 mask1 = WRITEMASK_XYZ; 1240 res0 = register_output( p, VARYING_SLOT_BFC0 ); 1241 res1 = register_output( p, VARYING_SLOT_BFC1 ); 1242 } 1243 else { 1244 mask0 = 0; 1245 mask1 = WRITEMASK_XYZ; 1246 res0 = _bfc0; 1247 res1 = register_output( p, VARYING_SLOT_BFC0 ); 1248 } 1249 } 1250 else { 1251 res0 = _bfc0; 1252 res1 = _bfc1; 1253 mask0 = 0; 1254 mask1 = 0; 1255 } 1256 1257 /* For the back face we need to negate the X and Y component 1258 * dot products. dots.Z has the negated back-face specular 1259 * exponent. We swizzle that into the W position. This 1260 * negation makes the back-face specular term positive again. 1261 */ 1262 dots = negate(swizzle(dots,X,Y,W,Z)); 1263 1264 if (!is_undef(att)) { 1265 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1266 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1267 emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); 1268 } 1269 else if (!p->state->material_shininess_is_zero) { 1270 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1271 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/ 1272 } 1273 else { 1274 emit_degenerate_lit(p, lit, dots); 1275 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); 1276 } 1277 1278 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); 1279 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); 1280 /* restore dots to its original state for subsequent lights 1281 * by negating and swizzling again. 1282 */ 1283 dots = negate(swizzle(dots,X,Y,W,Z)); 1284 1285 release_temp(p, ambient); 1286 release_temp(p, diffuse); 1287 release_temp(p, specular); 1288 } 1289 1290 release_temp(p, half); 1291 release_temp(p, VPpli); 1292 release_temp(p, att); 1293 } 1294 } 1295 1296 release_temps( p ); 1297} 1298 1299 1300static void build_fog( struct tnl_program *p ) 1301{ 1302 struct ureg fog = register_output(p, VARYING_SLOT_FOGC); 1303 struct ureg input; 1304 1305 if (p->state->fog_source_is_depth) { 1306 1307 switch (p->state->fog_distance_mode) { 1308 case FDM_EYE_RADIAL: /* Z = sqrt(Xe*Xe + Ye*Ye + Ze*Ze) */ 1309 input = get_eye_position(p); 1310 emit_op2(p, OPCODE_DP3, fog, WRITEMASK_X, input, input); 1311 emit_op1(p, OPCODE_RSQ, fog, WRITEMASK_X, fog); 1312 emit_op1(p, OPCODE_RCP, fog, WRITEMASK_X, fog); 1313 break; 1314 case FDM_EYE_PLANE: /* Z = Ze */ 1315 input = get_eye_position_z(p); 1316 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input); 1317 break; 1318 case FDM_EYE_PLANE_ABS: /* Z = abs(Ze) */ 1319 input = get_eye_position_z(p); 1320 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1321 break; 1322 default: 1323 assert(!"Bad fog mode in build_fog()"); 1324 break; 1325 } 1326 1327 } 1328 else { 1329 input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); 1330 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1331 } 1332 1333 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p)); 1334} 1335 1336 1337static void build_reflect_texgen( struct tnl_program *p, 1338 struct ureg dest, 1339 GLuint writemask ) 1340{ 1341 struct ureg normal = get_transformed_normal(p); 1342 struct ureg eye_hat = get_eye_position_normalized(p); 1343 struct ureg tmp = get_temp(p); 1344 1345 /* n.u */ 1346 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1347 /* 2n.u */ 1348 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1349 /* (-2n.u)n + u */ 1350 emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); 1351 1352 release_temp(p, tmp); 1353} 1354 1355 1356static void build_sphere_texgen( struct tnl_program *p, 1357 struct ureg dest, 1358 GLuint writemask ) 1359{ 1360 struct ureg normal = get_transformed_normal(p); 1361 struct ureg eye_hat = get_eye_position_normalized(p); 1362 struct ureg tmp = get_temp(p); 1363 struct ureg half = register_scalar_const(p, .5); 1364 struct ureg r = get_temp(p); 1365 struct ureg inv_m = get_temp(p); 1366 struct ureg id = get_identity_param(p); 1367 1368 /* Could share the above calculations, but it would be 1369 * a fairly odd state for someone to set (both sphere and 1370 * reflection active for different texture coordinate 1371 * components. Of course - if two texture units enable 1372 * reflect and/or sphere, things start to tilt in favour 1373 * of seperating this out: 1374 */ 1375 1376 /* n.u */ 1377 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1378 /* 2n.u */ 1379 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1380 /* (-2n.u)n + u */ 1381 emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); 1382 /* r + 0,0,1 */ 1383 emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 1384 /* rx^2 + ry^2 + (rz+1)^2 */ 1385 emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); 1386 /* 2/m */ 1387 emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 1388 /* 1/m */ 1389 emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); 1390 /* r/m + 1/2 */ 1391 emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); 1392 1393 release_temp(p, tmp); 1394 release_temp(p, r); 1395 release_temp(p, inv_m); 1396} 1397 1398 1399static void build_texture_transform( struct tnl_program *p ) 1400{ 1401 GLuint i, j; 1402 1403 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 1404 1405 if (!(p->state->fragprog_inputs_read & VARYING_BIT_TEX(i))) 1406 continue; 1407 1408 if (p->state->unit[i].coord_replace) 1409 continue; 1410 1411 if (p->state->unit[i].texgen_enabled || 1412 p->state->unit[i].texmat_enabled) { 1413 1414 GLuint texmat_enabled = p->state->unit[i].texmat_enabled; 1415 struct ureg out = register_output(p, VARYING_SLOT_TEX0 + i); 1416 struct ureg out_texgen = undef; 1417 1418 if (p->state->unit[i].texgen_enabled) { 1419 GLuint copy_mask = 0; 1420 GLuint sphere_mask = 0; 1421 GLuint reflect_mask = 0; 1422 GLuint normal_mask = 0; 1423 GLuint modes[4]; 1424 1425 if (texmat_enabled) 1426 out_texgen = get_temp(p); 1427 else 1428 out_texgen = out; 1429 1430 modes[0] = p->state->unit[i].texgen_mode0; 1431 modes[1] = p->state->unit[i].texgen_mode1; 1432 modes[2] = p->state->unit[i].texgen_mode2; 1433 modes[3] = p->state->unit[i].texgen_mode3; 1434 1435 for (j = 0; j < 4; j++) { 1436 switch (modes[j]) { 1437 case TXG_OBJ_LINEAR: { 1438 struct ureg obj = register_input(p, VERT_ATTRIB_POS); 1439 struct ureg plane = 1440 register_param3(p, STATE_TEXGEN, i, 1441 STATE_TEXGEN_OBJECT_S + j); 1442 1443 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1444 obj, plane ); 1445 break; 1446 } 1447 case TXG_EYE_LINEAR: { 1448 struct ureg eye = get_eye_position(p); 1449 struct ureg plane = 1450 register_param3(p, STATE_TEXGEN, i, 1451 STATE_TEXGEN_EYE_S + j); 1452 1453 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1454 eye, plane ); 1455 break; 1456 } 1457 case TXG_SPHERE_MAP: 1458 sphere_mask |= WRITEMASK_X << j; 1459 break; 1460 case TXG_REFLECTION_MAP: 1461 reflect_mask |= WRITEMASK_X << j; 1462 break; 1463 case TXG_NORMAL_MAP: 1464 normal_mask |= WRITEMASK_X << j; 1465 break; 1466 case TXG_NONE: 1467 copy_mask |= WRITEMASK_X << j; 1468 } 1469 } 1470 1471 if (sphere_mask) { 1472 build_sphere_texgen(p, out_texgen, sphere_mask); 1473 } 1474 1475 if (reflect_mask) { 1476 build_reflect_texgen(p, out_texgen, reflect_mask); 1477 } 1478 1479 if (normal_mask) { 1480 struct ureg normal = get_transformed_normal(p); 1481 emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); 1482 } 1483 1484 if (copy_mask) { 1485 struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); 1486 emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); 1487 } 1488 } 1489 1490 if (texmat_enabled) { 1491 struct ureg texmat[4]; 1492 struct ureg in = (!is_undef(out_texgen) ? 1493 out_texgen : 1494 register_input(p, VERT_ATTRIB_TEX0+i)); 1495 if (p->mvp_with_dp4) { 1496 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1497 0, texmat ); 1498 emit_matrix_transform_vec4( p, out, texmat, in ); 1499 } 1500 else { 1501 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1502 STATE_MATRIX_TRANSPOSE, texmat ); 1503 emit_transpose_matrix_transform_vec4( p, out, texmat, in ); 1504 } 1505 } 1506 1507 release_temps(p); 1508 } 1509 else { 1510 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VARYING_SLOT_TEX0+i); 1511 } 1512 } 1513} 1514 1515 1516/** 1517 * Point size attenuation computation. 1518 */ 1519static void build_atten_pointsize( struct tnl_program *p ) 1520{ 1521 struct ureg eye = get_eye_position_z(p); 1522 struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED); 1523 struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); 1524 struct ureg out = register_output(p, VARYING_SLOT_PSIZ); 1525 struct ureg ut = get_temp(p); 1526 1527 /* dist = |eyez| */ 1528 emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z)); 1529 /* p1 + dist * (p2 + dist * p3); */ 1530 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1531 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y)); 1532 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1533 ut, swizzle1(state_attenuation, X)); 1534 1535 /* 1 / sqrt(factor) */ 1536 emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); 1537 1538#if 0 1539 /* out = pointSize / sqrt(factor) */ 1540 emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 1541#else 1542 /* this is a good place to clamp the point size since there's likely 1543 * no hardware registers to clamp point size at rasterization time. 1544 */ 1545 emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); 1546 emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); 1547 emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z)); 1548#endif 1549 1550 release_temp(p, ut); 1551} 1552 1553 1554/** 1555 * Pass-though per-vertex point size, from user's point size array. 1556 */ 1557static void build_array_pointsize( struct tnl_program *p ) 1558{ 1559 struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE); 1560 struct ureg out = register_output(p, VARYING_SLOT_PSIZ); 1561 emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in); 1562} 1563 1564 1565static void build_tnl_program( struct tnl_program *p ) 1566{ 1567 /* Emit the program, starting with the modelview, projection transforms: 1568 */ 1569 build_hpos(p); 1570 1571 /* Lighting calculations: 1572 */ 1573 if (p->state->fragprog_inputs_read & (VARYING_BIT_COL0|VARYING_BIT_COL1)) { 1574 if (p->state->light_global_enabled) 1575 build_lighting(p); 1576 else { 1577 if (p->state->fragprog_inputs_read & VARYING_BIT_COL0) 1578 emit_passthrough(p, VERT_ATTRIB_COLOR0, VARYING_SLOT_COL0); 1579 1580 if (p->state->fragprog_inputs_read & VARYING_BIT_COL1) 1581 emit_passthrough(p, VERT_ATTRIB_COLOR1, VARYING_SLOT_COL1); 1582 } 1583 } 1584 1585 if (p->state->fragprog_inputs_read & VARYING_BIT_FOGC) 1586 build_fog(p); 1587 1588 if (p->state->fragprog_inputs_read & VARYING_BITS_TEX_ANY) 1589 build_texture_transform(p); 1590 1591 if (p->state->point_attenuated) 1592 build_atten_pointsize(p); 1593 else if (p->state->point_array) 1594 build_array_pointsize(p); 1595 1596 /* Finish up: 1597 */ 1598 emit_op1(p, OPCODE_END, undef, 0, undef); 1599 1600 /* Disassemble: 1601 */ 1602 if (DISASSEM) { 1603 printf ("\n"); 1604 } 1605} 1606 1607 1608static void 1609create_new_program( const struct state_key *key, 1610 struct gl_vertex_program *program, 1611 GLboolean mvp_with_dp4, 1612 GLuint max_temps) 1613{ 1614 struct tnl_program p; 1615 1616 memset(&p, 0, sizeof(p)); 1617 p.state = key; 1618 p.program = program; 1619 p.eye_position = undef; 1620 p.eye_position_z = undef; 1621 p.eye_position_normalized = undef; 1622 p.transformed_normal = undef; 1623 p.identity = undef; 1624 p.temp_in_use = 0; 1625 p.mvp_with_dp4 = mvp_with_dp4; 1626 1627 if (max_temps >= sizeof(int) * 8) 1628 p.temp_reserved = 0; 1629 else 1630 p.temp_reserved = ~((1<<max_temps)-1); 1631 1632 /* Start by allocating 32 instructions. 1633 * If we need more, we'll grow the instruction array as needed. 1634 */ 1635 p.max_inst = 32; 1636 p.program->Base.Instructions = _mesa_alloc_instructions(p.max_inst); 1637 p.program->Base.String = NULL; 1638 p.program->Base.NumInstructions = 1639 p.program->Base.NumTemporaries = 1640 p.program->Base.NumParameters = 1641 p.program->Base.NumAttributes = p.program->Base.NumAddressRegs = 0; 1642 p.program->Base.Parameters = _mesa_new_parameter_list(); 1643 p.program->Base.InputsRead = 0; 1644 p.program->Base.OutputsWritten = 0; 1645 1646 build_tnl_program( &p ); 1647} 1648 1649 1650/** 1651 * Return a vertex program which implements the current fixed-function 1652 * transform/lighting/texgen operations. 1653 */ 1654struct gl_vertex_program * 1655_mesa_get_fixed_func_vertex_program(struct gl_context *ctx) 1656{ 1657 struct gl_vertex_program *prog; 1658 struct state_key key; 1659 1660 /* Grab all the relevent state and put it in a single structure: 1661 */ 1662 make_state_key(ctx, &key); 1663 1664 /* Look for an already-prepared program for this state: 1665 */ 1666 prog = gl_vertex_program( 1667 _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, sizeof(key))); 1668 1669 if (!prog) { 1670 /* OK, we'll have to build a new one */ 1671 if (0) 1672 printf("Build new TNL program\n"); 1673 1674 prog = gl_vertex_program(ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0)); 1675 if (!prog) 1676 return NULL; 1677 1678 create_new_program( &key, prog, 1679 ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS, 1680 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTemps ); 1681 1682#if 0 1683 if (ctx->Driver.ProgramStringNotify) 1684 ctx->Driver.ProgramStringNotify( ctx, GL_VERTEX_PROGRAM_ARB, 1685 &prog->Base ); 1686#endif 1687 _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache, 1688 &key, sizeof(key), &prog->Base); 1689 } 1690 1691 return prog; 1692} 1693