ffvertex_prog.c revision 01e04c3f
1/************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * \file ffvertex_prog.c 30 * 31 * Create a vertex program to execute the current fixed function T&L pipeline. 32 * \author Keith Whitwell 33 */ 34 35 36#include "main/errors.h" 37#include "main/glheader.h" 38#include "main/mtypes.h" 39#include "main/macros.h" 40#include "main/enums.h" 41#include "main/ffvertex_prog.h" 42#include "program/program.h" 43#include "program/prog_cache.h" 44#include "program/prog_instruction.h" 45#include "program/prog_parameter.h" 46#include "program/prog_print.h" 47#include "program/prog_statevars.h" 48#include "util/bitscan.h" 49 50 51/** Max of number of lights and texture coord units */ 52#define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS) 53 54struct state_key { 55 GLbitfield varying_vp_inputs; 56 57 unsigned fragprog_inputs_read:12; 58 59 unsigned light_color_material_mask:12; 60 unsigned light_global_enabled:1; 61 unsigned light_local_viewer:1; 62 unsigned light_twoside:1; 63 unsigned material_shininess_is_zero:1; 64 unsigned need_eye_coords:1; 65 unsigned normalize:1; 66 unsigned rescale_normals:1; 67 68 unsigned fog_distance_mode:2; 69 unsigned separate_specular:1; 70 unsigned point_attenuated:1; 71 72 struct { 73 unsigned char light_enabled:1; 74 unsigned char light_eyepos3_is_zero:1; 75 unsigned char light_spotcutoff_is_180:1; 76 unsigned char light_attenuated:1; 77 unsigned char texmat_enabled:1; 78 unsigned char coord_replace:1; 79 unsigned char texgen_enabled:1; 80 unsigned char texgen_mode0:4; 81 unsigned char texgen_mode1:4; 82 unsigned char texgen_mode2:4; 83 unsigned char texgen_mode3:4; 84 } unit[NUM_UNITS]; 85}; 86 87 88#define TXG_NONE 0 89#define TXG_OBJ_LINEAR 1 90#define TXG_EYE_LINEAR 2 91#define TXG_SPHERE_MAP 3 92#define TXG_REFLECTION_MAP 4 93#define TXG_NORMAL_MAP 5 94 95static GLuint translate_texgen( GLboolean enabled, GLenum mode ) 96{ 97 if (!enabled) 98 return TXG_NONE; 99 100 switch (mode) { 101 case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; 102 case GL_EYE_LINEAR: return TXG_EYE_LINEAR; 103 case GL_SPHERE_MAP: return TXG_SPHERE_MAP; 104 case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; 105 case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; 106 default: return TXG_NONE; 107 } 108} 109 110#define FDM_EYE_RADIAL 0 111#define FDM_EYE_PLANE 1 112#define FDM_EYE_PLANE_ABS 2 113#define FDM_FROM_ARRAY 3 114 115static GLuint translate_fog_distance_mode(GLenum source, GLenum mode) 116{ 117 if (source == GL_FRAGMENT_DEPTH_EXT) { 118 switch (mode) { 119 case GL_EYE_RADIAL_NV: 120 return FDM_EYE_RADIAL; 121 case GL_EYE_PLANE: 122 return FDM_EYE_PLANE; 123 default: /* shouldn't happen; fall through to a sensible default */ 124 case GL_EYE_PLANE_ABSOLUTE_NV: 125 return FDM_EYE_PLANE_ABS; 126 } 127 } else { 128 return FDM_FROM_ARRAY; 129 } 130} 131 132static GLboolean check_active_shininess( struct gl_context *ctx, 133 const struct state_key *key, 134 GLuint side ) 135{ 136 GLuint attr = MAT_ATTRIB_FRONT_SHININESS + side; 137 138 if ((key->varying_vp_inputs & VERT_BIT_COLOR0) && 139 (key->light_color_material_mask & (1 << attr))) 140 return GL_TRUE; 141 142 if (key->varying_vp_inputs & VERT_BIT_MAT(attr)) 143 return GL_TRUE; 144 145 if (ctx->Light.Material.Attrib[attr][0] != 0.0F) 146 return GL_TRUE; 147 148 return GL_FALSE; 149} 150 151 152static void make_state_key( struct gl_context *ctx, struct state_key *key ) 153{ 154 const struct gl_program *fp = ctx->FragmentProgram._Current; 155 GLbitfield mask; 156 157 memset(key, 0, sizeof(struct state_key)); 158 159 /* This now relies on texenvprogram.c being active: 160 */ 161 assert(fp); 162 163 key->need_eye_coords = ctx->_NeedEyeCoords; 164 165 key->fragprog_inputs_read = fp->info.inputs_read; 166 key->varying_vp_inputs = ctx->varying_vp_inputs; 167 168 if (ctx->RenderMode == GL_FEEDBACK) { 169 /* make sure the vertprog emits color and tex0 */ 170 key->fragprog_inputs_read |= (VARYING_BIT_COL0 | VARYING_BIT_TEX0); 171 } 172 173 if (ctx->Light.Enabled) { 174 key->light_global_enabled = 1; 175 176 if (ctx->Light.Model.LocalViewer) 177 key->light_local_viewer = 1; 178 179 if (ctx->Light.Model.TwoSide) 180 key->light_twoside = 1; 181 182 if (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) 183 key->separate_specular = 1; 184 185 if (ctx->Light.ColorMaterialEnabled) { 186 key->light_color_material_mask = ctx->Light._ColorMaterialBitmask; 187 } 188 189 mask = ctx->Light._EnabledLights; 190 while (mask) { 191 const int i = u_bit_scan(&mask); 192 struct gl_light *light = &ctx->Light.Light[i]; 193 194 key->unit[i].light_enabled = 1; 195 196 if (light->EyePosition[3] == 0.0F) 197 key->unit[i].light_eyepos3_is_zero = 1; 198 199 if (light->SpotCutoff == 180.0F) 200 key->unit[i].light_spotcutoff_is_180 = 1; 201 202 if (light->ConstantAttenuation != 1.0F || 203 light->LinearAttenuation != 0.0F || 204 light->QuadraticAttenuation != 0.0F) 205 key->unit[i].light_attenuated = 1; 206 } 207 208 if (check_active_shininess(ctx, key, 0)) { 209 key->material_shininess_is_zero = 0; 210 } 211 else if (key->light_twoside && 212 check_active_shininess(ctx, key, 1)) { 213 key->material_shininess_is_zero = 0; 214 } 215 else { 216 key->material_shininess_is_zero = 1; 217 } 218 } 219 220 if (ctx->Transform.Normalize) 221 key->normalize = 1; 222 223 if (ctx->Transform.RescaleNormals) 224 key->rescale_normals = 1; 225 226 /* Only distinguish fog parameters if we actually need */ 227 if (key->fragprog_inputs_read & VARYING_BIT_FOGC) 228 key->fog_distance_mode = 229 translate_fog_distance_mode(ctx->Fog.FogCoordinateSource, 230 ctx->Fog.FogDistanceMode); 231 232 if (ctx->Point._Attenuated) 233 key->point_attenuated = 1; 234 235 mask = ctx->Texture._EnabledCoordUnits | ctx->Texture._TexGenEnabled 236 | ctx->Texture._TexMatEnabled | ctx->Point.CoordReplace; 237 while (mask) { 238 const int i = u_bit_scan(&mask); 239 struct gl_fixedfunc_texture_unit *texUnit = 240 &ctx->Texture.FixedFuncUnit[i]; 241 242 if (ctx->Point.PointSprite) 243 if (ctx->Point.CoordReplace & (1u << i)) 244 key->unit[i].coord_replace = 1; 245 246 if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i)) 247 key->unit[i].texmat_enabled = 1; 248 249 if (texUnit->TexGenEnabled) { 250 key->unit[i].texgen_enabled = 1; 251 252 key->unit[i].texgen_mode0 = 253 translate_texgen( texUnit->TexGenEnabled & (1<<0), 254 texUnit->GenS.Mode ); 255 key->unit[i].texgen_mode1 = 256 translate_texgen( texUnit->TexGenEnabled & (1<<1), 257 texUnit->GenT.Mode ); 258 key->unit[i].texgen_mode2 = 259 translate_texgen( texUnit->TexGenEnabled & (1<<2), 260 texUnit->GenR.Mode ); 261 key->unit[i].texgen_mode3 = 262 translate_texgen( texUnit->TexGenEnabled & (1<<3), 263 texUnit->GenQ.Mode ); 264 } 265 } 266} 267 268 269 270/* Very useful debugging tool - produces annotated listing of 271 * generated program with line/function references for each 272 * instruction back into this file: 273 */ 274#define DISASSEM 0 275 276 277/* Use uregs to represent registers internally, translate to Mesa's 278 * expected formats on emit. 279 * 280 * NOTE: These are passed by value extensively in this file rather 281 * than as usual by pointer reference. If this disturbs you, try 282 * remembering they are just 32bits in size. 283 * 284 * GCC is smart enough to deal with these dword-sized structures in 285 * much the same way as if I had defined them as dwords and was using 286 * macros to access and set the fields. This is much nicer and easier 287 * to evolve. 288 */ 289struct ureg { 290 GLuint file:4; 291 GLint idx:9; /* relative addressing may be negative */ 292 /* sizeof(idx) should == sizeof(prog_src_reg::Index) */ 293 GLuint negate:1; 294 GLuint swz:12; 295 GLuint pad:6; 296}; 297 298 299struct tnl_program { 300 const struct state_key *state; 301 struct gl_program *program; 302 GLuint max_inst; /** number of instructions allocated for program */ 303 GLboolean mvp_with_dp4; 304 305 GLuint temp_in_use; 306 GLuint temp_reserved; 307 308 struct ureg eye_position; 309 struct ureg eye_position_z; 310 struct ureg eye_position_normalized; 311 struct ureg transformed_normal; 312 struct ureg identity; 313 314 GLuint materials; 315 GLuint color_materials; 316}; 317 318 319static const struct ureg undef = { 320 PROGRAM_UNDEFINED, 321 0, 322 0, 323 0, 324 0 325}; 326 327/* Local shorthand: 328 */ 329#define X SWIZZLE_X 330#define Y SWIZZLE_Y 331#define Z SWIZZLE_Z 332#define W SWIZZLE_W 333 334 335/* Construct a ureg: 336 */ 337static struct ureg make_ureg(GLuint file, GLint idx) 338{ 339 struct ureg reg; 340 reg.file = file; 341 reg.idx = idx; 342 reg.negate = 0; 343 reg.swz = SWIZZLE_NOOP; 344 reg.pad = 0; 345 return reg; 346} 347 348 349static struct ureg negate( struct ureg reg ) 350{ 351 reg.negate ^= 1; 352 return reg; 353} 354 355 356static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 357{ 358 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 359 GET_SWZ(reg.swz, y), 360 GET_SWZ(reg.swz, z), 361 GET_SWZ(reg.swz, w)); 362 return reg; 363} 364 365 366static struct ureg swizzle1( struct ureg reg, int x ) 367{ 368 return swizzle(reg, x, x, x, x); 369} 370 371 372static struct ureg get_temp( struct tnl_program *p ) 373{ 374 int bit = ffs( ~p->temp_in_use ); 375 if (!bit) { 376 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 377 exit(1); 378 } 379 380 if ((GLuint) bit > p->program->arb.NumTemporaries) 381 p->program->arb.NumTemporaries = bit; 382 383 p->temp_in_use |= 1<<(bit-1); 384 return make_ureg(PROGRAM_TEMPORARY, bit-1); 385} 386 387 388static struct ureg reserve_temp( struct tnl_program *p ) 389{ 390 struct ureg temp = get_temp( p ); 391 p->temp_reserved |= 1<<temp.idx; 392 return temp; 393} 394 395 396static void release_temp( struct tnl_program *p, struct ureg reg ) 397{ 398 if (reg.file == PROGRAM_TEMPORARY) { 399 p->temp_in_use &= ~(1<<reg.idx); 400 p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ 401 } 402} 403 404static void release_temps( struct tnl_program *p ) 405{ 406 p->temp_in_use = p->temp_reserved; 407} 408 409 410static struct ureg register_param5(struct tnl_program *p, 411 GLint s0, 412 GLint s1, 413 GLint s2, 414 GLint s3, 415 GLint s4) 416{ 417 gl_state_index16 tokens[STATE_LENGTH]; 418 GLint idx; 419 tokens[0] = s0; 420 tokens[1] = s1; 421 tokens[2] = s2; 422 tokens[3] = s3; 423 tokens[4] = s4; 424 idx = _mesa_add_state_reference(p->program->Parameters, tokens ); 425 return make_ureg(PROGRAM_STATE_VAR, idx); 426} 427 428 429#define register_param1(p,s0) register_param5(p,s0,0,0,0,0) 430#define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0) 431#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) 432#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) 433 434 435 436/** 437 * \param input one of VERT_ATTRIB_x tokens. 438 */ 439static struct ureg register_input( struct tnl_program *p, GLuint input ) 440{ 441 assert(input < VERT_ATTRIB_MAX); 442 443 if (p->state->varying_vp_inputs & VERT_BIT(input)) { 444 p->program->info.inputs_read |= VERT_BIT(input); 445 return make_ureg(PROGRAM_INPUT, input); 446 } 447 else { 448 return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB, input ); 449 } 450} 451 452 453/** 454 * \param input one of VARYING_SLOT_x tokens. 455 */ 456static struct ureg register_output( struct tnl_program *p, GLuint output ) 457{ 458 p->program->info.outputs_written |= BITFIELD64_BIT(output); 459 return make_ureg(PROGRAM_OUTPUT, output); 460} 461 462 463static struct ureg register_const4f( struct tnl_program *p, 464 GLfloat s0, 465 GLfloat s1, 466 GLfloat s2, 467 GLfloat s3) 468{ 469 gl_constant_value values[4]; 470 GLint idx; 471 GLuint swizzle; 472 values[0].f = s0; 473 values[1].f = s1; 474 values[2].f = s2; 475 values[3].f = s3; 476 idx = _mesa_add_unnamed_constant(p->program->Parameters, values, 4, 477 &swizzle ); 478 assert(swizzle == SWIZZLE_NOOP); 479 return make_ureg(PROGRAM_CONSTANT, idx); 480} 481 482#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 483#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) 484#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 485#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 486 487static GLboolean is_undef( struct ureg reg ) 488{ 489 return reg.file == PROGRAM_UNDEFINED; 490} 491 492 493static struct ureg get_identity_param( struct tnl_program *p ) 494{ 495 if (is_undef(p->identity)) 496 p->identity = register_const4f(p, 0,0,0,1); 497 498 return p->identity; 499} 500 501static void register_matrix_param5( struct tnl_program *p, 502 GLint s0, /* modelview, projection, etc */ 503 GLint s1, /* texture matrix number */ 504 GLint s2, /* first row */ 505 GLint s3, /* last row */ 506 GLint s4, /* inverse, transpose, etc */ 507 struct ureg *matrix ) 508{ 509 GLint i; 510 511 /* This is a bit sad as the support is there to pull the whole 512 * matrix out in one go: 513 */ 514 for (i = 0; i <= s3 - s2; i++) 515 matrix[i] = register_param5( p, s0, s1, i, i, s4 ); 516} 517 518 519static void emit_arg( struct prog_src_register *src, 520 struct ureg reg ) 521{ 522 src->File = reg.file; 523 src->Index = reg.idx; 524 src->Swizzle = reg.swz; 525 src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE; 526 src->RelAddr = 0; 527 /* Check that bitfield sizes aren't exceeded */ 528 assert(src->Index == reg.idx); 529} 530 531 532static void emit_dst( struct prog_dst_register *dst, 533 struct ureg reg, GLuint mask ) 534{ 535 dst->File = reg.file; 536 dst->Index = reg.idx; 537 /* allow zero as a shorthand for xyzw */ 538 dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 539 /* Check that bitfield sizes aren't exceeded */ 540 assert(dst->Index == reg.idx); 541} 542 543 544static void debug_insn( struct prog_instruction *inst, const char *fn, 545 GLuint line ) 546{ 547 if (DISASSEM) { 548 static const char *last_fn; 549 550 if (fn != last_fn) { 551 last_fn = fn; 552 printf("%s:\n", fn); 553 } 554 555 printf("%d:\t", line); 556 _mesa_print_instruction(inst); 557 } 558} 559 560 561static void emit_op3fn(struct tnl_program *p, 562 enum prog_opcode op, 563 struct ureg dest, 564 GLuint mask, 565 struct ureg src0, 566 struct ureg src1, 567 struct ureg src2, 568 const char *fn, 569 GLuint line) 570{ 571 GLuint nr; 572 struct prog_instruction *inst; 573 574 assert(p->program->arb.NumInstructions <= p->max_inst); 575 576 if (p->program->arb.NumInstructions == p->max_inst) { 577 /* need to extend the program's instruction array */ 578 struct prog_instruction *newInst; 579 580 /* double the size */ 581 p->max_inst *= 2; 582 583 newInst = 584 rzalloc_array(p->program, struct prog_instruction, p->max_inst); 585 if (!newInst) { 586 _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build"); 587 return; 588 } 589 590 _mesa_copy_instructions(newInst, p->program->arb.Instructions, 591 p->program->arb.NumInstructions); 592 593 ralloc_free(p->program->arb.Instructions); 594 595 p->program->arb.Instructions = newInst; 596 } 597 598 nr = p->program->arb.NumInstructions++; 599 600 inst = &p->program->arb.Instructions[nr]; 601 inst->Opcode = (enum prog_opcode) op; 602 603 emit_arg( &inst->SrcReg[0], src0 ); 604 emit_arg( &inst->SrcReg[1], src1 ); 605 emit_arg( &inst->SrcReg[2], src2 ); 606 607 emit_dst( &inst->DstReg, dest, mask ); 608 609 debug_insn(inst, fn, line); 610} 611 612 613#define emit_op3(p, op, dst, mask, src0, src1, src2) \ 614 emit_op3fn(p, op, dst, mask, src0, src1, src2, __func__, __LINE__) 615 616#define emit_op2(p, op, dst, mask, src0, src1) \ 617 emit_op3fn(p, op, dst, mask, src0, src1, undef, __func__, __LINE__) 618 619#define emit_op1(p, op, dst, mask, src0) \ 620 emit_op3fn(p, op, dst, mask, src0, undef, undef, __func__, __LINE__) 621 622 623static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) 624{ 625 if (reg.file == PROGRAM_TEMPORARY && 626 !(p->temp_reserved & (1<<reg.idx))) 627 return reg; 628 else { 629 struct ureg temp = get_temp(p); 630 emit_op1(p, OPCODE_MOV, temp, 0, reg); 631 return temp; 632 } 633} 634 635 636/* Currently no tracking performed of input/output/register size or 637 * active elements. Could be used to reduce these operations, as 638 * could the matrix type. 639 */ 640static void emit_matrix_transform_vec4( struct tnl_program *p, 641 struct ureg dest, 642 const struct ureg *mat, 643 struct ureg src) 644{ 645 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); 646 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); 647 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); 648 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); 649} 650 651 652/* This version is much easier to implement if writemasks are not 653 * supported natively on the target or (like SSE), the target doesn't 654 * have a clean/obvious dotproduct implementation. 655 */ 656static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, 657 struct ureg dest, 658 const struct ureg *mat, 659 struct ureg src) 660{ 661 struct ureg tmp; 662 663 if (dest.file != PROGRAM_TEMPORARY) 664 tmp = get_temp(p); 665 else 666 tmp = dest; 667 668 emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 669 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 670 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 671 emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 672 673 if (dest.file != PROGRAM_TEMPORARY) 674 release_temp(p, tmp); 675} 676 677 678static void emit_matrix_transform_vec3( struct tnl_program *p, 679 struct ureg dest, 680 const struct ureg *mat, 681 struct ureg src) 682{ 683 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); 684 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); 685 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); 686} 687 688 689static void emit_normalize_vec3( struct tnl_program *p, 690 struct ureg dest, 691 struct ureg src ) 692{ 693 struct ureg tmp = get_temp(p); 694 emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src); 695 emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); 696 emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X)); 697 release_temp(p, tmp); 698} 699 700 701static void emit_passthrough( struct tnl_program *p, 702 GLuint input, 703 GLuint output ) 704{ 705 struct ureg out = register_output(p, output); 706 emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); 707} 708 709 710static struct ureg get_eye_position( struct tnl_program *p ) 711{ 712 if (is_undef(p->eye_position)) { 713 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 714 struct ureg modelview[4]; 715 716 p->eye_position = reserve_temp(p); 717 718 if (p->mvp_with_dp4) { 719 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 720 0, modelview ); 721 722 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); 723 } 724 else { 725 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 726 STATE_MATRIX_TRANSPOSE, modelview ); 727 728 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); 729 } 730 } 731 732 return p->eye_position; 733} 734 735 736static struct ureg get_eye_position_z( struct tnl_program *p ) 737{ 738 if (!is_undef(p->eye_position)) 739 return swizzle1(p->eye_position, Z); 740 741 if (is_undef(p->eye_position_z)) { 742 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 743 struct ureg modelview[4]; 744 745 p->eye_position_z = reserve_temp(p); 746 747 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 748 0, modelview ); 749 750 emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]); 751 } 752 753 return p->eye_position_z; 754} 755 756 757static struct ureg get_eye_position_normalized( struct tnl_program *p ) 758{ 759 if (is_undef(p->eye_position_normalized)) { 760 struct ureg eye = get_eye_position(p); 761 p->eye_position_normalized = reserve_temp(p); 762 emit_normalize_vec3(p, p->eye_position_normalized, eye); 763 } 764 765 return p->eye_position_normalized; 766} 767 768 769static struct ureg get_transformed_normal( struct tnl_program *p ) 770{ 771 if (is_undef(p->transformed_normal) && 772 !p->state->need_eye_coords && 773 !p->state->normalize && 774 !(p->state->need_eye_coords == p->state->rescale_normals)) 775 { 776 p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL ); 777 } 778 else if (is_undef(p->transformed_normal)) 779 { 780 struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); 781 struct ureg mvinv[3]; 782 struct ureg transformed_normal = reserve_temp(p); 783 784 if (p->state->need_eye_coords) { 785 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 2, 786 STATE_MATRIX_INVTRANS, mvinv ); 787 788 /* Transform to eye space: 789 */ 790 emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal ); 791 normal = transformed_normal; 792 } 793 794 /* Normalize/Rescale: 795 */ 796 if (p->state->normalize) { 797 emit_normalize_vec3( p, transformed_normal, normal ); 798 normal = transformed_normal; 799 } 800 else if (p->state->need_eye_coords == p->state->rescale_normals) { 801 /* This is already adjusted for eye/non-eye rendering: 802 */ 803 struct ureg rescale = register_param2(p, STATE_INTERNAL, 804 STATE_NORMAL_SCALE); 805 806 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale ); 807 normal = transformed_normal; 808 } 809 810 assert(normal.file == PROGRAM_TEMPORARY); 811 p->transformed_normal = normal; 812 } 813 814 return p->transformed_normal; 815} 816 817 818static void build_hpos( struct tnl_program *p ) 819{ 820 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 821 struct ureg hpos = register_output( p, VARYING_SLOT_POS ); 822 struct ureg mvp[4]; 823 824 if (p->mvp_with_dp4) { 825 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 826 0, mvp ); 827 emit_matrix_transform_vec4( p, hpos, mvp, pos ); 828 } 829 else { 830 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 831 STATE_MATRIX_TRANSPOSE, mvp ); 832 emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); 833 } 834} 835 836 837static GLuint material_attrib( GLuint side, GLuint property ) 838{ 839 return (property - STATE_AMBIENT) * 2 + side; 840} 841 842 843/** 844 * Get a bitmask of which material values vary on a per-vertex basis. 845 */ 846static void set_material_flags( struct tnl_program *p ) 847{ 848 p->color_materials = 0; 849 p->materials = 0; 850 851 if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) { 852 p->materials = 853 p->color_materials = p->state->light_color_material_mask; 854 } 855 856 p->materials |= ((p->state->varying_vp_inputs & VERT_BIT_MAT_ALL) 857 >> VERT_ATTRIB_MAT(0)); 858} 859 860 861static struct ureg get_material( struct tnl_program *p, GLuint side, 862 GLuint property ) 863{ 864 GLuint attrib = material_attrib(side, property); 865 866 if (p->color_materials & (1<<attrib)) 867 return register_input(p, VERT_ATTRIB_COLOR0); 868 else if (p->materials & (1<<attrib)) { 869 /* Put material values in the GENERIC slots -- they are not used 870 * for anything in fixed function mode. 871 */ 872 return register_input( p, VERT_ATTRIB_MAT(attrib) ); 873 } 874 else 875 return register_param3( p, STATE_MATERIAL, side, property ); 876} 877 878#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \ 879 MAT_BIT_FRONT_AMBIENT | \ 880 MAT_BIT_FRONT_DIFFUSE) << (side)) 881 882 883/** 884 * Either return a precalculated constant value or emit code to 885 * calculate these values dynamically in the case where material calls 886 * are present between begin/end pairs. 887 * 888 * Probably want to shift this to the program compilation phase - if 889 * we always emitted the calculation here, a smart compiler could 890 * detect that it was constant (given a certain set of inputs), and 891 * lift it out of the main loop. That way the programs created here 892 * would be independent of the vertex_buffer details. 893 */ 894static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) 895{ 896 if (p->materials & SCENE_COLOR_BITS(side)) { 897 struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); 898 struct ureg material_emission = get_material(p, side, STATE_EMISSION); 899 struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); 900 struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); 901 struct ureg tmp = make_temp(p, material_diffuse); 902 emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, 903 material_ambient, material_emission); 904 return tmp; 905 } 906 else 907 return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); 908} 909 910 911static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 912 GLuint side, GLuint property ) 913{ 914 GLuint attrib = material_attrib(side, property); 915 if (p->materials & (1<<attrib)) { 916 struct ureg light_value = 917 register_param3(p, STATE_LIGHT, light, property); 918 struct ureg material_value = get_material(p, side, property); 919 struct ureg tmp = get_temp(p); 920 emit_op2(p, OPCODE_MUL, tmp, 0, light_value, material_value); 921 return tmp; 922 } 923 else 924 return register_param4(p, STATE_LIGHTPROD, light, side, property); 925} 926 927 928static struct ureg calculate_light_attenuation( struct tnl_program *p, 929 GLuint i, 930 struct ureg VPpli, 931 struct ureg dist ) 932{ 933 struct ureg attenuation = register_param3(p, STATE_LIGHT, i, 934 STATE_ATTENUATION); 935 struct ureg att = undef; 936 937 /* Calculate spot attenuation: 938 */ 939 if (!p->state->unit[i].light_spotcutoff_is_180) { 940 struct ureg spot_dir_norm = register_param3(p, STATE_INTERNAL, 941 STATE_LIGHT_SPOT_DIR_NORMALIZED, i); 942 struct ureg spot = get_temp(p); 943 struct ureg slt = get_temp(p); 944 945 att = get_temp(p); 946 947 emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); 948 emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); 949 emit_op1(p, OPCODE_ABS, spot, 0, spot); 950 emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); 951 emit_op2(p, OPCODE_MUL, att, 0, slt, spot); 952 953 release_temp(p, spot); 954 release_temp(p, slt); 955 } 956 957 /* Calculate distance attenuation(See formula (2.4) at glspec 2.1 page 62): 958 * 959 * Skip the calucation when _dist_ is undefined(light_eyepos3_is_zero) 960 */ 961 if (p->state->unit[i].light_attenuated && !is_undef(dist)) { 962 if (is_undef(att)) 963 att = get_temp(p); 964 /* 1/d,d,d,1/d */ 965 emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); 966 /* 1,d,d*d,1/d */ 967 emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 968 /* 1/dist-atten */ 969 emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); 970 971 if (!p->state->unit[i].light_spotcutoff_is_180) { 972 /* dist-atten */ 973 emit_op1(p, OPCODE_RCP, dist, 0, dist); 974 /* spot-atten * dist-atten */ 975 emit_op2(p, OPCODE_MUL, att, 0, dist, att); 976 } 977 else { 978 /* dist-atten */ 979 emit_op1(p, OPCODE_RCP, att, 0, dist); 980 } 981 } 982 983 return att; 984} 985 986 987/** 988 * Compute: 989 * lit.y = MAX(0, dots.x) 990 * lit.z = SLT(0, dots.x) 991 */ 992static void emit_degenerate_lit( struct tnl_program *p, 993 struct ureg lit, 994 struct ureg dots ) 995{ 996 struct ureg id = get_identity_param(p); /* id = {0,0,0,1} */ 997 998 /* Note that lit.x & lit.w will not be examined. Note also that 999 * dots.xyzw == dots.xxxx. 1000 */ 1001 1002 /* MAX lit, id, dots; 1003 */ 1004 emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots); 1005 1006 /* result[2] = (in > 0 ? 1 : 0) 1007 * SLT lit.z, id.z, dots; # lit.z = (0 < dots.z) ? 1 : 0 1008 */ 1009 emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots); 1010} 1011 1012 1013/* Need to add some addtional parameters to allow lighting in object 1014 * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye 1015 * space lighting. 1016 */ 1017static void build_lighting( struct tnl_program *p ) 1018{ 1019 const GLboolean twoside = p->state->light_twoside; 1020 const GLboolean separate = p->state->separate_specular; 1021 GLuint nr_lights = 0, count = 0; 1022 struct ureg normal = get_transformed_normal(p); 1023 struct ureg lit = get_temp(p); 1024 struct ureg dots = get_temp(p); 1025 struct ureg _col0 = undef, _col1 = undef; 1026 struct ureg _bfc0 = undef, _bfc1 = undef; 1027 GLuint i; 1028 1029 /* 1030 * NOTE: 1031 * dots.x = dot(normal, VPpli) 1032 * dots.y = dot(normal, halfAngle) 1033 * dots.z = back.shininess 1034 * dots.w = front.shininess 1035 */ 1036 1037 for (i = 0; i < MAX_LIGHTS; i++) 1038 if (p->state->unit[i].light_enabled) 1039 nr_lights++; 1040 1041 set_material_flags(p); 1042 1043 { 1044 if (!p->state->material_shininess_is_zero) { 1045 struct ureg shininess = get_material(p, 0, STATE_SHININESS); 1046 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); 1047 release_temp(p, shininess); 1048 } 1049 1050 _col0 = make_temp(p, get_scenecolor(p, 0)); 1051 if (separate) 1052 _col1 = make_temp(p, get_identity_param(p)); 1053 else 1054 _col1 = _col0; 1055 } 1056 1057 if (twoside) { 1058 if (!p->state->material_shininess_is_zero) { 1059 /* Note that we negate the back-face specular exponent here. 1060 * The negation will be un-done later in the back-face code below. 1061 */ 1062 struct ureg shininess = get_material(p, 1, STATE_SHININESS); 1063 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, 1064 negate(swizzle1(shininess,X))); 1065 release_temp(p, shininess); 1066 } 1067 1068 _bfc0 = make_temp(p, get_scenecolor(p, 1)); 1069 if (separate) 1070 _bfc1 = make_temp(p, get_identity_param(p)); 1071 else 1072 _bfc1 = _bfc0; 1073 } 1074 1075 /* If no lights, still need to emit the scenecolor. 1076 */ 1077 { 1078 struct ureg res0 = register_output( p, VARYING_SLOT_COL0 ); 1079 emit_op1(p, OPCODE_MOV, res0, 0, _col0); 1080 } 1081 1082 if (separate) { 1083 struct ureg res1 = register_output( p, VARYING_SLOT_COL1 ); 1084 emit_op1(p, OPCODE_MOV, res1, 0, _col1); 1085 } 1086 1087 if (twoside) { 1088 struct ureg res0 = register_output( p, VARYING_SLOT_BFC0 ); 1089 emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); 1090 } 1091 1092 if (twoside && separate) { 1093 struct ureg res1 = register_output( p, VARYING_SLOT_BFC1 ); 1094 emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); 1095 } 1096 1097 if (nr_lights == 0) { 1098 release_temps(p); 1099 return; 1100 } 1101 1102 for (i = 0; i < MAX_LIGHTS; i++) { 1103 if (p->state->unit[i].light_enabled) { 1104 struct ureg half = undef; 1105 struct ureg att = undef, VPpli = undef; 1106 struct ureg dist = undef; 1107 1108 count++; 1109 if (p->state->unit[i].light_eyepos3_is_zero) { 1110 VPpli = register_param3(p, STATE_INTERNAL, 1111 STATE_LIGHT_POSITION_NORMALIZED, i); 1112 } else { 1113 struct ureg Ppli = register_param3(p, STATE_INTERNAL, 1114 STATE_LIGHT_POSITION, i); 1115 struct ureg V = get_eye_position(p); 1116 1117 VPpli = get_temp(p); 1118 dist = get_temp(p); 1119 1120 /* Calculate VPpli vector 1121 */ 1122 emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); 1123 1124 /* Normalize VPpli. The dist value also used in 1125 * attenuation below. 1126 */ 1127 emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); 1128 emit_op1(p, OPCODE_RSQ, dist, 0, dist); 1129 emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); 1130 } 1131 1132 /* Calculate attenuation: 1133 */ 1134 att = calculate_light_attenuation(p, i, VPpli, dist); 1135 release_temp(p, dist); 1136 1137 /* Calculate viewer direction, or use infinite viewer: 1138 */ 1139 if (!p->state->material_shininess_is_zero) { 1140 if (p->state->light_local_viewer) { 1141 struct ureg eye_hat = get_eye_position_normalized(p); 1142 half = get_temp(p); 1143 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1144 emit_normalize_vec3(p, half, half); 1145 } else if (p->state->unit[i].light_eyepos3_is_zero) { 1146 half = register_param3(p, STATE_INTERNAL, 1147 STATE_LIGHT_HALF_VECTOR, i); 1148 } else { 1149 struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 1150 half = get_temp(p); 1151 emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); 1152 emit_normalize_vec3(p, half, half); 1153 } 1154 } 1155 1156 /* Calculate dot products: 1157 */ 1158 if (p->state->material_shininess_is_zero) { 1159 emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli); 1160 } 1161 else { 1162 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); 1163 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); 1164 } 1165 1166 /* Front face lighting: 1167 */ 1168 { 1169 struct ureg ambient = get_lightprod(p, i, 0, STATE_AMBIENT); 1170 struct ureg diffuse = get_lightprod(p, i, 0, STATE_DIFFUSE); 1171 struct ureg specular = get_lightprod(p, i, 0, STATE_SPECULAR); 1172 struct ureg res0, res1; 1173 GLuint mask0, mask1; 1174 1175 if (count == nr_lights) { 1176 if (separate) { 1177 mask0 = WRITEMASK_XYZ; 1178 mask1 = WRITEMASK_XYZ; 1179 res0 = register_output( p, VARYING_SLOT_COL0 ); 1180 res1 = register_output( p, VARYING_SLOT_COL1 ); 1181 } 1182 else { 1183 mask0 = 0; 1184 mask1 = WRITEMASK_XYZ; 1185 res0 = _col0; 1186 res1 = register_output( p, VARYING_SLOT_COL0 ); 1187 } 1188 } 1189 else { 1190 mask0 = 0; 1191 mask1 = 0; 1192 res0 = _col0; 1193 res1 = _col1; 1194 } 1195 1196 if (!is_undef(att)) { 1197 /* light is attenuated by distance */ 1198 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1199 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1200 emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); 1201 } 1202 else if (!p->state->material_shininess_is_zero) { 1203 /* there's a non-zero specular term */ 1204 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1205 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1206 } 1207 else { 1208 /* no attenutation, no specular */ 1209 emit_degenerate_lit(p, lit, dots); 1210 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1211 } 1212 1213 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); 1214 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); 1215 1216 release_temp(p, ambient); 1217 release_temp(p, diffuse); 1218 release_temp(p, specular); 1219 } 1220 1221 /* Back face lighting: 1222 */ 1223 if (twoside) { 1224 struct ureg ambient = get_lightprod(p, i, 1, STATE_AMBIENT); 1225 struct ureg diffuse = get_lightprod(p, i, 1, STATE_DIFFUSE); 1226 struct ureg specular = get_lightprod(p, i, 1, STATE_SPECULAR); 1227 struct ureg res0, res1; 1228 GLuint mask0, mask1; 1229 1230 if (count == nr_lights) { 1231 if (separate) { 1232 mask0 = WRITEMASK_XYZ; 1233 mask1 = WRITEMASK_XYZ; 1234 res0 = register_output( p, VARYING_SLOT_BFC0 ); 1235 res1 = register_output( p, VARYING_SLOT_BFC1 ); 1236 } 1237 else { 1238 mask0 = 0; 1239 mask1 = WRITEMASK_XYZ; 1240 res0 = _bfc0; 1241 res1 = register_output( p, VARYING_SLOT_BFC0 ); 1242 } 1243 } 1244 else { 1245 res0 = _bfc0; 1246 res1 = _bfc1; 1247 mask0 = 0; 1248 mask1 = 0; 1249 } 1250 1251 /* For the back face we need to negate the X and Y component 1252 * dot products. dots.Z has the negated back-face specular 1253 * exponent. We swizzle that into the W position. This 1254 * negation makes the back-face specular term positive again. 1255 */ 1256 dots = negate(swizzle(dots,X,Y,W,Z)); 1257 1258 if (!is_undef(att)) { 1259 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1260 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1261 emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); 1262 } 1263 else if (!p->state->material_shininess_is_zero) { 1264 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1265 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/ 1266 } 1267 else { 1268 emit_degenerate_lit(p, lit, dots); 1269 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); 1270 } 1271 1272 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); 1273 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); 1274 /* restore dots to its original state for subsequent lights 1275 * by negating and swizzling again. 1276 */ 1277 dots = negate(swizzle(dots,X,Y,W,Z)); 1278 1279 release_temp(p, ambient); 1280 release_temp(p, diffuse); 1281 release_temp(p, specular); 1282 } 1283 1284 release_temp(p, half); 1285 release_temp(p, VPpli); 1286 release_temp(p, att); 1287 } 1288 } 1289 1290 release_temps( p ); 1291} 1292 1293 1294static void build_fog( struct tnl_program *p ) 1295{ 1296 struct ureg fog = register_output(p, VARYING_SLOT_FOGC); 1297 struct ureg input; 1298 1299 switch (p->state->fog_distance_mode) { 1300 case FDM_EYE_RADIAL: { /* Z = sqrt(Xe*Xe + Ye*Ye + Ze*Ze) */ 1301 struct ureg tmp = get_temp(p); 1302 input = get_eye_position(p); 1303 emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, input, input); 1304 emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); 1305 emit_op1(p, OPCODE_RCP, fog, WRITEMASK_X, tmp); 1306 break; 1307 } 1308 case FDM_EYE_PLANE: /* Z = Ze */ 1309 input = get_eye_position_z(p); 1310 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input); 1311 break; 1312 case FDM_EYE_PLANE_ABS: /* Z = abs(Ze) */ 1313 input = get_eye_position_z(p); 1314 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1315 break; 1316 case FDM_FROM_ARRAY: 1317 input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); 1318 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1319 break; 1320 default: 1321 assert(!"Bad fog mode in build_fog()"); 1322 break; 1323 } 1324 1325 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p)); 1326} 1327 1328 1329static void build_reflect_texgen( struct tnl_program *p, 1330 struct ureg dest, 1331 GLuint writemask ) 1332{ 1333 struct ureg normal = get_transformed_normal(p); 1334 struct ureg eye_hat = get_eye_position_normalized(p); 1335 struct ureg tmp = get_temp(p); 1336 1337 /* n.u */ 1338 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1339 /* 2n.u */ 1340 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1341 /* (-2n.u)n + u */ 1342 emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); 1343 1344 release_temp(p, tmp); 1345} 1346 1347 1348static void build_sphere_texgen( struct tnl_program *p, 1349 struct ureg dest, 1350 GLuint writemask ) 1351{ 1352 struct ureg normal = get_transformed_normal(p); 1353 struct ureg eye_hat = get_eye_position_normalized(p); 1354 struct ureg tmp = get_temp(p); 1355 struct ureg half = register_scalar_const(p, .5); 1356 struct ureg r = get_temp(p); 1357 struct ureg inv_m = get_temp(p); 1358 struct ureg id = get_identity_param(p); 1359 1360 /* Could share the above calculations, but it would be 1361 * a fairly odd state for someone to set (both sphere and 1362 * reflection active for different texture coordinate 1363 * components. Of course - if two texture units enable 1364 * reflect and/or sphere, things start to tilt in favour 1365 * of seperating this out: 1366 */ 1367 1368 /* n.u */ 1369 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1370 /* 2n.u */ 1371 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1372 /* (-2n.u)n + u */ 1373 emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); 1374 /* r + 0,0,1 */ 1375 emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 1376 /* rx^2 + ry^2 + (rz+1)^2 */ 1377 emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); 1378 /* 2/m */ 1379 emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 1380 /* 1/m */ 1381 emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); 1382 /* r/m + 1/2 */ 1383 emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); 1384 1385 release_temp(p, tmp); 1386 release_temp(p, r); 1387 release_temp(p, inv_m); 1388} 1389 1390 1391static void build_texture_transform( struct tnl_program *p ) 1392{ 1393 GLuint i, j; 1394 1395 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 1396 1397 if (!(p->state->fragprog_inputs_read & VARYING_BIT_TEX(i))) 1398 continue; 1399 1400 if (p->state->unit[i].coord_replace) 1401 continue; 1402 1403 if (p->state->unit[i].texgen_enabled || 1404 p->state->unit[i].texmat_enabled) { 1405 1406 GLuint texmat_enabled = p->state->unit[i].texmat_enabled; 1407 struct ureg out = register_output(p, VARYING_SLOT_TEX0 + i); 1408 struct ureg out_texgen = undef; 1409 1410 if (p->state->unit[i].texgen_enabled) { 1411 GLuint copy_mask = 0; 1412 GLuint sphere_mask = 0; 1413 GLuint reflect_mask = 0; 1414 GLuint normal_mask = 0; 1415 GLuint modes[4]; 1416 1417 if (texmat_enabled) 1418 out_texgen = get_temp(p); 1419 else 1420 out_texgen = out; 1421 1422 modes[0] = p->state->unit[i].texgen_mode0; 1423 modes[1] = p->state->unit[i].texgen_mode1; 1424 modes[2] = p->state->unit[i].texgen_mode2; 1425 modes[3] = p->state->unit[i].texgen_mode3; 1426 1427 for (j = 0; j < 4; j++) { 1428 switch (modes[j]) { 1429 case TXG_OBJ_LINEAR: { 1430 struct ureg obj = register_input(p, VERT_ATTRIB_POS); 1431 struct ureg plane = 1432 register_param3(p, STATE_TEXGEN, i, 1433 STATE_TEXGEN_OBJECT_S + j); 1434 1435 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1436 obj, plane ); 1437 break; 1438 } 1439 case TXG_EYE_LINEAR: { 1440 struct ureg eye = get_eye_position(p); 1441 struct ureg plane = 1442 register_param3(p, STATE_TEXGEN, i, 1443 STATE_TEXGEN_EYE_S + j); 1444 1445 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1446 eye, plane ); 1447 break; 1448 } 1449 case TXG_SPHERE_MAP: 1450 sphere_mask |= WRITEMASK_X << j; 1451 break; 1452 case TXG_REFLECTION_MAP: 1453 reflect_mask |= WRITEMASK_X << j; 1454 break; 1455 case TXG_NORMAL_MAP: 1456 normal_mask |= WRITEMASK_X << j; 1457 break; 1458 case TXG_NONE: 1459 copy_mask |= WRITEMASK_X << j; 1460 } 1461 } 1462 1463 if (sphere_mask) { 1464 build_sphere_texgen(p, out_texgen, sphere_mask); 1465 } 1466 1467 if (reflect_mask) { 1468 build_reflect_texgen(p, out_texgen, reflect_mask); 1469 } 1470 1471 if (normal_mask) { 1472 struct ureg normal = get_transformed_normal(p); 1473 emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); 1474 } 1475 1476 if (copy_mask) { 1477 struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); 1478 emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); 1479 } 1480 } 1481 1482 if (texmat_enabled) { 1483 struct ureg texmat[4]; 1484 struct ureg in = (!is_undef(out_texgen) ? 1485 out_texgen : 1486 register_input(p, VERT_ATTRIB_TEX0+i)); 1487 if (p->mvp_with_dp4) { 1488 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1489 0, texmat ); 1490 emit_matrix_transform_vec4( p, out, texmat, in ); 1491 } 1492 else { 1493 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1494 STATE_MATRIX_TRANSPOSE, texmat ); 1495 emit_transpose_matrix_transform_vec4( p, out, texmat, in ); 1496 } 1497 } 1498 1499 release_temps(p); 1500 } 1501 else { 1502 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VARYING_SLOT_TEX0+i); 1503 } 1504 } 1505} 1506 1507 1508/** 1509 * Point size attenuation computation. 1510 */ 1511static void build_atten_pointsize( struct tnl_program *p ) 1512{ 1513 struct ureg eye = get_eye_position_z(p); 1514 struct ureg state_size = register_param2(p, STATE_INTERNAL, STATE_POINT_SIZE_CLAMPED); 1515 struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); 1516 struct ureg out = register_output(p, VARYING_SLOT_PSIZ); 1517 struct ureg ut = get_temp(p); 1518 1519 /* dist = |eyez| */ 1520 emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z)); 1521 /* p1 + dist * (p2 + dist * p3); */ 1522 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1523 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y)); 1524 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1525 ut, swizzle1(state_attenuation, X)); 1526 1527 /* 1 / sqrt(factor) */ 1528 emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); 1529 1530#if 0 1531 /* out = pointSize / sqrt(factor) */ 1532 emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 1533#else 1534 /* this is a good place to clamp the point size since there's likely 1535 * no hardware registers to clamp point size at rasterization time. 1536 */ 1537 emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); 1538 emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); 1539 emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z)); 1540#endif 1541 1542 release_temp(p, ut); 1543} 1544 1545 1546/** 1547 * Pass-though per-vertex point size, from user's point size array. 1548 */ 1549static void build_array_pointsize( struct tnl_program *p ) 1550{ 1551 struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE); 1552 struct ureg out = register_output(p, VARYING_SLOT_PSIZ); 1553 emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in); 1554} 1555 1556 1557static void build_tnl_program( struct tnl_program *p ) 1558{ 1559 /* Emit the program, starting with the modelview, projection transforms: 1560 */ 1561 build_hpos(p); 1562 1563 /* Lighting calculations: 1564 */ 1565 if (p->state->fragprog_inputs_read & (VARYING_BIT_COL0|VARYING_BIT_COL1)) { 1566 if (p->state->light_global_enabled) 1567 build_lighting(p); 1568 else { 1569 if (p->state->fragprog_inputs_read & VARYING_BIT_COL0) 1570 emit_passthrough(p, VERT_ATTRIB_COLOR0, VARYING_SLOT_COL0); 1571 1572 if (p->state->fragprog_inputs_read & VARYING_BIT_COL1) 1573 emit_passthrough(p, VERT_ATTRIB_COLOR1, VARYING_SLOT_COL1); 1574 } 1575 } 1576 1577 if (p->state->fragprog_inputs_read & VARYING_BIT_FOGC) 1578 build_fog(p); 1579 1580 if (p->state->fragprog_inputs_read & VARYING_BITS_TEX_ANY) 1581 build_texture_transform(p); 1582 1583 if (p->state->point_attenuated) 1584 build_atten_pointsize(p); 1585 else if (p->state->varying_vp_inputs & VERT_BIT_POINT_SIZE) 1586 build_array_pointsize(p); 1587 1588 /* Finish up: 1589 */ 1590 emit_op1(p, OPCODE_END, undef, 0, undef); 1591 1592 /* Disassemble: 1593 */ 1594 if (DISASSEM) { 1595 printf ("\n"); 1596 } 1597} 1598 1599 1600static void 1601create_new_program( const struct state_key *key, 1602 struct gl_program *program, 1603 GLboolean mvp_with_dp4, 1604 GLuint max_temps) 1605{ 1606 struct tnl_program p; 1607 1608 memset(&p, 0, sizeof(p)); 1609 p.state = key; 1610 p.program = program; 1611 p.eye_position = undef; 1612 p.eye_position_z = undef; 1613 p.eye_position_normalized = undef; 1614 p.transformed_normal = undef; 1615 p.identity = undef; 1616 p.temp_in_use = 0; 1617 p.mvp_with_dp4 = mvp_with_dp4; 1618 1619 if (max_temps >= sizeof(int) * 8) 1620 p.temp_reserved = 0; 1621 else 1622 p.temp_reserved = ~((1<<max_temps)-1); 1623 1624 /* Start by allocating 32 instructions. 1625 * If we need more, we'll grow the instruction array as needed. 1626 */ 1627 p.max_inst = 32; 1628 p.program->arb.Instructions = 1629 rzalloc_array(program, struct prog_instruction, p.max_inst); 1630 p.program->String = NULL; 1631 p.program->arb.NumInstructions = 1632 p.program->arb.NumTemporaries = 1633 p.program->arb.NumParameters = 1634 p.program->arb.NumAttributes = p.program->arb.NumAddressRegs = 0; 1635 p.program->Parameters = _mesa_new_parameter_list(); 1636 p.program->info.inputs_read = 0; 1637 p.program->info.outputs_written = 0; 1638 1639 build_tnl_program( &p ); 1640} 1641 1642 1643/** 1644 * Return a vertex program which implements the current fixed-function 1645 * transform/lighting/texgen operations. 1646 */ 1647struct gl_program * 1648_mesa_get_fixed_func_vertex_program(struct gl_context *ctx) 1649{ 1650 struct gl_program *prog; 1651 struct state_key key; 1652 1653 /* Grab all the relevant state and put it in a single structure: 1654 */ 1655 make_state_key(ctx, &key); 1656 1657 /* Look for an already-prepared program for this state: 1658 */ 1659 prog = _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, 1660 sizeof(key)); 1661 1662 if (!prog) { 1663 /* OK, we'll have to build a new one */ 1664 if (0) 1665 printf("Build new TNL program\n"); 1666 1667 prog = ctx->Driver.NewProgram(ctx, GL_VERTEX_PROGRAM_ARB, 0, true); 1668 if (!prog) 1669 return NULL; 1670 1671 create_new_program( &key, prog, 1672 ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS, 1673 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTemps ); 1674 1675 if (ctx->Driver.ProgramStringNotify) 1676 ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, prog); 1677 1678 _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache, &key, 1679 sizeof(key), prog); 1680 } 1681 1682 return prog; 1683} 1684