1/************************************************************************** 2 * 3 * Copyright 2007 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * \file ffvertex_prog.c 30 * 31 * Create a vertex program to execute the current fixed function T&L pipeline. 32 * \author Keith Whitwell 33 */ 34 35 36#include "main/errors.h" 37#include "main/glheader.h" 38#include "main/mtypes.h" 39#include "main/macros.h" 40#include "main/enums.h" 41#include "main/ffvertex_prog.h" 42#include "program/program.h" 43#include "program/prog_cache.h" 44#include "program/prog_instruction.h" 45#include "program/prog_parameter.h" 46#include "program/prog_print.h" 47#include "program/prog_statevars.h" 48#include "util/bitscan.h" 49 50 51/** Max of number of lights and texture coord units */ 52#define NUM_UNITS MAX2(MAX_TEXTURE_COORD_UNITS, MAX_LIGHTS) 53 54struct state_key { 55 GLbitfield varying_vp_inputs; 56 57 unsigned fragprog_inputs_read:12; 58 59 unsigned light_color_material_mask:12; 60 unsigned light_global_enabled:1; 61 unsigned light_local_viewer:1; 62 unsigned light_twoside:1; 63 unsigned material_shininess_is_zero:1; 64 unsigned need_eye_coords:1; 65 unsigned normalize:1; 66 unsigned rescale_normals:1; 67 68 unsigned fog_distance_mode:2; 69 unsigned separate_specular:1; 70 unsigned point_attenuated:1; 71 72 struct { 73 unsigned char light_enabled:1; 74 unsigned char light_eyepos3_is_zero:1; 75 unsigned char light_spotcutoff_is_180:1; 76 unsigned char light_attenuated:1; 77 unsigned char texmat_enabled:1; 78 unsigned char coord_replace:1; 79 unsigned char texgen_enabled:1; 80 unsigned char texgen_mode0:4; 81 unsigned char texgen_mode1:4; 82 unsigned char texgen_mode2:4; 83 unsigned char texgen_mode3:4; 84 } unit[NUM_UNITS]; 85}; 86 87 88#define TXG_NONE 0 89#define TXG_OBJ_LINEAR 1 90#define TXG_EYE_LINEAR 2 91#define TXG_SPHERE_MAP 3 92#define TXG_REFLECTION_MAP 4 93#define TXG_NORMAL_MAP 5 94 95static GLuint translate_texgen( GLboolean enabled, GLenum mode ) 96{ 97 if (!enabled) 98 return TXG_NONE; 99 100 switch (mode) { 101 case GL_OBJECT_LINEAR: return TXG_OBJ_LINEAR; 102 case GL_EYE_LINEAR: return TXG_EYE_LINEAR; 103 case GL_SPHERE_MAP: return TXG_SPHERE_MAP; 104 case GL_REFLECTION_MAP_NV: return TXG_REFLECTION_MAP; 105 case GL_NORMAL_MAP_NV: return TXG_NORMAL_MAP; 106 default: return TXG_NONE; 107 } 108} 109 110#define FDM_EYE_RADIAL 0 111#define FDM_EYE_PLANE 1 112#define FDM_EYE_PLANE_ABS 2 113#define FDM_FROM_ARRAY 3 114 115static GLuint translate_fog_distance_mode(GLenum source, GLenum mode) 116{ 117 if (source == GL_FRAGMENT_DEPTH_EXT) { 118 switch (mode) { 119 case GL_EYE_RADIAL_NV: 120 return FDM_EYE_RADIAL; 121 case GL_EYE_PLANE: 122 return FDM_EYE_PLANE; 123 default: /* shouldn't happen; fall through to a sensible default */ 124 case GL_EYE_PLANE_ABSOLUTE_NV: 125 return FDM_EYE_PLANE_ABS; 126 } 127 } else { 128 return FDM_FROM_ARRAY; 129 } 130} 131 132static GLboolean check_active_shininess( struct gl_context *ctx, 133 const struct state_key *key, 134 GLuint side ) 135{ 136 GLuint attr = MAT_ATTRIB_FRONT_SHININESS + side; 137 138 if ((key->varying_vp_inputs & VERT_BIT_COLOR0) && 139 (key->light_color_material_mask & (1 << attr))) 140 return GL_TRUE; 141 142 if (key->varying_vp_inputs & VERT_BIT_MAT(attr)) 143 return GL_TRUE; 144 145 if (ctx->Light.Material.Attrib[attr][0] != 0.0F) 146 return GL_TRUE; 147 148 return GL_FALSE; 149} 150 151 152static void make_state_key( struct gl_context *ctx, struct state_key *key ) 153{ 154 const struct gl_program *fp = ctx->FragmentProgram._Current; 155 GLbitfield mask; 156 157 memset(key, 0, sizeof(struct state_key)); 158 159 /* This now relies on texenvprogram.c being active: 160 */ 161 assert(fp); 162 163 key->need_eye_coords = ctx->_NeedEyeCoords; 164 165 key->fragprog_inputs_read = fp->info.inputs_read; 166 key->varying_vp_inputs = ctx->VertexProgram._VaryingInputs; 167 168 if (ctx->RenderMode == GL_FEEDBACK) { 169 /* make sure the vertprog emits color and tex0 */ 170 key->fragprog_inputs_read |= (VARYING_BIT_COL0 | VARYING_BIT_TEX0); 171 } 172 173 if (ctx->Light.Enabled) { 174 key->light_global_enabled = 1; 175 176 if (ctx->Light.Model.LocalViewer) 177 key->light_local_viewer = 1; 178 179 if (ctx->Light.Model.TwoSide) 180 key->light_twoside = 1; 181 182 if (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR) 183 key->separate_specular = 1; 184 185 if (ctx->Light.ColorMaterialEnabled) { 186 key->light_color_material_mask = ctx->Light._ColorMaterialBitmask; 187 } 188 189 mask = ctx->Light._EnabledLights; 190 while (mask) { 191 const int i = u_bit_scan(&mask); 192 struct gl_light_uniforms *lu = &ctx->Light.LightSource[i]; 193 194 key->unit[i].light_enabled = 1; 195 196 if (lu->EyePosition[3] == 0.0F) 197 key->unit[i].light_eyepos3_is_zero = 1; 198 199 if (lu->SpotCutoff == 180.0F) 200 key->unit[i].light_spotcutoff_is_180 = 1; 201 202 if (lu->ConstantAttenuation != 1.0F || 203 lu->LinearAttenuation != 0.0F || 204 lu->QuadraticAttenuation != 0.0F) 205 key->unit[i].light_attenuated = 1; 206 } 207 208 if (check_active_shininess(ctx, key, 0)) { 209 key->material_shininess_is_zero = 0; 210 } 211 else if (key->light_twoside && 212 check_active_shininess(ctx, key, 1)) { 213 key->material_shininess_is_zero = 0; 214 } 215 else { 216 key->material_shininess_is_zero = 1; 217 } 218 } 219 220 if (ctx->Transform.Normalize) 221 key->normalize = 1; 222 223 if (ctx->Transform.RescaleNormals) 224 key->rescale_normals = 1; 225 226 /* Only distinguish fog parameters if we actually need */ 227 if (key->fragprog_inputs_read & VARYING_BIT_FOGC) 228 key->fog_distance_mode = 229 translate_fog_distance_mode(ctx->Fog.FogCoordinateSource, 230 ctx->Fog.FogDistanceMode); 231 232 if (ctx->Point._Attenuated) 233 key->point_attenuated = 1; 234 235 mask = ctx->Texture._EnabledCoordUnits | ctx->Texture._TexGenEnabled 236 | ctx->Texture._TexMatEnabled | ctx->Point.CoordReplace; 237 while (mask) { 238 const int i = u_bit_scan(&mask); 239 struct gl_fixedfunc_texture_unit *texUnit = 240 &ctx->Texture.FixedFuncUnit[i]; 241 242 if (ctx->Point.PointSprite) 243 if (ctx->Point.CoordReplace & (1u << i)) 244 key->unit[i].coord_replace = 1; 245 246 if (ctx->Texture._TexMatEnabled & ENABLE_TEXMAT(i)) 247 key->unit[i].texmat_enabled = 1; 248 249 if (texUnit->TexGenEnabled) { 250 key->unit[i].texgen_enabled = 1; 251 252 key->unit[i].texgen_mode0 = 253 translate_texgen( texUnit->TexGenEnabled & (1<<0), 254 texUnit->GenS.Mode ); 255 key->unit[i].texgen_mode1 = 256 translate_texgen( texUnit->TexGenEnabled & (1<<1), 257 texUnit->GenT.Mode ); 258 key->unit[i].texgen_mode2 = 259 translate_texgen( texUnit->TexGenEnabled & (1<<2), 260 texUnit->GenR.Mode ); 261 key->unit[i].texgen_mode3 = 262 translate_texgen( texUnit->TexGenEnabled & (1<<3), 263 texUnit->GenQ.Mode ); 264 } 265 } 266} 267 268 269 270/* Very useful debugging tool - produces annotated listing of 271 * generated program with line/function references for each 272 * instruction back into this file: 273 */ 274#define DISASSEM 0 275 276 277/* Use uregs to represent registers internally, translate to Mesa's 278 * expected formats on emit. 279 * 280 * NOTE: These are passed by value extensively in this file rather 281 * than as usual by pointer reference. If this disturbs you, try 282 * remembering they are just 32bits in size. 283 * 284 * GCC is smart enough to deal with these dword-sized structures in 285 * much the same way as if I had defined them as dwords and was using 286 * macros to access and set the fields. This is much nicer and easier 287 * to evolve. 288 */ 289struct ureg { 290 GLuint file:4; 291 GLint idx:9; /* relative addressing may be negative */ 292 /* sizeof(idx) should == sizeof(prog_src_reg::Index) */ 293 GLuint negate:1; 294 GLuint swz:12; 295 GLuint pad:6; 296}; 297 298 299struct tnl_program { 300 const struct state_key *state; 301 struct gl_program *program; 302 struct gl_program_parameter_list *state_params; 303 GLuint max_inst; /** number of instructions allocated for program */ 304 GLboolean mvp_with_dp4; 305 306 GLuint temp_in_use; 307 GLuint temp_reserved; 308 309 struct ureg eye_position; 310 struct ureg eye_position_z; 311 struct ureg eye_position_normalized; 312 struct ureg transformed_normal; 313 struct ureg identity; 314 315 GLuint materials; 316 GLuint color_materials; 317}; 318 319 320static const struct ureg undef = { 321 PROGRAM_UNDEFINED, 322 0, 323 0, 324 0, 325 0 326}; 327 328/* Local shorthand: 329 */ 330#define X SWIZZLE_X 331#define Y SWIZZLE_Y 332#define Z SWIZZLE_Z 333#define W SWIZZLE_W 334 335 336/* Construct a ureg: 337 */ 338static struct ureg make_ureg(GLuint file, GLint idx) 339{ 340 struct ureg reg; 341 reg.file = file; 342 reg.idx = idx; 343 reg.negate = 0; 344 reg.swz = SWIZZLE_NOOP; 345 reg.pad = 0; 346 return reg; 347} 348 349 350static struct ureg negate( struct ureg reg ) 351{ 352 reg.negate ^= 1; 353 return reg; 354} 355 356 357static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 358{ 359 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 360 GET_SWZ(reg.swz, y), 361 GET_SWZ(reg.swz, z), 362 GET_SWZ(reg.swz, w)); 363 return reg; 364} 365 366 367static struct ureg swizzle1( struct ureg reg, int x ) 368{ 369 return swizzle(reg, x, x, x, x); 370} 371 372 373static struct ureg get_temp( struct tnl_program *p ) 374{ 375 int bit = ffs( ~p->temp_in_use ); 376 if (!bit) { 377 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 378 exit(1); 379 } 380 381 if ((GLuint) bit > p->program->arb.NumTemporaries) 382 p->program->arb.NumTemporaries = bit; 383 384 p->temp_in_use |= 1<<(bit-1); 385 return make_ureg(PROGRAM_TEMPORARY, bit-1); 386} 387 388 389static struct ureg reserve_temp( struct tnl_program *p ) 390{ 391 struct ureg temp = get_temp( p ); 392 p->temp_reserved |= 1<<temp.idx; 393 return temp; 394} 395 396 397static void release_temp( struct tnl_program *p, struct ureg reg ) 398{ 399 if (reg.file == PROGRAM_TEMPORARY) { 400 p->temp_in_use &= ~(1<<reg.idx); 401 p->temp_in_use |= p->temp_reserved; /* can't release reserved temps */ 402 } 403} 404 405static void release_temps( struct tnl_program *p ) 406{ 407 p->temp_in_use = p->temp_reserved; 408} 409 410 411static struct ureg register_param4(struct tnl_program *p, 412 GLint s0, 413 GLint s1, 414 GLint s2, 415 GLint s3) 416{ 417 gl_state_index16 tokens[STATE_LENGTH]; 418 GLint idx; 419 tokens[0] = s0; 420 tokens[1] = s1; 421 tokens[2] = s2; 422 tokens[3] = s3; 423 idx = _mesa_add_state_reference(p->state_params, tokens); 424 return make_ureg(PROGRAM_STATE_VAR, idx); 425} 426 427 428#define register_param1(p,s0) register_param4(p,s0,0,0,0) 429#define register_param2(p,s0,s1) register_param4(p,s0,s1,0,0) 430#define register_param3(p,s0,s1,s2) register_param4(p,s0,s1,s2,0) 431 432 433 434/** 435 * \param input one of VERT_ATTRIB_x tokens. 436 */ 437static struct ureg register_input( struct tnl_program *p, GLuint input ) 438{ 439 assert(input < VERT_ATTRIB_MAX); 440 441 if (p->state->varying_vp_inputs & VERT_BIT(input)) { 442 p->program->info.inputs_read |= (uint64_t)VERT_BIT(input); 443 return make_ureg(PROGRAM_INPUT, input); 444 } 445 else { 446 return register_param2(p, STATE_CURRENT_ATTRIB, input); 447 } 448} 449 450 451/** 452 * \param input one of VARYING_SLOT_x tokens. 453 */ 454static struct ureg register_output( struct tnl_program *p, GLuint output ) 455{ 456 p->program->info.outputs_written |= BITFIELD64_BIT(output); 457 return make_ureg(PROGRAM_OUTPUT, output); 458} 459 460 461static struct ureg register_const4f( struct tnl_program *p, 462 GLfloat s0, 463 GLfloat s1, 464 GLfloat s2, 465 GLfloat s3) 466{ 467 gl_constant_value values[4]; 468 GLint idx; 469 GLuint swizzle; 470 values[0].f = s0; 471 values[1].f = s1; 472 values[2].f = s2; 473 values[3].f = s3; 474 idx = _mesa_add_unnamed_constant(p->program->Parameters, values, 4, 475 &swizzle ); 476 assert(swizzle == SWIZZLE_NOOP); 477 return make_ureg(PROGRAM_CONSTANT, idx); 478} 479 480#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 481#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) 482#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 483#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 484 485static GLboolean is_undef( struct ureg reg ) 486{ 487 return reg.file == PROGRAM_UNDEFINED; 488} 489 490 491static struct ureg get_identity_param( struct tnl_program *p ) 492{ 493 if (is_undef(p->identity)) 494 p->identity = register_const4f(p, 0,0,0,1); 495 496 return p->identity; 497} 498 499static void register_matrix_param5( struct tnl_program *p, 500 GLint s0, /* modelview, projection, etc */ 501 GLint s1, /* texture matrix number */ 502 GLint s2, /* first row */ 503 GLint s3, /* last row */ 504 struct ureg *matrix ) 505{ 506 GLint i; 507 508 /* This is a bit sad as the support is there to pull the whole 509 * matrix out in one go: 510 */ 511 for (i = 0; i <= s3 - s2; i++) 512 matrix[i] = register_param4(p, s0, s1, i, i); 513} 514 515 516static void emit_arg( struct prog_src_register *src, 517 struct ureg reg ) 518{ 519 src->File = reg.file; 520 src->Index = reg.idx; 521 src->Swizzle = reg.swz; 522 src->Negate = reg.negate ? NEGATE_XYZW : NEGATE_NONE; 523 src->RelAddr = 0; 524 /* Check that bitfield sizes aren't exceeded */ 525 assert(src->Index == reg.idx); 526} 527 528 529static void emit_dst( struct prog_dst_register *dst, 530 struct ureg reg, GLuint mask ) 531{ 532 dst->File = reg.file; 533 dst->Index = reg.idx; 534 /* allow zero as a shorthand for xyzw */ 535 dst->WriteMask = mask ? mask : WRITEMASK_XYZW; 536 /* Check that bitfield sizes aren't exceeded */ 537 assert(dst->Index == reg.idx); 538} 539 540 541static void debug_insn( struct prog_instruction *inst, const char *fn, 542 GLuint line ) 543{ 544 if (DISASSEM) { 545 static const char *last_fn; 546 547 if (fn != last_fn) { 548 last_fn = fn; 549 printf("%s:\n", fn); 550 } 551 552 printf("%d:\t", line); 553 _mesa_print_instruction(inst); 554 } 555} 556 557 558static void emit_op3fn(struct tnl_program *p, 559 enum prog_opcode op, 560 struct ureg dest, 561 GLuint mask, 562 struct ureg src0, 563 struct ureg src1, 564 struct ureg src2, 565 const char *fn, 566 GLuint line) 567{ 568 GLuint nr; 569 struct prog_instruction *inst; 570 571 assert(p->program->arb.NumInstructions <= p->max_inst); 572 573 if (p->program->arb.NumInstructions == p->max_inst) { 574 /* need to extend the program's instruction array */ 575 struct prog_instruction *newInst; 576 577 /* double the size */ 578 p->max_inst *= 2; 579 580 newInst = 581 rzalloc_array(p->program, struct prog_instruction, p->max_inst); 582 if (!newInst) { 583 _mesa_error(NULL, GL_OUT_OF_MEMORY, "vertex program build"); 584 return; 585 } 586 587 _mesa_copy_instructions(newInst, p->program->arb.Instructions, 588 p->program->arb.NumInstructions); 589 590 ralloc_free(p->program->arb.Instructions); 591 592 p->program->arb.Instructions = newInst; 593 } 594 595 nr = p->program->arb.NumInstructions++; 596 597 inst = &p->program->arb.Instructions[nr]; 598 inst->Opcode = (enum prog_opcode) op; 599 600 emit_arg( &inst->SrcReg[0], src0 ); 601 emit_arg( &inst->SrcReg[1], src1 ); 602 emit_arg( &inst->SrcReg[2], src2 ); 603 604 emit_dst( &inst->DstReg, dest, mask ); 605 606 debug_insn(inst, fn, line); 607} 608 609 610#define emit_op3(p, op, dst, mask, src0, src1, src2) \ 611 emit_op3fn(p, op, dst, mask, src0, src1, src2, __func__, __LINE__) 612 613#define emit_op2(p, op, dst, mask, src0, src1) \ 614 emit_op3fn(p, op, dst, mask, src0, src1, undef, __func__, __LINE__) 615 616#define emit_op1(p, op, dst, mask, src0) \ 617 emit_op3fn(p, op, dst, mask, src0, undef, undef, __func__, __LINE__) 618 619 620static struct ureg make_temp( struct tnl_program *p, struct ureg reg ) 621{ 622 if (reg.file == PROGRAM_TEMPORARY && 623 !(p->temp_reserved & (1<<reg.idx))) 624 return reg; 625 else { 626 struct ureg temp = get_temp(p); 627 emit_op1(p, OPCODE_MOV, temp, 0, reg); 628 return temp; 629 } 630} 631 632 633/* Currently no tracking performed of input/output/register size or 634 * active elements. Could be used to reduce these operations, as 635 * could the matrix type. 636 */ 637static void emit_matrix_transform_vec4( struct tnl_program *p, 638 struct ureg dest, 639 const struct ureg *mat, 640 struct ureg src) 641{ 642 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_X, src, mat[0]); 643 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Y, src, mat[1]); 644 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_Z, src, mat[2]); 645 emit_op2(p, OPCODE_DP4, dest, WRITEMASK_W, src, mat[3]); 646} 647 648 649/* This version is much easier to implement if writemasks are not 650 * supported natively on the target or (like SSE), the target doesn't 651 * have a clean/obvious dotproduct implementation. 652 */ 653static void emit_transpose_matrix_transform_vec4( struct tnl_program *p, 654 struct ureg dest, 655 const struct ureg *mat, 656 struct ureg src) 657{ 658 struct ureg tmp; 659 660 if (dest.file != PROGRAM_TEMPORARY) 661 tmp = get_temp(p); 662 else 663 tmp = dest; 664 665 emit_op2(p, OPCODE_MUL, tmp, 0, swizzle1(src,X), mat[0]); 666 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Y), mat[1], tmp); 667 emit_op3(p, OPCODE_MAD, tmp, 0, swizzle1(src,Z), mat[2], tmp); 668 emit_op3(p, OPCODE_MAD, dest, 0, swizzle1(src,W), mat[3], tmp); 669 670 if (dest.file != PROGRAM_TEMPORARY) 671 release_temp(p, tmp); 672} 673 674 675static void emit_matrix_transform_vec3( struct tnl_program *p, 676 struct ureg dest, 677 const struct ureg *mat, 678 struct ureg src) 679{ 680 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_X, src, mat[0]); 681 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Y, src, mat[1]); 682 emit_op2(p, OPCODE_DP3, dest, WRITEMASK_Z, src, mat[2]); 683} 684 685 686static void emit_normalize_vec3( struct tnl_program *p, 687 struct ureg dest, 688 struct ureg src ) 689{ 690 struct ureg tmp = get_temp(p); 691 emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, src, src); 692 emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); 693 emit_op2(p, OPCODE_MUL, dest, 0, src, swizzle1(tmp, X)); 694 release_temp(p, tmp); 695} 696 697 698static void emit_passthrough( struct tnl_program *p, 699 GLuint input, 700 GLuint output ) 701{ 702 struct ureg out = register_output(p, output); 703 emit_op1(p, OPCODE_MOV, out, 0, register_input(p, input)); 704} 705 706 707static struct ureg get_eye_position( struct tnl_program *p ) 708{ 709 if (is_undef(p->eye_position)) { 710 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 711 struct ureg modelview[4]; 712 713 p->eye_position = reserve_temp(p); 714 715 if (p->mvp_with_dp4) { 716 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 717 modelview ); 718 719 emit_matrix_transform_vec4(p, p->eye_position, modelview, pos); 720 } 721 else { 722 register_matrix_param5( p, STATE_MODELVIEW_MATRIX_TRANSPOSE, 0, 0, 3, 723 modelview ); 724 725 emit_transpose_matrix_transform_vec4(p, p->eye_position, modelview, pos); 726 } 727 } 728 729 return p->eye_position; 730} 731 732 733static struct ureg get_eye_position_z( struct tnl_program *p ) 734{ 735 if (!is_undef(p->eye_position)) 736 return swizzle1(p->eye_position, Z); 737 738 if (is_undef(p->eye_position_z)) { 739 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 740 struct ureg modelview[4]; 741 742 p->eye_position_z = reserve_temp(p); 743 744 register_matrix_param5( p, STATE_MODELVIEW_MATRIX, 0, 0, 3, 745 modelview ); 746 747 emit_op2(p, OPCODE_DP4, p->eye_position_z, 0, pos, modelview[2]); 748 } 749 750 return p->eye_position_z; 751} 752 753 754static struct ureg get_eye_position_normalized( struct tnl_program *p ) 755{ 756 if (is_undef(p->eye_position_normalized)) { 757 struct ureg eye = get_eye_position(p); 758 p->eye_position_normalized = reserve_temp(p); 759 emit_normalize_vec3(p, p->eye_position_normalized, eye); 760 } 761 762 return p->eye_position_normalized; 763} 764 765 766static struct ureg get_transformed_normal( struct tnl_program *p ) 767{ 768 if (is_undef(p->transformed_normal) && 769 !p->state->need_eye_coords && 770 !p->state->normalize && 771 !(p->state->need_eye_coords == p->state->rescale_normals)) 772 { 773 p->transformed_normal = register_input(p, VERT_ATTRIB_NORMAL ); 774 } 775 else if (is_undef(p->transformed_normal)) 776 { 777 struct ureg normal = register_input(p, VERT_ATTRIB_NORMAL ); 778 struct ureg mvinv[3]; 779 struct ureg transformed_normal = reserve_temp(p); 780 781 if (p->state->need_eye_coords) { 782 register_matrix_param5( p, STATE_MODELVIEW_MATRIX_INVTRANS, 0, 0, 2, 783 mvinv ); 784 785 /* Transform to eye space: 786 */ 787 emit_matrix_transform_vec3( p, transformed_normal, mvinv, normal ); 788 normal = transformed_normal; 789 } 790 791 /* Normalize/Rescale: 792 */ 793 if (p->state->normalize) { 794 emit_normalize_vec3( p, transformed_normal, normal ); 795 normal = transformed_normal; 796 } 797 else if (p->state->need_eye_coords == p->state->rescale_normals) { 798 /* This is already adjusted for eye/non-eye rendering: 799 */ 800 struct ureg rescale = register_param1(p, STATE_NORMAL_SCALE); 801 802 emit_op2( p, OPCODE_MUL, transformed_normal, 0, normal, rescale ); 803 normal = transformed_normal; 804 } 805 806 assert(normal.file == PROGRAM_TEMPORARY); 807 p->transformed_normal = normal; 808 } 809 810 return p->transformed_normal; 811} 812 813 814static void build_hpos( struct tnl_program *p ) 815{ 816 struct ureg pos = register_input( p, VERT_ATTRIB_POS ); 817 struct ureg hpos = register_output( p, VARYING_SLOT_POS ); 818 struct ureg mvp[4]; 819 820 if (p->mvp_with_dp4) { 821 register_matrix_param5( p, STATE_MVP_MATRIX, 0, 0, 3, 822 mvp ); 823 emit_matrix_transform_vec4( p, hpos, mvp, pos ); 824 } 825 else { 826 register_matrix_param5( p, STATE_MVP_MATRIX_TRANSPOSE, 0, 0, 3, 827 mvp ); 828 emit_transpose_matrix_transform_vec4( p, hpos, mvp, pos ); 829 } 830} 831 832 833static GLuint material_attrib( GLuint side, GLuint property ) 834{ 835 switch (property) { 836 case STATE_AMBIENT: 837 return MAT_ATTRIB_FRONT_AMBIENT + side; 838 case STATE_DIFFUSE: 839 return MAT_ATTRIB_FRONT_DIFFUSE + side; 840 case STATE_SPECULAR: 841 return MAT_ATTRIB_FRONT_SPECULAR + side; 842 case STATE_EMISSION: 843 return MAT_ATTRIB_FRONT_EMISSION + side; 844 case STATE_SHININESS: 845 return MAT_ATTRIB_FRONT_SHININESS + side; 846 default: 847 unreachable("invalid value"); 848 } 849} 850 851 852/** 853 * Get a bitmask of which material values vary on a per-vertex basis. 854 */ 855static void set_material_flags( struct tnl_program *p ) 856{ 857 p->color_materials = 0; 858 p->materials = 0; 859 860 if (p->state->varying_vp_inputs & VERT_BIT_COLOR0) { 861 p->materials = 862 p->color_materials = p->state->light_color_material_mask; 863 } 864 865 p->materials |= ((p->state->varying_vp_inputs & VERT_BIT_MAT_ALL) 866 >> VERT_ATTRIB_MAT(0)); 867} 868 869 870static struct ureg get_material( struct tnl_program *p, GLuint side, 871 GLuint property ) 872{ 873 GLuint attrib = material_attrib(side, property); 874 875 if (p->color_materials & (1<<attrib)) 876 return register_input(p, VERT_ATTRIB_COLOR0); 877 else if (p->materials & (1<<attrib)) { 878 /* Put material values in the GENERIC slots -- they are not used 879 * for anything in fixed function mode. 880 */ 881 return register_input( p, VERT_ATTRIB_MAT(attrib) ); 882 } 883 else 884 return register_param2(p, STATE_MATERIAL, attrib); 885} 886 887#define SCENE_COLOR_BITS(side) (( MAT_BIT_FRONT_EMISSION | \ 888 MAT_BIT_FRONT_AMBIENT | \ 889 MAT_BIT_FRONT_DIFFUSE) << (side)) 890 891 892/** 893 * Either return a precalculated constant value or emit code to 894 * calculate these values dynamically in the case where material calls 895 * are present between begin/end pairs. 896 * 897 * Probably want to shift this to the program compilation phase - if 898 * we always emitted the calculation here, a smart compiler could 899 * detect that it was constant (given a certain set of inputs), and 900 * lift it out of the main loop. That way the programs created here 901 * would be independent of the vertex_buffer details. 902 */ 903static struct ureg get_scenecolor( struct tnl_program *p, GLuint side ) 904{ 905 if (p->materials & SCENE_COLOR_BITS(side)) { 906 struct ureg lm_ambient = register_param1(p, STATE_LIGHTMODEL_AMBIENT); 907 struct ureg material_emission = get_material(p, side, STATE_EMISSION); 908 struct ureg material_ambient = get_material(p, side, STATE_AMBIENT); 909 struct ureg material_diffuse = get_material(p, side, STATE_DIFFUSE); 910 struct ureg tmp = make_temp(p, material_diffuse); 911 emit_op3(p, OPCODE_MAD, tmp, WRITEMASK_XYZ, lm_ambient, 912 material_ambient, material_emission); 913 return tmp; 914 } 915 else 916 return register_param2( p, STATE_LIGHTMODEL_SCENECOLOR, side ); 917} 918 919 920static struct ureg get_lightprod( struct tnl_program *p, GLuint light, 921 GLuint side, GLuint property, bool *is_state_light ) 922{ 923 GLuint attrib = material_attrib(side, property); 924 if (p->materials & (1<<attrib)) { 925 struct ureg light_value = 926 register_param3(p, STATE_LIGHT, light, property); 927 *is_state_light = true; 928 return light_value; 929 } 930 else { 931 *is_state_light = false; 932 return register_param3(p, STATE_LIGHTPROD, light, attrib); 933 } 934} 935 936 937static struct ureg calculate_light_attenuation( struct tnl_program *p, 938 GLuint i, 939 struct ureg VPpli, 940 struct ureg dist ) 941{ 942 struct ureg attenuation = register_param3(p, STATE_LIGHT, i, 943 STATE_ATTENUATION); 944 struct ureg att = undef; 945 946 /* Calculate spot attenuation: 947 */ 948 if (!p->state->unit[i].light_spotcutoff_is_180) { 949 struct ureg spot_dir_norm = register_param2(p, STATE_LIGHT_SPOT_DIR_NORMALIZED, i); 950 struct ureg spot = get_temp(p); 951 struct ureg slt = get_temp(p); 952 953 att = get_temp(p); 954 955 emit_op2(p, OPCODE_DP3, spot, 0, negate(VPpli), spot_dir_norm); 956 emit_op2(p, OPCODE_SLT, slt, 0, swizzle1(spot_dir_norm,W), spot); 957 emit_op1(p, OPCODE_ABS, spot, 0, spot); 958 emit_op2(p, OPCODE_POW, spot, 0, spot, swizzle1(attenuation, W)); 959 emit_op2(p, OPCODE_MUL, att, 0, slt, spot); 960 961 release_temp(p, spot); 962 release_temp(p, slt); 963 } 964 965 /* Calculate distance attenuation(See formula (2.4) at glspec 2.1 page 62): 966 * 967 * Skip the calucation when _dist_ is undefined(light_eyepos3_is_zero) 968 */ 969 if (p->state->unit[i].light_attenuated && !is_undef(dist)) { 970 if (is_undef(att)) 971 att = get_temp(p); 972 /* 1/d,d,d,1/d */ 973 emit_op1(p, OPCODE_RCP, dist, WRITEMASK_YZ, dist); 974 /* 1,d,d*d,1/d */ 975 emit_op2(p, OPCODE_MUL, dist, WRITEMASK_XZ, dist, swizzle1(dist,Y)); 976 /* 1/dist-atten */ 977 emit_op2(p, OPCODE_DP3, dist, 0, attenuation, dist); 978 979 if (!p->state->unit[i].light_spotcutoff_is_180) { 980 /* dist-atten */ 981 emit_op1(p, OPCODE_RCP, dist, 0, dist); 982 /* spot-atten * dist-atten */ 983 emit_op2(p, OPCODE_MUL, att, 0, dist, att); 984 } 985 else { 986 /* dist-atten */ 987 emit_op1(p, OPCODE_RCP, att, 0, dist); 988 } 989 } 990 991 return att; 992} 993 994 995/** 996 * Compute: 997 * lit.y = MAX(0, dots.x) 998 * lit.z = SLT(0, dots.x) 999 */ 1000static void emit_degenerate_lit( struct tnl_program *p, 1001 struct ureg lit, 1002 struct ureg dots ) 1003{ 1004 struct ureg id = get_identity_param(p); /* id = {0,0,0,1} */ 1005 1006 /* Note that lit.x & lit.w will not be examined. Note also that 1007 * dots.xyzw == dots.xxxx. 1008 */ 1009 1010 /* MAX lit, id, dots; 1011 */ 1012 emit_op2(p, OPCODE_MAX, lit, WRITEMASK_XYZW, id, dots); 1013 1014 /* result[2] = (in > 0 ? 1 : 0) 1015 * SLT lit.z, id.z, dots; # lit.z = (0 < dots.z) ? 1 : 0 1016 */ 1017 emit_op2(p, OPCODE_SLT, lit, WRITEMASK_Z, swizzle1(id,Z), dots); 1018} 1019 1020 1021/* Need to add some addtional parameters to allow lighting in object 1022 * space - STATE_SPOT_DIRECTION and STATE_HALF_VECTOR implicitly assume eye 1023 * space lighting. 1024 */ 1025static void build_lighting( struct tnl_program *p ) 1026{ 1027 const GLboolean twoside = p->state->light_twoside; 1028 const GLboolean separate = p->state->separate_specular; 1029 GLuint nr_lights = 0, count = 0; 1030 struct ureg normal = get_transformed_normal(p); 1031 struct ureg lit = get_temp(p); 1032 struct ureg dots = get_temp(p); 1033 struct ureg _col0 = undef, _col1 = undef; 1034 struct ureg _bfc0 = undef, _bfc1 = undef; 1035 GLuint i; 1036 1037 /* 1038 * NOTE: 1039 * dots.x = dot(normal, VPpli) 1040 * dots.y = dot(normal, halfAngle) 1041 * dots.z = back.shininess 1042 * dots.w = front.shininess 1043 */ 1044 1045 for (i = 0; i < MAX_LIGHTS; i++) 1046 if (p->state->unit[i].light_enabled) 1047 nr_lights++; 1048 1049 set_material_flags(p); 1050 1051 { 1052 if (!p->state->material_shininess_is_zero) { 1053 struct ureg shininess = get_material(p, 0, STATE_SHININESS); 1054 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_W, swizzle1(shininess,X)); 1055 release_temp(p, shininess); 1056 } 1057 1058 _col0 = make_temp(p, get_scenecolor(p, 0)); 1059 if (separate) 1060 _col1 = make_temp(p, get_identity_param(p)); 1061 else 1062 _col1 = _col0; 1063 } 1064 1065 if (twoside) { 1066 if (!p->state->material_shininess_is_zero) { 1067 /* Note that we negate the back-face specular exponent here. 1068 * The negation will be un-done later in the back-face code below. 1069 */ 1070 struct ureg shininess = get_material(p, 1, STATE_SHININESS); 1071 emit_op1(p, OPCODE_MOV, dots, WRITEMASK_Z, 1072 negate(swizzle1(shininess,X))); 1073 release_temp(p, shininess); 1074 } 1075 1076 _bfc0 = make_temp(p, get_scenecolor(p, 1)); 1077 if (separate) 1078 _bfc1 = make_temp(p, get_identity_param(p)); 1079 else 1080 _bfc1 = _bfc0; 1081 } 1082 1083 /* If no lights, still need to emit the scenecolor. 1084 */ 1085 { 1086 struct ureg res0 = register_output( p, VARYING_SLOT_COL0 ); 1087 emit_op1(p, OPCODE_MOV, res0, 0, _col0); 1088 } 1089 1090 if (separate) { 1091 struct ureg res1 = register_output( p, VARYING_SLOT_COL1 ); 1092 emit_op1(p, OPCODE_MOV, res1, 0, _col1); 1093 } 1094 1095 if (twoside) { 1096 struct ureg res0 = register_output( p, VARYING_SLOT_BFC0 ); 1097 emit_op1(p, OPCODE_MOV, res0, 0, _bfc0); 1098 } 1099 1100 if (twoside && separate) { 1101 struct ureg res1 = register_output( p, VARYING_SLOT_BFC1 ); 1102 emit_op1(p, OPCODE_MOV, res1, 0, _bfc1); 1103 } 1104 1105 if (nr_lights == 0) { 1106 release_temps(p); 1107 return; 1108 } 1109 1110 /* Declare light products first to place them sequentially next to each 1111 * other for optimal constant uploads. 1112 */ 1113 struct ureg lightprod_front[MAX_LIGHTS][3]; 1114 struct ureg lightprod_back[MAX_LIGHTS][3]; 1115 bool lightprod_front_is_state_light[MAX_LIGHTS][3]; 1116 bool lightprod_back_is_state_light[MAX_LIGHTS][3]; 1117 1118 for (i = 0; i < MAX_LIGHTS; i++) { 1119 if (p->state->unit[i].light_enabled) { 1120 lightprod_front[i][0] = get_lightprod(p, i, 0, STATE_AMBIENT, 1121 &lightprod_front_is_state_light[i][0]); 1122 if (twoside) 1123 lightprod_back[i][0] = get_lightprod(p, i, 1, STATE_AMBIENT, 1124 &lightprod_back_is_state_light[i][0]); 1125 1126 lightprod_front[i][1] = get_lightprod(p, i, 0, STATE_DIFFUSE, 1127 &lightprod_front_is_state_light[i][1]); 1128 if (twoside) 1129 lightprod_back[i][1] = get_lightprod(p, i, 1, STATE_DIFFUSE, 1130 &lightprod_back_is_state_light[i][1]); 1131 1132 lightprod_front[i][2] = get_lightprod(p, i, 0, STATE_SPECULAR, 1133 &lightprod_front_is_state_light[i][2]); 1134 if (twoside) 1135 lightprod_back[i][2] = get_lightprod(p, i, 1, STATE_SPECULAR, 1136 &lightprod_back_is_state_light[i][2]); 1137 } 1138 } 1139 1140 /* Add more variables now that we'll use later, so that they are nicely 1141 * sorted in the parameter list. 1142 */ 1143 for (i = 0; i < MAX_LIGHTS; i++) { 1144 if (p->state->unit[i].light_enabled) { 1145 if (p->state->unit[i].light_eyepos3_is_zero) 1146 register_param2(p, STATE_LIGHT_POSITION_NORMALIZED, i); 1147 else 1148 register_param2(p, STATE_LIGHT_POSITION, i); 1149 } 1150 } 1151 for (i = 0; i < MAX_LIGHTS; i++) { 1152 if (p->state->unit[i].light_enabled) 1153 register_param3(p, STATE_LIGHT, i, STATE_ATTENUATION); 1154 } 1155 1156 for (i = 0; i < MAX_LIGHTS; i++) { 1157 if (p->state->unit[i].light_enabled) { 1158 struct ureg half = undef; 1159 struct ureg att = undef, VPpli = undef; 1160 struct ureg dist = undef; 1161 1162 count++; 1163 if (p->state->unit[i].light_eyepos3_is_zero) { 1164 VPpli = register_param2(p, STATE_LIGHT_POSITION_NORMALIZED, i); 1165 } else { 1166 struct ureg Ppli = register_param2(p, STATE_LIGHT_POSITION, i); 1167 struct ureg V = get_eye_position(p); 1168 1169 VPpli = get_temp(p); 1170 dist = get_temp(p); 1171 1172 /* Calculate VPpli vector 1173 */ 1174 emit_op2(p, OPCODE_SUB, VPpli, 0, Ppli, V); 1175 1176 /* Normalize VPpli. The dist value also used in 1177 * attenuation below. 1178 */ 1179 emit_op2(p, OPCODE_DP3, dist, 0, VPpli, VPpli); 1180 emit_op1(p, OPCODE_RSQ, dist, 0, dist); 1181 emit_op2(p, OPCODE_MUL, VPpli, 0, VPpli, dist); 1182 } 1183 1184 /* Calculate attenuation: 1185 */ 1186 att = calculate_light_attenuation(p, i, VPpli, dist); 1187 release_temp(p, dist); 1188 1189 /* Calculate viewer direction, or use infinite viewer: 1190 */ 1191 if (!p->state->material_shininess_is_zero) { 1192 if (p->state->light_local_viewer) { 1193 struct ureg eye_hat = get_eye_position_normalized(p); 1194 half = get_temp(p); 1195 emit_op2(p, OPCODE_SUB, half, 0, VPpli, eye_hat); 1196 emit_normalize_vec3(p, half, half); 1197 } else if (p->state->unit[i].light_eyepos3_is_zero) { 1198 half = register_param2(p, STATE_LIGHT_HALF_VECTOR, i); 1199 } else { 1200 struct ureg z_dir = swizzle(get_identity_param(p),X,Y,W,Z); 1201 half = get_temp(p); 1202 emit_op2(p, OPCODE_ADD, half, 0, VPpli, z_dir); 1203 emit_normalize_vec3(p, half, half); 1204 } 1205 } 1206 1207 /* Calculate dot products: 1208 */ 1209 if (p->state->material_shininess_is_zero) { 1210 emit_op2(p, OPCODE_DP3, dots, 0, normal, VPpli); 1211 } 1212 else { 1213 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_X, normal, VPpli); 1214 emit_op2(p, OPCODE_DP3, dots, WRITEMASK_Y, normal, half); 1215 } 1216 1217 /* Front face lighting: 1218 */ 1219 { 1220 /* Transform STATE_LIGHT into STATE_LIGHTPROD if needed. This isn't done in 1221 * get_lightprod to avoid using too many temps. 1222 */ 1223 for (int j = 0; j < 3; j++) { 1224 if (lightprod_front_is_state_light[i][j]) { 1225 struct ureg material_value = get_material(p, 0, STATE_AMBIENT + j); 1226 struct ureg tmp = get_temp(p); 1227 emit_op2(p, OPCODE_MUL, tmp, 0, lightprod_front[i][j], material_value); 1228 lightprod_front[i][j] = tmp; 1229 } 1230 } 1231 1232 struct ureg ambient = lightprod_front[i][0]; 1233 struct ureg diffuse = lightprod_front[i][1]; 1234 struct ureg specular = lightprod_front[i][2]; 1235 struct ureg res0, res1; 1236 GLuint mask0, mask1; 1237 1238 if (count == nr_lights) { 1239 if (separate) { 1240 mask0 = WRITEMASK_XYZ; 1241 mask1 = WRITEMASK_XYZ; 1242 res0 = register_output( p, VARYING_SLOT_COL0 ); 1243 res1 = register_output( p, VARYING_SLOT_COL1 ); 1244 } 1245 else { 1246 mask0 = 0; 1247 mask1 = WRITEMASK_XYZ; 1248 res0 = _col0; 1249 res1 = register_output( p, VARYING_SLOT_COL0 ); 1250 } 1251 } 1252 else { 1253 mask0 = 0; 1254 mask1 = 0; 1255 res0 = _col0; 1256 res1 = _col1; 1257 } 1258 1259 if (!is_undef(att)) { 1260 /* light is attenuated by distance */ 1261 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1262 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1263 emit_op3(p, OPCODE_MAD, _col0, 0, swizzle1(lit,X), ambient, _col0); 1264 } 1265 else if (!p->state->material_shininess_is_zero) { 1266 /* there's a non-zero specular term */ 1267 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1268 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1269 } 1270 else { 1271 /* no attenutation, no specular */ 1272 emit_degenerate_lit(p, lit, dots); 1273 emit_op2(p, OPCODE_ADD, _col0, 0, ambient, _col0); 1274 } 1275 1276 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _col0); 1277 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _col1); 1278 1279 release_temp(p, ambient); 1280 release_temp(p, diffuse); 1281 release_temp(p, specular); 1282 } 1283 1284 /* Back face lighting: 1285 */ 1286 if (twoside) { 1287 /* Transform STATE_LIGHT into STATE_LIGHTPROD if needed. This isn't done in 1288 * get_lightprod to avoid using too many temps. 1289 */ 1290 for (int j = 0; j < 3; j++) { 1291 if (lightprod_back_is_state_light[i][j]) { 1292 struct ureg material_value = get_material(p, 1, STATE_AMBIENT + j); 1293 struct ureg tmp = get_temp(p); 1294 emit_op2(p, OPCODE_MUL, tmp, 1, lightprod_back[i][j], material_value); 1295 lightprod_back[i][j] = tmp; 1296 } 1297 } 1298 1299 struct ureg ambient = lightprod_back[i][0]; 1300 struct ureg diffuse = lightprod_back[i][1]; 1301 struct ureg specular = lightprod_back[i][2]; 1302 struct ureg res0, res1; 1303 GLuint mask0, mask1; 1304 1305 if (count == nr_lights) { 1306 if (separate) { 1307 mask0 = WRITEMASK_XYZ; 1308 mask1 = WRITEMASK_XYZ; 1309 res0 = register_output( p, VARYING_SLOT_BFC0 ); 1310 res1 = register_output( p, VARYING_SLOT_BFC1 ); 1311 } 1312 else { 1313 mask0 = 0; 1314 mask1 = WRITEMASK_XYZ; 1315 res0 = _bfc0; 1316 res1 = register_output( p, VARYING_SLOT_BFC0 ); 1317 } 1318 } 1319 else { 1320 res0 = _bfc0; 1321 res1 = _bfc1; 1322 mask0 = 0; 1323 mask1 = 0; 1324 } 1325 1326 /* For the back face we need to negate the X and Y component 1327 * dot products. dots.Z has the negated back-face specular 1328 * exponent. We swizzle that into the W position. This 1329 * negation makes the back-face specular term positive again. 1330 */ 1331 dots = negate(swizzle(dots,X,Y,W,Z)); 1332 1333 if (!is_undef(att)) { 1334 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1335 emit_op2(p, OPCODE_MUL, lit, 0, lit, att); 1336 emit_op3(p, OPCODE_MAD, _bfc0, 0, swizzle1(lit,X), ambient, _bfc0); 1337 } 1338 else if (!p->state->material_shininess_is_zero) { 1339 emit_op1(p, OPCODE_LIT, lit, 0, dots); 1340 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); /**/ 1341 } 1342 else { 1343 emit_degenerate_lit(p, lit, dots); 1344 emit_op2(p, OPCODE_ADD, _bfc0, 0, ambient, _bfc0); 1345 } 1346 1347 emit_op3(p, OPCODE_MAD, res0, mask0, swizzle1(lit,Y), diffuse, _bfc0); 1348 emit_op3(p, OPCODE_MAD, res1, mask1, swizzle1(lit,Z), specular, _bfc1); 1349 /* restore dots to its original state for subsequent lights 1350 * by negating and swizzling again. 1351 */ 1352 dots = negate(swizzle(dots,X,Y,W,Z)); 1353 1354 release_temp(p, ambient); 1355 release_temp(p, diffuse); 1356 release_temp(p, specular); 1357 } 1358 1359 release_temp(p, half); 1360 release_temp(p, VPpli); 1361 release_temp(p, att); 1362 } 1363 } 1364 1365 release_temps( p ); 1366} 1367 1368 1369static void build_fog( struct tnl_program *p ) 1370{ 1371 struct ureg fog = register_output(p, VARYING_SLOT_FOGC); 1372 struct ureg input; 1373 1374 switch (p->state->fog_distance_mode) { 1375 case FDM_EYE_RADIAL: { /* Z = sqrt(Xe*Xe + Ye*Ye + Ze*Ze) */ 1376 struct ureg tmp = get_temp(p); 1377 input = get_eye_position(p); 1378 emit_op2(p, OPCODE_DP3, tmp, WRITEMASK_X, input, input); 1379 emit_op1(p, OPCODE_RSQ, tmp, WRITEMASK_X, tmp); 1380 emit_op1(p, OPCODE_RCP, fog, WRITEMASK_X, tmp); 1381 break; 1382 } 1383 case FDM_EYE_PLANE: /* Z = Ze */ 1384 input = get_eye_position_z(p); 1385 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_X, input); 1386 break; 1387 case FDM_EYE_PLANE_ABS: /* Z = abs(Ze) */ 1388 input = get_eye_position_z(p); 1389 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1390 break; 1391 case FDM_FROM_ARRAY: 1392 input = swizzle1(register_input(p, VERT_ATTRIB_FOG), X); 1393 emit_op1(p, OPCODE_ABS, fog, WRITEMASK_X, input); 1394 break; 1395 default: 1396 assert(!"Bad fog mode in build_fog()"); 1397 break; 1398 } 1399 1400 emit_op1(p, OPCODE_MOV, fog, WRITEMASK_YZW, get_identity_param(p)); 1401} 1402 1403 1404static void build_reflect_texgen( struct tnl_program *p, 1405 struct ureg dest, 1406 GLuint writemask ) 1407{ 1408 struct ureg normal = get_transformed_normal(p); 1409 struct ureg eye_hat = get_eye_position_normalized(p); 1410 struct ureg tmp = get_temp(p); 1411 1412 /* n.u */ 1413 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1414 /* 2n.u */ 1415 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1416 /* (-2n.u)n + u */ 1417 emit_op3(p, OPCODE_MAD, dest, writemask, negate(tmp), normal, eye_hat); 1418 1419 release_temp(p, tmp); 1420} 1421 1422 1423static void build_sphere_texgen( struct tnl_program *p, 1424 struct ureg dest, 1425 GLuint writemask ) 1426{ 1427 struct ureg normal = get_transformed_normal(p); 1428 struct ureg eye_hat = get_eye_position_normalized(p); 1429 struct ureg tmp = get_temp(p); 1430 struct ureg half = register_scalar_const(p, .5); 1431 struct ureg r = get_temp(p); 1432 struct ureg inv_m = get_temp(p); 1433 struct ureg id = get_identity_param(p); 1434 1435 /* Could share the above calculations, but it would be 1436 * a fairly odd state for someone to set (both sphere and 1437 * reflection active for different texture coordinate 1438 * components. Of course - if two texture units enable 1439 * reflect and/or sphere, things start to tilt in favour 1440 * of seperating this out: 1441 */ 1442 1443 /* n.u */ 1444 emit_op2(p, OPCODE_DP3, tmp, 0, normal, eye_hat); 1445 /* 2n.u */ 1446 emit_op2(p, OPCODE_ADD, tmp, 0, tmp, tmp); 1447 /* (-2n.u)n + u */ 1448 emit_op3(p, OPCODE_MAD, r, 0, negate(tmp), normal, eye_hat); 1449 /* r + 0,0,1 */ 1450 emit_op2(p, OPCODE_ADD, tmp, 0, r, swizzle(id,X,Y,W,Z)); 1451 /* rx^2 + ry^2 + (rz+1)^2 */ 1452 emit_op2(p, OPCODE_DP3, tmp, 0, tmp, tmp); 1453 /* 2/m */ 1454 emit_op1(p, OPCODE_RSQ, tmp, 0, tmp); 1455 /* 1/m */ 1456 emit_op2(p, OPCODE_MUL, inv_m, 0, tmp, half); 1457 /* r/m + 1/2 */ 1458 emit_op3(p, OPCODE_MAD, dest, writemask, r, inv_m, half); 1459 1460 release_temp(p, tmp); 1461 release_temp(p, r); 1462 release_temp(p, inv_m); 1463} 1464 1465 1466static void build_texture_transform( struct tnl_program *p ) 1467{ 1468 GLuint i, j; 1469 1470 for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { 1471 1472 if (!(p->state->fragprog_inputs_read & VARYING_BIT_TEX(i))) 1473 continue; 1474 1475 if (p->state->unit[i].coord_replace) 1476 continue; 1477 1478 if (p->state->unit[i].texgen_enabled || 1479 p->state->unit[i].texmat_enabled) { 1480 1481 GLuint texmat_enabled = p->state->unit[i].texmat_enabled; 1482 struct ureg out = register_output(p, VARYING_SLOT_TEX0 + i); 1483 struct ureg out_texgen = undef; 1484 1485 if (p->state->unit[i].texgen_enabled) { 1486 GLuint copy_mask = 0; 1487 GLuint sphere_mask = 0; 1488 GLuint reflect_mask = 0; 1489 GLuint normal_mask = 0; 1490 GLuint modes[4]; 1491 1492 if (texmat_enabled) 1493 out_texgen = get_temp(p); 1494 else 1495 out_texgen = out; 1496 1497 modes[0] = p->state->unit[i].texgen_mode0; 1498 modes[1] = p->state->unit[i].texgen_mode1; 1499 modes[2] = p->state->unit[i].texgen_mode2; 1500 modes[3] = p->state->unit[i].texgen_mode3; 1501 1502 for (j = 0; j < 4; j++) { 1503 switch (modes[j]) { 1504 case TXG_OBJ_LINEAR: { 1505 struct ureg obj = register_input(p, VERT_ATTRIB_POS); 1506 struct ureg plane = 1507 register_param3(p, STATE_TEXGEN, i, 1508 STATE_TEXGEN_OBJECT_S + j); 1509 1510 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1511 obj, plane ); 1512 break; 1513 } 1514 case TXG_EYE_LINEAR: { 1515 struct ureg eye = get_eye_position(p); 1516 struct ureg plane = 1517 register_param3(p, STATE_TEXGEN, i, 1518 STATE_TEXGEN_EYE_S + j); 1519 1520 emit_op2(p, OPCODE_DP4, out_texgen, WRITEMASK_X << j, 1521 eye, plane ); 1522 break; 1523 } 1524 case TXG_SPHERE_MAP: 1525 sphere_mask |= WRITEMASK_X << j; 1526 break; 1527 case TXG_REFLECTION_MAP: 1528 reflect_mask |= WRITEMASK_X << j; 1529 break; 1530 case TXG_NORMAL_MAP: 1531 normal_mask |= WRITEMASK_X << j; 1532 break; 1533 case TXG_NONE: 1534 copy_mask |= WRITEMASK_X << j; 1535 } 1536 } 1537 1538 if (sphere_mask) { 1539 build_sphere_texgen(p, out_texgen, sphere_mask); 1540 } 1541 1542 if (reflect_mask) { 1543 build_reflect_texgen(p, out_texgen, reflect_mask); 1544 } 1545 1546 if (normal_mask) { 1547 struct ureg normal = get_transformed_normal(p); 1548 emit_op1(p, OPCODE_MOV, out_texgen, normal_mask, normal ); 1549 } 1550 1551 if (copy_mask) { 1552 struct ureg in = register_input(p, VERT_ATTRIB_TEX0+i); 1553 emit_op1(p, OPCODE_MOV, out_texgen, copy_mask, in ); 1554 } 1555 } 1556 1557 if (texmat_enabled) { 1558 struct ureg texmat[4]; 1559 struct ureg in = (!is_undef(out_texgen) ? 1560 out_texgen : 1561 register_input(p, VERT_ATTRIB_TEX0+i)); 1562 if (p->mvp_with_dp4) { 1563 register_matrix_param5( p, STATE_TEXTURE_MATRIX, i, 0, 3, 1564 texmat ); 1565 emit_matrix_transform_vec4( p, out, texmat, in ); 1566 } 1567 else { 1568 register_matrix_param5( p, STATE_TEXTURE_MATRIX_TRANSPOSE, i, 0, 3, 1569 texmat ); 1570 emit_transpose_matrix_transform_vec4( p, out, texmat, in ); 1571 } 1572 } 1573 1574 release_temps(p); 1575 } 1576 else { 1577 emit_passthrough(p, VERT_ATTRIB_TEX0+i, VARYING_SLOT_TEX0+i); 1578 } 1579 } 1580} 1581 1582 1583/** 1584 * Point size attenuation computation. 1585 */ 1586static void build_atten_pointsize( struct tnl_program *p ) 1587{ 1588 struct ureg eye = get_eye_position_z(p); 1589 struct ureg state_size = register_param1(p, STATE_POINT_SIZE_CLAMPED); 1590 struct ureg state_attenuation = register_param1(p, STATE_POINT_ATTENUATION); 1591 struct ureg out = register_output(p, VARYING_SLOT_PSIZ); 1592 struct ureg ut = get_temp(p); 1593 1594 /* dist = |eyez| */ 1595 emit_op1(p, OPCODE_ABS, ut, WRITEMASK_Y, swizzle1(eye, Z)); 1596 /* p1 + dist * (p2 + dist * p3); */ 1597 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1598 swizzle1(state_attenuation, Z), swizzle1(state_attenuation, Y)); 1599 emit_op3(p, OPCODE_MAD, ut, WRITEMASK_X, swizzle1(ut, Y), 1600 ut, swizzle1(state_attenuation, X)); 1601 1602 /* 1 / sqrt(factor) */ 1603 emit_op1(p, OPCODE_RSQ, ut, WRITEMASK_X, ut ); 1604 1605#if 0 1606 /* out = pointSize / sqrt(factor) */ 1607 emit_op2(p, OPCODE_MUL, out, WRITEMASK_X, ut, state_size); 1608#else 1609 /* this is a good place to clamp the point size since there's likely 1610 * no hardware registers to clamp point size at rasterization time. 1611 */ 1612 emit_op2(p, OPCODE_MUL, ut, WRITEMASK_X, ut, state_size); 1613 emit_op2(p, OPCODE_MAX, ut, WRITEMASK_X, ut, swizzle1(state_size, Y)); 1614 emit_op2(p, OPCODE_MIN, out, WRITEMASK_X, ut, swizzle1(state_size, Z)); 1615#endif 1616 1617 release_temp(p, ut); 1618} 1619 1620 1621/** 1622 * Pass-though per-vertex point size, from user's point size array. 1623 */ 1624static void build_array_pointsize( struct tnl_program *p ) 1625{ 1626 struct ureg in = register_input(p, VERT_ATTRIB_POINT_SIZE); 1627 struct ureg out = register_output(p, VARYING_SLOT_PSIZ); 1628 emit_op1(p, OPCODE_MOV, out, WRITEMASK_X, in); 1629} 1630 1631 1632static void build_tnl_program( struct tnl_program *p ) 1633{ 1634 /* Emit the program, starting with the modelview, projection transforms: 1635 */ 1636 build_hpos(p); 1637 1638 /* Lighting calculations: 1639 */ 1640 if (p->state->fragprog_inputs_read & (VARYING_BIT_COL0|VARYING_BIT_COL1)) { 1641 if (p->state->light_global_enabled) 1642 build_lighting(p); 1643 else { 1644 if (p->state->fragprog_inputs_read & VARYING_BIT_COL0) 1645 emit_passthrough(p, VERT_ATTRIB_COLOR0, VARYING_SLOT_COL0); 1646 1647 if (p->state->fragprog_inputs_read & VARYING_BIT_COL1) 1648 emit_passthrough(p, VERT_ATTRIB_COLOR1, VARYING_SLOT_COL1); 1649 } 1650 } 1651 1652 if (p->state->fragprog_inputs_read & VARYING_BIT_FOGC) 1653 build_fog(p); 1654 1655 if (p->state->fragprog_inputs_read & VARYING_BITS_TEX_ANY) 1656 build_texture_transform(p); 1657 1658 if (p->state->point_attenuated) 1659 build_atten_pointsize(p); 1660 else if (p->state->varying_vp_inputs & VERT_BIT_POINT_SIZE) 1661 build_array_pointsize(p); 1662 1663 /* Finish up: 1664 */ 1665 emit_op1(p, OPCODE_END, undef, 0, undef); 1666 1667 /* Disassemble: 1668 */ 1669 if (DISASSEM) { 1670 printf ("\n"); 1671 } 1672} 1673 1674 1675static void 1676create_new_program( const struct state_key *key, 1677 struct gl_program *program, 1678 GLboolean mvp_with_dp4, 1679 GLuint max_temps) 1680{ 1681 struct tnl_program p; 1682 1683 memset(&p, 0, sizeof(p)); 1684 p.state = key; 1685 p.program = program; 1686 p.eye_position = undef; 1687 p.eye_position_z = undef; 1688 p.eye_position_normalized = undef; 1689 p.transformed_normal = undef; 1690 p.identity = undef; 1691 p.temp_in_use = 0; 1692 p.mvp_with_dp4 = mvp_with_dp4; 1693 1694 if (max_temps >= sizeof(int) * 8) 1695 p.temp_reserved = 0; 1696 else 1697 p.temp_reserved = ~((1<<max_temps)-1); 1698 1699 /* Start by allocating 32 instructions. 1700 * If we need more, we'll grow the instruction array as needed. 1701 */ 1702 p.max_inst = 32; 1703 p.program->arb.Instructions = 1704 rzalloc_array(program, struct prog_instruction, p.max_inst); 1705 p.program->String = NULL; 1706 p.program->arb.NumInstructions = 1707 p.program->arb.NumTemporaries = 1708 p.program->arb.NumParameters = 1709 p.program->arb.NumAttributes = p.program->arb.NumAddressRegs = 0; 1710 p.program->Parameters = _mesa_new_parameter_list(); 1711 p.program->info.inputs_read = 0; 1712 p.program->info.outputs_written = 0; 1713 p.state_params = _mesa_new_parameter_list(); 1714 1715 build_tnl_program( &p ); 1716 1717 _mesa_add_separate_state_parameters(p.program, p.state_params); 1718 _mesa_free_parameter_list(p.state_params); 1719} 1720 1721 1722/** 1723 * Return a vertex program which implements the current fixed-function 1724 * transform/lighting/texgen operations. 1725 */ 1726struct gl_program * 1727_mesa_get_fixed_func_vertex_program(struct gl_context *ctx) 1728{ 1729 struct gl_program *prog; 1730 struct state_key key; 1731 1732 /* We only update ctx->VertexProgram._VaryingInputs when in VP_MODE_FF _VPMode */ 1733 assert(VP_MODE_FF == ctx->VertexProgram._VPMode); 1734 1735 /* Grab all the relevant state and put it in a single structure: 1736 */ 1737 make_state_key(ctx, &key); 1738 1739 /* Look for an already-prepared program for this state: 1740 */ 1741 prog = _mesa_search_program_cache(ctx->VertexProgram.Cache, &key, 1742 sizeof(key)); 1743 1744 if (!prog) { 1745 /* OK, we'll have to build a new one */ 1746 if (0) 1747 printf("Build new TNL program\n"); 1748 1749 prog = ctx->Driver.NewProgram(ctx, MESA_SHADER_VERTEX, 0, true); 1750 if (!prog) 1751 return NULL; 1752 1753 create_new_program( &key, prog, 1754 ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS, 1755 ctx->Const.Program[MESA_SHADER_VERTEX].MaxTemps ); 1756 1757 if (ctx->Driver.ProgramStringNotify) 1758 ctx->Driver.ProgramStringNotify(ctx, GL_VERTEX_PROGRAM_ARB, prog); 1759 1760 _mesa_program_cache_insert(ctx, ctx->VertexProgram.Cache, &key, 1761 sizeof(key), prog); 1762 } 1763 1764 return prog; 1765} 1766