ff_fragment_shader.cpp revision 3464ebd5
1/************************************************************************** 2 * 3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * Copyright 2009 VMware, Inc. All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29extern "C" { 30#include "glheader.h" 31#include "imports.h" 32#include "mtypes.h" 33#include "program/program.h" 34#include "program/prog_parameter.h" 35#include "program/prog_cache.h" 36#include "program/prog_instruction.h" 37#include "program/prog_print.h" 38#include "program/prog_statevars.h" 39#include "program/programopt.h" 40#include "texenvprogram.h" 41} 42 43/* 44 * Note on texture units: 45 * 46 * The number of texture units supported by fixed-function fragment 47 * processing is MAX_TEXTURE_COORD_UNITS, not MAX_TEXTURE_IMAGE_UNITS. 48 * That's because there's a one-to-one correspondence between texture 49 * coordinates and samplers in fixed-function processing. 50 * 51 * Since fixed-function vertex processing is limited to MAX_TEXTURE_COORD_UNITS 52 * sets of texcoords, so is fixed-function fragment processing. 53 * 54 * We can safely use ctx->Const.MaxTextureUnits for loop bounds. 55 */ 56 57 58struct texenvprog_cache_item 59{ 60 GLuint hash; 61 void *key; 62 struct gl_fragment_program *data; 63 struct texenvprog_cache_item *next; 64}; 65 66static GLboolean 67texenv_doing_secondary_color(struct gl_context *ctx) 68{ 69 if (ctx->Light.Enabled && 70 (ctx->Light.Model.ColorControl == GL_SEPARATE_SPECULAR_COLOR)) 71 return GL_TRUE; 72 73 if (ctx->Fog.ColorSumEnabled) 74 return GL_TRUE; 75 76 return GL_FALSE; 77} 78 79/** 80 * Up to nine instructions per tex unit, plus fog, specular color. 81 */ 82#define MAX_INSTRUCTIONS ((MAX_TEXTURE_COORD_UNITS * 9) + 12) 83 84#define DISASSEM (MESA_VERBOSE & VERBOSE_DISASSEM) 85 86struct mode_opt { 87#ifdef __GNUC__ 88 __extension__ GLubyte Source:4; /**< SRC_x */ 89 __extension__ GLubyte Operand:3; /**< OPR_x */ 90#else 91 GLubyte Source; /**< SRC_x */ 92 GLubyte Operand; /**< OPR_x */ 93#endif 94}; 95 96struct state_key { 97 GLuint nr_enabled_units:8; 98 GLuint enabled_units:8; 99 GLuint separate_specular:1; 100 GLuint fog_enabled:1; 101 GLuint fog_mode:2; /**< FOG_x */ 102 GLuint inputs_available:12; 103 GLuint num_draw_buffers:4; 104 105 /* NOTE: This array of structs must be last! (see "keySize" below) */ 106 struct { 107 GLuint enabled:1; 108 GLuint source_index:3; /**< TEXTURE_x_INDEX */ 109 GLuint shadow:1; 110 GLuint ScaleShiftRGB:2; 111 GLuint ScaleShiftA:2; 112 113 GLuint NumArgsRGB:3; /**< up to MAX_COMBINER_TERMS */ 114 GLuint ModeRGB:5; /**< MODE_x */ 115 116 GLuint NumArgsA:3; /**< up to MAX_COMBINER_TERMS */ 117 GLuint ModeA:5; /**< MODE_x */ 118 119 GLuint texture_cyl_wrap:1; /**< For gallium test/debug only */ 120 121 struct mode_opt OptRGB[MAX_COMBINER_TERMS]; 122 struct mode_opt OptA[MAX_COMBINER_TERMS]; 123 } unit[MAX_TEXTURE_UNITS]; 124}; 125 126#define FOG_LINEAR 0 127#define FOG_EXP 1 128#define FOG_EXP2 2 129#define FOG_UNKNOWN 3 130 131static GLuint translate_fog_mode( GLenum mode ) 132{ 133 switch (mode) { 134 case GL_LINEAR: return FOG_LINEAR; 135 case GL_EXP: return FOG_EXP; 136 case GL_EXP2: return FOG_EXP2; 137 default: return FOG_UNKNOWN; 138 } 139} 140 141#define OPR_SRC_COLOR 0 142#define OPR_ONE_MINUS_SRC_COLOR 1 143#define OPR_SRC_ALPHA 2 144#define OPR_ONE_MINUS_SRC_ALPHA 3 145#define OPR_ZERO 4 146#define OPR_ONE 5 147#define OPR_UNKNOWN 7 148 149static GLuint translate_operand( GLenum operand ) 150{ 151 switch (operand) { 152 case GL_SRC_COLOR: return OPR_SRC_COLOR; 153 case GL_ONE_MINUS_SRC_COLOR: return OPR_ONE_MINUS_SRC_COLOR; 154 case GL_SRC_ALPHA: return OPR_SRC_ALPHA; 155 case GL_ONE_MINUS_SRC_ALPHA: return OPR_ONE_MINUS_SRC_ALPHA; 156 case GL_ZERO: return OPR_ZERO; 157 case GL_ONE: return OPR_ONE; 158 default: 159 assert(0); 160 return OPR_UNKNOWN; 161 } 162} 163 164#define SRC_TEXTURE 0 165#define SRC_TEXTURE0 1 166#define SRC_TEXTURE1 2 167#define SRC_TEXTURE2 3 168#define SRC_TEXTURE3 4 169#define SRC_TEXTURE4 5 170#define SRC_TEXTURE5 6 171#define SRC_TEXTURE6 7 172#define SRC_TEXTURE7 8 173#define SRC_CONSTANT 9 174#define SRC_PRIMARY_COLOR 10 175#define SRC_PREVIOUS 11 176#define SRC_ZERO 12 177#define SRC_UNKNOWN 15 178 179static GLuint translate_source( GLenum src ) 180{ 181 switch (src) { 182 case GL_TEXTURE: return SRC_TEXTURE; 183 case GL_TEXTURE0: 184 case GL_TEXTURE1: 185 case GL_TEXTURE2: 186 case GL_TEXTURE3: 187 case GL_TEXTURE4: 188 case GL_TEXTURE5: 189 case GL_TEXTURE6: 190 case GL_TEXTURE7: return SRC_TEXTURE0 + (src - GL_TEXTURE0); 191 case GL_CONSTANT: return SRC_CONSTANT; 192 case GL_PRIMARY_COLOR: return SRC_PRIMARY_COLOR; 193 case GL_PREVIOUS: return SRC_PREVIOUS; 194 case GL_ZERO: 195 return SRC_ZERO; 196 default: 197 assert(0); 198 return SRC_UNKNOWN; 199 } 200} 201 202#define MODE_REPLACE 0 /* r = a0 */ 203#define MODE_MODULATE 1 /* r = a0 * a1 */ 204#define MODE_ADD 2 /* r = a0 + a1 */ 205#define MODE_ADD_SIGNED 3 /* r = a0 + a1 - 0.5 */ 206#define MODE_INTERPOLATE 4 /* r = a0 * a2 + a1 * (1 - a2) */ 207#define MODE_SUBTRACT 5 /* r = a0 - a1 */ 208#define MODE_DOT3_RGB 6 /* r = a0 . a1 */ 209#define MODE_DOT3_RGB_EXT 7 /* r = a0 . a1 */ 210#define MODE_DOT3_RGBA 8 /* r = a0 . a1 */ 211#define MODE_DOT3_RGBA_EXT 9 /* r = a0 . a1 */ 212#define MODE_MODULATE_ADD_ATI 10 /* r = a0 * a2 + a1 */ 213#define MODE_MODULATE_SIGNED_ADD_ATI 11 /* r = a0 * a2 + a1 - 0.5 */ 214#define MODE_MODULATE_SUBTRACT_ATI 12 /* r = a0 * a2 - a1 */ 215#define MODE_ADD_PRODUCTS 13 /* r = a0 * a1 + a2 * a3 */ 216#define MODE_ADD_PRODUCTS_SIGNED 14 /* r = a0 * a1 + a2 * a3 - 0.5 */ 217#define MODE_BUMP_ENVMAP_ATI 15 /* special */ 218#define MODE_UNKNOWN 16 219 220/** 221 * Translate GL combiner state into a MODE_x value 222 */ 223static GLuint translate_mode( GLenum envMode, GLenum mode ) 224{ 225 switch (mode) { 226 case GL_REPLACE: return MODE_REPLACE; 227 case GL_MODULATE: return MODE_MODULATE; 228 case GL_ADD: 229 if (envMode == GL_COMBINE4_NV) 230 return MODE_ADD_PRODUCTS; 231 else 232 return MODE_ADD; 233 case GL_ADD_SIGNED: 234 if (envMode == GL_COMBINE4_NV) 235 return MODE_ADD_PRODUCTS_SIGNED; 236 else 237 return MODE_ADD_SIGNED; 238 case GL_INTERPOLATE: return MODE_INTERPOLATE; 239 case GL_SUBTRACT: return MODE_SUBTRACT; 240 case GL_DOT3_RGB: return MODE_DOT3_RGB; 241 case GL_DOT3_RGB_EXT: return MODE_DOT3_RGB_EXT; 242 case GL_DOT3_RGBA: return MODE_DOT3_RGBA; 243 case GL_DOT3_RGBA_EXT: return MODE_DOT3_RGBA_EXT; 244 case GL_MODULATE_ADD_ATI: return MODE_MODULATE_ADD_ATI; 245 case GL_MODULATE_SIGNED_ADD_ATI: return MODE_MODULATE_SIGNED_ADD_ATI; 246 case GL_MODULATE_SUBTRACT_ATI: return MODE_MODULATE_SUBTRACT_ATI; 247 case GL_BUMP_ENVMAP_ATI: return MODE_BUMP_ENVMAP_ATI; 248 default: 249 assert(0); 250 return MODE_UNKNOWN; 251 } 252} 253 254 255/** 256 * Do we need to clamp the results of the given texture env/combine mode? 257 * If the inputs to the mode are in [0,1] we don't always have to clamp 258 * the results. 259 */ 260static GLboolean 261need_saturate( GLuint mode ) 262{ 263 switch (mode) { 264 case MODE_REPLACE: 265 case MODE_MODULATE: 266 case MODE_INTERPOLATE: 267 return GL_FALSE; 268 case MODE_ADD: 269 case MODE_ADD_SIGNED: 270 case MODE_SUBTRACT: 271 case MODE_DOT3_RGB: 272 case MODE_DOT3_RGB_EXT: 273 case MODE_DOT3_RGBA: 274 case MODE_DOT3_RGBA_EXT: 275 case MODE_MODULATE_ADD_ATI: 276 case MODE_MODULATE_SIGNED_ADD_ATI: 277 case MODE_MODULATE_SUBTRACT_ATI: 278 case MODE_ADD_PRODUCTS: 279 case MODE_ADD_PRODUCTS_SIGNED: 280 case MODE_BUMP_ENVMAP_ATI: 281 return GL_TRUE; 282 default: 283 assert(0); 284 return GL_FALSE; 285 } 286} 287 288 289 290/** 291 * Translate TEXTURE_x_BIT to TEXTURE_x_INDEX. 292 */ 293static GLuint translate_tex_src_bit( GLbitfield bit ) 294{ 295 ASSERT(bit); 296 return _mesa_ffs(bit) - 1; 297} 298 299 300#define VERT_BIT_TEX_ANY (0xff << VERT_ATTRIB_TEX0) 301#define VERT_RESULT_TEX_ANY (0xff << VERT_RESULT_TEX0) 302 303/** 304 * Identify all possible varying inputs. The fragment program will 305 * never reference non-varying inputs, but will track them via state 306 * constants instead. 307 * 308 * This function figures out all the inputs that the fragment program 309 * has access to. The bitmask is later reduced to just those which 310 * are actually referenced. 311 */ 312static GLbitfield get_fp_input_mask( struct gl_context *ctx ) 313{ 314 /* _NEW_PROGRAM */ 315 const GLboolean vertexShader = 316 (ctx->Shader.CurrentVertexProgram && 317 ctx->Shader.CurrentVertexProgram->LinkStatus && 318 ctx->Shader.CurrentVertexProgram->VertexProgram); 319 const GLboolean vertexProgram = ctx->VertexProgram._Enabled; 320 GLbitfield fp_inputs = 0x0; 321 322 if (ctx->VertexProgram._Overriden) { 323 /* Somebody's messing with the vertex program and we don't have 324 * a clue what's happening. Assume that it could be producing 325 * all possible outputs. 326 */ 327 fp_inputs = ~0; 328 } 329 else if (ctx->RenderMode == GL_FEEDBACK) { 330 /* _NEW_RENDERMODE */ 331 fp_inputs = (FRAG_BIT_COL0 | FRAG_BIT_TEX0); 332 } 333 else if (!(vertexProgram || vertexShader) || 334 !ctx->VertexProgram._Current) { 335 /* Fixed function vertex logic */ 336 /* _NEW_ARRAY */ 337 GLbitfield varying_inputs = ctx->varying_vp_inputs; 338 339 /* These get generated in the setup routine regardless of the 340 * vertex program: 341 */ 342 /* _NEW_POINT */ 343 if (ctx->Point.PointSprite) 344 varying_inputs |= FRAG_BITS_TEX_ANY; 345 346 /* First look at what values may be computed by the generated 347 * vertex program: 348 */ 349 /* _NEW_LIGHT */ 350 if (ctx->Light.Enabled) { 351 fp_inputs |= FRAG_BIT_COL0; 352 353 if (texenv_doing_secondary_color(ctx)) 354 fp_inputs |= FRAG_BIT_COL1; 355 } 356 357 /* _NEW_TEXTURE */ 358 fp_inputs |= (ctx->Texture._TexGenEnabled | 359 ctx->Texture._TexMatEnabled) << FRAG_ATTRIB_TEX0; 360 361 /* Then look at what might be varying as a result of enabled 362 * arrays, etc: 363 */ 364 if (varying_inputs & VERT_BIT_COLOR0) 365 fp_inputs |= FRAG_BIT_COL0; 366 if (varying_inputs & VERT_BIT_COLOR1) 367 fp_inputs |= FRAG_BIT_COL1; 368 369 fp_inputs |= (((varying_inputs & VERT_BIT_TEX_ANY) >> VERT_ATTRIB_TEX0) 370 << FRAG_ATTRIB_TEX0); 371 372 } 373 else { 374 /* calculate from vp->outputs */ 375 struct gl_vertex_program *vprog; 376 GLbitfield64 vp_outputs; 377 378 /* Choose GLSL vertex shader over ARB vertex program. Need this 379 * since vertex shader state validation comes after fragment state 380 * validation (see additional comments in state.c). 381 */ 382 if (vertexShader) 383 vprog = ctx->Shader.CurrentVertexProgram->VertexProgram; 384 else 385 vprog = ctx->VertexProgram.Current; 386 387 vp_outputs = vprog->Base.OutputsWritten; 388 389 /* These get generated in the setup routine regardless of the 390 * vertex program: 391 */ 392 /* _NEW_POINT */ 393 if (ctx->Point.PointSprite) 394 vp_outputs |= FRAG_BITS_TEX_ANY; 395 396 if (vp_outputs & (1 << VERT_RESULT_COL0)) 397 fp_inputs |= FRAG_BIT_COL0; 398 if (vp_outputs & (1 << VERT_RESULT_COL1)) 399 fp_inputs |= FRAG_BIT_COL1; 400 401 fp_inputs |= (((vp_outputs & VERT_RESULT_TEX_ANY) >> VERT_RESULT_TEX0) 402 << FRAG_ATTRIB_TEX0); 403 } 404 405 return fp_inputs; 406} 407 408 409/** 410 * Examine current texture environment state and generate a unique 411 * key to identify it. 412 */ 413static GLuint make_state_key( struct gl_context *ctx, struct state_key *key ) 414{ 415 GLuint i, j; 416 GLbitfield inputs_referenced = FRAG_BIT_COL0; 417 const GLbitfield inputs_available = get_fp_input_mask( ctx ); 418 GLuint keySize; 419 420 memset(key, 0, sizeof(*key)); 421 422 /* _NEW_TEXTURE */ 423 for (i = 0; i < ctx->Const.MaxTextureUnits; i++) { 424 const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[i]; 425 const struct gl_texture_object *texObj = texUnit->_Current; 426 const struct gl_tex_env_combine_state *comb = texUnit->_CurrentCombine; 427 GLenum format; 428 429 if (!texUnit->_ReallyEnabled || !texUnit->Enabled) 430 continue; 431 432 format = texObj->Image[0][texObj->BaseLevel]->_BaseFormat; 433 434 key->unit[i].enabled = 1; 435 key->enabled_units |= (1<<i); 436 key->nr_enabled_units = i + 1; 437 inputs_referenced |= FRAG_BIT_TEX(i); 438 439 key->unit[i].source_index = 440 translate_tex_src_bit(texUnit->_ReallyEnabled); 441 442 key->unit[i].shadow = 443 ((texObj->Sampler.CompareMode == GL_COMPARE_R_TO_TEXTURE) && 444 ((format == GL_DEPTH_COMPONENT) || 445 (format == GL_DEPTH_STENCIL_EXT))); 446 447 key->unit[i].NumArgsRGB = comb->_NumArgsRGB; 448 key->unit[i].NumArgsA = comb->_NumArgsA; 449 450 key->unit[i].ModeRGB = 451 translate_mode(texUnit->EnvMode, comb->ModeRGB); 452 key->unit[i].ModeA = 453 translate_mode(texUnit->EnvMode, comb->ModeA); 454 455 key->unit[i].ScaleShiftRGB = comb->ScaleShiftRGB; 456 key->unit[i].ScaleShiftA = comb->ScaleShiftA; 457 458 for (j = 0; j < MAX_COMBINER_TERMS; j++) { 459 key->unit[i].OptRGB[j].Operand = translate_operand(comb->OperandRGB[j]); 460 key->unit[i].OptA[j].Operand = translate_operand(comb->OperandA[j]); 461 key->unit[i].OptRGB[j].Source = translate_source(comb->SourceRGB[j]); 462 key->unit[i].OptA[j].Source = translate_source(comb->SourceA[j]); 463 } 464 465 if (key->unit[i].ModeRGB == MODE_BUMP_ENVMAP_ATI) { 466 /* requires some special translation */ 467 key->unit[i].NumArgsRGB = 2; 468 key->unit[i].ScaleShiftRGB = 0; 469 key->unit[i].OptRGB[0].Operand = OPR_SRC_COLOR; 470 key->unit[i].OptRGB[0].Source = SRC_TEXTURE; 471 key->unit[i].OptRGB[1].Operand = OPR_SRC_COLOR; 472 key->unit[i].OptRGB[1].Source = texUnit->BumpTarget - GL_TEXTURE0 + SRC_TEXTURE0; 473 } 474 475 /* this is a back-door for enabling cylindrical texture wrap mode */ 476 if (texObj->Priority == 0.125) 477 key->unit[i].texture_cyl_wrap = 1; 478 } 479 480 /* _NEW_LIGHT | _NEW_FOG */ 481 if (texenv_doing_secondary_color(ctx)) { 482 key->separate_specular = 1; 483 inputs_referenced |= FRAG_BIT_COL1; 484 } 485 486 /* _NEW_FOG */ 487 if (ctx->Fog.Enabled) { 488 key->fog_enabled = 1; 489 key->fog_mode = translate_fog_mode(ctx->Fog.Mode); 490 inputs_referenced |= FRAG_BIT_FOGC; /* maybe */ 491 } 492 493 /* _NEW_BUFFERS */ 494 key->num_draw_buffers = ctx->DrawBuffer->_NumColorDrawBuffers; 495 496 key->inputs_available = (inputs_available & inputs_referenced); 497 498 /* compute size of state key, ignoring unused texture units */ 499 keySize = sizeof(*key) - sizeof(key->unit) 500 + key->nr_enabled_units * sizeof(key->unit[0]); 501 502 return keySize; 503} 504 505 506/** 507 * Use uregs to represent registers internally, translate to Mesa's 508 * expected formats on emit. 509 * 510 * NOTE: These are passed by value extensively in this file rather 511 * than as usual by pointer reference. If this disturbs you, try 512 * remembering they are just 32bits in size. 513 * 514 * GCC is smart enough to deal with these dword-sized structures in 515 * much the same way as if I had defined them as dwords and was using 516 * macros to access and set the fields. This is much nicer and easier 517 * to evolve. 518 */ 519struct ureg { 520 GLuint file:4; 521 GLuint idx:8; 522 GLuint negatebase:1; 523 GLuint swz:12; 524 GLuint pad:7; 525}; 526 527static const struct ureg undef = { 528 PROGRAM_UNDEFINED, 529 255, 530 0, 531 0, 532 0 533}; 534 535 536/** State used to build the fragment program: 537 */ 538struct texenv_fragment_program { 539 struct gl_fragment_program *program; 540 struct state_key *state; 541 542 GLbitfield alu_temps; /**< Track texture indirections, see spec. */ 543 GLbitfield temps_output; /**< Track texture indirections, see spec. */ 544 GLbitfield temp_in_use; /**< Tracks temporary regs which are in use. */ 545 GLboolean error; 546 547 struct ureg src_texture[MAX_TEXTURE_COORD_UNITS]; 548 /* Reg containing each texture unit's sampled texture color, 549 * else undef. 550 */ 551 552 struct ureg texcoord_tex[MAX_TEXTURE_COORD_UNITS]; 553 /* Reg containing texcoord for a texture unit, 554 * needed for bump mapping, else undef. 555 */ 556 557 struct ureg src_previous; /**< Reg containing color from previous 558 * stage. May need to be decl'd. 559 */ 560 561 GLuint last_tex_stage; /**< Number of last enabled texture unit */ 562 563 struct ureg half; 564 struct ureg one; 565 struct ureg zero; 566}; 567 568 569 570static struct ureg make_ureg(GLuint file, GLuint idx) 571{ 572 struct ureg reg; 573 reg.file = file; 574 reg.idx = idx; 575 reg.negatebase = 0; 576 reg.swz = SWIZZLE_NOOP; 577 reg.pad = 0; 578 return reg; 579} 580 581static struct ureg swizzle( struct ureg reg, int x, int y, int z, int w ) 582{ 583 reg.swz = MAKE_SWIZZLE4(GET_SWZ(reg.swz, x), 584 GET_SWZ(reg.swz, y), 585 GET_SWZ(reg.swz, z), 586 GET_SWZ(reg.swz, w)); 587 588 return reg; 589} 590 591static struct ureg swizzle1( struct ureg reg, int x ) 592{ 593 return swizzle(reg, x, x, x, x); 594} 595 596static struct ureg negate( struct ureg reg ) 597{ 598 reg.negatebase ^= 1; 599 return reg; 600} 601 602static GLboolean is_undef( struct ureg reg ) 603{ 604 return reg.file == PROGRAM_UNDEFINED; 605} 606 607 608static struct ureg get_temp( struct texenv_fragment_program *p ) 609{ 610 GLint bit; 611 612 /* First try and reuse temps which have been used already: 613 */ 614 bit = _mesa_ffs( ~p->temp_in_use & p->alu_temps ); 615 616 /* Then any unused temporary: 617 */ 618 if (!bit) 619 bit = _mesa_ffs( ~p->temp_in_use ); 620 621 if (!bit) { 622 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 623 exit(1); 624 } 625 626 if ((GLuint) bit > p->program->Base.NumTemporaries) 627 p->program->Base.NumTemporaries = bit; 628 629 p->temp_in_use |= 1<<(bit-1); 630 return make_ureg(PROGRAM_TEMPORARY, (bit-1)); 631} 632 633static struct ureg get_tex_temp( struct texenv_fragment_program *p ) 634{ 635 int bit; 636 637 /* First try to find available temp not previously used (to avoid 638 * starting a new texture indirection). According to the spec, the 639 * ~p->temps_output isn't necessary, but will keep it there for 640 * now: 641 */ 642 bit = _mesa_ffs( ~p->temp_in_use & ~p->alu_temps & ~p->temps_output ); 643 644 /* Then any unused temporary: 645 */ 646 if (!bit) 647 bit = _mesa_ffs( ~p->temp_in_use ); 648 649 if (!bit) { 650 _mesa_problem(NULL, "%s: out of temporaries\n", __FILE__); 651 exit(1); 652 } 653 654 if ((GLuint) bit > p->program->Base.NumTemporaries) 655 p->program->Base.NumTemporaries = bit; 656 657 p->temp_in_use |= 1<<(bit-1); 658 return make_ureg(PROGRAM_TEMPORARY, (bit-1)); 659} 660 661 662/** Mark a temp reg as being no longer allocatable. */ 663static void reserve_temp( struct texenv_fragment_program *p, struct ureg r ) 664{ 665 if (r.file == PROGRAM_TEMPORARY) 666 p->temps_output |= (1 << r.idx); 667} 668 669 670static void release_temps(struct gl_context *ctx, struct texenv_fragment_program *p ) 671{ 672 GLuint max_temp = ctx->Const.FragmentProgram.MaxTemps; 673 674 /* KW: To support tex_env_crossbar, don't release the registers in 675 * temps_output. 676 */ 677 if (max_temp >= sizeof(int) * 8) 678 p->temp_in_use = p->temps_output; 679 else 680 p->temp_in_use = ~((1<<max_temp)-1) | p->temps_output; 681} 682 683 684static struct ureg register_param5( struct texenv_fragment_program *p, 685 GLint s0, 686 GLint s1, 687 GLint s2, 688 GLint s3, 689 GLint s4) 690{ 691 int tokens[STATE_LENGTH]; 692 GLuint idx; 693 tokens[0] = s0; 694 tokens[1] = s1; 695 tokens[2] = s2; 696 tokens[3] = s3; 697 tokens[4] = s4; 698 idx = _mesa_add_state_reference(p->program->Base.Parameters, 699 (gl_state_index *)tokens); 700 return make_ureg(PROGRAM_STATE_VAR, idx); 701} 702 703 704#define register_param1(p,s0) register_param5(p,s0,0,0,0,0) 705#define register_param2(p,s0,s1) register_param5(p,s0,s1,0,0,0) 706#define register_param3(p,s0,s1,s2) register_param5(p,s0,s1,s2,0,0) 707#define register_param4(p,s0,s1,s2,s3) register_param5(p,s0,s1,s2,s3,0) 708 709static GLuint frag_to_vert_attrib( GLuint attrib ) 710{ 711 switch (attrib) { 712 case FRAG_ATTRIB_COL0: return VERT_ATTRIB_COLOR0; 713 case FRAG_ATTRIB_COL1: return VERT_ATTRIB_COLOR1; 714 default: 715 assert(attrib >= FRAG_ATTRIB_TEX0); 716 assert(attrib <= FRAG_ATTRIB_TEX7); 717 return attrib - FRAG_ATTRIB_TEX0 + VERT_ATTRIB_TEX0; 718 } 719} 720 721 722static struct ureg register_input( struct texenv_fragment_program *p, GLuint input ) 723{ 724 if (p->state->inputs_available & (1<<input)) { 725 p->program->Base.InputsRead |= (1 << input); 726 return make_ureg(PROGRAM_INPUT, input); 727 } 728 else { 729 GLuint idx = frag_to_vert_attrib( input ); 730 return register_param3( p, STATE_INTERNAL, STATE_CURRENT_ATTRIB_MAYBE_VP_CLAMPED, idx ); 731 } 732} 733 734 735static void emit_arg( struct prog_src_register *reg, 736 struct ureg ureg ) 737{ 738 reg->File = ureg.file; 739 reg->Index = ureg.idx; 740 reg->Swizzle = ureg.swz; 741 reg->Negate = ureg.negatebase ? NEGATE_XYZW : NEGATE_NONE; 742 reg->Abs = GL_FALSE; 743} 744 745static void emit_dst( struct prog_dst_register *dst, 746 struct ureg ureg, GLuint mask ) 747{ 748 dst->File = ureg.file; 749 dst->Index = ureg.idx; 750 dst->WriteMask = mask; 751 dst->CondMask = COND_TR; /* always pass cond test */ 752 dst->CondSwizzle = SWIZZLE_NOOP; 753} 754 755static struct prog_instruction * 756emit_op(struct texenv_fragment_program *p, 757 enum prog_opcode op, 758 struct ureg dest, 759 GLuint mask, 760 GLboolean saturate, 761 struct ureg src0, 762 struct ureg src1, 763 struct ureg src2 ) 764{ 765 const GLuint nr = p->program->Base.NumInstructions++; 766 struct prog_instruction *inst = &p->program->Base.Instructions[nr]; 767 768 assert(nr < MAX_INSTRUCTIONS); 769 770 _mesa_init_instructions(inst, 1); 771 inst->Opcode = op; 772 773 emit_arg( &inst->SrcReg[0], src0 ); 774 emit_arg( &inst->SrcReg[1], src1 ); 775 emit_arg( &inst->SrcReg[2], src2 ); 776 777 inst->SaturateMode = saturate ? SATURATE_ZERO_ONE : SATURATE_OFF; 778 779 emit_dst( &inst->DstReg, dest, mask ); 780 781#if 0 782 /* Accounting for indirection tracking: 783 */ 784 if (dest.file == PROGRAM_TEMPORARY) 785 p->temps_output |= 1 << dest.idx; 786#endif 787 788 return inst; 789} 790 791 792static struct ureg emit_arith( struct texenv_fragment_program *p, 793 enum prog_opcode op, 794 struct ureg dest, 795 GLuint mask, 796 GLboolean saturate, 797 struct ureg src0, 798 struct ureg src1, 799 struct ureg src2 ) 800{ 801 emit_op(p, op, dest, mask, saturate, src0, src1, src2); 802 803 /* Accounting for indirection tracking: 804 */ 805 if (src0.file == PROGRAM_TEMPORARY) 806 p->alu_temps |= 1 << src0.idx; 807 808 if (!is_undef(src1) && src1.file == PROGRAM_TEMPORARY) 809 p->alu_temps |= 1 << src1.idx; 810 811 if (!is_undef(src2) && src2.file == PROGRAM_TEMPORARY) 812 p->alu_temps |= 1 << src2.idx; 813 814 if (dest.file == PROGRAM_TEMPORARY) 815 p->alu_temps |= 1 << dest.idx; 816 817 p->program->Base.NumAluInstructions++; 818 return dest; 819} 820 821static struct ureg emit_texld( struct texenv_fragment_program *p, 822 enum prog_opcode op, 823 struct ureg dest, 824 GLuint destmask, 825 GLuint tex_unit, 826 GLuint tex_idx, 827 GLuint tex_shadow, 828 struct ureg coord ) 829{ 830 struct prog_instruction *inst = emit_op( p, op, 831 dest, destmask, 832 GL_FALSE, /* don't saturate? */ 833 coord, /* arg 0? */ 834 undef, 835 undef); 836 837 inst->TexSrcTarget = tex_idx; 838 inst->TexSrcUnit = tex_unit; 839 inst->TexShadow = tex_shadow; 840 841 p->program->Base.NumTexInstructions++; 842 843 /* Accounting for indirection tracking: 844 */ 845 reserve_temp(p, dest); 846 847#if 0 848 /* Is this a texture indirection? 849 */ 850 if ((coord.file == PROGRAM_TEMPORARY && 851 (p->temps_output & (1<<coord.idx))) || 852 (dest.file == PROGRAM_TEMPORARY && 853 (p->alu_temps & (1<<dest.idx)))) { 854 p->program->Base.NumTexIndirections++; 855 p->temps_output = 1<<coord.idx; 856 p->alu_temps = 0; 857 assert(0); /* KW: texture env crossbar */ 858 } 859#endif 860 861 return dest; 862} 863 864 865static struct ureg register_const4f( struct texenv_fragment_program *p, 866 GLfloat s0, 867 GLfloat s1, 868 GLfloat s2, 869 GLfloat s3) 870{ 871 GLfloat values[4]; 872 GLuint idx, swizzle; 873 struct ureg r; 874 values[0] = s0; 875 values[1] = s1; 876 values[2] = s2; 877 values[3] = s3; 878 idx = _mesa_add_unnamed_constant( p->program->Base.Parameters, values, 4, 879 &swizzle ); 880 r = make_ureg(PROGRAM_CONSTANT, idx); 881 r.swz = swizzle; 882 return r; 883} 884 885#define register_scalar_const(p, s0) register_const4f(p, s0, s0, s0, s0) 886#define register_const1f(p, s0) register_const4f(p, s0, 0, 0, 1) 887#define register_const2f(p, s0, s1) register_const4f(p, s0, s1, 0, 1) 888#define register_const3f(p, s0, s1, s2) register_const4f(p, s0, s1, s2, 1) 889 890 891static struct ureg get_one( struct texenv_fragment_program *p ) 892{ 893 if (is_undef(p->one)) 894 p->one = register_scalar_const(p, 1.0); 895 return p->one; 896} 897 898static struct ureg get_half( struct texenv_fragment_program *p ) 899{ 900 if (is_undef(p->half)) 901 p->half = register_scalar_const(p, 0.5); 902 return p->half; 903} 904 905static struct ureg get_zero( struct texenv_fragment_program *p ) 906{ 907 if (is_undef(p->zero)) 908 p->zero = register_scalar_const(p, 0.0); 909 return p->zero; 910} 911 912 913static void program_error( struct texenv_fragment_program *p, const char *msg ) 914{ 915 _mesa_problem(NULL, "%s", msg); 916 p->error = 1; 917} 918 919static struct ureg get_source( struct texenv_fragment_program *p, 920 GLuint src, GLuint unit ) 921{ 922 switch (src) { 923 case SRC_TEXTURE: 924 assert(!is_undef(p->src_texture[unit])); 925 return p->src_texture[unit]; 926 927 case SRC_TEXTURE0: 928 case SRC_TEXTURE1: 929 case SRC_TEXTURE2: 930 case SRC_TEXTURE3: 931 case SRC_TEXTURE4: 932 case SRC_TEXTURE5: 933 case SRC_TEXTURE6: 934 case SRC_TEXTURE7: 935 assert(!is_undef(p->src_texture[src - SRC_TEXTURE0])); 936 return p->src_texture[src - SRC_TEXTURE0]; 937 938 case SRC_CONSTANT: 939 return register_param2(p, STATE_TEXENV_COLOR, unit); 940 941 case SRC_PRIMARY_COLOR: 942 return register_input(p, FRAG_ATTRIB_COL0); 943 944 case SRC_ZERO: 945 return get_zero(p); 946 947 case SRC_PREVIOUS: 948 if (is_undef(p->src_previous)) 949 return register_input(p, FRAG_ATTRIB_COL0); 950 else 951 return p->src_previous; 952 953 default: 954 assert(0); 955 return undef; 956 } 957} 958 959static struct ureg emit_combine_source( struct texenv_fragment_program *p, 960 GLuint mask, 961 GLuint unit, 962 GLuint source, 963 GLuint operand ) 964{ 965 struct ureg arg, src, one; 966 967 src = get_source(p, source, unit); 968 969 switch (operand) { 970 case OPR_ONE_MINUS_SRC_COLOR: 971 /* Get unused tmp, 972 * Emit tmp = 1.0 - arg.xyzw 973 */ 974 arg = get_temp( p ); 975 one = get_one( p ); 976 return emit_arith( p, OPCODE_SUB, arg, mask, 0, one, src, undef); 977 978 case OPR_SRC_ALPHA: 979 if (mask == WRITEMASK_W) 980 return src; 981 else 982 return swizzle1( src, SWIZZLE_W ); 983 case OPR_ONE_MINUS_SRC_ALPHA: 984 /* Get unused tmp, 985 * Emit tmp = 1.0 - arg.wwww 986 */ 987 arg = get_temp(p); 988 one = get_one(p); 989 return emit_arith(p, OPCODE_SUB, arg, mask, 0, 990 one, swizzle1(src, SWIZZLE_W), undef); 991 case OPR_ZERO: 992 return get_zero(p); 993 case OPR_ONE: 994 return get_one(p); 995 case OPR_SRC_COLOR: 996 return src; 997 default: 998 assert(0); 999 return src; 1000 } 1001} 1002 1003/** 1004 * Check if the RGB and Alpha sources and operands match for the given 1005 * texture unit's combinder state. When the RGB and A sources and 1006 * operands match, we can emit fewer instructions. 1007 */ 1008static GLboolean args_match( const struct state_key *key, GLuint unit ) 1009{ 1010 GLuint i, numArgs = key->unit[unit].NumArgsRGB; 1011 1012 for (i = 0; i < numArgs; i++) { 1013 if (key->unit[unit].OptA[i].Source != key->unit[unit].OptRGB[i].Source) 1014 return GL_FALSE; 1015 1016 switch (key->unit[unit].OptA[i].Operand) { 1017 case OPR_SRC_ALPHA: 1018 switch (key->unit[unit].OptRGB[i].Operand) { 1019 case OPR_SRC_COLOR: 1020 case OPR_SRC_ALPHA: 1021 break; 1022 default: 1023 return GL_FALSE; 1024 } 1025 break; 1026 case OPR_ONE_MINUS_SRC_ALPHA: 1027 switch (key->unit[unit].OptRGB[i].Operand) { 1028 case OPR_ONE_MINUS_SRC_COLOR: 1029 case OPR_ONE_MINUS_SRC_ALPHA: 1030 break; 1031 default: 1032 return GL_FALSE; 1033 } 1034 break; 1035 default: 1036 return GL_FALSE; /* impossible */ 1037 } 1038 } 1039 1040 return GL_TRUE; 1041} 1042 1043static struct ureg emit_combine( struct texenv_fragment_program *p, 1044 struct ureg dest, 1045 GLuint mask, 1046 GLboolean saturate, 1047 GLuint unit, 1048 GLuint nr, 1049 GLuint mode, 1050 const struct mode_opt *opt) 1051{ 1052 struct ureg src[MAX_COMBINER_TERMS]; 1053 struct ureg tmp, half; 1054 GLuint i; 1055 1056 assert(nr <= MAX_COMBINER_TERMS); 1057 1058 for (i = 0; i < nr; i++) 1059 src[i] = emit_combine_source( p, mask, unit, opt[i].Source, opt[i].Operand ); 1060 1061 switch (mode) { 1062 case MODE_REPLACE: 1063 if (mask == WRITEMASK_XYZW && !saturate) 1064 return src[0]; 1065 else 1066 return emit_arith( p, OPCODE_MOV, dest, mask, saturate, src[0], undef, undef ); 1067 case MODE_MODULATE: 1068 return emit_arith( p, OPCODE_MUL, dest, mask, saturate, 1069 src[0], src[1], undef ); 1070 case MODE_ADD: 1071 return emit_arith( p, OPCODE_ADD, dest, mask, saturate, 1072 src[0], src[1], undef ); 1073 case MODE_ADD_SIGNED: 1074 /* tmp = arg0 + arg1 1075 * result = tmp - .5 1076 */ 1077 half = get_half(p); 1078 tmp = get_temp( p ); 1079 emit_arith( p, OPCODE_ADD, tmp, mask, 0, src[0], src[1], undef ); 1080 emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp, half, undef ); 1081 return dest; 1082 case MODE_INTERPOLATE: 1083 /* Arg0 * (Arg2) + Arg1 * (1-Arg2) -- note arguments are reordered: 1084 */ 1085 return emit_arith( p, OPCODE_LRP, dest, mask, saturate, src[2], src[0], src[1] ); 1086 1087 case MODE_SUBTRACT: 1088 return emit_arith( p, OPCODE_SUB, dest, mask, saturate, src[0], src[1], undef ); 1089 1090 case MODE_DOT3_RGBA: 1091 case MODE_DOT3_RGBA_EXT: 1092 case MODE_DOT3_RGB_EXT: 1093 case MODE_DOT3_RGB: { 1094 struct ureg tmp0 = get_temp( p ); 1095 struct ureg tmp1 = get_temp( p ); 1096 struct ureg neg1 = register_scalar_const(p, -1); 1097 struct ureg two = register_scalar_const(p, 2); 1098 1099 /* tmp0 = 2*src0 - 1 1100 * tmp1 = 2*src1 - 1 1101 * 1102 * dst = tmp0 dot3 tmp1 1103 */ 1104 emit_arith( p, OPCODE_MAD, tmp0, WRITEMASK_XYZW, 0, 1105 two, src[0], neg1); 1106 1107 if (memcmp(&src[0], &src[1], sizeof(struct ureg)) == 0) 1108 tmp1 = tmp0; 1109 else 1110 emit_arith( p, OPCODE_MAD, tmp1, WRITEMASK_XYZW, 0, 1111 two, src[1], neg1); 1112 emit_arith( p, OPCODE_DP3, dest, mask, saturate, tmp0, tmp1, undef); 1113 return dest; 1114 } 1115 case MODE_MODULATE_ADD_ATI: 1116 /* Arg0 * Arg2 + Arg1 */ 1117 return emit_arith( p, OPCODE_MAD, dest, mask, saturate, 1118 src[0], src[2], src[1] ); 1119 case MODE_MODULATE_SIGNED_ADD_ATI: { 1120 /* Arg0 * Arg2 + Arg1 - 0.5 */ 1121 struct ureg tmp0 = get_temp(p); 1122 half = get_half(p); 1123 emit_arith( p, OPCODE_MAD, tmp0, mask, 0, src[0], src[2], src[1] ); 1124 emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp0, half, undef ); 1125 return dest; 1126 } 1127 case MODE_MODULATE_SUBTRACT_ATI: 1128 /* Arg0 * Arg2 - Arg1 */ 1129 emit_arith( p, OPCODE_MAD, dest, mask, 0, src[0], src[2], negate(src[1]) ); 1130 return dest; 1131 case MODE_ADD_PRODUCTS: 1132 /* Arg0 * Arg1 + Arg2 * Arg3 */ 1133 { 1134 struct ureg tmp0 = get_temp(p); 1135 emit_arith( p, OPCODE_MUL, tmp0, mask, 0, src[0], src[1], undef ); 1136 emit_arith( p, OPCODE_MAD, dest, mask, saturate, src[2], src[3], tmp0 ); 1137 } 1138 return dest; 1139 case MODE_ADD_PRODUCTS_SIGNED: 1140 /* Arg0 * Arg1 + Arg2 * Arg3 - 0.5 */ 1141 { 1142 struct ureg tmp0 = get_temp(p); 1143 half = get_half(p); 1144 emit_arith( p, OPCODE_MUL, tmp0, mask, 0, src[0], src[1], undef ); 1145 emit_arith( p, OPCODE_MAD, tmp0, mask, 0, src[2], src[3], tmp0 ); 1146 emit_arith( p, OPCODE_SUB, dest, mask, saturate, tmp0, half, undef ); 1147 } 1148 return dest; 1149 case MODE_BUMP_ENVMAP_ATI: 1150 /* special - not handled here */ 1151 assert(0); 1152 return src[0]; 1153 default: 1154 assert(0); 1155 return src[0]; 1156 } 1157} 1158 1159 1160/** 1161 * Generate instructions for one texture unit's env/combiner mode. 1162 */ 1163static struct ureg 1164emit_texenv(struct texenv_fragment_program *p, GLuint unit) 1165{ 1166 const struct state_key *key = p->state; 1167 GLboolean rgb_saturate, alpha_saturate; 1168 GLuint rgb_shift, alpha_shift; 1169 struct ureg out, dest; 1170 1171 if (!key->unit[unit].enabled) { 1172 return get_source(p, SRC_PREVIOUS, 0); 1173 } 1174 if (key->unit[unit].ModeRGB == MODE_BUMP_ENVMAP_ATI) { 1175 /* this isn't really a env stage delivering a color and handled elsewhere */ 1176 return get_source(p, SRC_PREVIOUS, 0); 1177 } 1178 1179 switch (key->unit[unit].ModeRGB) { 1180 case MODE_DOT3_RGB_EXT: 1181 alpha_shift = key->unit[unit].ScaleShiftA; 1182 rgb_shift = 0; 1183 break; 1184 case MODE_DOT3_RGBA_EXT: 1185 alpha_shift = 0; 1186 rgb_shift = 0; 1187 break; 1188 default: 1189 rgb_shift = key->unit[unit].ScaleShiftRGB; 1190 alpha_shift = key->unit[unit].ScaleShiftA; 1191 break; 1192 } 1193 1194 /* If we'll do rgb/alpha shifting don't saturate in emit_combine(). 1195 * We don't want to clamp twice. 1196 */ 1197 if (rgb_shift) 1198 rgb_saturate = GL_FALSE; /* saturate after rgb shift */ 1199 else if (need_saturate(key->unit[unit].ModeRGB)) 1200 rgb_saturate = GL_TRUE; 1201 else 1202 rgb_saturate = GL_FALSE; 1203 1204 if (alpha_shift) 1205 alpha_saturate = GL_FALSE; /* saturate after alpha shift */ 1206 else if (need_saturate(key->unit[unit].ModeA)) 1207 alpha_saturate = GL_TRUE; 1208 else 1209 alpha_saturate = GL_FALSE; 1210 1211 /* If this is the very last calculation (and various other conditions 1212 * are met), emit directly to the color output register. Otherwise, 1213 * emit to a temporary register. 1214 */ 1215 if (key->separate_specular || 1216 unit != p->last_tex_stage || 1217 alpha_shift || 1218 key->num_draw_buffers != 1 || 1219 rgb_shift) 1220 dest = get_temp( p ); 1221 else 1222 dest = make_ureg(PROGRAM_OUTPUT, FRAG_RESULT_COLOR); 1223 1224 /* Emit the RGB and A combine ops 1225 */ 1226 if (key->unit[unit].ModeRGB == key->unit[unit].ModeA && 1227 args_match(key, unit)) { 1228 out = emit_combine( p, dest, WRITEMASK_XYZW, rgb_saturate, 1229 unit, 1230 key->unit[unit].NumArgsRGB, 1231 key->unit[unit].ModeRGB, 1232 key->unit[unit].OptRGB); 1233 } 1234 else if (key->unit[unit].ModeRGB == MODE_DOT3_RGBA_EXT || 1235 key->unit[unit].ModeRGB == MODE_DOT3_RGBA) { 1236 out = emit_combine( p, dest, WRITEMASK_XYZW, rgb_saturate, 1237 unit, 1238 key->unit[unit].NumArgsRGB, 1239 key->unit[unit].ModeRGB, 1240 key->unit[unit].OptRGB); 1241 } 1242 else { 1243 /* Need to do something to stop from re-emitting identical 1244 * argument calculations here: 1245 */ 1246 out = emit_combine( p, dest, WRITEMASK_XYZ, rgb_saturate, 1247 unit, 1248 key->unit[unit].NumArgsRGB, 1249 key->unit[unit].ModeRGB, 1250 key->unit[unit].OptRGB); 1251 out = emit_combine( p, dest, WRITEMASK_W, alpha_saturate, 1252 unit, 1253 key->unit[unit].NumArgsA, 1254 key->unit[unit].ModeA, 1255 key->unit[unit].OptA); 1256 } 1257 1258 /* Deal with the final shift: 1259 */ 1260 if (alpha_shift || rgb_shift) { 1261 struct ureg shift; 1262 GLboolean saturate = GL_TRUE; /* always saturate at this point */ 1263 1264 if (rgb_shift == alpha_shift) { 1265 shift = register_scalar_const(p, (GLfloat)(1<<rgb_shift)); 1266 } 1267 else { 1268 shift = register_const4f(p, 1269 (GLfloat)(1<<rgb_shift), 1270 (GLfloat)(1<<rgb_shift), 1271 (GLfloat)(1<<rgb_shift), 1272 (GLfloat)(1<<alpha_shift)); 1273 } 1274 return emit_arith( p, OPCODE_MUL, dest, WRITEMASK_XYZW, 1275 saturate, out, shift, undef ); 1276 } 1277 else 1278 return out; 1279} 1280 1281 1282/** 1283 * Generate instruction for getting a texture source term. 1284 */ 1285static void load_texture( struct texenv_fragment_program *p, GLuint unit ) 1286{ 1287 if (is_undef(p->src_texture[unit])) { 1288 const GLuint texTarget = p->state->unit[unit].source_index; 1289 struct ureg texcoord; 1290 struct ureg tmp = get_tex_temp( p ); 1291 1292 if (is_undef(p->texcoord_tex[unit])) { 1293 texcoord = register_input(p, FRAG_ATTRIB_TEX0+unit); 1294 } 1295 else { 1296 /* might want to reuse this reg for tex output actually */ 1297 texcoord = p->texcoord_tex[unit]; 1298 } 1299 1300 /* TODO: Use D0_MASK_XY where possible. 1301 */ 1302 if (p->state->unit[unit].enabled) { 1303 GLboolean shadow = GL_FALSE; 1304 1305 if (p->state->unit[unit].shadow) { 1306 p->program->Base.ShadowSamplers |= 1 << unit; 1307 shadow = GL_TRUE; 1308 } 1309 1310 p->src_texture[unit] = emit_texld( p, OPCODE_TXP, 1311 tmp, WRITEMASK_XYZW, 1312 unit, texTarget, shadow, 1313 texcoord ); 1314 1315 p->program->Base.SamplersUsed |= (1 << unit); 1316 /* This identity mapping should already be in place 1317 * (see _mesa_init_program_struct()) but let's be safe. 1318 */ 1319 p->program->Base.SamplerUnits[unit] = unit; 1320 } 1321 else 1322 p->src_texture[unit] = get_zero(p); 1323 1324 if (p->state->unit[unit].texture_cyl_wrap) { 1325 /* set flag which is checked by Mesa->Gallium program translation */ 1326 p->program->Base.InputFlags[0] |= PROG_PARAM_BIT_CYL_WRAP; 1327 } 1328 1329 } 1330} 1331 1332static GLboolean load_texenv_source( struct texenv_fragment_program *p, 1333 GLuint src, GLuint unit ) 1334{ 1335 switch (src) { 1336 case SRC_TEXTURE: 1337 load_texture(p, unit); 1338 break; 1339 1340 case SRC_TEXTURE0: 1341 case SRC_TEXTURE1: 1342 case SRC_TEXTURE2: 1343 case SRC_TEXTURE3: 1344 case SRC_TEXTURE4: 1345 case SRC_TEXTURE5: 1346 case SRC_TEXTURE6: 1347 case SRC_TEXTURE7: 1348 load_texture(p, src - SRC_TEXTURE0); 1349 break; 1350 1351 default: 1352 /* not a texture src - do nothing */ 1353 break; 1354 } 1355 1356 return GL_TRUE; 1357} 1358 1359 1360/** 1361 * Generate instructions for loading all texture source terms. 1362 */ 1363static GLboolean 1364load_texunit_sources( struct texenv_fragment_program *p, GLuint unit ) 1365{ 1366 const struct state_key *key = p->state; 1367 GLuint i; 1368 1369 for (i = 0; i < key->unit[unit].NumArgsRGB; i++) { 1370 load_texenv_source( p, key->unit[unit].OptRGB[i].Source, unit ); 1371 } 1372 1373 for (i = 0; i < key->unit[unit].NumArgsA; i++) { 1374 load_texenv_source( p, key->unit[unit].OptA[i].Source, unit ); 1375 } 1376 1377 return GL_TRUE; 1378} 1379 1380/** 1381 * Generate instructions for loading bump map textures. 1382 */ 1383static GLboolean 1384load_texunit_bumpmap( struct texenv_fragment_program *p, GLuint unit ) 1385{ 1386 const struct state_key *key = p->state; 1387 GLuint bumpedUnitNr = key->unit[unit].OptRGB[1].Source - SRC_TEXTURE0; 1388 struct ureg texcDst, bumpMapRes; 1389 struct ureg constdudvcolor = register_const4f(p, 0.0, 0.0, 0.0, 1.0); 1390 struct ureg texcSrc = register_input(p, FRAG_ATTRIB_TEX0 + bumpedUnitNr); 1391 struct ureg rotMat0 = register_param3( p, STATE_INTERNAL, STATE_ROT_MATRIX_0, unit ); 1392 struct ureg rotMat1 = register_param3( p, STATE_INTERNAL, STATE_ROT_MATRIX_1, unit ); 1393 1394 load_texenv_source( p, unit + SRC_TEXTURE0, unit ); 1395 1396 bumpMapRes = get_source(p, key->unit[unit].OptRGB[0].Source, unit); 1397 texcDst = get_tex_temp( p ); 1398 p->texcoord_tex[bumpedUnitNr] = texcDst; 1399 1400 /* Apply rot matrix and add coords to be available in next phase. 1401 * dest = (Arg0.xxxx * rotMat0 + Arg1) + (Arg0.yyyy * rotMat1) 1402 * note only 2 coords are affected the rest are left unchanged (mul by 0) 1403 */ 1404 emit_arith( p, OPCODE_MAD, texcDst, WRITEMASK_XYZW, 0, 1405 swizzle1(bumpMapRes, SWIZZLE_X), rotMat0, texcSrc ); 1406 emit_arith( p, OPCODE_MAD, texcDst, WRITEMASK_XYZW, 0, 1407 swizzle1(bumpMapRes, SWIZZLE_Y), rotMat1, texcDst ); 1408 1409 /* Move 0,0,0,1 into bumpmap src if someone (crossbar) is foolish 1410 * enough to access this later, should optimize away. 1411 */ 1412 emit_arith( p, OPCODE_MOV, bumpMapRes, WRITEMASK_XYZW, 0, 1413 constdudvcolor, undef, undef ); 1414 1415 return GL_TRUE; 1416} 1417 1418/** 1419 * Generate a new fragment program which implements the context's 1420 * current texture env/combine mode. 1421 */ 1422static void 1423create_new_program(struct gl_context *ctx, struct state_key *key, 1424 struct gl_fragment_program *program) 1425{ 1426 struct prog_instruction instBuffer[MAX_INSTRUCTIONS]; 1427 struct texenv_fragment_program p; 1428 GLuint unit; 1429 struct ureg cf, out; 1430 int i; 1431 1432 memset(&p, 0, sizeof(p)); 1433 p.state = key; 1434 p.program = program; 1435 1436 /* During code generation, use locally-allocated instruction buffer, 1437 * then alloc dynamic storage below. 1438 */ 1439 p.program->Base.Instructions = instBuffer; 1440 p.program->Base.Target = GL_FRAGMENT_PROGRAM_ARB; 1441 p.program->Base.String = NULL; 1442 p.program->Base.NumTexIndirections = 1; /* is this right? */ 1443 p.program->Base.NumTexInstructions = 0; 1444 p.program->Base.NumAluInstructions = 0; 1445 p.program->Base.NumInstructions = 0; 1446 p.program->Base.NumTemporaries = 0; 1447 p.program->Base.NumParameters = 0; 1448 p.program->Base.NumAttributes = 0; 1449 p.program->Base.NumAddressRegs = 0; 1450 p.program->Base.Parameters = _mesa_new_parameter_list(); 1451 p.program->Base.InputsRead = 0x0; 1452 1453 if (key->num_draw_buffers == 1) 1454 p.program->Base.OutputsWritten = 1 << FRAG_RESULT_COLOR; 1455 else { 1456 for (i = 0; i < key->num_draw_buffers; i++) 1457 p.program->Base.OutputsWritten |= (1 << (FRAG_RESULT_DATA0 + i)); 1458 } 1459 1460 for (unit = 0; unit < ctx->Const.MaxTextureUnits; unit++) { 1461 p.src_texture[unit] = undef; 1462 p.texcoord_tex[unit] = undef; 1463 } 1464 1465 p.src_previous = undef; 1466 p.half = undef; 1467 p.zero = undef; 1468 p.one = undef; 1469 1470 p.last_tex_stage = 0; 1471 release_temps(ctx, &p); 1472 1473 if (key->enabled_units && key->num_draw_buffers) { 1474 GLboolean needbumpstage = GL_FALSE; 1475 1476 /* Zeroth pass - bump map textures first */ 1477 for (unit = 0; unit < key->nr_enabled_units; unit++) 1478 if (key->unit[unit].enabled && 1479 key->unit[unit].ModeRGB == MODE_BUMP_ENVMAP_ATI) { 1480 needbumpstage = GL_TRUE; 1481 load_texunit_bumpmap( &p, unit ); 1482 } 1483 if (needbumpstage) 1484 p.program->Base.NumTexIndirections++; 1485 1486 /* First pass - to support texture_env_crossbar, first identify 1487 * all referenced texture sources and emit texld instructions 1488 * for each: 1489 */ 1490 for (unit = 0; unit < key->nr_enabled_units; unit++) 1491 if (key->unit[unit].enabled) { 1492 load_texunit_sources( &p, unit ); 1493 p.last_tex_stage = unit; 1494 } 1495 1496 /* Second pass - emit combine instructions to build final color: 1497 */ 1498 for (unit = 0; unit < key->nr_enabled_units; unit++) 1499 if (key->unit[unit].enabled) { 1500 p.src_previous = emit_texenv( &p, unit ); 1501 reserve_temp(&p, p.src_previous); /* don't re-use this temp reg */ 1502 release_temps(ctx, &p); /* release all temps */ 1503 } 1504 } 1505 1506 cf = get_source( &p, SRC_PREVIOUS, 0 ); 1507 1508 for (i = 0; i < key->num_draw_buffers; i++) { 1509 if (key->num_draw_buffers == 1) 1510 out = make_ureg( PROGRAM_OUTPUT, FRAG_RESULT_COLOR ); 1511 else { 1512 out = make_ureg( PROGRAM_OUTPUT, FRAG_RESULT_DATA0 + i ); 1513 } 1514 1515 if (key->separate_specular) { 1516 /* Emit specular add. 1517 */ 1518 struct ureg s = register_input(&p, FRAG_ATTRIB_COL1); 1519 emit_arith( &p, OPCODE_ADD, out, WRITEMASK_XYZ, 0, cf, s, undef ); 1520 emit_arith( &p, OPCODE_MOV, out, WRITEMASK_W, 0, cf, undef, undef ); 1521 } 1522 else if (memcmp(&cf, &out, sizeof(cf)) != 0) { 1523 /* Will wind up in here if no texture enabled or a couple of 1524 * other scenarios (GL_REPLACE for instance). 1525 */ 1526 emit_arith( &p, OPCODE_MOV, out, WRITEMASK_XYZW, 0, cf, undef, undef ); 1527 } 1528 } 1529 /* Finish up: 1530 */ 1531 emit_arith( &p, OPCODE_END, undef, WRITEMASK_XYZW, 0, undef, undef, undef); 1532 1533 /* Allocate final instruction array. This has to be done before calling 1534 * _mesa_append_fog_code because that function frees the Base.Instructions. 1535 * At this point, Base.Instructions points to stack data, so it's a really 1536 * bad idea to free it. 1537 */ 1538 p.program->Base.Instructions 1539 = _mesa_alloc_instructions(p.program->Base.NumInstructions); 1540 if (!p.program->Base.Instructions) { 1541 _mesa_error(ctx, GL_OUT_OF_MEMORY, 1542 "generating tex env program"); 1543 return; 1544 } 1545 _mesa_copy_instructions(p.program->Base.Instructions, instBuffer, 1546 p.program->Base.NumInstructions); 1547 1548 /* Append fog code. This must be done before checking the program against 1549 * the limits becuase it will potentially add some instructions. 1550 */ 1551 if (key->fog_enabled) { 1552 _mesa_append_fog_code(ctx, p.program, ctx->Fog.Mode, GL_FALSE); 1553 } 1554 1555 if (p.program->Base.NumTexIndirections > ctx->Const.FragmentProgram.MaxTexIndirections) 1556 program_error(&p, "Exceeded max nr indirect texture lookups"); 1557 1558 if (p.program->Base.NumTexInstructions > ctx->Const.FragmentProgram.MaxTexInstructions) 1559 program_error(&p, "Exceeded max TEX instructions"); 1560 1561 if (p.program->Base.NumAluInstructions > ctx->Const.FragmentProgram.MaxAluInstructions) 1562 program_error(&p, "Exceeded max ALU instructions"); 1563 1564 ASSERT(p.program->Base.NumInstructions <= MAX_INSTRUCTIONS); 1565 1566 /* Notify driver the fragment program has (actually) changed. 1567 */ 1568 if (ctx->Driver.ProgramStringNotify) { 1569 GLboolean ok = ctx->Driver.ProgramStringNotify(ctx, 1570 GL_FRAGMENT_PROGRAM_ARB, 1571 &p.program->Base); 1572 /* Driver should be able to handle any texenv programs as long as 1573 * the driver correctly reported max number of texture units correctly, 1574 * etc. 1575 */ 1576 ASSERT(ok); 1577 (void) ok; /* silence unused var warning */ 1578 } 1579 1580 if (DISASSEM) { 1581 _mesa_print_program(&p.program->Base); 1582 printf("\n"); 1583 } 1584} 1585 1586extern "C" { 1587 1588/** 1589 * Return a fragment program which implements the current 1590 * fixed-function texture, fog and color-sum operations. 1591 */ 1592struct gl_fragment_program * 1593_mesa_get_fixed_func_fragment_program(struct gl_context *ctx) 1594{ 1595 struct gl_fragment_program *prog; 1596 struct state_key key; 1597 GLuint keySize; 1598 1599 keySize = make_state_key(ctx, &key); 1600 1601 prog = (struct gl_fragment_program *) 1602 _mesa_search_program_cache(ctx->FragmentProgram.Cache, 1603 &key, keySize); 1604 1605 if (!prog) { 1606 prog = (struct gl_fragment_program *) 1607 ctx->Driver.NewProgram(ctx, GL_FRAGMENT_PROGRAM_ARB, 0); 1608 1609 create_new_program(ctx, &key, prog); 1610 1611 _mesa_program_cache_insert(ctx, ctx->FragmentProgram.Cache, 1612 &key, keySize, &prog->Base); 1613 } 1614 1615 return prog; 1616} 1617 1618} 1619