1/* 2 * Mesa 3-D graphics library 3 * 4 * Copyright (C) 1999-2008 Brian Paul All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice shall be included 14 * in all copies or substantial portions of the Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 * OTHER DEALINGS IN THE SOFTWARE. 23 */ 24 25/** 26 * \file prog_execute.c 27 * Software interpreter for vertex/fragment programs. 28 * \author Brian Paul 29 */ 30 31/* 32 * NOTE: we do everything in single-precision floating point; we don't 33 * currently observe the single/half/fixed-precision qualifiers. 34 * 35 */ 36 37 38#include "c99_math.h" 39#include "main/errors.h" 40#include "main/glheader.h" 41#include "main/macros.h" 42#include "main/mtypes.h" 43#include "prog_execute.h" 44#include "prog_instruction.h" 45#include "prog_parameter.h" 46#include "prog_print.h" 47#include "prog_noise.h" 48 49 50/* debug predicate */ 51#define DEBUG_PROG 0 52 53 54/** 55 * Set x to positive or negative infinity. 56 */ 57#define SET_POS_INFINITY(x) \ 58 do { \ 59 fi_type fi; \ 60 fi.i = 0x7F800000; \ 61 x = fi.f; \ 62 } while (0) 63#define SET_NEG_INFINITY(x) \ 64 do { \ 65 fi_type fi; \ 66 fi.i = 0xFF800000; \ 67 x = fi.f; \ 68 } while (0) 69 70#define SET_FLOAT_BITS(x, bits) ((fi_type *) (void *) &(x))->i = bits 71 72 73static const GLfloat ZeroVec[4] = { 0.0F, 0.0F, 0.0F, 0.0F }; 74 75 76/** 77 * Return a pointer to the 4-element float vector specified by the given 78 * source register. 79 */ 80static inline const GLfloat * 81get_src_register_pointer(const struct prog_src_register *source, 82 const struct gl_program_machine *machine) 83{ 84 const struct gl_program *prog = machine->CurProgram; 85 GLint reg = source->Index; 86 87 if (source->RelAddr) { 88 /* add address register value to src index/offset */ 89 reg += machine->AddressReg[0][0]; 90 if (reg < 0) { 91 return ZeroVec; 92 } 93 } 94 95 switch (source->File) { 96 case PROGRAM_TEMPORARY: 97 if (reg >= MAX_PROGRAM_TEMPS) 98 return ZeroVec; 99 return machine->Temporaries[reg]; 100 101 case PROGRAM_INPUT: 102 if (prog->Target == GL_VERTEX_PROGRAM_ARB) { 103 if (reg >= VERT_ATTRIB_MAX) 104 return ZeroVec; 105 return machine->VertAttribs[reg]; 106 } 107 else { 108 if (reg >= VARYING_SLOT_MAX) 109 return ZeroVec; 110 return machine->Attribs[reg][machine->CurElement]; 111 } 112 113 case PROGRAM_OUTPUT: 114 if (reg >= MAX_PROGRAM_OUTPUTS) 115 return ZeroVec; 116 return machine->Outputs[reg]; 117 118 case PROGRAM_STATE_VAR: 119 /* Fallthrough */ 120 case PROGRAM_CONSTANT: 121 /* Fallthrough */ 122 case PROGRAM_UNIFORM: { 123 if (reg >= (GLint) prog->Parameters->NumParameters) 124 return ZeroVec; 125 126 unsigned pvo = prog->Parameters->ParameterValueOffset[reg]; 127 return (GLfloat *) prog->Parameters->ParameterValues + pvo; 128 } 129 case PROGRAM_SYSTEM_VALUE: 130 assert(reg < (GLint) ARRAY_SIZE(machine->SystemValues)); 131 return machine->SystemValues[reg]; 132 133 default: 134 _mesa_problem(NULL, 135 "Invalid src register file %d in get_src_register_pointer()", 136 source->File); 137 return ZeroVec; 138 } 139} 140 141 142/** 143 * Return a pointer to the 4-element float vector specified by the given 144 * destination register. 145 */ 146static inline GLfloat * 147get_dst_register_pointer(const struct prog_dst_register *dest, 148 struct gl_program_machine *machine) 149{ 150 static GLfloat dummyReg[4]; 151 GLint reg = dest->Index; 152 153 if (dest->RelAddr) { 154 /* add address register value to src index/offset */ 155 reg += machine->AddressReg[0][0]; 156 if (reg < 0) { 157 return dummyReg; 158 } 159 } 160 161 switch (dest->File) { 162 case PROGRAM_TEMPORARY: 163 if (reg >= MAX_PROGRAM_TEMPS) 164 return dummyReg; 165 return machine->Temporaries[reg]; 166 167 case PROGRAM_OUTPUT: 168 if (reg >= MAX_PROGRAM_OUTPUTS) 169 return dummyReg; 170 return machine->Outputs[reg]; 171 172 default: 173 _mesa_problem(NULL, 174 "Invalid dest register file %d in get_dst_register_pointer()", 175 dest->File); 176 return dummyReg; 177 } 178} 179 180 181 182/** 183 * Fetch a 4-element float vector from the given source register. 184 * Apply swizzling and negating as needed. 185 */ 186static void 187fetch_vector4(const struct prog_src_register *source, 188 const struct gl_program_machine *machine, GLfloat result[4]) 189{ 190 const GLfloat *src = get_src_register_pointer(source, machine); 191 192 if (source->Swizzle == SWIZZLE_NOOP) { 193 /* no swizzling */ 194 COPY_4V(result, src); 195 } 196 else { 197 assert(GET_SWZ(source->Swizzle, 0) <= 3); 198 assert(GET_SWZ(source->Swizzle, 1) <= 3); 199 assert(GET_SWZ(source->Swizzle, 2) <= 3); 200 assert(GET_SWZ(source->Swizzle, 3) <= 3); 201 result[0] = src[GET_SWZ(source->Swizzle, 0)]; 202 result[1] = src[GET_SWZ(source->Swizzle, 1)]; 203 result[2] = src[GET_SWZ(source->Swizzle, 2)]; 204 result[3] = src[GET_SWZ(source->Swizzle, 3)]; 205 } 206 207 if (source->Negate) { 208 assert(source->Negate == NEGATE_XYZW); 209 result[0] = -result[0]; 210 result[1] = -result[1]; 211 result[2] = -result[2]; 212 result[3] = -result[3]; 213 } 214 215#ifdef NAN_CHECK 216 assert(!IS_INF_OR_NAN(result[0])); 217 assert(!IS_INF_OR_NAN(result[0])); 218 assert(!IS_INF_OR_NAN(result[0])); 219 assert(!IS_INF_OR_NAN(result[0])); 220#endif 221} 222 223 224/** 225 * Fetch the derivative with respect to X or Y for the given register. 226 * XXX this currently only works for fragment program input attribs. 227 */ 228static void 229fetch_vector4_deriv(const struct prog_src_register *source, 230 const struct gl_program_machine *machine, 231 char xOrY, GLfloat result[4]) 232{ 233 if (source->File == PROGRAM_INPUT && 234 source->Index < (GLint) machine->NumDeriv) { 235 const GLint col = machine->CurElement; 236 const GLfloat w = machine->Attribs[VARYING_SLOT_POS][col][3]; 237 const GLfloat invQ = 1.0f / w; 238 GLfloat deriv[4]; 239 240 if (xOrY == 'X') { 241 deriv[0] = machine->DerivX[source->Index][0] * invQ; 242 deriv[1] = machine->DerivX[source->Index][1] * invQ; 243 deriv[2] = machine->DerivX[source->Index][2] * invQ; 244 deriv[3] = machine->DerivX[source->Index][3] * invQ; 245 } 246 else { 247 deriv[0] = machine->DerivY[source->Index][0] * invQ; 248 deriv[1] = machine->DerivY[source->Index][1] * invQ; 249 deriv[2] = machine->DerivY[source->Index][2] * invQ; 250 deriv[3] = machine->DerivY[source->Index][3] * invQ; 251 } 252 253 result[0] = deriv[GET_SWZ(source->Swizzle, 0)]; 254 result[1] = deriv[GET_SWZ(source->Swizzle, 1)]; 255 result[2] = deriv[GET_SWZ(source->Swizzle, 2)]; 256 result[3] = deriv[GET_SWZ(source->Swizzle, 3)]; 257 258 if (source->Negate) { 259 assert(source->Negate == NEGATE_XYZW); 260 result[0] = -result[0]; 261 result[1] = -result[1]; 262 result[2] = -result[2]; 263 result[3] = -result[3]; 264 } 265 } 266 else { 267 ASSIGN_4V(result, 0.0, 0.0, 0.0, 0.0); 268 } 269} 270 271 272/** 273 * As above, but only return result[0] element. 274 */ 275static void 276fetch_vector1(const struct prog_src_register *source, 277 const struct gl_program_machine *machine, GLfloat result[4]) 278{ 279 const GLfloat *src = get_src_register_pointer(source, machine); 280 281 result[0] = src[GET_SWZ(source->Swizzle, 0)]; 282 283 if (source->Negate) { 284 result[0] = -result[0]; 285 } 286} 287 288 289/** 290 * Fetch texel from texture. Use partial derivatives when possible. 291 */ 292static inline void 293fetch_texel(struct gl_context *ctx, 294 const struct gl_program_machine *machine, 295 const struct prog_instruction *inst, 296 const GLfloat texcoord[4], GLfloat lodBias, 297 GLfloat color[4]) 298{ 299 const GLuint unit = machine->Samplers[inst->TexSrcUnit]; 300 301 /* Note: we only have the right derivatives for fragment input attribs. 302 */ 303 if (machine->NumDeriv > 0 && 304 inst->SrcReg[0].File == PROGRAM_INPUT && 305 inst->SrcReg[0].Index == VARYING_SLOT_TEX0 + inst->TexSrcUnit) { 306 /* simple texture fetch for which we should have derivatives */ 307 GLuint attr = inst->SrcReg[0].Index; 308 machine->FetchTexelDeriv(ctx, texcoord, 309 machine->DerivX[attr], 310 machine->DerivY[attr], 311 lodBias, unit, color); 312 } 313 else { 314 machine->FetchTexelLod(ctx, texcoord, lodBias, unit, color); 315 } 316} 317 318 319/** 320 * Store 4 floats into a register. Observe the instructions saturate and 321 * set-condition-code flags. 322 */ 323static void 324store_vector4(const struct prog_instruction *inst, 325 struct gl_program_machine *machine, const GLfloat value[4]) 326{ 327 const struct prog_dst_register *dstReg = &(inst->DstReg); 328 const GLboolean clamp = inst->Saturate; 329 GLuint writeMask = dstReg->WriteMask; 330 GLfloat clampedValue[4]; 331 GLfloat *dst = get_dst_register_pointer(dstReg, machine); 332 333#if 0 334 if (value[0] > 1.0e10 || 335 IS_INF_OR_NAN(value[0]) || 336 IS_INF_OR_NAN(value[1]) || 337 IS_INF_OR_NAN(value[2]) || IS_INF_OR_NAN(value[3])) 338 printf("store %g %g %g %g\n", value[0], value[1], value[2], value[3]); 339#endif 340 341 if (clamp) { 342 clampedValue[0] = CLAMP(value[0], 0.0F, 1.0F); 343 clampedValue[1] = CLAMP(value[1], 0.0F, 1.0F); 344 clampedValue[2] = CLAMP(value[2], 0.0F, 1.0F); 345 clampedValue[3] = CLAMP(value[3], 0.0F, 1.0F); 346 value = clampedValue; 347 } 348 349#ifdef NAN_CHECK 350 assert(!IS_INF_OR_NAN(value[0])); 351 assert(!IS_INF_OR_NAN(value[0])); 352 assert(!IS_INF_OR_NAN(value[0])); 353 assert(!IS_INF_OR_NAN(value[0])); 354#endif 355 356 if (writeMask & WRITEMASK_X) 357 dst[0] = value[0]; 358 if (writeMask & WRITEMASK_Y) 359 dst[1] = value[1]; 360 if (writeMask & WRITEMASK_Z) 361 dst[2] = value[2]; 362 if (writeMask & WRITEMASK_W) 363 dst[3] = value[3]; 364} 365 366 367/** 368 * Execute the given vertex/fragment program. 369 * 370 * \param ctx rendering context 371 * \param program the program to execute 372 * \param machine machine state (must be initialized) 373 * \return GL_TRUE if program completed or GL_FALSE if program executed KIL. 374 */ 375GLboolean 376_mesa_execute_program(struct gl_context * ctx, 377 const struct gl_program *program, 378 struct gl_program_machine *machine) 379{ 380 const GLuint numInst = program->arb.NumInstructions; 381 const GLuint maxExec = 65536; 382 GLuint pc, numExec = 0; 383 384 machine->CurProgram = program; 385 386 if (DEBUG_PROG) { 387 printf("execute program %u --------------------\n", program->Id); 388 } 389 390 if (program->Target == GL_VERTEX_PROGRAM_ARB) { 391 machine->EnvParams = ctx->VertexProgram.Parameters; 392 } 393 else { 394 machine->EnvParams = ctx->FragmentProgram.Parameters; 395 } 396 397 for (pc = 0; pc < numInst; pc++) { 398 const struct prog_instruction *inst = program->arb.Instructions + pc; 399 400 if (DEBUG_PROG) { 401 _mesa_print_instruction(inst); 402 } 403 404 switch (inst->Opcode) { 405 case OPCODE_ABS: 406 { 407 GLfloat a[4], result[4]; 408 fetch_vector4(&inst->SrcReg[0], machine, a); 409 result[0] = fabsf(a[0]); 410 result[1] = fabsf(a[1]); 411 result[2] = fabsf(a[2]); 412 result[3] = fabsf(a[3]); 413 store_vector4(inst, machine, result); 414 } 415 break; 416 case OPCODE_ADD: 417 { 418 GLfloat a[4], b[4], result[4]; 419 fetch_vector4(&inst->SrcReg[0], machine, a); 420 fetch_vector4(&inst->SrcReg[1], machine, b); 421 result[0] = a[0] + b[0]; 422 result[1] = a[1] + b[1]; 423 result[2] = a[2] + b[2]; 424 result[3] = a[3] + b[3]; 425 store_vector4(inst, machine, result); 426 if (DEBUG_PROG) { 427 printf("ADD (%g %g %g %g) = (%g %g %g %g) + (%g %g %g %g)\n", 428 result[0], result[1], result[2], result[3], 429 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); 430 } 431 } 432 break; 433 case OPCODE_ARL: 434 { 435 GLfloat t[4]; 436 fetch_vector4(&inst->SrcReg[0], machine, t); 437 machine->AddressReg[0][0] = IFLOOR(t[0]); 438 if (DEBUG_PROG) { 439 printf("ARL %d\n", machine->AddressReg[0][0]); 440 } 441 } 442 break; 443 case OPCODE_BGNLOOP: 444 /* no-op */ 445 assert(program->arb.Instructions[inst->BranchTarget].Opcode 446 == OPCODE_ENDLOOP); 447 break; 448 case OPCODE_ENDLOOP: 449 /* subtract 1 here since pc is incremented by for(pc) loop */ 450 assert(program->arb.Instructions[inst->BranchTarget].Opcode 451 == OPCODE_BGNLOOP); 452 pc = inst->BranchTarget - 1; /* go to matching BNGLOOP */ 453 break; 454 case OPCODE_BGNSUB: /* begin subroutine */ 455 break; 456 case OPCODE_ENDSUB: /* end subroutine */ 457 break; 458 case OPCODE_BRK: /* break out of loop (conditional) */ 459 assert(program->arb.Instructions[inst->BranchTarget].Opcode 460 == OPCODE_ENDLOOP); 461 /* break out of loop */ 462 /* pc++ at end of for-loop will put us after the ENDLOOP inst */ 463 pc = inst->BranchTarget; 464 break; 465 case OPCODE_CONT: /* continue loop (conditional) */ 466 assert(program->arb.Instructions[inst->BranchTarget].Opcode 467 == OPCODE_ENDLOOP); 468 /* continue at ENDLOOP */ 469 /* Subtract 1 here since we'll do pc++ at end of for-loop */ 470 pc = inst->BranchTarget - 1; 471 break; 472 case OPCODE_CAL: /* Call subroutine (conditional) */ 473 /* call the subroutine */ 474 if (machine->StackDepth >= MAX_PROGRAM_CALL_DEPTH) { 475 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */ 476 } 477 machine->CallStack[machine->StackDepth++] = pc + 1; /* next inst */ 478 /* Subtract 1 here since we'll do pc++ at end of for-loop */ 479 pc = inst->BranchTarget - 1; 480 break; 481 case OPCODE_CMP: 482 { 483 GLfloat a[4], b[4], c[4], result[4]; 484 fetch_vector4(&inst->SrcReg[0], machine, a); 485 fetch_vector4(&inst->SrcReg[1], machine, b); 486 fetch_vector4(&inst->SrcReg[2], machine, c); 487 result[0] = a[0] < 0.0F ? b[0] : c[0]; 488 result[1] = a[1] < 0.0F ? b[1] : c[1]; 489 result[2] = a[2] < 0.0F ? b[2] : c[2]; 490 result[3] = a[3] < 0.0F ? b[3] : c[3]; 491 store_vector4(inst, machine, result); 492 if (DEBUG_PROG) { 493 printf("CMP (%g %g %g %g) = (%g %g %g %g) < 0 ? (%g %g %g %g) : (%g %g %g %g)\n", 494 result[0], result[1], result[2], result[3], 495 a[0], a[1], a[2], a[3], 496 b[0], b[1], b[2], b[3], 497 c[0], c[1], c[2], c[3]); 498 } 499 } 500 break; 501 case OPCODE_COS: 502 { 503 GLfloat a[4], result[4]; 504 fetch_vector1(&inst->SrcReg[0], machine, a); 505 result[0] = result[1] = result[2] = result[3] 506 = cosf(a[0]); 507 store_vector4(inst, machine, result); 508 } 509 break; 510 case OPCODE_DDX: /* Partial derivative with respect to X */ 511 { 512 GLfloat result[4]; 513 fetch_vector4_deriv(&inst->SrcReg[0], machine, 'X', result); 514 store_vector4(inst, machine, result); 515 } 516 break; 517 case OPCODE_DDY: /* Partial derivative with respect to Y */ 518 { 519 GLfloat result[4]; 520 fetch_vector4_deriv(&inst->SrcReg[0], machine, 'Y', result); 521 store_vector4(inst, machine, result); 522 } 523 break; 524 case OPCODE_DP2: 525 { 526 GLfloat a[4], b[4], result[4]; 527 fetch_vector4(&inst->SrcReg[0], machine, a); 528 fetch_vector4(&inst->SrcReg[1], machine, b); 529 result[0] = result[1] = result[2] = result[3] = DOT2(a, b); 530 store_vector4(inst, machine, result); 531 if (DEBUG_PROG) { 532 printf("DP2 %g = (%g %g) . (%g %g)\n", 533 result[0], a[0], a[1], b[0], b[1]); 534 } 535 } 536 break; 537 case OPCODE_DP3: 538 { 539 GLfloat a[4], b[4], result[4]; 540 fetch_vector4(&inst->SrcReg[0], machine, a); 541 fetch_vector4(&inst->SrcReg[1], machine, b); 542 result[0] = result[1] = result[2] = result[3] = DOT3(a, b); 543 store_vector4(inst, machine, result); 544 if (DEBUG_PROG) { 545 printf("DP3 %g = (%g %g %g) . (%g %g %g)\n", 546 result[0], a[0], a[1], a[2], b[0], b[1], b[2]); 547 } 548 } 549 break; 550 case OPCODE_DP4: 551 { 552 GLfloat a[4], b[4], result[4]; 553 fetch_vector4(&inst->SrcReg[0], machine, a); 554 fetch_vector4(&inst->SrcReg[1], machine, b); 555 result[0] = result[1] = result[2] = result[3] = DOT4(a, b); 556 store_vector4(inst, machine, result); 557 if (DEBUG_PROG) { 558 printf("DP4 %g = (%g, %g %g %g) . (%g, %g %g %g)\n", 559 result[0], a[0], a[1], a[2], a[3], 560 b[0], b[1], b[2], b[3]); 561 } 562 } 563 break; 564 case OPCODE_DPH: 565 { 566 GLfloat a[4], b[4], result[4]; 567 fetch_vector4(&inst->SrcReg[0], machine, a); 568 fetch_vector4(&inst->SrcReg[1], machine, b); 569 result[0] = result[1] = result[2] = result[3] = DOT3(a, b) + b[3]; 570 store_vector4(inst, machine, result); 571 } 572 break; 573 case OPCODE_DST: /* Distance vector */ 574 { 575 GLfloat a[4], b[4], result[4]; 576 fetch_vector4(&inst->SrcReg[0], machine, a); 577 fetch_vector4(&inst->SrcReg[1], machine, b); 578 result[0] = 1.0F; 579 result[1] = a[1] * b[1]; 580 result[2] = a[2]; 581 result[3] = b[3]; 582 store_vector4(inst, machine, result); 583 } 584 break; 585 case OPCODE_EXP: 586 { 587 GLfloat t[4], q[4], floor_t0; 588 fetch_vector1(&inst->SrcReg[0], machine, t); 589 floor_t0 = floorf(t[0]); 590 if (floor_t0 > FLT_MAX_EXP) { 591 SET_POS_INFINITY(q[0]); 592 SET_POS_INFINITY(q[2]); 593 } 594 else if (floor_t0 < FLT_MIN_EXP) { 595 q[0] = 0.0F; 596 q[2] = 0.0F; 597 } 598 else { 599 q[0] = ldexpf(1.0, (int) floor_t0); 600 /* Note: GL_NV_vertex_program expects 601 * result.z = result.x * APPX(result.y) 602 * We do what the ARB extension says. 603 */ 604 q[2] = exp2f(t[0]); 605 } 606 q[1] = t[0] - floor_t0; 607 q[3] = 1.0F; 608 store_vector4( inst, machine, q ); 609 } 610 break; 611 case OPCODE_EX2: /* Exponential base 2 */ 612 { 613 GLfloat a[4], result[4], val; 614 fetch_vector1(&inst->SrcReg[0], machine, a); 615 val = exp2f(a[0]); 616 /* 617 if (IS_INF_OR_NAN(val)) 618 val = 1.0e10; 619 */ 620 result[0] = result[1] = result[2] = result[3] = val; 621 store_vector4(inst, machine, result); 622 } 623 break; 624 case OPCODE_FLR: 625 { 626 GLfloat a[4], result[4]; 627 fetch_vector4(&inst->SrcReg[0], machine, a); 628 result[0] = floorf(a[0]); 629 result[1] = floorf(a[1]); 630 result[2] = floorf(a[2]); 631 result[3] = floorf(a[3]); 632 store_vector4(inst, machine, result); 633 } 634 break; 635 case OPCODE_FRC: 636 { 637 GLfloat a[4], result[4]; 638 fetch_vector4(&inst->SrcReg[0], machine, a); 639 result[0] = a[0] - floorf(a[0]); 640 result[1] = a[1] - floorf(a[1]); 641 result[2] = a[2] - floorf(a[2]); 642 result[3] = a[3] - floorf(a[3]); 643 store_vector4(inst, machine, result); 644 } 645 break; 646 case OPCODE_IF: 647 { 648 GLboolean cond; 649 assert(program->arb.Instructions[inst->BranchTarget].Opcode 650 == OPCODE_ELSE || 651 program->arb.Instructions[inst->BranchTarget].Opcode 652 == OPCODE_ENDIF); 653 /* eval condition */ 654 GLfloat a[4]; 655 fetch_vector1(&inst->SrcReg[0], machine, a); 656 cond = (a[0] != 0.0F); 657 if (DEBUG_PROG) { 658 printf("IF: %d\n", cond); 659 } 660 /* do if/else */ 661 if (cond) { 662 /* do if-clause (just continue execution) */ 663 } 664 else { 665 /* go to the instruction after ELSE or ENDIF */ 666 assert(inst->BranchTarget >= 0); 667 pc = inst->BranchTarget; 668 } 669 } 670 break; 671 case OPCODE_ELSE: 672 /* goto ENDIF */ 673 assert(program->arb.Instructions[inst->BranchTarget].Opcode 674 == OPCODE_ENDIF); 675 assert(inst->BranchTarget >= 0); 676 pc = inst->BranchTarget; 677 break; 678 case OPCODE_ENDIF: 679 /* nothing */ 680 break; 681 case OPCODE_KIL: /* ARB_f_p only */ 682 { 683 GLfloat a[4]; 684 fetch_vector4(&inst->SrcReg[0], machine, a); 685 if (DEBUG_PROG) { 686 printf("KIL if (%g %g %g %g) <= 0.0\n", 687 a[0], a[1], a[2], a[3]); 688 } 689 690 if (a[0] < 0.0F || a[1] < 0.0F || a[2] < 0.0F || a[3] < 0.0F) { 691 return GL_FALSE; 692 } 693 } 694 break; 695 case OPCODE_LG2: /* log base 2 */ 696 { 697 GLfloat a[4], result[4], val; 698 fetch_vector1(&inst->SrcReg[0], machine, a); 699 /* The fast LOG2 macro doesn't meet the precision requirements. 700 */ 701 if (a[0] == 0.0F) { 702 val = -FLT_MAX; 703 } 704 else { 705 val = logf(a[0]) * 1.442695F; 706 } 707 result[0] = result[1] = result[2] = result[3] = val; 708 store_vector4(inst, machine, result); 709 } 710 break; 711 case OPCODE_LIT: 712 { 713 const GLfloat epsilon = 1.0F / 256.0F; /* from NV VP spec */ 714 GLfloat a[4], result[4]; 715 fetch_vector4(&inst->SrcReg[0], machine, a); 716 a[0] = MAX2(a[0], 0.0F); 717 a[1] = MAX2(a[1], 0.0F); 718 /* XXX ARB version clamps a[3], NV version doesn't */ 719 a[3] = CLAMP(a[3], -(128.0F - epsilon), (128.0F - epsilon)); 720 result[0] = 1.0F; 721 result[1] = a[0]; 722 /* XXX we could probably just use pow() here */ 723 if (a[0] > 0.0F) { 724 if (a[1] == 0.0F && a[3] == 0.0F) 725 result[2] = 1.0F; 726 else 727 result[2] = powf(a[1], a[3]); 728 } 729 else { 730 result[2] = 0.0F; 731 } 732 result[3] = 1.0F; 733 store_vector4(inst, machine, result); 734 if (DEBUG_PROG) { 735 printf("LIT (%g %g %g %g) : (%g %g %g %g)\n", 736 result[0], result[1], result[2], result[3], 737 a[0], a[1], a[2], a[3]); 738 } 739 } 740 break; 741 case OPCODE_LOG: 742 { 743 GLfloat t[4], q[4], abs_t0; 744 fetch_vector1(&inst->SrcReg[0], machine, t); 745 abs_t0 = fabsf(t[0]); 746 if (abs_t0 != 0.0F) { 747 if (IS_INF_OR_NAN(abs_t0)) 748 { 749 SET_POS_INFINITY(q[0]); 750 q[1] = 1.0F; 751 SET_POS_INFINITY(q[2]); 752 } 753 else { 754 int exponent; 755 GLfloat mantissa = frexpf(t[0], &exponent); 756 q[0] = (GLfloat) (exponent - 1); 757 q[1] = 2.0F * mantissa; /* map [.5, 1) -> [1, 2) */ 758 759 /* The fast LOG2 macro doesn't meet the precision 760 * requirements. 761 */ 762 q[2] = logf(t[0]) * 1.442695F; 763 } 764 } 765 else { 766 SET_NEG_INFINITY(q[0]); 767 q[1] = 1.0F; 768 SET_NEG_INFINITY(q[2]); 769 } 770 q[3] = 1.0; 771 store_vector4(inst, machine, q); 772 } 773 break; 774 case OPCODE_LRP: 775 { 776 GLfloat a[4], b[4], c[4], result[4]; 777 fetch_vector4(&inst->SrcReg[0], machine, a); 778 fetch_vector4(&inst->SrcReg[1], machine, b); 779 fetch_vector4(&inst->SrcReg[2], machine, c); 780 result[0] = a[0] * b[0] + (1.0F - a[0]) * c[0]; 781 result[1] = a[1] * b[1] + (1.0F - a[1]) * c[1]; 782 result[2] = a[2] * b[2] + (1.0F - a[2]) * c[2]; 783 result[3] = a[3] * b[3] + (1.0F - a[3]) * c[3]; 784 store_vector4(inst, machine, result); 785 if (DEBUG_PROG) { 786 printf("LRP (%g %g %g %g) = (%g %g %g %g), " 787 "(%g %g %g %g), (%g %g %g %g)\n", 788 result[0], result[1], result[2], result[3], 789 a[0], a[1], a[2], a[3], 790 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]); 791 } 792 } 793 break; 794 case OPCODE_MAD: 795 { 796 GLfloat a[4], b[4], c[4], result[4]; 797 fetch_vector4(&inst->SrcReg[0], machine, a); 798 fetch_vector4(&inst->SrcReg[1], machine, b); 799 fetch_vector4(&inst->SrcReg[2], machine, c); 800 result[0] = a[0] * b[0] + c[0]; 801 result[1] = a[1] * b[1] + c[1]; 802 result[2] = a[2] * b[2] + c[2]; 803 result[3] = a[3] * b[3] + c[3]; 804 store_vector4(inst, machine, result); 805 if (DEBUG_PROG) { 806 printf("MAD (%g %g %g %g) = (%g %g %g %g) * " 807 "(%g %g %g %g) + (%g %g %g %g)\n", 808 result[0], result[1], result[2], result[3], 809 a[0], a[1], a[2], a[3], 810 b[0], b[1], b[2], b[3], c[0], c[1], c[2], c[3]); 811 } 812 } 813 break; 814 case OPCODE_MAX: 815 { 816 GLfloat a[4], b[4], result[4]; 817 fetch_vector4(&inst->SrcReg[0], machine, a); 818 fetch_vector4(&inst->SrcReg[1], machine, b); 819 result[0] = MAX2(a[0], b[0]); 820 result[1] = MAX2(a[1], b[1]); 821 result[2] = MAX2(a[2], b[2]); 822 result[3] = MAX2(a[3], b[3]); 823 store_vector4(inst, machine, result); 824 if (DEBUG_PROG) { 825 printf("MAX (%g %g %g %g) = (%g %g %g %g), (%g %g %g %g)\n", 826 result[0], result[1], result[2], result[3], 827 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); 828 } 829 } 830 break; 831 case OPCODE_MIN: 832 { 833 GLfloat a[4], b[4], result[4]; 834 fetch_vector4(&inst->SrcReg[0], machine, a); 835 fetch_vector4(&inst->SrcReg[1], machine, b); 836 result[0] = MIN2(a[0], b[0]); 837 result[1] = MIN2(a[1], b[1]); 838 result[2] = MIN2(a[2], b[2]); 839 result[3] = MIN2(a[3], b[3]); 840 store_vector4(inst, machine, result); 841 } 842 break; 843 case OPCODE_MOV: 844 { 845 GLfloat result[4]; 846 fetch_vector4(&inst->SrcReg[0], machine, result); 847 store_vector4(inst, machine, result); 848 if (DEBUG_PROG) { 849 printf("MOV (%g %g %g %g)\n", 850 result[0], result[1], result[2], result[3]); 851 } 852 } 853 break; 854 case OPCODE_MUL: 855 { 856 GLfloat a[4], b[4], result[4]; 857 fetch_vector4(&inst->SrcReg[0], machine, a); 858 fetch_vector4(&inst->SrcReg[1], machine, b); 859 result[0] = a[0] * b[0]; 860 result[1] = a[1] * b[1]; 861 result[2] = a[2] * b[2]; 862 result[3] = a[3] * b[3]; 863 store_vector4(inst, machine, result); 864 if (DEBUG_PROG) { 865 printf("MUL (%g %g %g %g) = (%g %g %g %g) * (%g %g %g %g)\n", 866 result[0], result[1], result[2], result[3], 867 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); 868 } 869 } 870 break; 871 case OPCODE_NOISE1: 872 { 873 GLfloat a[4], result[4]; 874 fetch_vector1(&inst->SrcReg[0], machine, a); 875 result[0] = 876 result[1] = 877 result[2] = 878 result[3] = _mesa_noise1(a[0]); 879 store_vector4(inst, machine, result); 880 } 881 break; 882 case OPCODE_NOISE2: 883 { 884 GLfloat a[4], result[4]; 885 fetch_vector4(&inst->SrcReg[0], machine, a); 886 result[0] = 887 result[1] = 888 result[2] = result[3] = _mesa_noise2(a[0], a[1]); 889 store_vector4(inst, machine, result); 890 } 891 break; 892 case OPCODE_NOISE3: 893 { 894 GLfloat a[4], result[4]; 895 fetch_vector4(&inst->SrcReg[0], machine, a); 896 result[0] = 897 result[1] = 898 result[2] = 899 result[3] = _mesa_noise3(a[0], a[1], a[2]); 900 store_vector4(inst, machine, result); 901 } 902 break; 903 case OPCODE_NOISE4: 904 { 905 GLfloat a[4], result[4]; 906 fetch_vector4(&inst->SrcReg[0], machine, a); 907 result[0] = 908 result[1] = 909 result[2] = 910 result[3] = _mesa_noise4(a[0], a[1], a[2], a[3]); 911 store_vector4(inst, machine, result); 912 } 913 break; 914 case OPCODE_NOP: 915 break; 916 case OPCODE_POW: 917 { 918 GLfloat a[4], b[4], result[4]; 919 fetch_vector1(&inst->SrcReg[0], machine, a); 920 fetch_vector1(&inst->SrcReg[1], machine, b); 921 result[0] = result[1] = result[2] = result[3] 922 = powf(a[0], b[0]); 923 store_vector4(inst, machine, result); 924 } 925 break; 926 927 case OPCODE_RCP: 928 { 929 GLfloat a[4], result[4]; 930 fetch_vector1(&inst->SrcReg[0], machine, a); 931 if (DEBUG_PROG) { 932 if (a[0] == 0) 933 printf("RCP(0)\n"); 934 else if (IS_INF_OR_NAN(a[0])) 935 printf("RCP(inf)\n"); 936 } 937 result[0] = result[1] = result[2] = result[3] = 1.0F / a[0]; 938 store_vector4(inst, machine, result); 939 } 940 break; 941 case OPCODE_RET: /* return from subroutine (conditional) */ 942 if (machine->StackDepth == 0) { 943 return GL_TRUE; /* Per GL_NV_vertex_program2 spec */ 944 } 945 /* subtract one because of pc++ in the for loop */ 946 pc = machine->CallStack[--machine->StackDepth] - 1; 947 break; 948 case OPCODE_RSQ: /* 1 / sqrt() */ 949 { 950 GLfloat a[4], result[4]; 951 fetch_vector1(&inst->SrcReg[0], machine, a); 952 a[0] = fabsf(a[0]); 953 result[0] = result[1] = result[2] = result[3] = 1.0f / sqrtf(a[0]); 954 store_vector4(inst, machine, result); 955 if (DEBUG_PROG) { 956 printf("RSQ %g = 1/sqrt(|%g|)\n", result[0], a[0]); 957 } 958 } 959 break; 960 case OPCODE_SCS: /* sine and cos */ 961 { 962 GLfloat a[4], result[4]; 963 fetch_vector1(&inst->SrcReg[0], machine, a); 964 result[0] = cosf(a[0]); 965 result[1] = sinf(a[0]); 966 result[2] = 0.0F; /* undefined! */ 967 result[3] = 0.0F; /* undefined! */ 968 store_vector4(inst, machine, result); 969 } 970 break; 971 case OPCODE_SGE: /* set on greater or equal */ 972 { 973 GLfloat a[4], b[4], result[4]; 974 fetch_vector4(&inst->SrcReg[0], machine, a); 975 fetch_vector4(&inst->SrcReg[1], machine, b); 976 result[0] = (a[0] >= b[0]) ? 1.0F : 0.0F; 977 result[1] = (a[1] >= b[1]) ? 1.0F : 0.0F; 978 result[2] = (a[2] >= b[2]) ? 1.0F : 0.0F; 979 result[3] = (a[3] >= b[3]) ? 1.0F : 0.0F; 980 store_vector4(inst, machine, result); 981 if (DEBUG_PROG) { 982 printf("SGE (%g %g %g %g) = (%g %g %g %g) >= (%g %g %g %g)\n", 983 result[0], result[1], result[2], result[3], 984 a[0], a[1], a[2], a[3], 985 b[0], b[1], b[2], b[3]); 986 } 987 } 988 break; 989 case OPCODE_SIN: 990 { 991 GLfloat a[4], result[4]; 992 fetch_vector1(&inst->SrcReg[0], machine, a); 993 result[0] = result[1] = result[2] = result[3] 994 = sinf(a[0]); 995 store_vector4(inst, machine, result); 996 } 997 break; 998 case OPCODE_SLT: /* set on less */ 999 { 1000 GLfloat a[4], b[4], result[4]; 1001 fetch_vector4(&inst->SrcReg[0], machine, a); 1002 fetch_vector4(&inst->SrcReg[1], machine, b); 1003 result[0] = (a[0] < b[0]) ? 1.0F : 0.0F; 1004 result[1] = (a[1] < b[1]) ? 1.0F : 0.0F; 1005 result[2] = (a[2] < b[2]) ? 1.0F : 0.0F; 1006 result[3] = (a[3] < b[3]) ? 1.0F : 0.0F; 1007 store_vector4(inst, machine, result); 1008 if (DEBUG_PROG) { 1009 printf("SLT (%g %g %g %g) = (%g %g %g %g) < (%g %g %g %g)\n", 1010 result[0], result[1], result[2], result[3], 1011 a[0], a[1], a[2], a[3], 1012 b[0], b[1], b[2], b[3]); 1013 } 1014 } 1015 break; 1016 case OPCODE_SSG: /* set sign (-1, 0 or +1) */ 1017 { 1018 GLfloat a[4], result[4]; 1019 fetch_vector4(&inst->SrcReg[0], machine, a); 1020 result[0] = (GLfloat) ((a[0] > 0.0F) - (a[0] < 0.0F)); 1021 result[1] = (GLfloat) ((a[1] > 0.0F) - (a[1] < 0.0F)); 1022 result[2] = (GLfloat) ((a[2] > 0.0F) - (a[2] < 0.0F)); 1023 result[3] = (GLfloat) ((a[3] > 0.0F) - (a[3] < 0.0F)); 1024 store_vector4(inst, machine, result); 1025 } 1026 break; 1027 case OPCODE_SUB: 1028 { 1029 GLfloat a[4], b[4], result[4]; 1030 fetch_vector4(&inst->SrcReg[0], machine, a); 1031 fetch_vector4(&inst->SrcReg[1], machine, b); 1032 result[0] = a[0] - b[0]; 1033 result[1] = a[1] - b[1]; 1034 result[2] = a[2] - b[2]; 1035 result[3] = a[3] - b[3]; 1036 store_vector4(inst, machine, result); 1037 if (DEBUG_PROG) { 1038 printf("SUB (%g %g %g %g) = (%g %g %g %g) - (%g %g %g %g)\n", 1039 result[0], result[1], result[2], result[3], 1040 a[0], a[1], a[2], a[3], b[0], b[1], b[2], b[3]); 1041 } 1042 } 1043 break; 1044 case OPCODE_SWZ: /* extended swizzle */ 1045 { 1046 const struct prog_src_register *source = &inst->SrcReg[0]; 1047 const GLfloat *src = get_src_register_pointer(source, machine); 1048 GLfloat result[4]; 1049 GLuint i; 1050 for (i = 0; i < 4; i++) { 1051 const GLuint swz = GET_SWZ(source->Swizzle, i); 1052 if (swz == SWIZZLE_ZERO) 1053 result[i] = 0.0; 1054 else if (swz == SWIZZLE_ONE) 1055 result[i] = 1.0; 1056 else { 1057 assert(swz <= 3); 1058 result[i] = src[swz]; 1059 } 1060 if (source->Negate & (1 << i)) 1061 result[i] = -result[i]; 1062 } 1063 store_vector4(inst, machine, result); 1064 } 1065 break; 1066 case OPCODE_TEX: /* Both ARB and NV frag prog */ 1067 /* Simple texel lookup */ 1068 { 1069 GLfloat texcoord[4], color[4]; 1070 fetch_vector4(&inst->SrcReg[0], machine, texcoord); 1071 1072 /* For TEX, texcoord.Q should not be used and its value should not 1073 * matter (at most, we pass coord.xyz to texture3D() in GLSL). 1074 * Set Q=1 so that FetchTexelDeriv() doesn't get a garbage value 1075 * which is effectively what happens when the texcoord swizzle 1076 * is .xyzz 1077 */ 1078 texcoord[3] = 1.0f; 1079 1080 fetch_texel(ctx, machine, inst, texcoord, 0.0, color); 1081 1082 if (DEBUG_PROG) { 1083 printf("TEX (%g, %g, %g, %g) = texture[%d][%g, %g, %g, %g]\n", 1084 color[0], color[1], color[2], color[3], 1085 inst->TexSrcUnit, 1086 texcoord[0], texcoord[1], texcoord[2], texcoord[3]); 1087 } 1088 store_vector4(inst, machine, color); 1089 } 1090 break; 1091 case OPCODE_TXB: /* GL_ARB_fragment_program only */ 1092 /* Texel lookup with LOD bias */ 1093 { 1094 GLfloat texcoord[4], color[4], lodBias; 1095 1096 fetch_vector4(&inst->SrcReg[0], machine, texcoord); 1097 1098 /* texcoord[3] is the bias to add to lambda */ 1099 lodBias = texcoord[3]; 1100 1101 fetch_texel(ctx, machine, inst, texcoord, lodBias, color); 1102 1103 if (DEBUG_PROG) { 1104 printf("TXB (%g, %g, %g, %g) = texture[%d][%g %g %g %g]" 1105 " bias %g\n", 1106 color[0], color[1], color[2], color[3], 1107 inst->TexSrcUnit, 1108 texcoord[0], 1109 texcoord[1], 1110 texcoord[2], 1111 texcoord[3], 1112 lodBias); 1113 } 1114 1115 store_vector4(inst, machine, color); 1116 } 1117 break; 1118 case OPCODE_TXD: 1119 /* Texture lookup w/ partial derivatives for LOD */ 1120 { 1121 GLfloat texcoord[4], dtdx[4], dtdy[4], color[4]; 1122 fetch_vector4(&inst->SrcReg[0], machine, texcoord); 1123 fetch_vector4(&inst->SrcReg[1], machine, dtdx); 1124 fetch_vector4(&inst->SrcReg[2], machine, dtdy); 1125 machine->FetchTexelDeriv(ctx, texcoord, dtdx, dtdy, 1126 0.0, /* lodBias */ 1127 inst->TexSrcUnit, color); 1128 store_vector4(inst, machine, color); 1129 } 1130 break; 1131 case OPCODE_TXL: 1132 /* Texel lookup with explicit LOD */ 1133 { 1134 GLfloat texcoord[4], color[4], lod; 1135 1136 fetch_vector4(&inst->SrcReg[0], machine, texcoord); 1137 1138 /* texcoord[3] is the LOD */ 1139 lod = texcoord[3]; 1140 1141 machine->FetchTexelLod(ctx, texcoord, lod, 1142 machine->Samplers[inst->TexSrcUnit], color); 1143 1144 store_vector4(inst, machine, color); 1145 } 1146 break; 1147 case OPCODE_TXP: /* GL_ARB_fragment_program only */ 1148 /* Texture lookup w/ projective divide */ 1149 { 1150 GLfloat texcoord[4], color[4]; 1151 1152 fetch_vector4(&inst->SrcReg[0], machine, texcoord); 1153 /* Not so sure about this test - if texcoord[3] is 1154 * zero, we'd probably be fine except for an assert in 1155 * IROUND_POS() which gets triggered by the inf values created. 1156 */ 1157 if (texcoord[3] != 0.0F) { 1158 texcoord[0] /= texcoord[3]; 1159 texcoord[1] /= texcoord[3]; 1160 texcoord[2] /= texcoord[3]; 1161 } 1162 1163 fetch_texel(ctx, machine, inst, texcoord, 0.0, color); 1164 1165 store_vector4(inst, machine, color); 1166 } 1167 break; 1168 case OPCODE_TRUNC: /* truncate toward zero */ 1169 { 1170 GLfloat a[4], result[4]; 1171 fetch_vector4(&inst->SrcReg[0], machine, a); 1172 result[0] = (GLfloat) (GLint) a[0]; 1173 result[1] = (GLfloat) (GLint) a[1]; 1174 result[2] = (GLfloat) (GLint) a[2]; 1175 result[3] = (GLfloat) (GLint) a[3]; 1176 store_vector4(inst, machine, result); 1177 } 1178 break; 1179 case OPCODE_XPD: /* cross product */ 1180 { 1181 GLfloat a[4], b[4], result[4]; 1182 fetch_vector4(&inst->SrcReg[0], machine, a); 1183 fetch_vector4(&inst->SrcReg[1], machine, b); 1184 result[0] = a[1] * b[2] - a[2] * b[1]; 1185 result[1] = a[2] * b[0] - a[0] * b[2]; 1186 result[2] = a[0] * b[1] - a[1] * b[0]; 1187 result[3] = 1.0; 1188 store_vector4(inst, machine, result); 1189 if (DEBUG_PROG) { 1190 printf("XPD (%g %g %g %g) = (%g %g %g) X (%g %g %g)\n", 1191 result[0], result[1], result[2], result[3], 1192 a[0], a[1], a[2], b[0], b[1], b[2]); 1193 } 1194 } 1195 break; 1196 case OPCODE_END: 1197 return GL_TRUE; 1198 default: 1199 _mesa_problem(ctx, "Bad opcode %d in _mesa_execute_program", 1200 inst->Opcode); 1201 return GL_TRUE; /* return value doesn't matter */ 1202 } 1203 1204 numExec++; 1205 if (numExec > maxExec) { 1206 static GLboolean reported = GL_FALSE; 1207 if (!reported) { 1208 _mesa_problem(ctx, "Infinite loop detected in fragment program"); 1209 reported = GL_TRUE; 1210 } 1211 return GL_TRUE; 1212 } 1213 1214 } /* for pc */ 1215 1216 return GL_TRUE; 1217} 1218