tgsi_exec.c revision 4a49301e
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/** 29 * TGSI interpreter/executor. 30 * 31 * Flow control information: 32 * 33 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 34 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 35 * care since a condition may be true for some quad components but false 36 * for other components. 37 * 38 * We basically execute all statements (even if they're in the part of 39 * an IF/ELSE clause that's "not taken") and use a special mask to 40 * control writing to destination registers. This is the ExecMask. 41 * See store_dest(). 42 * 43 * The ExecMask is computed from three other masks (CondMask, LoopMask and 44 * ContMask) which are controlled by the flow control instructions (namely: 45 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 46 * 47 * 48 * Authors: 49 * Michal Krol 50 * Brian Paul 51 */ 52 53#include "pipe/p_compiler.h" 54#include "pipe/p_state.h" 55#include "pipe/p_shader_tokens.h" 56#include "tgsi/tgsi_dump.h" 57#include "tgsi/tgsi_parse.h" 58#include "tgsi/tgsi_util.h" 59#include "tgsi_exec.h" 60#include "util/u_memory.h" 61#include "util/u_math.h" 62 63#define FAST_MATH 1 64 65/** for tgsi_full_instruction::Flags */ 66#define SOA_DEPENDENCY_FLAG 0x1 67 68#define TILE_TOP_LEFT 0 69#define TILE_TOP_RIGHT 1 70#define TILE_BOTTOM_LEFT 2 71#define TILE_BOTTOM_RIGHT 3 72 73#define CHAN_X 0 74#define CHAN_Y 1 75#define CHAN_Z 2 76#define CHAN_W 3 77 78/* 79 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 80 */ 81#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 82#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 83#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 84#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 85#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 86#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 87#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 88#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 89#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 90#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 91#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 92#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 93#define TEMP_128_I TGSI_EXEC_TEMP_128_I 94#define TEMP_128_C TGSI_EXEC_TEMP_128_C 95#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 96#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 97#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 98#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 99#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 100#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 101#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 102#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 103#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 104#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 105#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 106#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 107#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 108#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 109#define TEMP_R0 TGSI_EXEC_TEMP_R0 110#define TEMP_P0 TGSI_EXEC_TEMP_P0 111 112#define IS_CHANNEL_ENABLED(INST, CHAN)\ 113 ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) 114 115#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 116 ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) 117 118#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 119 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 120 if (IS_CHANNEL_ENABLED( INST, CHAN )) 121 122#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 123 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 124 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 125 126 127/** The execution mask depends on the conditional mask and the loop mask */ 128#define UPDATE_EXEC_MASK(MACH) \ 129 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask 130 131 132static const union tgsi_exec_channel ZeroVec = 133 { { 0.0, 0.0, 0.0, 0.0 } }; 134 135 136static INLINE void 137check_inf_or_nan(const union tgsi_exec_channel *chan) 138{ 139 assert(!util_is_inf_or_nan(chan->f[0])); 140 assert(!util_is_inf_or_nan(chan->f[1])); 141 assert(!util_is_inf_or_nan(chan->f[2])); 142 assert(!util_is_inf_or_nan(chan->f[3])); 143} 144 145 146#ifdef DEBUG 147static void 148print_chan(const char *msg, const union tgsi_exec_channel *chan) 149{ 150 debug_printf("%s = {%f, %f, %f, %f}\n", 151 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 152} 153#endif 154 155 156#ifdef DEBUG 157static void 158print_temp(const struct tgsi_exec_machine *mach, uint index) 159{ 160 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 161 int i; 162 debug_printf("Temp[%u] =\n", index); 163 for (i = 0; i < 4; i++) { 164 debug_printf(" %c: { %f, %f, %f, %f }\n", 165 "XYZW"[i], 166 tmp->xyzw[i].f[0], 167 tmp->xyzw[i].f[1], 168 tmp->xyzw[i].f[2], 169 tmp->xyzw[i].f[3]); 170 } 171} 172#endif 173 174 175/** 176 * Check if there's a potential src/dst register data dependency when 177 * using SOA execution. 178 * Example: 179 * MOV T, T.yxwz; 180 * This would expand into: 181 * MOV t0, t1; 182 * MOV t1, t0; 183 * MOV t2, t3; 184 * MOV t3, t2; 185 * The second instruction will have the wrong value for t0 if executed as-is. 186 */ 187boolean 188tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 189{ 190 uint i, chan; 191 192 uint writemask = inst->FullDstRegisters[0].DstRegister.WriteMask; 193 if (writemask == TGSI_WRITEMASK_X || 194 writemask == TGSI_WRITEMASK_Y || 195 writemask == TGSI_WRITEMASK_Z || 196 writemask == TGSI_WRITEMASK_W || 197 writemask == TGSI_WRITEMASK_NONE) { 198 /* no chance of data dependency */ 199 return FALSE; 200 } 201 202 /* loop over src regs */ 203 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 204 if ((inst->FullSrcRegisters[i].SrcRegister.File == 205 inst->FullDstRegisters[0].DstRegister.File) && 206 (inst->FullSrcRegisters[i].SrcRegister.Index == 207 inst->FullDstRegisters[0].DstRegister.Index)) { 208 /* loop over dest channels */ 209 uint channelsWritten = 0x0; 210 FOR_EACH_ENABLED_CHANNEL(*inst, chan) { 211 /* check if we're reading a channel that's been written */ 212 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->FullSrcRegisters[i], chan); 213 if (channelsWritten & (1 << swizzle)) { 214 return TRUE; 215 } 216 217 channelsWritten |= (1 << chan); 218 } 219 } 220 } 221 return FALSE; 222} 223 224 225/** 226 * Initialize machine state by expanding tokens to full instructions, 227 * allocating temporary storage, setting up constants, etc. 228 * After this, we can call tgsi_exec_machine_run() many times. 229 */ 230void 231tgsi_exec_machine_bind_shader( 232 struct tgsi_exec_machine *mach, 233 const struct tgsi_token *tokens, 234 uint numSamplers, 235 struct tgsi_sampler **samplers) 236{ 237 uint k; 238 struct tgsi_parse_context parse; 239 struct tgsi_exec_labels *labels = &mach->Labels; 240 struct tgsi_full_instruction *instructions; 241 struct tgsi_full_declaration *declarations; 242 uint maxInstructions = 10, numInstructions = 0; 243 uint maxDeclarations = 10, numDeclarations = 0; 244 uint instno = 0; 245 246#if 0 247 tgsi_dump(tokens, 0); 248#endif 249 250 util_init_math(); 251 252 mach->Tokens = tokens; 253 mach->Samplers = samplers; 254 255 k = tgsi_parse_init (&parse, mach->Tokens); 256 if (k != TGSI_PARSE_OK) { 257 debug_printf( "Problem parsing!\n" ); 258 return; 259 } 260 261 mach->Processor = parse.FullHeader.Processor.Processor; 262 mach->ImmLimit = 0; 263 labels->count = 0; 264 265 declarations = (struct tgsi_full_declaration *) 266 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 267 268 if (!declarations) { 269 return; 270 } 271 272 instructions = (struct tgsi_full_instruction *) 273 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 274 275 if (!instructions) { 276 FREE( declarations ); 277 return; 278 } 279 280 while( !tgsi_parse_end_of_tokens( &parse ) ) { 281 uint pointer = parse.Position; 282 uint i; 283 284 tgsi_parse_token( &parse ); 285 switch( parse.FullToken.Token.Type ) { 286 case TGSI_TOKEN_TYPE_DECLARATION: 287 /* save expanded declaration */ 288 if (numDeclarations == maxDeclarations) { 289 declarations = REALLOC(declarations, 290 maxDeclarations 291 * sizeof(struct tgsi_full_declaration), 292 (maxDeclarations + 10) 293 * sizeof(struct tgsi_full_declaration)); 294 maxDeclarations += 10; 295 } 296 memcpy(declarations + numDeclarations, 297 &parse.FullToken.FullDeclaration, 298 sizeof(declarations[0])); 299 numDeclarations++; 300 break; 301 302 case TGSI_TOKEN_TYPE_IMMEDIATE: 303 { 304 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 305 assert( size <= 4 ); 306 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 307 308 for( i = 0; i < size; i++ ) { 309 mach->Imms[mach->ImmLimit][i] = 310 parse.FullToken.FullImmediate.u[i].Float; 311 } 312 mach->ImmLimit += 1; 313 } 314 break; 315 316 case TGSI_TOKEN_TYPE_INSTRUCTION: 317 assert( labels->count < MAX_LABELS ); 318 319 labels->labels[labels->count][0] = instno; 320 labels->labels[labels->count][1] = pointer; 321 labels->count++; 322 323 /* save expanded instruction */ 324 if (numInstructions == maxInstructions) { 325 instructions = REALLOC(instructions, 326 maxInstructions 327 * sizeof(struct tgsi_full_instruction), 328 (maxInstructions + 10) 329 * sizeof(struct tgsi_full_instruction)); 330 maxInstructions += 10; 331 } 332 333 if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) { 334 uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode; 335 parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG; 336 /* XXX we only handle SOA dependencies properly for MOV/SWZ 337 * at this time! 338 */ 339 if (opcode != TGSI_OPCODE_MOV && 340 opcode != TGSI_OPCODE_MUL && 341 opcode != TGSI_OPCODE_CMP) { 342 debug_printf("Warning: SOA dependency in instruction" 343 " is not handled:\n"); 344 tgsi_dump_instruction(&parse.FullToken.FullInstruction, 345 numInstructions); 346 } 347 } 348 349 memcpy(instructions + numInstructions, 350 &parse.FullToken.FullInstruction, 351 sizeof(instructions[0])); 352 353 numInstructions++; 354 break; 355 356 default: 357 assert( 0 ); 358 } 359 } 360 tgsi_parse_free (&parse); 361 362 if (mach->Declarations) { 363 FREE( mach->Declarations ); 364 } 365 mach->Declarations = declarations; 366 mach->NumDeclarations = numDeclarations; 367 368 if (mach->Instructions) { 369 FREE( mach->Instructions ); 370 } 371 mach->Instructions = instructions; 372 mach->NumInstructions = numInstructions; 373} 374 375 376struct tgsi_exec_machine * 377tgsi_exec_machine_create( void ) 378{ 379 struct tgsi_exec_machine *mach; 380 uint i; 381 382 mach = align_malloc( sizeof *mach, 16 ); 383 if (!mach) 384 goto fail; 385 386 memset(mach, 0, sizeof(*mach)); 387 388 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 389 390 /* Setup constants. */ 391 for( i = 0; i < 4; i++ ) { 392 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 393 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 394 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 395 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 396 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 397 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 398 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 399 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 400 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 401 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 402 } 403 404#ifdef DEBUG 405 /* silence warnings */ 406 (void) print_chan; 407 (void) print_temp; 408#endif 409 410 return mach; 411 412fail: 413 align_free(mach); 414 return NULL; 415} 416 417 418void 419tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 420{ 421 if (mach) { 422 FREE(mach->Instructions); 423 FREE(mach->Declarations); 424 } 425 426 align_free(mach); 427} 428 429 430static void 431micro_abs( 432 union tgsi_exec_channel *dst, 433 const union tgsi_exec_channel *src ) 434{ 435 dst->f[0] = fabsf( src->f[0] ); 436 dst->f[1] = fabsf( src->f[1] ); 437 dst->f[2] = fabsf( src->f[2] ); 438 dst->f[3] = fabsf( src->f[3] ); 439} 440 441static void 442micro_add( 443 union tgsi_exec_channel *dst, 444 const union tgsi_exec_channel *src0, 445 const union tgsi_exec_channel *src1 ) 446{ 447 dst->f[0] = src0->f[0] + src1->f[0]; 448 dst->f[1] = src0->f[1] + src1->f[1]; 449 dst->f[2] = src0->f[2] + src1->f[2]; 450 dst->f[3] = src0->f[3] + src1->f[3]; 451} 452 453#if 0 454static void 455micro_iadd( 456 union tgsi_exec_channel *dst, 457 const union tgsi_exec_channel *src0, 458 const union tgsi_exec_channel *src1 ) 459{ 460 dst->i[0] = src0->i[0] + src1->i[0]; 461 dst->i[1] = src0->i[1] + src1->i[1]; 462 dst->i[2] = src0->i[2] + src1->i[2]; 463 dst->i[3] = src0->i[3] + src1->i[3]; 464} 465#endif 466 467static void 468micro_and( 469 union tgsi_exec_channel *dst, 470 const union tgsi_exec_channel *src0, 471 const union tgsi_exec_channel *src1 ) 472{ 473 dst->u[0] = src0->u[0] & src1->u[0]; 474 dst->u[1] = src0->u[1] & src1->u[1]; 475 dst->u[2] = src0->u[2] & src1->u[2]; 476 dst->u[3] = src0->u[3] & src1->u[3]; 477} 478 479static void 480micro_ceil( 481 union tgsi_exec_channel *dst, 482 const union tgsi_exec_channel *src ) 483{ 484 dst->f[0] = ceilf( src->f[0] ); 485 dst->f[1] = ceilf( src->f[1] ); 486 dst->f[2] = ceilf( src->f[2] ); 487 dst->f[3] = ceilf( src->f[3] ); 488} 489 490static void 491micro_cos( 492 union tgsi_exec_channel *dst, 493 const union tgsi_exec_channel *src ) 494{ 495 dst->f[0] = cosf( src->f[0] ); 496 dst->f[1] = cosf( src->f[1] ); 497 dst->f[2] = cosf( src->f[2] ); 498 dst->f[3] = cosf( src->f[3] ); 499} 500 501static void 502micro_ddx( 503 union tgsi_exec_channel *dst, 504 const union tgsi_exec_channel *src ) 505{ 506 dst->f[0] = 507 dst->f[1] = 508 dst->f[2] = 509 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 510} 511 512static void 513micro_ddy( 514 union tgsi_exec_channel *dst, 515 const union tgsi_exec_channel *src ) 516{ 517 dst->f[0] = 518 dst->f[1] = 519 dst->f[2] = 520 dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; 521} 522 523static void 524micro_div( 525 union tgsi_exec_channel *dst, 526 const union tgsi_exec_channel *src0, 527 const union tgsi_exec_channel *src1 ) 528{ 529 if (src1->f[0] != 0) { 530 dst->f[0] = src0->f[0] / src1->f[0]; 531 } 532 if (src1->f[1] != 0) { 533 dst->f[1] = src0->f[1] / src1->f[1]; 534 } 535 if (src1->f[2] != 0) { 536 dst->f[2] = src0->f[2] / src1->f[2]; 537 } 538 if (src1->f[3] != 0) { 539 dst->f[3] = src0->f[3] / src1->f[3]; 540 } 541} 542 543#if 0 544static void 545micro_udiv( 546 union tgsi_exec_channel *dst, 547 const union tgsi_exec_channel *src0, 548 const union tgsi_exec_channel *src1 ) 549{ 550 dst->u[0] = src0->u[0] / src1->u[0]; 551 dst->u[1] = src0->u[1] / src1->u[1]; 552 dst->u[2] = src0->u[2] / src1->u[2]; 553 dst->u[3] = src0->u[3] / src1->u[3]; 554} 555#endif 556 557static void 558micro_eq( 559 union tgsi_exec_channel *dst, 560 const union tgsi_exec_channel *src0, 561 const union tgsi_exec_channel *src1, 562 const union tgsi_exec_channel *src2, 563 const union tgsi_exec_channel *src3 ) 564{ 565 dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; 566 dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; 567 dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; 568 dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; 569} 570 571#if 0 572static void 573micro_ieq( 574 union tgsi_exec_channel *dst, 575 const union tgsi_exec_channel *src0, 576 const union tgsi_exec_channel *src1, 577 const union tgsi_exec_channel *src2, 578 const union tgsi_exec_channel *src3 ) 579{ 580 dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; 581 dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; 582 dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; 583 dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; 584} 585#endif 586 587static void 588micro_exp2( 589 union tgsi_exec_channel *dst, 590 const union tgsi_exec_channel *src) 591{ 592#if FAST_MATH 593 dst->f[0] = util_fast_exp2( src->f[0] ); 594 dst->f[1] = util_fast_exp2( src->f[1] ); 595 dst->f[2] = util_fast_exp2( src->f[2] ); 596 dst->f[3] = util_fast_exp2( src->f[3] ); 597#else 598 dst->f[0] = powf( 2.0f, src->f[0] ); 599 dst->f[1] = powf( 2.0f, src->f[1] ); 600 dst->f[2] = powf( 2.0f, src->f[2] ); 601 dst->f[3] = powf( 2.0f, src->f[3] ); 602#endif 603} 604 605#if 0 606static void 607micro_f2ut( 608 union tgsi_exec_channel *dst, 609 const union tgsi_exec_channel *src ) 610{ 611 dst->u[0] = (uint) src->f[0]; 612 dst->u[1] = (uint) src->f[1]; 613 dst->u[2] = (uint) src->f[2]; 614 dst->u[3] = (uint) src->f[3]; 615} 616#endif 617 618static void 619micro_float_clamp(union tgsi_exec_channel *dst, 620 const union tgsi_exec_channel *src) 621{ 622 uint i; 623 624 for (i = 0; i < 4; i++) { 625 if (src->f[i] > 0.0f) { 626 if (src->f[i] > 1.884467e+019f) 627 dst->f[i] = 1.884467e+019f; 628 else if (src->f[i] < 5.42101e-020f) 629 dst->f[i] = 5.42101e-020f; 630 else 631 dst->f[i] = src->f[i]; 632 } 633 else { 634 if (src->f[i] < -1.884467e+019f) 635 dst->f[i] = -1.884467e+019f; 636 else if (src->f[i] > -5.42101e-020f) 637 dst->f[i] = -5.42101e-020f; 638 else 639 dst->f[i] = src->f[i]; 640 } 641 } 642} 643 644static void 645micro_flr( 646 union tgsi_exec_channel *dst, 647 const union tgsi_exec_channel *src ) 648{ 649 dst->f[0] = floorf( src->f[0] ); 650 dst->f[1] = floorf( src->f[1] ); 651 dst->f[2] = floorf( src->f[2] ); 652 dst->f[3] = floorf( src->f[3] ); 653} 654 655static void 656micro_frc( 657 union tgsi_exec_channel *dst, 658 const union tgsi_exec_channel *src ) 659{ 660 dst->f[0] = src->f[0] - floorf( src->f[0] ); 661 dst->f[1] = src->f[1] - floorf( src->f[1] ); 662 dst->f[2] = src->f[2] - floorf( src->f[2] ); 663 dst->f[3] = src->f[3] - floorf( src->f[3] ); 664} 665 666static void 667micro_i2f( 668 union tgsi_exec_channel *dst, 669 const union tgsi_exec_channel *src ) 670{ 671 dst->f[0] = (float) src->i[0]; 672 dst->f[1] = (float) src->i[1]; 673 dst->f[2] = (float) src->i[2]; 674 dst->f[3] = (float) src->i[3]; 675} 676 677static void 678micro_lg2( 679 union tgsi_exec_channel *dst, 680 const union tgsi_exec_channel *src ) 681{ 682#if FAST_MATH 683 dst->f[0] = util_fast_log2( src->f[0] ); 684 dst->f[1] = util_fast_log2( src->f[1] ); 685 dst->f[2] = util_fast_log2( src->f[2] ); 686 dst->f[3] = util_fast_log2( src->f[3] ); 687#else 688 dst->f[0] = logf( src->f[0] ) * 1.442695f; 689 dst->f[1] = logf( src->f[1] ) * 1.442695f; 690 dst->f[2] = logf( src->f[2] ) * 1.442695f; 691 dst->f[3] = logf( src->f[3] ) * 1.442695f; 692#endif 693} 694 695static void 696micro_le( 697 union tgsi_exec_channel *dst, 698 const union tgsi_exec_channel *src0, 699 const union tgsi_exec_channel *src1, 700 const union tgsi_exec_channel *src2, 701 const union tgsi_exec_channel *src3 ) 702{ 703 dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; 704 dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; 705 dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; 706 dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; 707} 708 709static void 710micro_lt( 711 union tgsi_exec_channel *dst, 712 const union tgsi_exec_channel *src0, 713 const union tgsi_exec_channel *src1, 714 const union tgsi_exec_channel *src2, 715 const union tgsi_exec_channel *src3 ) 716{ 717 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 718 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 719 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 720 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 721} 722 723#if 0 724static void 725micro_ilt( 726 union tgsi_exec_channel *dst, 727 const union tgsi_exec_channel *src0, 728 const union tgsi_exec_channel *src1, 729 const union tgsi_exec_channel *src2, 730 const union tgsi_exec_channel *src3 ) 731{ 732 dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; 733 dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; 734 dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; 735 dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; 736} 737#endif 738 739#if 0 740static void 741micro_ult( 742 union tgsi_exec_channel *dst, 743 const union tgsi_exec_channel *src0, 744 const union tgsi_exec_channel *src1, 745 const union tgsi_exec_channel *src2, 746 const union tgsi_exec_channel *src3 ) 747{ 748 dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; 749 dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; 750 dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; 751 dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; 752} 753#endif 754 755static void 756micro_max( 757 union tgsi_exec_channel *dst, 758 const union tgsi_exec_channel *src0, 759 const union tgsi_exec_channel *src1 ) 760{ 761 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 762 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 763 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 764 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 765} 766 767#if 0 768static void 769micro_imax( 770 union tgsi_exec_channel *dst, 771 const union tgsi_exec_channel *src0, 772 const union tgsi_exec_channel *src1 ) 773{ 774 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 775 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 776 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 777 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 778} 779#endif 780 781#if 0 782static void 783micro_umax( 784 union tgsi_exec_channel *dst, 785 const union tgsi_exec_channel *src0, 786 const union tgsi_exec_channel *src1 ) 787{ 788 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 789 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 790 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 791 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 792} 793#endif 794 795static void 796micro_min( 797 union tgsi_exec_channel *dst, 798 const union tgsi_exec_channel *src0, 799 const union tgsi_exec_channel *src1 ) 800{ 801 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 802 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 803 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 804 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 805} 806 807#if 0 808static void 809micro_imin( 810 union tgsi_exec_channel *dst, 811 const union tgsi_exec_channel *src0, 812 const union tgsi_exec_channel *src1 ) 813{ 814 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 815 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 816 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 817 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 818} 819#endif 820 821#if 0 822static void 823micro_umin( 824 union tgsi_exec_channel *dst, 825 const union tgsi_exec_channel *src0, 826 const union tgsi_exec_channel *src1 ) 827{ 828 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 829 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 830 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 831 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 832} 833#endif 834 835#if 0 836static void 837micro_umod( 838 union tgsi_exec_channel *dst, 839 const union tgsi_exec_channel *src0, 840 const union tgsi_exec_channel *src1 ) 841{ 842 dst->u[0] = src0->u[0] % src1->u[0]; 843 dst->u[1] = src0->u[1] % src1->u[1]; 844 dst->u[2] = src0->u[2] % src1->u[2]; 845 dst->u[3] = src0->u[3] % src1->u[3]; 846} 847#endif 848 849static void 850micro_mul( 851 union tgsi_exec_channel *dst, 852 const union tgsi_exec_channel *src0, 853 const union tgsi_exec_channel *src1 ) 854{ 855 dst->f[0] = src0->f[0] * src1->f[0]; 856 dst->f[1] = src0->f[1] * src1->f[1]; 857 dst->f[2] = src0->f[2] * src1->f[2]; 858 dst->f[3] = src0->f[3] * src1->f[3]; 859} 860 861#if 0 862static void 863micro_imul( 864 union tgsi_exec_channel *dst, 865 const union tgsi_exec_channel *src0, 866 const union tgsi_exec_channel *src1 ) 867{ 868 dst->i[0] = src0->i[0] * src1->i[0]; 869 dst->i[1] = src0->i[1] * src1->i[1]; 870 dst->i[2] = src0->i[2] * src1->i[2]; 871 dst->i[3] = src0->i[3] * src1->i[3]; 872} 873#endif 874 875#if 0 876static void 877micro_imul64( 878 union tgsi_exec_channel *dst0, 879 union tgsi_exec_channel *dst1, 880 const union tgsi_exec_channel *src0, 881 const union tgsi_exec_channel *src1 ) 882{ 883 dst1->i[0] = src0->i[0] * src1->i[0]; 884 dst1->i[1] = src0->i[1] * src1->i[1]; 885 dst1->i[2] = src0->i[2] * src1->i[2]; 886 dst1->i[3] = src0->i[3] * src1->i[3]; 887 dst0->i[0] = 0; 888 dst0->i[1] = 0; 889 dst0->i[2] = 0; 890 dst0->i[3] = 0; 891} 892#endif 893 894#if 0 895static void 896micro_umul64( 897 union tgsi_exec_channel *dst0, 898 union tgsi_exec_channel *dst1, 899 const union tgsi_exec_channel *src0, 900 const union tgsi_exec_channel *src1 ) 901{ 902 dst1->u[0] = src0->u[0] * src1->u[0]; 903 dst1->u[1] = src0->u[1] * src1->u[1]; 904 dst1->u[2] = src0->u[2] * src1->u[2]; 905 dst1->u[3] = src0->u[3] * src1->u[3]; 906 dst0->u[0] = 0; 907 dst0->u[1] = 0; 908 dst0->u[2] = 0; 909 dst0->u[3] = 0; 910} 911#endif 912 913 914#if 0 915static void 916micro_movc( 917 union tgsi_exec_channel *dst, 918 const union tgsi_exec_channel *src0, 919 const union tgsi_exec_channel *src1, 920 const union tgsi_exec_channel *src2 ) 921{ 922 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 923 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 924 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 925 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 926} 927#endif 928 929static void 930micro_neg( 931 union tgsi_exec_channel *dst, 932 const union tgsi_exec_channel *src ) 933{ 934 dst->f[0] = -src->f[0]; 935 dst->f[1] = -src->f[1]; 936 dst->f[2] = -src->f[2]; 937 dst->f[3] = -src->f[3]; 938} 939 940#if 0 941static void 942micro_ineg( 943 union tgsi_exec_channel *dst, 944 const union tgsi_exec_channel *src ) 945{ 946 dst->i[0] = -src->i[0]; 947 dst->i[1] = -src->i[1]; 948 dst->i[2] = -src->i[2]; 949 dst->i[3] = -src->i[3]; 950} 951#endif 952 953static void 954micro_not( 955 union tgsi_exec_channel *dst, 956 const union tgsi_exec_channel *src ) 957{ 958 dst->u[0] = ~src->u[0]; 959 dst->u[1] = ~src->u[1]; 960 dst->u[2] = ~src->u[2]; 961 dst->u[3] = ~src->u[3]; 962} 963 964static void 965micro_or( 966 union tgsi_exec_channel *dst, 967 const union tgsi_exec_channel *src0, 968 const union tgsi_exec_channel *src1 ) 969{ 970 dst->u[0] = src0->u[0] | src1->u[0]; 971 dst->u[1] = src0->u[1] | src1->u[1]; 972 dst->u[2] = src0->u[2] | src1->u[2]; 973 dst->u[3] = src0->u[3] | src1->u[3]; 974} 975 976static void 977micro_pow( 978 union tgsi_exec_channel *dst, 979 const union tgsi_exec_channel *src0, 980 const union tgsi_exec_channel *src1 ) 981{ 982#if FAST_MATH 983 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 984 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 985 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 986 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 987#else 988 dst->f[0] = powf( src0->f[0], src1->f[0] ); 989 dst->f[1] = powf( src0->f[1], src1->f[1] ); 990 dst->f[2] = powf( src0->f[2], src1->f[2] ); 991 dst->f[3] = powf( src0->f[3], src1->f[3] ); 992#endif 993} 994 995static void 996micro_rnd( 997 union tgsi_exec_channel *dst, 998 const union tgsi_exec_channel *src ) 999{ 1000 dst->f[0] = floorf( src->f[0] + 0.5f ); 1001 dst->f[1] = floorf( src->f[1] + 0.5f ); 1002 dst->f[2] = floorf( src->f[2] + 0.5f ); 1003 dst->f[3] = floorf( src->f[3] + 0.5f ); 1004} 1005 1006static void 1007micro_sgn( 1008 union tgsi_exec_channel *dst, 1009 const union tgsi_exec_channel *src ) 1010{ 1011 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 1012 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 1013 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 1014 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 1015} 1016 1017static void 1018micro_shl( 1019 union tgsi_exec_channel *dst, 1020 const union tgsi_exec_channel *src0, 1021 const union tgsi_exec_channel *src1 ) 1022{ 1023 dst->i[0] = src0->i[0] << src1->i[0]; 1024 dst->i[1] = src0->i[1] << src1->i[1]; 1025 dst->i[2] = src0->i[2] << src1->i[2]; 1026 dst->i[3] = src0->i[3] << src1->i[3]; 1027} 1028 1029static void 1030micro_ishr( 1031 union tgsi_exec_channel *dst, 1032 const union tgsi_exec_channel *src0, 1033 const union tgsi_exec_channel *src1 ) 1034{ 1035 dst->i[0] = src0->i[0] >> src1->i[0]; 1036 dst->i[1] = src0->i[1] >> src1->i[1]; 1037 dst->i[2] = src0->i[2] >> src1->i[2]; 1038 dst->i[3] = src0->i[3] >> src1->i[3]; 1039} 1040 1041static void 1042micro_trunc( 1043 union tgsi_exec_channel *dst, 1044 const union tgsi_exec_channel *src0 ) 1045{ 1046 dst->f[0] = (float) (int) src0->f[0]; 1047 dst->f[1] = (float) (int) src0->f[1]; 1048 dst->f[2] = (float) (int) src0->f[2]; 1049 dst->f[3] = (float) (int) src0->f[3]; 1050} 1051 1052#if 0 1053static void 1054micro_ushr( 1055 union tgsi_exec_channel *dst, 1056 const union tgsi_exec_channel *src0, 1057 const union tgsi_exec_channel *src1 ) 1058{ 1059 dst->u[0] = src0->u[0] >> src1->u[0]; 1060 dst->u[1] = src0->u[1] >> src1->u[1]; 1061 dst->u[2] = src0->u[2] >> src1->u[2]; 1062 dst->u[3] = src0->u[3] >> src1->u[3]; 1063} 1064#endif 1065 1066static void 1067micro_sin( 1068 union tgsi_exec_channel *dst, 1069 const union tgsi_exec_channel *src ) 1070{ 1071 dst->f[0] = sinf( src->f[0] ); 1072 dst->f[1] = sinf( src->f[1] ); 1073 dst->f[2] = sinf( src->f[2] ); 1074 dst->f[3] = sinf( src->f[3] ); 1075} 1076 1077static void 1078micro_sqrt( union tgsi_exec_channel *dst, 1079 const union tgsi_exec_channel *src ) 1080{ 1081 dst->f[0] = sqrtf( src->f[0] ); 1082 dst->f[1] = sqrtf( src->f[1] ); 1083 dst->f[2] = sqrtf( src->f[2] ); 1084 dst->f[3] = sqrtf( src->f[3] ); 1085} 1086 1087static void 1088micro_sub( 1089 union tgsi_exec_channel *dst, 1090 const union tgsi_exec_channel *src0, 1091 const union tgsi_exec_channel *src1 ) 1092{ 1093 dst->f[0] = src0->f[0] - src1->f[0]; 1094 dst->f[1] = src0->f[1] - src1->f[1]; 1095 dst->f[2] = src0->f[2] - src1->f[2]; 1096 dst->f[3] = src0->f[3] - src1->f[3]; 1097} 1098 1099#if 0 1100static void 1101micro_u2f( 1102 union tgsi_exec_channel *dst, 1103 const union tgsi_exec_channel *src ) 1104{ 1105 dst->f[0] = (float) src->u[0]; 1106 dst->f[1] = (float) src->u[1]; 1107 dst->f[2] = (float) src->u[2]; 1108 dst->f[3] = (float) src->u[3]; 1109} 1110#endif 1111 1112static void 1113micro_xor( 1114 union tgsi_exec_channel *dst, 1115 const union tgsi_exec_channel *src0, 1116 const union tgsi_exec_channel *src1 ) 1117{ 1118 dst->u[0] = src0->u[0] ^ src1->u[0]; 1119 dst->u[1] = src0->u[1] ^ src1->u[1]; 1120 dst->u[2] = src0->u[2] ^ src1->u[2]; 1121 dst->u[3] = src0->u[3] ^ src1->u[3]; 1122} 1123 1124static void 1125fetch_src_file_channel( 1126 const struct tgsi_exec_machine *mach, 1127 const uint file, 1128 const uint swizzle, 1129 const union tgsi_exec_channel *index, 1130 union tgsi_exec_channel *chan ) 1131{ 1132 switch( swizzle ) { 1133 case TGSI_SWIZZLE_X: 1134 case TGSI_SWIZZLE_Y: 1135 case TGSI_SWIZZLE_Z: 1136 case TGSI_SWIZZLE_W: 1137 switch( file ) { 1138 case TGSI_FILE_CONSTANT: 1139 assert(mach->Consts); 1140 if (index->i[0] < 0) 1141 chan->f[0] = 0.0f; 1142 else 1143 chan->f[0] = mach->Consts[index->i[0]][swizzle]; 1144 if (index->i[1] < 0) 1145 chan->f[1] = 0.0f; 1146 else 1147 chan->f[1] = mach->Consts[index->i[1]][swizzle]; 1148 if (index->i[2] < 0) 1149 chan->f[2] = 0.0f; 1150 else 1151 chan->f[2] = mach->Consts[index->i[2]][swizzle]; 1152 if (index->i[3] < 0) 1153 chan->f[3] = 0.0f; 1154 else 1155 chan->f[3] = mach->Consts[index->i[3]][swizzle]; 1156 break; 1157 1158 case TGSI_FILE_INPUT: 1159 chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; 1160 chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; 1161 chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; 1162 chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; 1163 break; 1164 1165 case TGSI_FILE_TEMPORARY: 1166 assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); 1167 chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; 1168 chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; 1169 chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; 1170 chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; 1171 break; 1172 1173 case TGSI_FILE_IMMEDIATE: 1174 assert( index->i[0] < (int) mach->ImmLimit ); 1175 chan->f[0] = mach->Imms[index->i[0]][swizzle]; 1176 assert( index->i[1] < (int) mach->ImmLimit ); 1177 chan->f[1] = mach->Imms[index->i[1]][swizzle]; 1178 assert( index->i[2] < (int) mach->ImmLimit ); 1179 chan->f[2] = mach->Imms[index->i[2]][swizzle]; 1180 assert( index->i[3] < (int) mach->ImmLimit ); 1181 chan->f[3] = mach->Imms[index->i[3]][swizzle]; 1182 break; 1183 1184 case TGSI_FILE_ADDRESS: 1185 chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; 1186 chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; 1187 chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; 1188 chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; 1189 break; 1190 1191 case TGSI_FILE_PREDICATE: 1192 assert(index->i[0] < TGSI_EXEC_NUM_PREDS); 1193 assert(index->i[1] < TGSI_EXEC_NUM_PREDS); 1194 assert(index->i[2] < TGSI_EXEC_NUM_PREDS); 1195 assert(index->i[3] < TGSI_EXEC_NUM_PREDS); 1196 chan->u[0] = mach->Addrs[0].xyzw[swizzle].u[0]; 1197 chan->u[1] = mach->Addrs[0].xyzw[swizzle].u[1]; 1198 chan->u[2] = mach->Addrs[0].xyzw[swizzle].u[2]; 1199 chan->u[3] = mach->Addrs[0].xyzw[swizzle].u[3]; 1200 break; 1201 1202 case TGSI_FILE_OUTPUT: 1203 /* vertex/fragment output vars can be read too */ 1204 chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; 1205 chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; 1206 chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; 1207 chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; 1208 break; 1209 1210 default: 1211 assert( 0 ); 1212 chan->u[0] = 0; 1213 chan->u[1] = 0; 1214 chan->u[2] = 0; 1215 chan->u[3] = 0; 1216 } 1217 break; 1218 1219 default: 1220 assert( 0 ); 1221 chan->u[0] = 0; 1222 chan->u[1] = 0; 1223 chan->u[2] = 0; 1224 chan->u[3] = 0; 1225 } 1226} 1227 1228static void 1229fetch_source( 1230 const struct tgsi_exec_machine *mach, 1231 union tgsi_exec_channel *chan, 1232 const struct tgsi_full_src_register *reg, 1233 const uint chan_index ) 1234{ 1235 union tgsi_exec_channel index; 1236 uint swizzle; 1237 1238 /* We start with a direct index into a register file. 1239 * 1240 * file[1], 1241 * where: 1242 * file = SrcRegister.File 1243 * [1] = SrcRegister.Index 1244 */ 1245 index.i[0] = 1246 index.i[1] = 1247 index.i[2] = 1248 index.i[3] = reg->SrcRegister.Index; 1249 1250 /* There is an extra source register that indirectly subscripts 1251 * a register file. The direct index now becomes an offset 1252 * that is being added to the indirect register. 1253 * 1254 * file[ind[2].x+1], 1255 * where: 1256 * ind = SrcRegisterInd.File 1257 * [2] = SrcRegisterInd.Index 1258 * .x = SrcRegisterInd.SwizzleX 1259 */ 1260 if (reg->SrcRegister.Indirect) { 1261 union tgsi_exec_channel index2; 1262 union tgsi_exec_channel indir_index; 1263 const uint execmask = mach->ExecMask; 1264 uint i; 1265 1266 /* which address register (always zero now) */ 1267 index2.i[0] = 1268 index2.i[1] = 1269 index2.i[2] = 1270 index2.i[3] = reg->SrcRegisterInd.Index; 1271 1272 /* get current value of address register[swizzle] */ 1273 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); 1274 fetch_src_file_channel( 1275 mach, 1276 reg->SrcRegisterInd.File, 1277 swizzle, 1278 &index2, 1279 &indir_index ); 1280 1281 /* add value of address register to the offset */ 1282 index.i[0] += (int) indir_index.f[0]; 1283 index.i[1] += (int) indir_index.f[1]; 1284 index.i[2] += (int) indir_index.f[2]; 1285 index.i[3] += (int) indir_index.f[3]; 1286 1287 /* for disabled execution channels, zero-out the index to 1288 * avoid using a potential garbage value. 1289 */ 1290 for (i = 0; i < QUAD_SIZE; i++) { 1291 if ((execmask & (1 << i)) == 0) 1292 index.i[i] = 0; 1293 } 1294 } 1295 1296 /* There is an extra source register that is a second 1297 * subscript to a register file. Effectively it means that 1298 * the register file is actually a 2D array of registers. 1299 * 1300 * file[1][3] == file[1*sizeof(file[1])+3], 1301 * where: 1302 * [3] = SrcRegisterDim.Index 1303 */ 1304 if (reg->SrcRegister.Dimension) { 1305 /* The size of the first-order array depends on the register file type. 1306 * We need to multiply the index to the first array to get an effective, 1307 * "flat" index that points to the beginning of the second-order array. 1308 */ 1309 switch (reg->SrcRegister.File) { 1310 case TGSI_FILE_INPUT: 1311 index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1312 index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1313 index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1314 index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; 1315 break; 1316 case TGSI_FILE_CONSTANT: 1317 index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; 1318 index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; 1319 index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; 1320 index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; 1321 break; 1322 default: 1323 assert( 0 ); 1324 } 1325 1326 index.i[0] += reg->SrcRegisterDim.Index; 1327 index.i[1] += reg->SrcRegisterDim.Index; 1328 index.i[2] += reg->SrcRegisterDim.Index; 1329 index.i[3] += reg->SrcRegisterDim.Index; 1330 1331 /* Again, the second subscript index can be addressed indirectly 1332 * identically to the first one. 1333 * Nothing stops us from indirectly addressing the indirect register, 1334 * but there is no need for that, so we won't exercise it. 1335 * 1336 * file[1][ind[4].y+3], 1337 * where: 1338 * ind = SrcRegisterDimInd.File 1339 * [4] = SrcRegisterDimInd.Index 1340 * .y = SrcRegisterDimInd.SwizzleX 1341 */ 1342 if (reg->SrcRegisterDim.Indirect) { 1343 union tgsi_exec_channel index2; 1344 union tgsi_exec_channel indir_index; 1345 const uint execmask = mach->ExecMask; 1346 uint i; 1347 1348 index2.i[0] = 1349 index2.i[1] = 1350 index2.i[2] = 1351 index2.i[3] = reg->SrcRegisterDimInd.Index; 1352 1353 swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); 1354 fetch_src_file_channel( 1355 mach, 1356 reg->SrcRegisterDimInd.File, 1357 swizzle, 1358 &index2, 1359 &indir_index ); 1360 1361 index.i[0] += (int) indir_index.f[0]; 1362 index.i[1] += (int) indir_index.f[1]; 1363 index.i[2] += (int) indir_index.f[2]; 1364 index.i[3] += (int) indir_index.f[3]; 1365 1366 /* for disabled execution channels, zero-out the index to 1367 * avoid using a potential garbage value. 1368 */ 1369 for (i = 0; i < QUAD_SIZE; i++) { 1370 if ((execmask & (1 << i)) == 0) 1371 index.i[i] = 0; 1372 } 1373 } 1374 1375 /* If by any chance there was a need for a 3D array of register 1376 * files, we would have to check whether SrcRegisterDim is followed 1377 * by a dimension register and continue the saga. 1378 */ 1379 } 1380 1381 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1382 fetch_src_file_channel( 1383 mach, 1384 reg->SrcRegister.File, 1385 swizzle, 1386 &index, 1387 chan ); 1388 1389 switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { 1390 case TGSI_UTIL_SIGN_CLEAR: 1391 micro_abs( chan, chan ); 1392 break; 1393 1394 case TGSI_UTIL_SIGN_SET: 1395 micro_abs( chan, chan ); 1396 micro_neg( chan, chan ); 1397 break; 1398 1399 case TGSI_UTIL_SIGN_TOGGLE: 1400 micro_neg( chan, chan ); 1401 break; 1402 1403 case TGSI_UTIL_SIGN_KEEP: 1404 break; 1405 } 1406 1407 if (reg->SrcRegisterExtMod.Complement) { 1408 micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); 1409 } 1410} 1411 1412static void 1413store_dest( 1414 struct tgsi_exec_machine *mach, 1415 const union tgsi_exec_channel *chan, 1416 const struct tgsi_full_dst_register *reg, 1417 const struct tgsi_full_instruction *inst, 1418 uint chan_index ) 1419{ 1420 uint i; 1421 union tgsi_exec_channel null; 1422 union tgsi_exec_channel *dst; 1423 uint execmask = mach->ExecMask; 1424 int offset = 0; /* indirection offset */ 1425 int index; 1426 1427 if (0) { 1428 check_inf_or_nan(chan); 1429 } 1430 1431 /* There is an extra source register that indirectly subscripts 1432 * a register file. The direct index now becomes an offset 1433 * that is being added to the indirect register. 1434 * 1435 * file[ind[2].x+1], 1436 * where: 1437 * ind = DstRegisterInd.File 1438 * [2] = DstRegisterInd.Index 1439 * .x = DstRegisterInd.SwizzleX 1440 */ 1441 if (reg->DstRegister.Indirect) { 1442 union tgsi_exec_channel index; 1443 union tgsi_exec_channel indir_index; 1444 uint swizzle; 1445 1446 /* which address register (always zero for now) */ 1447 index.i[0] = 1448 index.i[1] = 1449 index.i[2] = 1450 index.i[3] = reg->DstRegisterInd.Index; 1451 1452 /* get current value of address register[swizzle] */ 1453 swizzle = tgsi_util_get_src_register_swizzle( ®->DstRegisterInd, CHAN_X ); 1454 1455 /* fetch values from the address/indirection register */ 1456 fetch_src_file_channel( 1457 mach, 1458 reg->DstRegisterInd.File, 1459 swizzle, 1460 &index, 1461 &indir_index ); 1462 1463 /* save indirection offset */ 1464 offset = (int) indir_index.f[0]; 1465 } 1466 1467 switch (reg->DstRegister.File) { 1468 case TGSI_FILE_NULL: 1469 dst = &null; 1470 break; 1471 1472 case TGSI_FILE_OUTPUT: 1473 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1474 + reg->DstRegister.Index; 1475 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1476 break; 1477 1478 case TGSI_FILE_TEMPORARY: 1479 index = reg->DstRegister.Index; 1480 assert( index < TGSI_EXEC_NUM_TEMPS ); 1481 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1482 break; 1483 1484 case TGSI_FILE_ADDRESS: 1485 index = reg->DstRegister.Index; 1486 dst = &mach->Addrs[index].xyzw[chan_index]; 1487 break; 1488 1489 case TGSI_FILE_PREDICATE: 1490 index = reg->DstRegister.Index; 1491 assert(index < TGSI_EXEC_NUM_PREDS); 1492 dst = &mach->Addrs[index].xyzw[chan_index]; 1493 break; 1494 1495 default: 1496 assert( 0 ); 1497 return; 1498 } 1499 1500 switch (inst->Instruction.Saturate) { 1501 case TGSI_SAT_NONE: 1502 for (i = 0; i < QUAD_SIZE; i++) 1503 if (execmask & (1 << i)) 1504 dst->i[i] = chan->i[i]; 1505 break; 1506 1507 case TGSI_SAT_ZERO_ONE: 1508 for (i = 0; i < QUAD_SIZE; i++) 1509 if (execmask & (1 << i)) { 1510 if (chan->f[i] < 0.0f) 1511 dst->f[i] = 0.0f; 1512 else if (chan->f[i] > 1.0f) 1513 dst->f[i] = 1.0f; 1514 else 1515 dst->i[i] = chan->i[i]; 1516 } 1517 break; 1518 1519 case TGSI_SAT_MINUS_PLUS_ONE: 1520 for (i = 0; i < QUAD_SIZE; i++) 1521 if (execmask & (1 << i)) { 1522 if (chan->f[i] < -1.0f) 1523 dst->f[i] = -1.0f; 1524 else if (chan->f[i] > 1.0f) 1525 dst->f[i] = 1.0f; 1526 else 1527 dst->i[i] = chan->i[i]; 1528 } 1529 break; 1530 1531 default: 1532 assert( 0 ); 1533 } 1534} 1535 1536#define FETCH(VAL,INDEX,CHAN)\ 1537 fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) 1538 1539#define STORE(VAL,INDEX,CHAN)\ 1540 store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) 1541 1542 1543/** 1544 * Execute ARB-style KIL which is predicated by a src register. 1545 * Kill fragment if any of the four values is less than zero. 1546 */ 1547static void 1548exec_kil(struct tgsi_exec_machine *mach, 1549 const struct tgsi_full_instruction *inst) 1550{ 1551 uint uniquemask; 1552 uint chan_index; 1553 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1554 union tgsi_exec_channel r[1]; 1555 1556 /* This mask stores component bits that were already tested. */ 1557 uniquemask = 0; 1558 1559 for (chan_index = 0; chan_index < 4; chan_index++) 1560 { 1561 uint swizzle; 1562 uint i; 1563 1564 /* unswizzle channel */ 1565 swizzle = tgsi_util_get_full_src_register_swizzle ( 1566 &inst->FullSrcRegisters[0], 1567 chan_index); 1568 1569 /* check if the component has not been already tested */ 1570 if (uniquemask & (1 << swizzle)) 1571 continue; 1572 uniquemask |= 1 << swizzle; 1573 1574 FETCH(&r[0], 0, chan_index); 1575 for (i = 0; i < 4; i++) 1576 if (r[0].f[i] < 0.0f) 1577 kilmask |= 1 << i; 1578 } 1579 1580 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1581} 1582 1583/** 1584 * Execute NVIDIA-style KIL which is predicated by a condition code. 1585 * Kill fragment if the condition code is TRUE. 1586 */ 1587static void 1588exec_kilp(struct tgsi_exec_machine *mach, 1589 const struct tgsi_full_instruction *inst) 1590{ 1591 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1592 1593 /* "unconditional" kil */ 1594 kilmask = mach->ExecMask; 1595 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1596} 1597 1598 1599/* 1600 * Fetch a four texture samples using STR texture coordinates. 1601 */ 1602static void 1603fetch_texel( struct tgsi_sampler *sampler, 1604 const union tgsi_exec_channel *s, 1605 const union tgsi_exec_channel *t, 1606 const union tgsi_exec_channel *p, 1607 float lodbias, /* XXX should be float[4] */ 1608 union tgsi_exec_channel *r, 1609 union tgsi_exec_channel *g, 1610 union tgsi_exec_channel *b, 1611 union tgsi_exec_channel *a ) 1612{ 1613 uint j; 1614 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1615 1616 sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); 1617 1618 for (j = 0; j < 4; j++) { 1619 r->f[j] = rgba[0][j]; 1620 g->f[j] = rgba[1][j]; 1621 b->f[j] = rgba[2][j]; 1622 a->f[j] = rgba[3][j]; 1623 } 1624} 1625 1626 1627static void 1628exec_tex(struct tgsi_exec_machine *mach, 1629 const struct tgsi_full_instruction *inst, 1630 boolean biasLod, 1631 boolean projected) 1632{ 1633 const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; 1634 union tgsi_exec_channel r[4]; 1635 uint chan_index; 1636 float lodBias; 1637 1638 /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ 1639 1640 switch (inst->InstructionExtTexture.Texture) { 1641 case TGSI_TEXTURE_1D: 1642 case TGSI_TEXTURE_SHADOW1D: 1643 1644 FETCH(&r[0], 0, CHAN_X); 1645 1646 if (projected) { 1647 FETCH(&r[1], 0, CHAN_W); 1648 micro_div( &r[0], &r[0], &r[1] ); 1649 } 1650 1651 if (biasLod) { 1652 FETCH(&r[1], 0, CHAN_W); 1653 lodBias = r[2].f[0]; 1654 } 1655 else 1656 lodBias = 0.0; 1657 1658 fetch_texel(mach->Samplers[unit], 1659 &r[0], &ZeroVec, &ZeroVec, lodBias, /* S, T, P, BIAS */ 1660 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1661 break; 1662 1663 case TGSI_TEXTURE_2D: 1664 case TGSI_TEXTURE_RECT: 1665 case TGSI_TEXTURE_SHADOW2D: 1666 case TGSI_TEXTURE_SHADOWRECT: 1667 1668 FETCH(&r[0], 0, CHAN_X); 1669 FETCH(&r[1], 0, CHAN_Y); 1670 FETCH(&r[2], 0, CHAN_Z); 1671 1672 if (projected) { 1673 FETCH(&r[3], 0, CHAN_W); 1674 micro_div( &r[0], &r[0], &r[3] ); 1675 micro_div( &r[1], &r[1], &r[3] ); 1676 micro_div( &r[2], &r[2], &r[3] ); 1677 } 1678 1679 if (biasLod) { 1680 FETCH(&r[3], 0, CHAN_W); 1681 lodBias = r[3].f[0]; 1682 } 1683 else 1684 lodBias = 0.0; 1685 1686 fetch_texel(mach->Samplers[unit], 1687 &r[0], &r[1], &r[2], lodBias, /* inputs */ 1688 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1689 break; 1690 1691 case TGSI_TEXTURE_3D: 1692 case TGSI_TEXTURE_CUBE: 1693 1694 FETCH(&r[0], 0, CHAN_X); 1695 FETCH(&r[1], 0, CHAN_Y); 1696 FETCH(&r[2], 0, CHAN_Z); 1697 1698 if (projected) { 1699 FETCH(&r[3], 0, CHAN_W); 1700 micro_div( &r[0], &r[0], &r[3] ); 1701 micro_div( &r[1], &r[1], &r[3] ); 1702 micro_div( &r[2], &r[2], &r[3] ); 1703 } 1704 1705 if (biasLod) { 1706 FETCH(&r[3], 0, CHAN_W); 1707 lodBias = r[3].f[0]; 1708 } 1709 else 1710 lodBias = 0.0; 1711 1712 fetch_texel(mach->Samplers[unit], 1713 &r[0], &r[1], &r[2], lodBias, 1714 &r[0], &r[1], &r[2], &r[3]); 1715 break; 1716 1717 default: 1718 assert (0); 1719 } 1720 1721 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1722 STORE( &r[chan_index], 0, chan_index ); 1723 } 1724} 1725 1726 1727/** 1728 * Evaluate a constant-valued coefficient at the position of the 1729 * current quad. 1730 */ 1731static void 1732eval_constant_coef( 1733 struct tgsi_exec_machine *mach, 1734 unsigned attrib, 1735 unsigned chan ) 1736{ 1737 unsigned i; 1738 1739 for( i = 0; i < QUAD_SIZE; i++ ) { 1740 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1741 } 1742} 1743 1744/** 1745 * Evaluate a linear-valued coefficient at the position of the 1746 * current quad. 1747 */ 1748static void 1749eval_linear_coef( 1750 struct tgsi_exec_machine *mach, 1751 unsigned attrib, 1752 unsigned chan ) 1753{ 1754 const float x = mach->QuadPos.xyzw[0].f[0]; 1755 const float y = mach->QuadPos.xyzw[1].f[0]; 1756 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1757 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1758 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1759 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1760 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1761 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1762 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1763} 1764 1765/** 1766 * Evaluate a perspective-valued coefficient at the position of the 1767 * current quad. 1768 */ 1769static void 1770eval_perspective_coef( 1771 struct tgsi_exec_machine *mach, 1772 unsigned attrib, 1773 unsigned chan ) 1774{ 1775 const float x = mach->QuadPos.xyzw[0].f[0]; 1776 const float y = mach->QuadPos.xyzw[1].f[0]; 1777 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1778 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1779 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1780 const float *w = mach->QuadPos.xyzw[3].f; 1781 /* divide by W here */ 1782 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1783 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1784 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1785 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1786} 1787 1788 1789typedef void (* eval_coef_func)( 1790 struct tgsi_exec_machine *mach, 1791 unsigned attrib, 1792 unsigned chan ); 1793 1794static void 1795exec_declaration( 1796 struct tgsi_exec_machine *mach, 1797 const struct tgsi_full_declaration *decl ) 1798{ 1799 if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { 1800 if( decl->Declaration.File == TGSI_FILE_INPUT ) { 1801 unsigned first, last, mask; 1802 eval_coef_func eval; 1803 1804 first = decl->DeclarationRange.First; 1805 last = decl->DeclarationRange.Last; 1806 mask = decl->Declaration.UsageMask; 1807 1808 switch( decl->Declaration.Interpolate ) { 1809 case TGSI_INTERPOLATE_CONSTANT: 1810 eval = eval_constant_coef; 1811 break; 1812 1813 case TGSI_INTERPOLATE_LINEAR: 1814 eval = eval_linear_coef; 1815 break; 1816 1817 case TGSI_INTERPOLATE_PERSPECTIVE: 1818 eval = eval_perspective_coef; 1819 break; 1820 1821 default: 1822 assert( 0 ); 1823 return; 1824 } 1825 1826 if( mask == TGSI_WRITEMASK_XYZW ) { 1827 unsigned i, j; 1828 1829 for( i = first; i <= last; i++ ) { 1830 for( j = 0; j < NUM_CHANNELS; j++ ) { 1831 eval( mach, i, j ); 1832 } 1833 } 1834 } 1835 else { 1836 unsigned i, j; 1837 1838 for( j = 0; j < NUM_CHANNELS; j++ ) { 1839 if( mask & (1 << j) ) { 1840 for( i = first; i <= last; i++ ) { 1841 eval( mach, i, j ); 1842 } 1843 } 1844 } 1845 } 1846 } 1847 } 1848} 1849 1850static void 1851exec_instruction( 1852 struct tgsi_exec_machine *mach, 1853 const struct tgsi_full_instruction *inst, 1854 int *pc ) 1855{ 1856 uint chan_index; 1857 union tgsi_exec_channel r[3 * NUM_CHANNELS]; 1858 union tgsi_exec_channel d[8]; 1859 1860 (*pc)++; 1861 1862 switch (inst->Instruction.Opcode) { 1863 case TGSI_OPCODE_ARL: 1864 case TGSI_OPCODE_FLR: 1865 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1866 FETCH( &r[0], 0, chan_index ); 1867 micro_flr( &r[0], &r[0] ); 1868 STORE( &r[0], 0, chan_index ); 1869 } 1870 break; 1871 1872 case TGSI_OPCODE_MOV: 1873 if (inst->Flags & SOA_DEPENDENCY_FLAG) { 1874 /* Do all fetches into temp regs, then do all stores to avoid 1875 * intermediate/accidental clobbering. This could be done all the 1876 * time for MOV but for other instructions we'll need more temps... 1877 */ 1878 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1879 FETCH( &r[chan_index], 0, chan_index ); 1880 } 1881 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1882 STORE( &r[chan_index], 0, chan_index ); 1883 } 1884 } 1885 else { 1886 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1887 FETCH( &r[0], 0, chan_index ); 1888 STORE( &r[0], 0, chan_index ); 1889 } 1890 } 1891 break; 1892 1893 case TGSI_OPCODE_LIT: 1894 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1895 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 1896 } 1897 1898 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1899 FETCH( &r[0], 0, CHAN_X ); 1900 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1901 micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1902 STORE( &r[0], 0, CHAN_Y ); 1903 } 1904 1905 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1906 FETCH( &r[1], 0, CHAN_Y ); 1907 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1908 1909 FETCH( &r[2], 0, CHAN_W ); 1910 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 1911 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 1912 micro_pow( &r[1], &r[1], &r[2] ); 1913 micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 1914 STORE( &r[0], 0, CHAN_Z ); 1915 } 1916 } 1917 1918 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1919 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1920 } 1921 break; 1922 1923 case TGSI_OPCODE_RCP: 1924 /* TGSI_OPCODE_RECIP */ 1925 FETCH( &r[0], 0, CHAN_X ); 1926 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1927 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1928 STORE( &r[0], 0, chan_index ); 1929 } 1930 break; 1931 1932 case TGSI_OPCODE_RSQ: 1933 /* TGSI_OPCODE_RECIPSQRT */ 1934 FETCH( &r[0], 0, CHAN_X ); 1935 micro_abs( &r[0], &r[0] ); 1936 micro_sqrt( &r[0], &r[0] ); 1937 micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); 1938 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 1939 STORE( &r[0], 0, chan_index ); 1940 } 1941 break; 1942 1943 case TGSI_OPCODE_EXP: 1944 FETCH( &r[0], 0, CHAN_X ); 1945 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 1946 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1947 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 1948 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 1949 } 1950 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1951 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 1952 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 1953 } 1954 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1955 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 1956 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 1957 } 1958 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1959 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1960 } 1961 break; 1962 1963 case TGSI_OPCODE_LOG: 1964 FETCH( &r[0], 0, CHAN_X ); 1965 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 1966 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 1967 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 1968 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 1969 STORE( &r[0], 0, CHAN_X ); 1970 } 1971 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 1972 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 1973 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 1974 STORE( &r[0], 0, CHAN_Y ); 1975 } 1976 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 1977 STORE( &r[1], 0, CHAN_Z ); 1978 } 1979 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 1980 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 1981 } 1982 break; 1983 1984 case TGSI_OPCODE_MUL: 1985 if (inst->Flags & SOA_DEPENDENCY_FLAG) { 1986 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 1987 { 1988 FETCH(&r[chan_index], 0, chan_index); 1989 FETCH(&r[chan_index + NUM_CHANNELS], 1, chan_index); 1990 } 1991 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 1992 { 1993 micro_mul( &r[chan_index], &r[chan_index], &r[chan_index + NUM_CHANNELS] ); 1994 STORE(&r[chan_index], 0, chan_index); 1995 } 1996 } else { 1997 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) 1998 { 1999 FETCH(&r[0], 0, chan_index); 2000 FETCH(&r[1], 1, chan_index); 2001 2002 micro_mul( &r[0], &r[0], &r[1] ); 2003 2004 STORE(&r[0], 0, chan_index); 2005 } 2006 } 2007 break; 2008 2009 case TGSI_OPCODE_ADD: 2010 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2011 FETCH( &r[0], 0, chan_index ); 2012 FETCH( &r[1], 1, chan_index ); 2013 micro_add( &r[0], &r[0], &r[1] ); 2014 STORE( &r[0], 0, chan_index ); 2015 } 2016 break; 2017 2018 case TGSI_OPCODE_DP3: 2019 /* TGSI_OPCODE_DOT3 */ 2020 FETCH( &r[0], 0, CHAN_X ); 2021 FETCH( &r[1], 1, CHAN_X ); 2022 micro_mul( &r[0], &r[0], &r[1] ); 2023 2024 FETCH( &r[1], 0, CHAN_Y ); 2025 FETCH( &r[2], 1, CHAN_Y ); 2026 micro_mul( &r[1], &r[1], &r[2] ); 2027 micro_add( &r[0], &r[0], &r[1] ); 2028 2029 FETCH( &r[1], 0, CHAN_Z ); 2030 FETCH( &r[2], 1, CHAN_Z ); 2031 micro_mul( &r[1], &r[1], &r[2] ); 2032 micro_add( &r[0], &r[0], &r[1] ); 2033 2034 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2035 STORE( &r[0], 0, chan_index ); 2036 } 2037 break; 2038 2039 case TGSI_OPCODE_DP4: 2040 /* TGSI_OPCODE_DOT4 */ 2041 FETCH(&r[0], 0, CHAN_X); 2042 FETCH(&r[1], 1, CHAN_X); 2043 2044 micro_mul( &r[0], &r[0], &r[1] ); 2045 2046 FETCH(&r[1], 0, CHAN_Y); 2047 FETCH(&r[2], 1, CHAN_Y); 2048 2049 micro_mul( &r[1], &r[1], &r[2] ); 2050 micro_add( &r[0], &r[0], &r[1] ); 2051 2052 FETCH(&r[1], 0, CHAN_Z); 2053 FETCH(&r[2], 1, CHAN_Z); 2054 2055 micro_mul( &r[1], &r[1], &r[2] ); 2056 micro_add( &r[0], &r[0], &r[1] ); 2057 2058 FETCH(&r[1], 0, CHAN_W); 2059 FETCH(&r[2], 1, CHAN_W); 2060 2061 micro_mul( &r[1], &r[1], &r[2] ); 2062 micro_add( &r[0], &r[0], &r[1] ); 2063 2064 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2065 STORE( &r[0], 0, chan_index ); 2066 } 2067 break; 2068 2069 case TGSI_OPCODE_DST: 2070 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2071 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2072 } 2073 2074 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2075 FETCH( &r[0], 0, CHAN_Y ); 2076 FETCH( &r[1], 1, CHAN_Y); 2077 micro_mul( &r[0], &r[0], &r[1] ); 2078 STORE( &r[0], 0, CHAN_Y ); 2079 } 2080 2081 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2082 FETCH( &r[0], 0, CHAN_Z ); 2083 STORE( &r[0], 0, CHAN_Z ); 2084 } 2085 2086 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2087 FETCH( &r[0], 1, CHAN_W ); 2088 STORE( &r[0], 0, CHAN_W ); 2089 } 2090 break; 2091 2092 case TGSI_OPCODE_MIN: 2093 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2094 FETCH(&r[0], 0, chan_index); 2095 FETCH(&r[1], 1, chan_index); 2096 2097 /* XXX use micro_min()?? */ 2098 micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); 2099 2100 STORE(&r[0], 0, chan_index); 2101 } 2102 break; 2103 2104 case TGSI_OPCODE_MAX: 2105 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2106 FETCH(&r[0], 0, chan_index); 2107 FETCH(&r[1], 1, chan_index); 2108 2109 /* XXX use micro_max()?? */ 2110 micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); 2111 2112 STORE(&r[0], 0, chan_index ); 2113 } 2114 break; 2115 2116 case TGSI_OPCODE_SLT: 2117 /* TGSI_OPCODE_SETLT */ 2118 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2119 FETCH( &r[0], 0, chan_index ); 2120 FETCH( &r[1], 1, chan_index ); 2121 micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2122 STORE( &r[0], 0, chan_index ); 2123 } 2124 break; 2125 2126 case TGSI_OPCODE_SGE: 2127 /* TGSI_OPCODE_SETGE */ 2128 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2129 FETCH( &r[0], 0, chan_index ); 2130 FETCH( &r[1], 1, chan_index ); 2131 micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2132 STORE( &r[0], 0, chan_index ); 2133 } 2134 break; 2135 2136 case TGSI_OPCODE_MAD: 2137 /* TGSI_OPCODE_MADD */ 2138 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2139 FETCH( &r[0], 0, chan_index ); 2140 FETCH( &r[1], 1, chan_index ); 2141 micro_mul( &r[0], &r[0], &r[1] ); 2142 FETCH( &r[1], 2, chan_index ); 2143 micro_add( &r[0], &r[0], &r[1] ); 2144 STORE( &r[0], 0, chan_index ); 2145 } 2146 break; 2147 2148 case TGSI_OPCODE_SUB: 2149 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2150 FETCH(&r[0], 0, chan_index); 2151 FETCH(&r[1], 1, chan_index); 2152 2153 micro_sub( &r[0], &r[0], &r[1] ); 2154 2155 STORE(&r[0], 0, chan_index); 2156 } 2157 break; 2158 2159 case TGSI_OPCODE_LRP: 2160 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2161 FETCH(&r[0], 0, chan_index); 2162 FETCH(&r[1], 1, chan_index); 2163 FETCH(&r[2], 2, chan_index); 2164 2165 micro_sub( &r[1], &r[1], &r[2] ); 2166 micro_mul( &r[0], &r[0], &r[1] ); 2167 micro_add( &r[0], &r[0], &r[2] ); 2168 2169 STORE(&r[0], 0, chan_index); 2170 } 2171 break; 2172 2173 case TGSI_OPCODE_CND: 2174 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2175 FETCH(&r[0], 0, chan_index); 2176 FETCH(&r[1], 1, chan_index); 2177 FETCH(&r[2], 2, chan_index); 2178 micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]); 2179 STORE(&r[0], 0, chan_index); 2180 } 2181 break; 2182 2183 case TGSI_OPCODE_DP2A: 2184 FETCH( &r[0], 0, CHAN_X ); 2185 FETCH( &r[1], 1, CHAN_X ); 2186 micro_mul( &r[0], &r[0], &r[1] ); 2187 2188 FETCH( &r[1], 0, CHAN_Y ); 2189 FETCH( &r[2], 1, CHAN_Y ); 2190 micro_mul( &r[1], &r[1], &r[2] ); 2191 micro_add( &r[0], &r[0], &r[1] ); 2192 2193 FETCH( &r[2], 2, CHAN_X ); 2194 micro_add( &r[0], &r[0], &r[2] ); 2195 2196 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2197 STORE( &r[0], 0, chan_index ); 2198 } 2199 break; 2200 2201 case TGSI_OPCODE_FRC: 2202 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2203 FETCH( &r[0], 0, chan_index ); 2204 micro_frc( &r[0], &r[0] ); 2205 STORE( &r[0], 0, chan_index ); 2206 } 2207 break; 2208 2209 case TGSI_OPCODE_CLAMP: 2210 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2211 FETCH(&r[0], 0, chan_index); 2212 FETCH(&r[1], 1, chan_index); 2213 micro_max(&r[0], &r[0], &r[1]); 2214 FETCH(&r[1], 2, chan_index); 2215 micro_min(&r[0], &r[0], &r[1]); 2216 STORE(&r[0], 0, chan_index); 2217 } 2218 break; 2219 2220 case TGSI_OPCODE_ROUND: 2221 case TGSI_OPCODE_ARR: 2222 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2223 FETCH( &r[0], 0, chan_index ); 2224 micro_rnd( &r[0], &r[0] ); 2225 STORE( &r[0], 0, chan_index ); 2226 } 2227 break; 2228 2229 case TGSI_OPCODE_EX2: 2230 FETCH(&r[0], 0, CHAN_X); 2231 2232#if FAST_MATH 2233 micro_exp2( &r[0], &r[0] ); 2234#else 2235 micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); 2236#endif 2237 2238 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2239 STORE( &r[0], 0, chan_index ); 2240 } 2241 break; 2242 2243 case TGSI_OPCODE_LG2: 2244 FETCH( &r[0], 0, CHAN_X ); 2245 micro_lg2( &r[0], &r[0] ); 2246 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2247 STORE( &r[0], 0, chan_index ); 2248 } 2249 break; 2250 2251 case TGSI_OPCODE_POW: 2252 FETCH(&r[0], 0, CHAN_X); 2253 FETCH(&r[1], 1, CHAN_X); 2254 2255 micro_pow( &r[0], &r[0], &r[1] ); 2256 2257 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2258 STORE( &r[0], 0, chan_index ); 2259 } 2260 break; 2261 2262 case TGSI_OPCODE_XPD: 2263 FETCH(&r[0], 0, CHAN_Y); 2264 FETCH(&r[1], 1, CHAN_Z); 2265 2266 micro_mul( &r[2], &r[0], &r[1] ); 2267 2268 FETCH(&r[3], 0, CHAN_Z); 2269 FETCH(&r[4], 1, CHAN_Y); 2270 2271 micro_mul( &r[5], &r[3], &r[4] ); 2272 micro_sub(&d[CHAN_X], &r[2], &r[5]); 2273 2274 FETCH(&r[2], 1, CHAN_X); 2275 2276 micro_mul( &r[3], &r[3], &r[2] ); 2277 2278 FETCH(&r[5], 0, CHAN_X); 2279 2280 micro_mul( &r[1], &r[1], &r[5] ); 2281 micro_sub(&d[CHAN_Y], &r[3], &r[1]); 2282 2283 micro_mul( &r[5], &r[5], &r[4] ); 2284 micro_mul( &r[0], &r[0], &r[2] ); 2285 micro_sub(&d[CHAN_Z], &r[5], &r[0]); 2286 2287 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2288 STORE(&d[CHAN_X], 0, CHAN_X); 2289 } 2290 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2291 STORE(&d[CHAN_Y], 0, CHAN_Y); 2292 } 2293 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2294 STORE(&d[CHAN_Z], 0, CHAN_Z); 2295 } 2296 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2297 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2298 } 2299 break; 2300 2301 case TGSI_OPCODE_ABS: 2302 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2303 FETCH(&r[0], 0, chan_index); 2304 2305 micro_abs( &r[0], &r[0] ); 2306 2307 STORE(&r[0], 0, chan_index); 2308 } 2309 break; 2310 2311 case TGSI_OPCODE_RCC: 2312 FETCH(&r[0], 0, CHAN_X); 2313 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2314 micro_float_clamp(&r[0], &r[0]); 2315 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2316 STORE(&r[0], 0, chan_index); 2317 } 2318 break; 2319 2320 case TGSI_OPCODE_DPH: 2321 FETCH(&r[0], 0, CHAN_X); 2322 FETCH(&r[1], 1, CHAN_X); 2323 2324 micro_mul( &r[0], &r[0], &r[1] ); 2325 2326 FETCH(&r[1], 0, CHAN_Y); 2327 FETCH(&r[2], 1, CHAN_Y); 2328 2329 micro_mul( &r[1], &r[1], &r[2] ); 2330 micro_add( &r[0], &r[0], &r[1] ); 2331 2332 FETCH(&r[1], 0, CHAN_Z); 2333 FETCH(&r[2], 1, CHAN_Z); 2334 2335 micro_mul( &r[1], &r[1], &r[2] ); 2336 micro_add( &r[0], &r[0], &r[1] ); 2337 2338 FETCH(&r[1], 1, CHAN_W); 2339 2340 micro_add( &r[0], &r[0], &r[1] ); 2341 2342 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2343 STORE( &r[0], 0, chan_index ); 2344 } 2345 break; 2346 2347 case TGSI_OPCODE_COS: 2348 FETCH(&r[0], 0, CHAN_X); 2349 2350 micro_cos( &r[0], &r[0] ); 2351 2352 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2353 STORE( &r[0], 0, chan_index ); 2354 } 2355 break; 2356 2357 case TGSI_OPCODE_DDX: 2358 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2359 FETCH( &r[0], 0, chan_index ); 2360 micro_ddx( &r[0], &r[0] ); 2361 STORE( &r[0], 0, chan_index ); 2362 } 2363 break; 2364 2365 case TGSI_OPCODE_DDY: 2366 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2367 FETCH( &r[0], 0, chan_index ); 2368 micro_ddy( &r[0], &r[0] ); 2369 STORE( &r[0], 0, chan_index ); 2370 } 2371 break; 2372 2373 case TGSI_OPCODE_KILP: 2374 exec_kilp (mach, inst); 2375 break; 2376 2377 case TGSI_OPCODE_KIL: 2378 exec_kil (mach, inst); 2379 break; 2380 2381 case TGSI_OPCODE_PK2H: 2382 assert (0); 2383 break; 2384 2385 case TGSI_OPCODE_PK2US: 2386 assert (0); 2387 break; 2388 2389 case TGSI_OPCODE_PK4B: 2390 assert (0); 2391 break; 2392 2393 case TGSI_OPCODE_PK4UB: 2394 assert (0); 2395 break; 2396 2397 case TGSI_OPCODE_RFL: 2398 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2399 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2400 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2401 /* r0 = dp3(src0, src0) */ 2402 FETCH(&r[2], 0, CHAN_X); 2403 micro_mul(&r[0], &r[2], &r[2]); 2404 FETCH(&r[4], 0, CHAN_Y); 2405 micro_mul(&r[8], &r[4], &r[4]); 2406 micro_add(&r[0], &r[0], &r[8]); 2407 FETCH(&r[6], 0, CHAN_Z); 2408 micro_mul(&r[8], &r[6], &r[6]); 2409 micro_add(&r[0], &r[0], &r[8]); 2410 2411 /* r1 = dp3(src0, src1) */ 2412 FETCH(&r[3], 1, CHAN_X); 2413 micro_mul(&r[1], &r[2], &r[3]); 2414 FETCH(&r[5], 1, CHAN_Y); 2415 micro_mul(&r[8], &r[4], &r[5]); 2416 micro_add(&r[1], &r[1], &r[8]); 2417 FETCH(&r[7], 1, CHAN_Z); 2418 micro_mul(&r[8], &r[6], &r[7]); 2419 micro_add(&r[1], &r[1], &r[8]); 2420 2421 /* r1 = 2 * r1 / r0 */ 2422 micro_add(&r[1], &r[1], &r[1]); 2423 micro_div(&r[1], &r[1], &r[0]); 2424 2425 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2426 micro_mul(&r[2], &r[2], &r[1]); 2427 micro_sub(&r[2], &r[2], &r[3]); 2428 STORE(&r[2], 0, CHAN_X); 2429 } 2430 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2431 micro_mul(&r[4], &r[4], &r[1]); 2432 micro_sub(&r[4], &r[4], &r[5]); 2433 STORE(&r[4], 0, CHAN_Y); 2434 } 2435 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2436 micro_mul(&r[6], &r[6], &r[1]); 2437 micro_sub(&r[6], &r[6], &r[7]); 2438 STORE(&r[6], 0, CHAN_Z); 2439 } 2440 } 2441 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2442 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2443 } 2444 break; 2445 2446 case TGSI_OPCODE_SEQ: 2447 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2448 FETCH( &r[0], 0, chan_index ); 2449 FETCH( &r[1], 1, chan_index ); 2450 micro_eq( &r[0], &r[0], &r[1], 2451 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 2452 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2453 STORE( &r[0], 0, chan_index ); 2454 } 2455 break; 2456 2457 case TGSI_OPCODE_SFL: 2458 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2459 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 2460 } 2461 break; 2462 2463 case TGSI_OPCODE_SGT: 2464 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2465 FETCH( &r[0], 0, chan_index ); 2466 FETCH( &r[1], 1, chan_index ); 2467 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2468 STORE( &r[0], 0, chan_index ); 2469 } 2470 break; 2471 2472 case TGSI_OPCODE_SIN: 2473 FETCH( &r[0], 0, CHAN_X ); 2474 micro_sin( &r[0], &r[0] ); 2475 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2476 STORE( &r[0], 0, chan_index ); 2477 } 2478 break; 2479 2480 case TGSI_OPCODE_SLE: 2481 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2482 FETCH( &r[0], 0, chan_index ); 2483 FETCH( &r[1], 1, chan_index ); 2484 micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2485 STORE( &r[0], 0, chan_index ); 2486 } 2487 break; 2488 2489 case TGSI_OPCODE_SNE: 2490 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2491 FETCH( &r[0], 0, chan_index ); 2492 FETCH( &r[1], 1, chan_index ); 2493 micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 2494 STORE( &r[0], 0, chan_index ); 2495 } 2496 break; 2497 2498 case TGSI_OPCODE_STR: 2499 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2500 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 2501 } 2502 break; 2503 2504 case TGSI_OPCODE_TEX: 2505 /* simple texture lookup */ 2506 /* src[0] = texcoord */ 2507 /* src[1] = sampler unit */ 2508 exec_tex(mach, inst, FALSE, FALSE); 2509 break; 2510 2511 case TGSI_OPCODE_TXB: 2512 /* Texture lookup with lod bias */ 2513 /* src[0] = texcoord (src[0].w = LOD bias) */ 2514 /* src[1] = sampler unit */ 2515 exec_tex(mach, inst, TRUE, FALSE); 2516 break; 2517 2518 case TGSI_OPCODE_TXD: 2519 /* Texture lookup with explict partial derivatives */ 2520 /* src[0] = texcoord */ 2521 /* src[1] = d[strq]/dx */ 2522 /* src[2] = d[strq]/dy */ 2523 /* src[3] = sampler unit */ 2524 assert (0); 2525 break; 2526 2527 case TGSI_OPCODE_TXL: 2528 /* Texture lookup with explit LOD */ 2529 /* src[0] = texcoord (src[0].w = LOD) */ 2530 /* src[1] = sampler unit */ 2531 exec_tex(mach, inst, TRUE, FALSE); 2532 break; 2533 2534 case TGSI_OPCODE_TXP: 2535 /* Texture lookup with projection */ 2536 /* src[0] = texcoord (src[0].w = projection) */ 2537 /* src[1] = sampler unit */ 2538 exec_tex(mach, inst, FALSE, TRUE); 2539 break; 2540 2541 case TGSI_OPCODE_UP2H: 2542 assert (0); 2543 break; 2544 2545 case TGSI_OPCODE_UP2US: 2546 assert (0); 2547 break; 2548 2549 case TGSI_OPCODE_UP4B: 2550 assert (0); 2551 break; 2552 2553 case TGSI_OPCODE_UP4UB: 2554 assert (0); 2555 break; 2556 2557 case TGSI_OPCODE_X2D: 2558 FETCH(&r[0], 1, CHAN_X); 2559 FETCH(&r[1], 1, CHAN_Y); 2560 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2561 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2562 FETCH(&r[2], 2, CHAN_X); 2563 micro_mul(&r[2], &r[2], &r[0]); 2564 FETCH(&r[3], 2, CHAN_Y); 2565 micro_mul(&r[3], &r[3], &r[1]); 2566 micro_add(&r[2], &r[2], &r[3]); 2567 FETCH(&r[3], 0, CHAN_X); 2568 micro_add(&r[2], &r[2], &r[3]); 2569 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2570 STORE(&r[2], 0, CHAN_X); 2571 } 2572 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2573 STORE(&r[2], 0, CHAN_Z); 2574 } 2575 } 2576 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2577 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2578 FETCH(&r[2], 2, CHAN_Z); 2579 micro_mul(&r[2], &r[2], &r[0]); 2580 FETCH(&r[3], 2, CHAN_W); 2581 micro_mul(&r[3], &r[3], &r[1]); 2582 micro_add(&r[2], &r[2], &r[3]); 2583 FETCH(&r[3], 0, CHAN_Y); 2584 micro_add(&r[2], &r[2], &r[3]); 2585 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2586 STORE(&r[2], 0, CHAN_Y); 2587 } 2588 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2589 STORE(&r[2], 0, CHAN_W); 2590 } 2591 } 2592 break; 2593 2594 case TGSI_OPCODE_ARA: 2595 assert (0); 2596 break; 2597 2598 case TGSI_OPCODE_BRA: 2599 assert (0); 2600 break; 2601 2602 case TGSI_OPCODE_CAL: 2603 /* skip the call if no execution channels are enabled */ 2604 if (mach->ExecMask) { 2605 /* do the call */ 2606 2607 /* First, record the depths of the execution stacks. 2608 * This is important for deeply nested/looped return statements. 2609 * We have to unwind the stacks by the correct amount. For a 2610 * real code generator, we could determine the number of entries 2611 * to pop off each stack with simple static analysis and avoid 2612 * implementing this data structure at run time. 2613 */ 2614 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 2615 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 2616 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 2617 /* note that PC was already incremented above */ 2618 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 2619 2620 mach->CallStackTop++; 2621 2622 /* Second, push the Cond, Loop, Cont, Func stacks */ 2623 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2624 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2625 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2626 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2627 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2628 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2629 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 2630 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 2631 2632 /* Finally, jump to the subroutine */ 2633 *pc = inst->InstructionExtLabel.Label; 2634 } 2635 break; 2636 2637 case TGSI_OPCODE_RET: 2638 mach->FuncMask &= ~mach->ExecMask; 2639 UPDATE_EXEC_MASK(mach); 2640 2641 if (mach->FuncMask == 0x0) { 2642 /* really return now (otherwise, keep executing */ 2643 2644 if (mach->CallStackTop == 0) { 2645 /* returning from main() */ 2646 *pc = -1; 2647 return; 2648 } 2649 2650 assert(mach->CallStackTop > 0); 2651 mach->CallStackTop--; 2652 2653 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 2654 mach->CondMask = mach->CondStack[mach->CondStackTop]; 2655 2656 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 2657 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 2658 2659 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 2660 mach->ContMask = mach->ContStack[mach->ContStackTop]; 2661 2662 assert(mach->FuncStackTop > 0); 2663 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 2664 2665 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 2666 2667 UPDATE_EXEC_MASK(mach); 2668 } 2669 break; 2670 2671 case TGSI_OPCODE_SSG: 2672 /* TGSI_OPCODE_SGN */ 2673 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2674 FETCH( &r[0], 0, chan_index ); 2675 micro_sgn( &r[0], &r[0] ); 2676 STORE( &r[0], 0, chan_index ); 2677 } 2678 break; 2679 2680 case TGSI_OPCODE_CMP: 2681 if (inst->Flags & SOA_DEPENDENCY_FLAG) { 2682 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2683 FETCH(&r[chan_index], 0, chan_index); 2684 FETCH(&r[chan_index + NUM_CHANNELS], 1, chan_index); 2685 FETCH(&r[chan_index + 2 * NUM_CHANNELS], 2, chan_index); 2686 } 2687 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2688 micro_lt( &r[chan_index], &r[chan_index], 2689 &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[chan_index + NUM_CHANNELS], 2690 &r[chan_index + 2*NUM_CHANNELS] ); 2691 STORE(&r[chan_index], 0, chan_index); 2692 } 2693 } else { 2694 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2695 FETCH(&r[0], 0, chan_index); 2696 FETCH(&r[1], 1, chan_index); 2697 FETCH(&r[2], 2, chan_index); 2698 2699 micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); 2700 2701 STORE(&r[0], 0, chan_index); 2702 } 2703 } 2704 break; 2705 2706 case TGSI_OPCODE_SCS: 2707 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 2708 FETCH( &r[0], 0, CHAN_X ); 2709 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2710 micro_cos(&r[1], &r[0]); 2711 STORE(&r[1], 0, CHAN_X); 2712 } 2713 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2714 micro_sin(&r[1], &r[0]); 2715 STORE(&r[1], 0, CHAN_Y); 2716 } 2717 } 2718 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 2719 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 2720 } 2721 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 2722 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2723 } 2724 break; 2725 2726 case TGSI_OPCODE_NRM: 2727 /* 3-component vector normalize */ 2728 if(IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2729 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2730 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2731 /* r3 = sqrt(dp3(src0, src0)) */ 2732 FETCH(&r[0], 0, CHAN_X); 2733 micro_mul(&r[3], &r[0], &r[0]); 2734 FETCH(&r[1], 0, CHAN_Y); 2735 micro_mul(&r[4], &r[1], &r[1]); 2736 micro_add(&r[3], &r[3], &r[4]); 2737 FETCH(&r[2], 0, CHAN_Z); 2738 micro_mul(&r[4], &r[2], &r[2]); 2739 micro_add(&r[3], &r[3], &r[4]); 2740 micro_sqrt(&r[3], &r[3]); 2741 2742 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2743 micro_div(&r[0], &r[0], &r[3]); 2744 STORE(&r[0], 0, CHAN_X); 2745 } 2746 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2747 micro_div(&r[1], &r[1], &r[3]); 2748 STORE(&r[1], 0, CHAN_Y); 2749 } 2750 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2751 micro_div(&r[2], &r[2], &r[3]); 2752 STORE(&r[2], 0, CHAN_Z); 2753 } 2754 } 2755 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2756 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2757 } 2758 break; 2759 2760 case TGSI_OPCODE_NRM4: 2761 /* 4-component vector normalize */ 2762 { 2763 union tgsi_exec_channel tmp, dot; 2764 2765 /* tmp = dp4(src0, src0): */ 2766 FETCH( &r[0], 0, CHAN_X ); 2767 micro_mul( &tmp, &r[0], &r[0] ); 2768 2769 FETCH( &r[1], 0, CHAN_Y ); 2770 micro_mul( &dot, &r[1], &r[1] ); 2771 micro_add( &tmp, &tmp, &dot ); 2772 2773 FETCH( &r[2], 0, CHAN_Z ); 2774 micro_mul( &dot, &r[2], &r[2] ); 2775 micro_add( &tmp, &tmp, &dot ); 2776 2777 FETCH( &r[3], 0, CHAN_W ); 2778 micro_mul( &dot, &r[3], &r[3] ); 2779 micro_add( &tmp, &tmp, &dot ); 2780 2781 /* tmp = 1 / sqrt(tmp) */ 2782 micro_sqrt( &tmp, &tmp ); 2783 micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); 2784 2785 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2786 /* chan = chan * tmp */ 2787 micro_mul( &r[chan_index], &tmp, &r[chan_index] ); 2788 STORE( &r[chan_index], 0, chan_index ); 2789 } 2790 } 2791 break; 2792 2793 case TGSI_OPCODE_DIV: 2794 assert( 0 ); 2795 break; 2796 2797 case TGSI_OPCODE_DP2: 2798 FETCH( &r[0], 0, CHAN_X ); 2799 FETCH( &r[1], 1, CHAN_X ); 2800 micro_mul( &r[0], &r[0], &r[1] ); 2801 2802 FETCH( &r[1], 0, CHAN_Y ); 2803 FETCH( &r[2], 1, CHAN_Y ); 2804 micro_mul( &r[1], &r[1], &r[2] ); 2805 micro_add( &r[0], &r[0], &r[1] ); 2806 2807 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2808 STORE( &r[0], 0, chan_index ); 2809 } 2810 break; 2811 2812 case TGSI_OPCODE_IF: 2813 /* push CondMask */ 2814 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 2815 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 2816 FETCH( &r[0], 0, CHAN_X ); 2817 /* update CondMask */ 2818 if( ! r[0].u[0] ) { 2819 mach->CondMask &= ~0x1; 2820 } 2821 if( ! r[0].u[1] ) { 2822 mach->CondMask &= ~0x2; 2823 } 2824 if( ! r[0].u[2] ) { 2825 mach->CondMask &= ~0x4; 2826 } 2827 if( ! r[0].u[3] ) { 2828 mach->CondMask &= ~0x8; 2829 } 2830 UPDATE_EXEC_MASK(mach); 2831 /* Todo: If CondMask==0, jump to ELSE */ 2832 break; 2833 2834 case TGSI_OPCODE_ELSE: 2835 /* invert CondMask wrt previous mask */ 2836 { 2837 uint prevMask; 2838 assert(mach->CondStackTop > 0); 2839 prevMask = mach->CondStack[mach->CondStackTop - 1]; 2840 mach->CondMask = ~mach->CondMask & prevMask; 2841 UPDATE_EXEC_MASK(mach); 2842 /* Todo: If CondMask==0, jump to ENDIF */ 2843 } 2844 break; 2845 2846 case TGSI_OPCODE_ENDIF: 2847 /* pop CondMask */ 2848 assert(mach->CondStackTop > 0); 2849 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 2850 UPDATE_EXEC_MASK(mach); 2851 break; 2852 2853 case TGSI_OPCODE_END: 2854 /* halt execution */ 2855 *pc = -1; 2856 break; 2857 2858 case TGSI_OPCODE_REP: 2859 assert (0); 2860 break; 2861 2862 case TGSI_OPCODE_ENDREP: 2863 assert (0); 2864 break; 2865 2866 case TGSI_OPCODE_PUSHA: 2867 assert (0); 2868 break; 2869 2870 case TGSI_OPCODE_POPA: 2871 assert (0); 2872 break; 2873 2874 case TGSI_OPCODE_CEIL: 2875 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2876 FETCH( &r[0], 0, chan_index ); 2877 micro_ceil( &r[0], &r[0] ); 2878 STORE( &r[0], 0, chan_index ); 2879 } 2880 break; 2881 2882 case TGSI_OPCODE_I2F: 2883 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2884 FETCH( &r[0], 0, chan_index ); 2885 micro_i2f( &r[0], &r[0] ); 2886 STORE( &r[0], 0, chan_index ); 2887 } 2888 break; 2889 2890 case TGSI_OPCODE_NOT: 2891 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2892 FETCH( &r[0], 0, chan_index ); 2893 micro_not( &r[0], &r[0] ); 2894 STORE( &r[0], 0, chan_index ); 2895 } 2896 break; 2897 2898 case TGSI_OPCODE_TRUNC: 2899 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2900 FETCH( &r[0], 0, chan_index ); 2901 micro_trunc( &r[0], &r[0] ); 2902 STORE( &r[0], 0, chan_index ); 2903 } 2904 break; 2905 2906 case TGSI_OPCODE_SHL: 2907 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2908 FETCH( &r[0], 0, chan_index ); 2909 FETCH( &r[1], 1, chan_index ); 2910 micro_shl( &r[0], &r[0], &r[1] ); 2911 STORE( &r[0], 0, chan_index ); 2912 } 2913 break; 2914 2915 case TGSI_OPCODE_SHR: 2916 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2917 FETCH( &r[0], 0, chan_index ); 2918 FETCH( &r[1], 1, chan_index ); 2919 micro_ishr( &r[0], &r[0], &r[1] ); 2920 STORE( &r[0], 0, chan_index ); 2921 } 2922 break; 2923 2924 case TGSI_OPCODE_AND: 2925 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2926 FETCH( &r[0], 0, chan_index ); 2927 FETCH( &r[1], 1, chan_index ); 2928 micro_and( &r[0], &r[0], &r[1] ); 2929 STORE( &r[0], 0, chan_index ); 2930 } 2931 break; 2932 2933 case TGSI_OPCODE_OR: 2934 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2935 FETCH( &r[0], 0, chan_index ); 2936 FETCH( &r[1], 1, chan_index ); 2937 micro_or( &r[0], &r[0], &r[1] ); 2938 STORE( &r[0], 0, chan_index ); 2939 } 2940 break; 2941 2942 case TGSI_OPCODE_MOD: 2943 assert (0); 2944 break; 2945 2946 case TGSI_OPCODE_XOR: 2947 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2948 FETCH( &r[0], 0, chan_index ); 2949 FETCH( &r[1], 1, chan_index ); 2950 micro_xor( &r[0], &r[0], &r[1] ); 2951 STORE( &r[0], 0, chan_index ); 2952 } 2953 break; 2954 2955 case TGSI_OPCODE_SAD: 2956 assert (0); 2957 break; 2958 2959 case TGSI_OPCODE_TXF: 2960 assert (0); 2961 break; 2962 2963 case TGSI_OPCODE_TXQ: 2964 assert (0); 2965 break; 2966 2967 case TGSI_OPCODE_EMIT: 2968 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; 2969 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 2970 break; 2971 2972 case TGSI_OPCODE_ENDPRIM: 2973 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; 2974 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; 2975 break; 2976 2977 case TGSI_OPCODE_BGNFOR: 2978 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2979 for (chan_index = 0; chan_index < 3; chan_index++) { 2980 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); 2981 } 2982 STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); 2983 ++mach->LoopCounterStackTop; 2984 /* fall-through (for now) */ 2985 case TGSI_OPCODE_BGNLOOP: 2986 /* push LoopMask and ContMasks */ 2987 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2988 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 2989 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2990 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 2991 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 2992 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 2993 break; 2994 2995 case TGSI_OPCODE_ENDFOR: 2996 assert(mach->LoopCounterStackTop > 0); 2997 micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 2998 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 2999 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); 3000 /* update LoopMask */ 3001 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) { 3002 mach->LoopMask &= ~0x1; 3003 } 3004 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) { 3005 mach->LoopMask &= ~0x2; 3006 } 3007 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) { 3008 mach->LoopMask &= ~0x4; 3009 } 3010 if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) { 3011 mach->LoopMask &= ~0x8; 3012 } 3013 micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3014 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3015 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); 3016 assert(mach->LoopLabelStackTop > 0); 3017 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; 3018 STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X ); 3019 /* Restore ContMask, but don't pop */ 3020 assert(mach->ContStackTop > 0); 3021 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3022 UPDATE_EXEC_MASK(mach); 3023 if (mach->ExecMask) { 3024 /* repeat loop: jump to instruction just past BGNLOOP */ 3025 assert(mach->LoopLabelStackTop > 0); 3026 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3027 } 3028 else { 3029 /* exit loop: pop LoopMask */ 3030 assert(mach->LoopStackTop > 0); 3031 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3032 /* pop ContMask */ 3033 assert(mach->ContStackTop > 0); 3034 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3035 assert(mach->LoopLabelStackTop > 0); 3036 --mach->LoopLabelStackTop; 3037 assert(mach->LoopCounterStackTop > 0); 3038 --mach->LoopCounterStackTop; 3039 } 3040 UPDATE_EXEC_MASK(mach); 3041 break; 3042 3043 case TGSI_OPCODE_ENDLOOP: 3044 /* Restore ContMask, but don't pop */ 3045 assert(mach->ContStackTop > 0); 3046 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3047 UPDATE_EXEC_MASK(mach); 3048 if (mach->ExecMask) { 3049 /* repeat loop: jump to instruction just past BGNLOOP */ 3050 assert(mach->LoopLabelStackTop > 0); 3051 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3052 } 3053 else { 3054 /* exit loop: pop LoopMask */ 3055 assert(mach->LoopStackTop > 0); 3056 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3057 /* pop ContMask */ 3058 assert(mach->ContStackTop > 0); 3059 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3060 assert(mach->LoopLabelStackTop > 0); 3061 --mach->LoopLabelStackTop; 3062 } 3063 UPDATE_EXEC_MASK(mach); 3064 break; 3065 3066 case TGSI_OPCODE_BRK: 3067 /* turn off loop channels for each enabled exec channel */ 3068 mach->LoopMask &= ~mach->ExecMask; 3069 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3070 UPDATE_EXEC_MASK(mach); 3071 break; 3072 3073 case TGSI_OPCODE_CONT: 3074 /* turn off cont channels for each enabled exec channel */ 3075 mach->ContMask &= ~mach->ExecMask; 3076 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3077 UPDATE_EXEC_MASK(mach); 3078 break; 3079 3080 case TGSI_OPCODE_BGNSUB: 3081 /* no-op */ 3082 break; 3083 3084 case TGSI_OPCODE_ENDSUB: 3085 /* no-op */ 3086 break; 3087 3088 case TGSI_OPCODE_NOP: 3089 break; 3090 3091 default: 3092 assert( 0 ); 3093 } 3094} 3095 3096 3097/** 3098 * Run TGSI interpreter. 3099 * \return bitmask of "alive" quad components 3100 */ 3101uint 3102tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3103{ 3104 uint i; 3105 int pc = 0; 3106 3107 mach->CondMask = 0xf; 3108 mach->LoopMask = 0xf; 3109 mach->ContMask = 0xf; 3110 mach->FuncMask = 0xf; 3111 mach->ExecMask = 0xf; 3112 3113 assert(mach->CondStackTop == 0); 3114 assert(mach->LoopStackTop == 0); 3115 assert(mach->ContStackTop == 0); 3116 assert(mach->CallStackTop == 0); 3117 3118 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3119 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3120 3121 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3122 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3123 mach->Primitives[0] = 0; 3124 } 3125 3126 for (i = 0; i < QUAD_SIZE; i++) { 3127 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3128 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3129 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3130 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3131 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3132 } 3133 3134 /* execute declarations (interpolants) */ 3135 for (i = 0; i < mach->NumDeclarations; i++) { 3136 exec_declaration( mach, mach->Declarations+i ); 3137 } 3138 3139 /* execute instructions, until pc is set to -1 */ 3140 while (pc != -1) { 3141 assert(pc < (int) mach->NumInstructions); 3142 exec_instruction( mach, mach->Instructions + pc, &pc ); 3143 } 3144 3145#if 0 3146 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3147 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3148 /* 3149 * Scale back depth component. 3150 */ 3151 for (i = 0; i < 4; i++) 3152 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3153 } 3154#endif 3155 3156 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3157} 3158