tgsi_exec.c revision cdc920a0
1/************************************************************************** 2 * 3 * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. 4 * All Rights Reserved. 5 * Copyright 2009-2010 VMware, Inc. All rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * TGSI interpreter/executor. 31 * 32 * Flow control information: 33 * 34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 36 * care since a condition may be true for some quad components but false 37 * for other components. 38 * 39 * We basically execute all statements (even if they're in the part of 40 * an IF/ELSE clause that's "not taken") and use a special mask to 41 * control writing to destination registers. This is the ExecMask. 42 * See store_dest(). 43 * 44 * The ExecMask is computed from three other masks (CondMask, LoopMask and 45 * ContMask) which are controlled by the flow control instructions (namely: 46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 47 * 48 * 49 * Authors: 50 * Michal Krol 51 * Brian Paul 52 */ 53 54#include "pipe/p_compiler.h" 55#include "pipe/p_state.h" 56#include "pipe/p_shader_tokens.h" 57#include "tgsi/tgsi_dump.h" 58#include "tgsi/tgsi_parse.h" 59#include "tgsi/tgsi_util.h" 60#include "tgsi_exec.h" 61#include "util/u_memory.h" 62#include "util/u_math.h" 63 64 65#define FAST_MATH 1 66 67#define TILE_TOP_LEFT 0 68#define TILE_TOP_RIGHT 1 69#define TILE_BOTTOM_LEFT 2 70#define TILE_BOTTOM_RIGHT 3 71 72static void 73micro_abs(union tgsi_exec_channel *dst, 74 const union tgsi_exec_channel *src) 75{ 76 dst->f[0] = fabsf(src->f[0]); 77 dst->f[1] = fabsf(src->f[1]); 78 dst->f[2] = fabsf(src->f[2]); 79 dst->f[3] = fabsf(src->f[3]); 80} 81 82static void 83micro_arl(union tgsi_exec_channel *dst, 84 const union tgsi_exec_channel *src) 85{ 86 dst->i[0] = (int)floorf(src->f[0]); 87 dst->i[1] = (int)floorf(src->f[1]); 88 dst->i[2] = (int)floorf(src->f[2]); 89 dst->i[3] = (int)floorf(src->f[3]); 90} 91 92static void 93micro_arr(union tgsi_exec_channel *dst, 94 const union tgsi_exec_channel *src) 95{ 96 dst->i[0] = (int)floorf(src->f[0] + 0.5f); 97 dst->i[1] = (int)floorf(src->f[1] + 0.5f); 98 dst->i[2] = (int)floorf(src->f[2] + 0.5f); 99 dst->i[3] = (int)floorf(src->f[3] + 0.5f); 100} 101 102static void 103micro_ceil(union tgsi_exec_channel *dst, 104 const union tgsi_exec_channel *src) 105{ 106 dst->f[0] = ceilf(src->f[0]); 107 dst->f[1] = ceilf(src->f[1]); 108 dst->f[2] = ceilf(src->f[2]); 109 dst->f[3] = ceilf(src->f[3]); 110} 111 112static void 113micro_clamp(union tgsi_exec_channel *dst, 114 const union tgsi_exec_channel *src0, 115 const union tgsi_exec_channel *src1, 116 const union tgsi_exec_channel *src2) 117{ 118 dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0]; 119 dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1]; 120 dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2]; 121 dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3]; 122} 123 124static void 125micro_cmp(union tgsi_exec_channel *dst, 126 const union tgsi_exec_channel *src0, 127 const union tgsi_exec_channel *src1, 128 const union tgsi_exec_channel *src2) 129{ 130 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; 131 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; 132 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; 133 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; 134} 135 136static void 137micro_cnd(union tgsi_exec_channel *dst, 138 const union tgsi_exec_channel *src0, 139 const union tgsi_exec_channel *src1, 140 const union tgsi_exec_channel *src2) 141{ 142 dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0]; 143 dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1]; 144 dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2]; 145 dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3]; 146} 147 148static void 149micro_cos(union tgsi_exec_channel *dst, 150 const union tgsi_exec_channel *src) 151{ 152 dst->f[0] = cosf(src->f[0]); 153 dst->f[1] = cosf(src->f[1]); 154 dst->f[2] = cosf(src->f[2]); 155 dst->f[3] = cosf(src->f[3]); 156} 157 158static void 159micro_ddx(union tgsi_exec_channel *dst, 160 const union tgsi_exec_channel *src) 161{ 162 dst->f[0] = 163 dst->f[1] = 164 dst->f[2] = 165 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 166} 167 168static void 169micro_ddy(union tgsi_exec_channel *dst, 170 const union tgsi_exec_channel *src) 171{ 172 dst->f[0] = 173 dst->f[1] = 174 dst->f[2] = 175 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 176} 177 178static void 179micro_exp2(union tgsi_exec_channel *dst, 180 const union tgsi_exec_channel *src) 181{ 182#if FAST_MATH 183 dst->f[0] = util_fast_exp2(src->f[0]); 184 dst->f[1] = util_fast_exp2(src->f[1]); 185 dst->f[2] = util_fast_exp2(src->f[2]); 186 dst->f[3] = util_fast_exp2(src->f[3]); 187#else 188#if DEBUG 189 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 190 uint i; 191 union tgsi_exec_channel clamped; 192 193 for (i = 0; i < 4; i++) { 194 if (src->f[i] > 127.99999f) { 195 clamped.f[i] = 127.99999f; 196 } else if (src->f[i] < -126.99999f) { 197 clamped.f[i] = -126.99999f; 198 } else { 199 clamped.f[i] = src->f[i]; 200 } 201 } 202 src = &clamped; 203#endif /* DEBUG */ 204 205 dst->f[0] = powf(2.0f, src->f[0]); 206 dst->f[1] = powf(2.0f, src->f[1]); 207 dst->f[2] = powf(2.0f, src->f[2]); 208 dst->f[3] = powf(2.0f, src->f[3]); 209#endif /* FAST_MATH */ 210} 211 212static void 213micro_flr(union tgsi_exec_channel *dst, 214 const union tgsi_exec_channel *src) 215{ 216 dst->f[0] = floorf(src->f[0]); 217 dst->f[1] = floorf(src->f[1]); 218 dst->f[2] = floorf(src->f[2]); 219 dst->f[3] = floorf(src->f[3]); 220} 221 222static void 223micro_frc(union tgsi_exec_channel *dst, 224 const union tgsi_exec_channel *src) 225{ 226 dst->f[0] = src->f[0] - floorf(src->f[0]); 227 dst->f[1] = src->f[1] - floorf(src->f[1]); 228 dst->f[2] = src->f[2] - floorf(src->f[2]); 229 dst->f[3] = src->f[3] - floorf(src->f[3]); 230} 231 232static void 233micro_iabs(union tgsi_exec_channel *dst, 234 const union tgsi_exec_channel *src) 235{ 236 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; 237 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; 238 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; 239 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; 240} 241 242static void 243micro_ineg(union tgsi_exec_channel *dst, 244 const union tgsi_exec_channel *src) 245{ 246 dst->i[0] = -src->i[0]; 247 dst->i[1] = -src->i[1]; 248 dst->i[2] = -src->i[2]; 249 dst->i[3] = -src->i[3]; 250} 251 252static void 253micro_lg2(union tgsi_exec_channel *dst, 254 const union tgsi_exec_channel *src) 255{ 256#if FAST_MATH 257 dst->f[0] = util_fast_log2(src->f[0]); 258 dst->f[1] = util_fast_log2(src->f[1]); 259 dst->f[2] = util_fast_log2(src->f[2]); 260 dst->f[3] = util_fast_log2(src->f[3]); 261#else 262 dst->f[0] = logf(src->f[0]) * 1.442695f; 263 dst->f[1] = logf(src->f[1]) * 1.442695f; 264 dst->f[2] = logf(src->f[2]) * 1.442695f; 265 dst->f[3] = logf(src->f[3]) * 1.442695f; 266#endif 267} 268 269static void 270micro_lrp(union tgsi_exec_channel *dst, 271 const union tgsi_exec_channel *src0, 272 const union tgsi_exec_channel *src1, 273 const union tgsi_exec_channel *src2) 274{ 275 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0]; 276 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1]; 277 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2]; 278 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3]; 279} 280 281static void 282micro_mad(union tgsi_exec_channel *dst, 283 const union tgsi_exec_channel *src0, 284 const union tgsi_exec_channel *src1, 285 const union tgsi_exec_channel *src2) 286{ 287 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0]; 288 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1]; 289 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2]; 290 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3]; 291} 292 293static void 294micro_mov(union tgsi_exec_channel *dst, 295 const union tgsi_exec_channel *src) 296{ 297 dst->u[0] = src->u[0]; 298 dst->u[1] = src->u[1]; 299 dst->u[2] = src->u[2]; 300 dst->u[3] = src->u[3]; 301} 302 303static void 304micro_rcp(union tgsi_exec_channel *dst, 305 const union tgsi_exec_channel *src) 306{ 307#if 0 /* for debugging */ 308 assert(src->f[0] != 0.0f); 309 assert(src->f[1] != 0.0f); 310 assert(src->f[2] != 0.0f); 311 assert(src->f[3] != 0.0f); 312#endif 313 dst->f[0] = 1.0f / src->f[0]; 314 dst->f[1] = 1.0f / src->f[1]; 315 dst->f[2] = 1.0f / src->f[2]; 316 dst->f[3] = 1.0f / src->f[3]; 317} 318 319static void 320micro_rnd(union tgsi_exec_channel *dst, 321 const union tgsi_exec_channel *src) 322{ 323 dst->f[0] = floorf(src->f[0] + 0.5f); 324 dst->f[1] = floorf(src->f[1] + 0.5f); 325 dst->f[2] = floorf(src->f[2] + 0.5f); 326 dst->f[3] = floorf(src->f[3] + 0.5f); 327} 328 329static void 330micro_rsq(union tgsi_exec_channel *dst, 331 const union tgsi_exec_channel *src) 332{ 333#if 0 /* for debugging */ 334 assert(src->f[0] != 0.0f); 335 assert(src->f[1] != 0.0f); 336 assert(src->f[2] != 0.0f); 337 assert(src->f[3] != 0.0f); 338#endif 339 dst->f[0] = 1.0f / sqrtf(fabsf(src->f[0])); 340 dst->f[1] = 1.0f / sqrtf(fabsf(src->f[1])); 341 dst->f[2] = 1.0f / sqrtf(fabsf(src->f[2])); 342 dst->f[3] = 1.0f / sqrtf(fabsf(src->f[3])); 343} 344 345static void 346micro_seq(union tgsi_exec_channel *dst, 347 const union tgsi_exec_channel *src0, 348 const union tgsi_exec_channel *src1) 349{ 350 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f; 351 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f; 352 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f; 353 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f; 354} 355 356static void 357micro_sge(union tgsi_exec_channel *dst, 358 const union tgsi_exec_channel *src0, 359 const union tgsi_exec_channel *src1) 360{ 361 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f; 362 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f; 363 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f; 364 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f; 365} 366 367static void 368micro_sgn(union tgsi_exec_channel *dst, 369 const union tgsi_exec_channel *src) 370{ 371 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 372 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 373 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 374 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 375} 376 377static void 378micro_sgt(union tgsi_exec_channel *dst, 379 const union tgsi_exec_channel *src0, 380 const union tgsi_exec_channel *src1) 381{ 382 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f; 383 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f; 384 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f; 385 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f; 386} 387 388static void 389micro_sin(union tgsi_exec_channel *dst, 390 const union tgsi_exec_channel *src) 391{ 392 dst->f[0] = sinf(src->f[0]); 393 dst->f[1] = sinf(src->f[1]); 394 dst->f[2] = sinf(src->f[2]); 395 dst->f[3] = sinf(src->f[3]); 396} 397 398static void 399micro_sle(union tgsi_exec_channel *dst, 400 const union tgsi_exec_channel *src0, 401 const union tgsi_exec_channel *src1) 402{ 403 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f; 404 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f; 405 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f; 406 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f; 407} 408 409static void 410micro_slt(union tgsi_exec_channel *dst, 411 const union tgsi_exec_channel *src0, 412 const union tgsi_exec_channel *src1) 413{ 414 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f; 415 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f; 416 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f; 417 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f; 418} 419 420static void 421micro_sne(union tgsi_exec_channel *dst, 422 const union tgsi_exec_channel *src0, 423 const union tgsi_exec_channel *src1) 424{ 425 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f; 426 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f; 427 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f; 428 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; 429} 430 431static void 432micro_trunc(union tgsi_exec_channel *dst, 433 const union tgsi_exec_channel *src) 434{ 435 dst->f[0] = (float)(int)src->f[0]; 436 dst->f[1] = (float)(int)src->f[1]; 437 dst->f[2] = (float)(int)src->f[2]; 438 dst->f[3] = (float)(int)src->f[3]; 439} 440 441 442#define CHAN_X 0 443#define CHAN_Y 1 444#define CHAN_Z 2 445#define CHAN_W 3 446 447enum tgsi_exec_datatype { 448 TGSI_EXEC_DATA_FLOAT, 449 TGSI_EXEC_DATA_INT, 450 TGSI_EXEC_DATA_UINT 451}; 452 453/* 454 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 455 */ 456#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I 457#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C 458#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I 459#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C 460#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I 461#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C 462#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I 463#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C 464#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I 465#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C 466#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I 467#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C 468#define TEMP_128_I TGSI_EXEC_TEMP_128_I 469#define TEMP_128_C TGSI_EXEC_TEMP_128_C 470#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I 471#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C 472#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 473#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 474#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 475#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 476#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 477#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 478#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I 479#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C 480#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I 481#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C 482#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I 483#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C 484#define TEMP_R0 TGSI_EXEC_TEMP_R0 485#define TEMP_P0 TGSI_EXEC_TEMP_P0 486 487#define IS_CHANNEL_ENABLED(INST, CHAN)\ 488 ((INST).Dst[0].Register.WriteMask & (1 << (CHAN))) 489 490#define IS_CHANNEL_ENABLED2(INST, CHAN)\ 491 ((INST).Dst[1].Register.WriteMask & (1 << (CHAN))) 492 493#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ 494 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 495 if (IS_CHANNEL_ENABLED( INST, CHAN )) 496 497#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ 498 for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ 499 if (IS_CHANNEL_ENABLED2( INST, CHAN )) 500 501 502/** The execution mask depends on the conditional mask and the loop mask */ 503#define UPDATE_EXEC_MASK(MACH) \ 504 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask 505 506 507static const union tgsi_exec_channel ZeroVec = 508 { { 0.0, 0.0, 0.0, 0.0 } }; 509 510static const union tgsi_exec_channel OneVec = { 511 {1.0f, 1.0f, 1.0f, 1.0f} 512}; 513 514 515/** 516 * Assert that none of the float values in 'chan' are infinite or NaN. 517 * NaN and Inf may occur normally during program execution and should 518 * not lead to crashes, etc. But when debugging, it's helpful to catch 519 * them. 520 */ 521static INLINE void 522check_inf_or_nan(const union tgsi_exec_channel *chan) 523{ 524 assert(!util_is_inf_or_nan((chan)->f[0])); 525 assert(!util_is_inf_or_nan((chan)->f[1])); 526 assert(!util_is_inf_or_nan((chan)->f[2])); 527 assert(!util_is_inf_or_nan((chan)->f[3])); 528} 529 530 531#ifdef DEBUG 532static void 533print_chan(const char *msg, const union tgsi_exec_channel *chan) 534{ 535 debug_printf("%s = {%f, %f, %f, %f}\n", 536 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 537} 538#endif 539 540 541#ifdef DEBUG 542static void 543print_temp(const struct tgsi_exec_machine *mach, uint index) 544{ 545 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 546 int i; 547 debug_printf("Temp[%u] =\n", index); 548 for (i = 0; i < 4; i++) { 549 debug_printf(" %c: { %f, %f, %f, %f }\n", 550 "XYZW"[i], 551 tmp->xyzw[i].f[0], 552 tmp->xyzw[i].f[1], 553 tmp->xyzw[i].f[2], 554 tmp->xyzw[i].f[3]); 555 } 556} 557#endif 558 559 560/** 561 * Check if there's a potential src/dst register data dependency when 562 * using SOA execution. 563 * Example: 564 * MOV T, T.yxwz; 565 * This would expand into: 566 * MOV t0, t1; 567 * MOV t1, t0; 568 * MOV t2, t3; 569 * MOV t3, t2; 570 * The second instruction will have the wrong value for t0 if executed as-is. 571 */ 572boolean 573tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 574{ 575 uint i, chan; 576 577 uint writemask = inst->Dst[0].Register.WriteMask; 578 if (writemask == TGSI_WRITEMASK_X || 579 writemask == TGSI_WRITEMASK_Y || 580 writemask == TGSI_WRITEMASK_Z || 581 writemask == TGSI_WRITEMASK_W || 582 writemask == TGSI_WRITEMASK_NONE) { 583 /* no chance of data dependency */ 584 return FALSE; 585 } 586 587 /* loop over src regs */ 588 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 589 if ((inst->Src[i].Register.File == 590 inst->Dst[0].Register.File) && 591 (inst->Src[i].Register.Index == 592 inst->Dst[0].Register.Index)) { 593 /* loop over dest channels */ 594 uint channelsWritten = 0x0; 595 FOR_EACH_ENABLED_CHANNEL(*inst, chan) { 596 /* check if we're reading a channel that's been written */ 597 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); 598 if (channelsWritten & (1 << swizzle)) { 599 return TRUE; 600 } 601 602 channelsWritten |= (1 << chan); 603 } 604 } 605 } 606 return FALSE; 607} 608 609 610/** 611 * Initialize machine state by expanding tokens to full instructions, 612 * allocating temporary storage, setting up constants, etc. 613 * After this, we can call tgsi_exec_machine_run() many times. 614 */ 615void 616tgsi_exec_machine_bind_shader( 617 struct tgsi_exec_machine *mach, 618 const struct tgsi_token *tokens, 619 uint numSamplers, 620 struct tgsi_sampler **samplers) 621{ 622 uint k; 623 struct tgsi_parse_context parse; 624 struct tgsi_exec_labels *labels = &mach->Labels; 625 struct tgsi_full_instruction *instructions; 626 struct tgsi_full_declaration *declarations; 627 uint maxInstructions = 10, numInstructions = 0; 628 uint maxDeclarations = 10, numDeclarations = 0; 629 uint instno = 0; 630 631#if 0 632 tgsi_dump(tokens, 0); 633#endif 634 635 util_init_math(); 636 637 mach->Tokens = tokens; 638 mach->Samplers = samplers; 639 640 k = tgsi_parse_init (&parse, mach->Tokens); 641 if (k != TGSI_PARSE_OK) { 642 debug_printf( "Problem parsing!\n" ); 643 return; 644 } 645 646 mach->Processor = parse.FullHeader.Processor.Processor; 647 mach->ImmLimit = 0; 648 labels->count = 0; 649 650 declarations = (struct tgsi_full_declaration *) 651 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 652 653 if (!declarations) { 654 return; 655 } 656 657 instructions = (struct tgsi_full_instruction *) 658 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 659 660 if (!instructions) { 661 FREE( declarations ); 662 return; 663 } 664 665 while( !tgsi_parse_end_of_tokens( &parse ) ) { 666 uint pointer = parse.Position; 667 uint i; 668 669 tgsi_parse_token( &parse ); 670 switch( parse.FullToken.Token.Type ) { 671 case TGSI_TOKEN_TYPE_DECLARATION: 672 /* save expanded declaration */ 673 if (numDeclarations == maxDeclarations) { 674 declarations = REALLOC(declarations, 675 maxDeclarations 676 * sizeof(struct tgsi_full_declaration), 677 (maxDeclarations + 10) 678 * sizeof(struct tgsi_full_declaration)); 679 maxDeclarations += 10; 680 } 681 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { 682 unsigned reg; 683 for (reg = parse.FullToken.FullDeclaration.Range.First; 684 reg <= parse.FullToken.FullDeclaration.Range.Last; 685 ++reg) { 686 ++mach->NumOutputs; 687 } 688 } 689 memcpy(declarations + numDeclarations, 690 &parse.FullToken.FullDeclaration, 691 sizeof(declarations[0])); 692 numDeclarations++; 693 break; 694 695 case TGSI_TOKEN_TYPE_IMMEDIATE: 696 { 697 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 698 assert( size <= 4 ); 699 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 700 701 for( i = 0; i < size; i++ ) { 702 mach->Imms[mach->ImmLimit][i] = 703 parse.FullToken.FullImmediate.u[i].Float; 704 } 705 mach->ImmLimit += 1; 706 } 707 break; 708 709 case TGSI_TOKEN_TYPE_INSTRUCTION: 710 assert( labels->count < MAX_LABELS ); 711 712 labels->labels[labels->count][0] = instno; 713 labels->labels[labels->count][1] = pointer; 714 labels->count++; 715 716 /* save expanded instruction */ 717 if (numInstructions == maxInstructions) { 718 instructions = REALLOC(instructions, 719 maxInstructions 720 * sizeof(struct tgsi_full_instruction), 721 (maxInstructions + 10) 722 * sizeof(struct tgsi_full_instruction)); 723 maxInstructions += 10; 724 } 725 726 memcpy(instructions + numInstructions, 727 &parse.FullToken.FullInstruction, 728 sizeof(instructions[0])); 729 730 numInstructions++; 731 break; 732 733 case TGSI_TOKEN_TYPE_PROPERTY: 734 break; 735 736 default: 737 assert( 0 ); 738 } 739 } 740 tgsi_parse_free (&parse); 741 742 if (mach->Declarations) { 743 FREE( mach->Declarations ); 744 } 745 mach->Declarations = declarations; 746 mach->NumDeclarations = numDeclarations; 747 748 if (mach->Instructions) { 749 FREE( mach->Instructions ); 750 } 751 mach->Instructions = instructions; 752 mach->NumInstructions = numInstructions; 753} 754 755 756struct tgsi_exec_machine * 757tgsi_exec_machine_create( void ) 758{ 759 struct tgsi_exec_machine *mach; 760 uint i; 761 762 mach = align_malloc( sizeof *mach, 16 ); 763 if (!mach) 764 goto fail; 765 766 memset(mach, 0, sizeof(*mach)); 767 768 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 769 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; 770 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; 771 772 /* Setup constants. */ 773 for( i = 0; i < 4; i++ ) { 774 mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; 775 mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; 776 mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; 777 mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; 778 mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; 779 mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; 780 mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; 781 mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; 782 mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; 783 mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; 784 } 785 786#ifdef DEBUG 787 /* silence warnings */ 788 (void) print_chan; 789 (void) print_temp; 790#endif 791 792 return mach; 793 794fail: 795 align_free(mach); 796 return NULL; 797} 798 799 800void 801tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 802{ 803 if (mach) { 804 FREE(mach->Instructions); 805 FREE(mach->Declarations); 806 } 807 808 align_free(mach); 809} 810 811static void 812micro_add(union tgsi_exec_channel *dst, 813 const union tgsi_exec_channel *src0, 814 const union tgsi_exec_channel *src1) 815{ 816 dst->f[0] = src0->f[0] + src1->f[0]; 817 dst->f[1] = src0->f[1] + src1->f[1]; 818 dst->f[2] = src0->f[2] + src1->f[2]; 819 dst->f[3] = src0->f[3] + src1->f[3]; 820} 821 822static void 823micro_div( 824 union tgsi_exec_channel *dst, 825 const union tgsi_exec_channel *src0, 826 const union tgsi_exec_channel *src1 ) 827{ 828 if (src1->f[0] != 0) { 829 dst->f[0] = src0->f[0] / src1->f[0]; 830 } 831 if (src1->f[1] != 0) { 832 dst->f[1] = src0->f[1] / src1->f[1]; 833 } 834 if (src1->f[2] != 0) { 835 dst->f[2] = src0->f[2] / src1->f[2]; 836 } 837 if (src1->f[3] != 0) { 838 dst->f[3] = src0->f[3] / src1->f[3]; 839 } 840} 841 842static void 843micro_float_clamp(union tgsi_exec_channel *dst, 844 const union tgsi_exec_channel *src) 845{ 846 uint i; 847 848 for (i = 0; i < 4; i++) { 849 if (src->f[i] > 0.0f) { 850 if (src->f[i] > 1.884467e+019f) 851 dst->f[i] = 1.884467e+019f; 852 else if (src->f[i] < 5.42101e-020f) 853 dst->f[i] = 5.42101e-020f; 854 else 855 dst->f[i] = src->f[i]; 856 } 857 else { 858 if (src->f[i] < -1.884467e+019f) 859 dst->f[i] = -1.884467e+019f; 860 else if (src->f[i] > -5.42101e-020f) 861 dst->f[i] = -5.42101e-020f; 862 else 863 dst->f[i] = src->f[i]; 864 } 865 } 866} 867 868static void 869micro_lt( 870 union tgsi_exec_channel *dst, 871 const union tgsi_exec_channel *src0, 872 const union tgsi_exec_channel *src1, 873 const union tgsi_exec_channel *src2, 874 const union tgsi_exec_channel *src3 ) 875{ 876 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 877 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 878 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 879 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 880} 881 882static void 883micro_max(union tgsi_exec_channel *dst, 884 const union tgsi_exec_channel *src0, 885 const union tgsi_exec_channel *src1) 886{ 887 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 888 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 889 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 890 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 891} 892 893static void 894micro_min(union tgsi_exec_channel *dst, 895 const union tgsi_exec_channel *src0, 896 const union tgsi_exec_channel *src1) 897{ 898 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 899 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 900 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 901 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 902} 903 904static void 905micro_mul(union tgsi_exec_channel *dst, 906 const union tgsi_exec_channel *src0, 907 const union tgsi_exec_channel *src1) 908{ 909 dst->f[0] = src0->f[0] * src1->f[0]; 910 dst->f[1] = src0->f[1] * src1->f[1]; 911 dst->f[2] = src0->f[2] * src1->f[2]; 912 dst->f[3] = src0->f[3] * src1->f[3]; 913} 914 915#if 0 916static void 917micro_imul64( 918 union tgsi_exec_channel *dst0, 919 union tgsi_exec_channel *dst1, 920 const union tgsi_exec_channel *src0, 921 const union tgsi_exec_channel *src1 ) 922{ 923 dst1->i[0] = src0->i[0] * src1->i[0]; 924 dst1->i[1] = src0->i[1] * src1->i[1]; 925 dst1->i[2] = src0->i[2] * src1->i[2]; 926 dst1->i[3] = src0->i[3] * src1->i[3]; 927 dst0->i[0] = 0; 928 dst0->i[1] = 0; 929 dst0->i[2] = 0; 930 dst0->i[3] = 0; 931} 932#endif 933 934#if 0 935static void 936micro_umul64( 937 union tgsi_exec_channel *dst0, 938 union tgsi_exec_channel *dst1, 939 const union tgsi_exec_channel *src0, 940 const union tgsi_exec_channel *src1 ) 941{ 942 dst1->u[0] = src0->u[0] * src1->u[0]; 943 dst1->u[1] = src0->u[1] * src1->u[1]; 944 dst1->u[2] = src0->u[2] * src1->u[2]; 945 dst1->u[3] = src0->u[3] * src1->u[3]; 946 dst0->u[0] = 0; 947 dst0->u[1] = 0; 948 dst0->u[2] = 0; 949 dst0->u[3] = 0; 950} 951#endif 952 953 954#if 0 955static void 956micro_movc( 957 union tgsi_exec_channel *dst, 958 const union tgsi_exec_channel *src0, 959 const union tgsi_exec_channel *src1, 960 const union tgsi_exec_channel *src2 ) 961{ 962 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 963 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 964 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 965 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 966} 967#endif 968 969static void 970micro_neg( 971 union tgsi_exec_channel *dst, 972 const union tgsi_exec_channel *src ) 973{ 974 dst->f[0] = -src->f[0]; 975 dst->f[1] = -src->f[1]; 976 dst->f[2] = -src->f[2]; 977 dst->f[3] = -src->f[3]; 978} 979 980static void 981micro_pow( 982 union tgsi_exec_channel *dst, 983 const union tgsi_exec_channel *src0, 984 const union tgsi_exec_channel *src1 ) 985{ 986#if FAST_MATH 987 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 988 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 989 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 990 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 991#else 992 dst->f[0] = powf( src0->f[0], src1->f[0] ); 993 dst->f[1] = powf( src0->f[1], src1->f[1] ); 994 dst->f[2] = powf( src0->f[2], src1->f[2] ); 995 dst->f[3] = powf( src0->f[3], src1->f[3] ); 996#endif 997} 998 999static void 1000micro_sub(union tgsi_exec_channel *dst, 1001 const union tgsi_exec_channel *src0, 1002 const union tgsi_exec_channel *src1) 1003{ 1004 dst->f[0] = src0->f[0] - src1->f[0]; 1005 dst->f[1] = src0->f[1] - src1->f[1]; 1006 dst->f[2] = src0->f[2] - src1->f[2]; 1007 dst->f[3] = src0->f[3] - src1->f[3]; 1008} 1009 1010static void 1011fetch_src_file_channel(const struct tgsi_exec_machine *mach, 1012 const uint file, 1013 const uint swizzle, 1014 const union tgsi_exec_channel *index, 1015 const union tgsi_exec_channel *index2D, 1016 union tgsi_exec_channel *chan) 1017{ 1018 uint i; 1019 1020 switch (file) { 1021 case TGSI_FILE_CONSTANT: 1022 for (i = 0; i < QUAD_SIZE; i++) { 1023 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS); 1024 assert(mach->Consts[index2D->i[i]]); 1025 1026 if (index->i[i] < 0) { 1027 chan->u[i] = 0; 1028 } else { 1029 const uint *p = (const uint *)mach->Consts[index2D->i[i]]; 1030 1031 chan->u[i] = p[index->i[i] * 4 + swizzle]; 1032 } 1033 } 1034 break; 1035 1036 case TGSI_FILE_INPUT: 1037 case TGSI_FILE_SYSTEM_VALUE: 1038 for (i = 0; i < QUAD_SIZE; i++) { 1039 /* XXX: 2D indexing */ 1040 chan->u[i] = mach->Inputs[index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]].xyzw[swizzle].u[i]; 1041 } 1042 break; 1043 1044 case TGSI_FILE_TEMPORARY: 1045 for (i = 0; i < QUAD_SIZE; i++) { 1046 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); 1047 assert(index2D->i[i] == 0); 1048 1049 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; 1050 } 1051 break; 1052 1053 case TGSI_FILE_IMMEDIATE: 1054 for (i = 0; i < QUAD_SIZE; i++) { 1055 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); 1056 assert(index2D->i[i] == 0); 1057 1058 chan->f[i] = mach->Imms[index->i[i]][swizzle]; 1059 } 1060 break; 1061 1062 case TGSI_FILE_ADDRESS: 1063 for (i = 0; i < QUAD_SIZE; i++) { 1064 assert(index->i[i] >= 0); 1065 assert(index2D->i[i] == 0); 1066 1067 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; 1068 } 1069 break; 1070 1071 case TGSI_FILE_PREDICATE: 1072 for (i = 0; i < QUAD_SIZE; i++) { 1073 assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS); 1074 assert(index2D->i[i] == 0); 1075 1076 chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i]; 1077 } 1078 break; 1079 1080 case TGSI_FILE_OUTPUT: 1081 /* vertex/fragment output vars can be read too */ 1082 for (i = 0; i < QUAD_SIZE; i++) { 1083 assert(index->i[i] >= 0); 1084 assert(index2D->i[i] == 0); 1085 1086 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; 1087 } 1088 break; 1089 1090 default: 1091 assert(0); 1092 for (i = 0; i < QUAD_SIZE; i++) { 1093 chan->u[i] = 0; 1094 } 1095 } 1096} 1097 1098static void 1099fetch_source(const struct tgsi_exec_machine *mach, 1100 union tgsi_exec_channel *chan, 1101 const struct tgsi_full_src_register *reg, 1102 const uint chan_index, 1103 enum tgsi_exec_datatype src_datatype) 1104{ 1105 union tgsi_exec_channel index; 1106 union tgsi_exec_channel index2D; 1107 uint swizzle; 1108 1109 /* We start with a direct index into a register file. 1110 * 1111 * file[1], 1112 * where: 1113 * file = Register.File 1114 * [1] = Register.Index 1115 */ 1116 index.i[0] = 1117 index.i[1] = 1118 index.i[2] = 1119 index.i[3] = reg->Register.Index; 1120 1121 /* There is an extra source register that indirectly subscripts 1122 * a register file. The direct index now becomes an offset 1123 * that is being added to the indirect register. 1124 * 1125 * file[ind[2].x+1], 1126 * where: 1127 * ind = Indirect.File 1128 * [2] = Indirect.Index 1129 * .x = Indirect.SwizzleX 1130 */ 1131 if (reg->Register.Indirect) { 1132 union tgsi_exec_channel index2; 1133 union tgsi_exec_channel indir_index; 1134 const uint execmask = mach->ExecMask; 1135 uint i; 1136 1137 /* which address register (always zero now) */ 1138 index2.i[0] = 1139 index2.i[1] = 1140 index2.i[2] = 1141 index2.i[3] = reg->Indirect.Index; 1142 1143 /* get current value of address register[swizzle] */ 1144 swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); 1145 fetch_src_file_channel(mach, 1146 reg->Indirect.File, 1147 swizzle, 1148 &index2, 1149 &ZeroVec, 1150 &indir_index); 1151 1152 /* add value of address register to the offset */ 1153 index.i[0] += indir_index.i[0]; 1154 index.i[1] += indir_index.i[1]; 1155 index.i[2] += indir_index.i[2]; 1156 index.i[3] += indir_index.i[3]; 1157 1158 /* for disabled execution channels, zero-out the index to 1159 * avoid using a potential garbage value. 1160 */ 1161 for (i = 0; i < QUAD_SIZE; i++) { 1162 if ((execmask & (1 << i)) == 0) 1163 index.i[i] = 0; 1164 } 1165 } 1166 1167 /* There is an extra source register that is a second 1168 * subscript to a register file. Effectively it means that 1169 * the register file is actually a 2D array of registers. 1170 * 1171 * file[3][1], 1172 * where: 1173 * [3] = Dimension.Index 1174 */ 1175 if (reg->Register.Dimension) { 1176 index2D.i[0] = 1177 index2D.i[1] = 1178 index2D.i[2] = 1179 index2D.i[3] = reg->Dimension.Index; 1180 1181 /* Again, the second subscript index can be addressed indirectly 1182 * identically to the first one. 1183 * Nothing stops us from indirectly addressing the indirect register, 1184 * but there is no need for that, so we won't exercise it. 1185 * 1186 * file[ind[4].y+3][1], 1187 * where: 1188 * ind = DimIndirect.File 1189 * [4] = DimIndirect.Index 1190 * .y = DimIndirect.SwizzleX 1191 */ 1192 if (reg->Dimension.Indirect) { 1193 union tgsi_exec_channel index2; 1194 union tgsi_exec_channel indir_index; 1195 const uint execmask = mach->ExecMask; 1196 uint i; 1197 1198 index2.i[0] = 1199 index2.i[1] = 1200 index2.i[2] = 1201 index2.i[3] = reg->DimIndirect.Index; 1202 1203 swizzle = tgsi_util_get_src_register_swizzle( ®->DimIndirect, CHAN_X ); 1204 fetch_src_file_channel(mach, 1205 reg->DimIndirect.File, 1206 swizzle, 1207 &index2, 1208 &ZeroVec, 1209 &indir_index); 1210 1211 index2D.i[0] += indir_index.i[0]; 1212 index2D.i[1] += indir_index.i[1]; 1213 index2D.i[2] += indir_index.i[2]; 1214 index2D.i[3] += indir_index.i[3]; 1215 1216 /* for disabled execution channels, zero-out the index to 1217 * avoid using a potential garbage value. 1218 */ 1219 for (i = 0; i < QUAD_SIZE; i++) { 1220 if ((execmask & (1 << i)) == 0) { 1221 index2D.i[i] = 0; 1222 } 1223 } 1224 } 1225 1226 /* If by any chance there was a need for a 3D array of register 1227 * files, we would have to check whether Dimension is followed 1228 * by a dimension register and continue the saga. 1229 */ 1230 } else { 1231 index2D.i[0] = 1232 index2D.i[1] = 1233 index2D.i[2] = 1234 index2D.i[3] = 0; 1235 } 1236 1237 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1238 fetch_src_file_channel(mach, 1239 reg->Register.File, 1240 swizzle, 1241 &index, 1242 &index2D, 1243 chan); 1244 1245 if (reg->Register.Absolute) { 1246 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1247 micro_abs(chan, chan); 1248 } else { 1249 micro_iabs(chan, chan); 1250 } 1251 } 1252 1253 if (reg->Register.Negate) { 1254 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1255 micro_neg(chan, chan); 1256 } else { 1257 micro_ineg(chan, chan); 1258 } 1259 } 1260} 1261 1262static void 1263store_dest(struct tgsi_exec_machine *mach, 1264 const union tgsi_exec_channel *chan, 1265 const struct tgsi_full_dst_register *reg, 1266 const struct tgsi_full_instruction *inst, 1267 uint chan_index, 1268 enum tgsi_exec_datatype dst_datatype) 1269{ 1270 uint i; 1271 union tgsi_exec_channel null; 1272 union tgsi_exec_channel *dst; 1273 uint execmask = mach->ExecMask; 1274 int offset = 0; /* indirection offset */ 1275 int index; 1276 1277 /* for debugging */ 1278 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) { 1279 check_inf_or_nan(chan); 1280 } 1281 1282 /* There is an extra source register that indirectly subscripts 1283 * a register file. The direct index now becomes an offset 1284 * that is being added to the indirect register. 1285 * 1286 * file[ind[2].x+1], 1287 * where: 1288 * ind = Indirect.File 1289 * [2] = Indirect.Index 1290 * .x = Indirect.SwizzleX 1291 */ 1292 if (reg->Register.Indirect) { 1293 union tgsi_exec_channel index; 1294 union tgsi_exec_channel indir_index; 1295 uint swizzle; 1296 1297 /* which address register (always zero for now) */ 1298 index.i[0] = 1299 index.i[1] = 1300 index.i[2] = 1301 index.i[3] = reg->Indirect.Index; 1302 1303 /* get current value of address register[swizzle] */ 1304 swizzle = tgsi_util_get_src_register_swizzle( ®->Indirect, CHAN_X ); 1305 1306 /* fetch values from the address/indirection register */ 1307 fetch_src_file_channel(mach, 1308 reg->Indirect.File, 1309 swizzle, 1310 &index, 1311 &ZeroVec, 1312 &indir_index); 1313 1314 /* save indirection offset */ 1315 offset = indir_index.i[0]; 1316 } 1317 1318 switch (reg->Register.File) { 1319 case TGSI_FILE_NULL: 1320 dst = &null; 1321 break; 1322 1323 case TGSI_FILE_OUTPUT: 1324 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1325 + reg->Register.Index; 1326 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1327#if 0 1328 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { 1329 fprintf(stderr, "STORING OUT[%d] mask(%d), = (", offset + index, execmask); 1330 for (i = 0; i < QUAD_SIZE; i++) 1331 if (execmask & (1 << i)) 1332 fprintf(stderr, "%f, ", chan->f[i]); 1333 fprintf(stderr, ")\n"); 1334 } 1335#endif 1336 break; 1337 1338 case TGSI_FILE_TEMPORARY: 1339 index = reg->Register.Index; 1340 assert( index < TGSI_EXEC_NUM_TEMPS ); 1341 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1342 break; 1343 1344 case TGSI_FILE_ADDRESS: 1345 index = reg->Register.Index; 1346 dst = &mach->Addrs[index].xyzw[chan_index]; 1347 break; 1348 1349 case TGSI_FILE_LOOP: 1350 assert(reg->Register.Index == 0); 1351 assert(mach->LoopCounterStackTop > 0); 1352 assert(chan_index == CHAN_X); 1353 dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index]; 1354 break; 1355 1356 case TGSI_FILE_PREDICATE: 1357 index = reg->Register.Index; 1358 assert(index < TGSI_EXEC_NUM_PREDS); 1359 dst = &mach->Predicates[index].xyzw[chan_index]; 1360 break; 1361 1362 default: 1363 assert( 0 ); 1364 return; 1365 } 1366 1367 if (inst->Instruction.Predicate) { 1368 uint swizzle; 1369 union tgsi_exec_channel *pred; 1370 1371 switch (chan_index) { 1372 case CHAN_X: 1373 swizzle = inst->Predicate.SwizzleX; 1374 break; 1375 case CHAN_Y: 1376 swizzle = inst->Predicate.SwizzleY; 1377 break; 1378 case CHAN_Z: 1379 swizzle = inst->Predicate.SwizzleZ; 1380 break; 1381 case CHAN_W: 1382 swizzle = inst->Predicate.SwizzleW; 1383 break; 1384 default: 1385 assert(0); 1386 return; 1387 } 1388 1389 assert(inst->Predicate.Index == 0); 1390 1391 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; 1392 1393 if (inst->Predicate.Negate) { 1394 for (i = 0; i < QUAD_SIZE; i++) { 1395 if (pred->u[i]) { 1396 execmask &= ~(1 << i); 1397 } 1398 } 1399 } else { 1400 for (i = 0; i < QUAD_SIZE; i++) { 1401 if (!pred->u[i]) { 1402 execmask &= ~(1 << i); 1403 } 1404 } 1405 } 1406 } 1407 1408 switch (inst->Instruction.Saturate) { 1409 case TGSI_SAT_NONE: 1410 for (i = 0; i < QUAD_SIZE; i++) 1411 if (execmask & (1 << i)) 1412 dst->i[i] = chan->i[i]; 1413 break; 1414 1415 case TGSI_SAT_ZERO_ONE: 1416 for (i = 0; i < QUAD_SIZE; i++) 1417 if (execmask & (1 << i)) { 1418 if (chan->f[i] < 0.0f) 1419 dst->f[i] = 0.0f; 1420 else if (chan->f[i] > 1.0f) 1421 dst->f[i] = 1.0f; 1422 else 1423 dst->i[i] = chan->i[i]; 1424 } 1425 break; 1426 1427 case TGSI_SAT_MINUS_PLUS_ONE: 1428 for (i = 0; i < QUAD_SIZE; i++) 1429 if (execmask & (1 << i)) { 1430 if (chan->f[i] < -1.0f) 1431 dst->f[i] = -1.0f; 1432 else if (chan->f[i] > 1.0f) 1433 dst->f[i] = 1.0f; 1434 else 1435 dst->i[i] = chan->i[i]; 1436 } 1437 break; 1438 1439 default: 1440 assert( 0 ); 1441 } 1442} 1443 1444#define FETCH(VAL,INDEX,CHAN)\ 1445 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) 1446 1447#define STORE(VAL,INDEX,CHAN)\ 1448 store_dest(mach, VAL, &inst->Dst[INDEX], inst, CHAN, TGSI_EXEC_DATA_FLOAT) 1449 1450 1451/** 1452 * Execute ARB-style KIL which is predicated by a src register. 1453 * Kill fragment if any of the four values is less than zero. 1454 */ 1455static void 1456exec_kil(struct tgsi_exec_machine *mach, 1457 const struct tgsi_full_instruction *inst) 1458{ 1459 uint uniquemask; 1460 uint chan_index; 1461 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1462 union tgsi_exec_channel r[1]; 1463 1464 /* This mask stores component bits that were already tested. */ 1465 uniquemask = 0; 1466 1467 for (chan_index = 0; chan_index < 4; chan_index++) 1468 { 1469 uint swizzle; 1470 uint i; 1471 1472 /* unswizzle channel */ 1473 swizzle = tgsi_util_get_full_src_register_swizzle ( 1474 &inst->Src[0], 1475 chan_index); 1476 1477 /* check if the component has not been already tested */ 1478 if (uniquemask & (1 << swizzle)) 1479 continue; 1480 uniquemask |= 1 << swizzle; 1481 1482 FETCH(&r[0], 0, chan_index); 1483 for (i = 0; i < 4; i++) 1484 if (r[0].f[i] < 0.0f) 1485 kilmask |= 1 << i; 1486 } 1487 1488 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1489} 1490 1491/** 1492 * Execute NVIDIA-style KIL which is predicated by a condition code. 1493 * Kill fragment if the condition code is TRUE. 1494 */ 1495static void 1496exec_kilp(struct tgsi_exec_machine *mach, 1497 const struct tgsi_full_instruction *inst) 1498{ 1499 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1500 1501 /* "unconditional" kil */ 1502 kilmask = mach->ExecMask; 1503 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1504} 1505 1506static void 1507emit_vertex(struct tgsi_exec_machine *mach) 1508{ 1509 /* FIXME: check for exec mask correctly 1510 unsigned i; 1511 for (i = 0; i < QUAD_SIZE; ++i) { 1512 if ((mach->ExecMask & (1 << i))) 1513 */ 1514 if (mach->ExecMask) { 1515 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; 1516 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 1517 } 1518} 1519 1520static void 1521emit_primitive(struct tgsi_exec_machine *mach) 1522{ 1523 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; 1524 /* FIXME: check for exec mask correctly 1525 unsigned i; 1526 for (i = 0; i < QUAD_SIZE; ++i) { 1527 if ((mach->ExecMask & (1 << i))) 1528 */ 1529 if (mach->ExecMask) { 1530 ++(*prim_count); 1531 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); 1532 mach->Primitives[*prim_count] = 0; 1533 } 1534} 1535 1536/* 1537 * Fetch four texture samples using STR texture coordinates. 1538 */ 1539static void 1540fetch_texel( struct tgsi_sampler *sampler, 1541 const union tgsi_exec_channel *s, 1542 const union tgsi_exec_channel *t, 1543 const union tgsi_exec_channel *p, 1544 const union tgsi_exec_channel *c0, 1545 enum tgsi_sampler_control control, 1546 union tgsi_exec_channel *r, 1547 union tgsi_exec_channel *g, 1548 union tgsi_exec_channel *b, 1549 union tgsi_exec_channel *a ) 1550{ 1551 uint j; 1552 float rgba[NUM_CHANNELS][QUAD_SIZE]; 1553 1554 sampler->get_samples(sampler, s->f, t->f, p->f, c0->f, control, rgba); 1555 1556 for (j = 0; j < 4; j++) { 1557 r->f[j] = rgba[0][j]; 1558 g->f[j] = rgba[1][j]; 1559 b->f[j] = rgba[2][j]; 1560 a->f[j] = rgba[3][j]; 1561 } 1562} 1563 1564 1565#define TEX_MODIFIER_NONE 0 1566#define TEX_MODIFIER_PROJECTED 1 1567#define TEX_MODIFIER_LOD_BIAS 2 1568#define TEX_MODIFIER_EXPLICIT_LOD 3 1569 1570 1571static void 1572exec_tex(struct tgsi_exec_machine *mach, 1573 const struct tgsi_full_instruction *inst, 1574 uint modifier) 1575{ 1576 const uint unit = inst->Src[1].Register.Index; 1577 union tgsi_exec_channel r[4]; 1578 const union tgsi_exec_channel *lod = &ZeroVec; 1579 enum tgsi_sampler_control control; 1580 uint chan_index; 1581 1582 if (modifier != TEX_MODIFIER_NONE) { 1583 FETCH(&r[3], 0, CHAN_W); 1584 if (modifier != TEX_MODIFIER_PROJECTED) { 1585 lod = &r[3]; 1586 } 1587 } 1588 1589 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 1590 control = tgsi_sampler_lod_explicit; 1591 } else { 1592 control = tgsi_sampler_lod_bias; 1593 } 1594 1595 switch (inst->Texture.Texture) { 1596 case TGSI_TEXTURE_1D: 1597 case TGSI_TEXTURE_SHADOW1D: 1598 FETCH(&r[0], 0, CHAN_X); 1599 1600 if (modifier == TEX_MODIFIER_PROJECTED) { 1601 micro_div(&r[0], &r[0], &r[3]); 1602 } 1603 1604 fetch_texel(mach->Samplers[unit], 1605 &r[0], &ZeroVec, &ZeroVec, lod, /* S, T, P, LOD */ 1606 control, 1607 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1608 break; 1609 1610 case TGSI_TEXTURE_2D: 1611 case TGSI_TEXTURE_RECT: 1612 case TGSI_TEXTURE_SHADOW2D: 1613 case TGSI_TEXTURE_SHADOWRECT: 1614 FETCH(&r[0], 0, CHAN_X); 1615 FETCH(&r[1], 0, CHAN_Y); 1616 FETCH(&r[2], 0, CHAN_Z); 1617 1618 if (modifier == TEX_MODIFIER_PROJECTED) { 1619 micro_div(&r[0], &r[0], &r[3]); 1620 micro_div(&r[1], &r[1], &r[3]); 1621 micro_div(&r[2], &r[2], &r[3]); 1622 } 1623 1624 fetch_texel(mach->Samplers[unit], 1625 &r[0], &r[1], &r[2], lod, /* S, T, P, LOD */ 1626 control, 1627 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1628 break; 1629 1630 case TGSI_TEXTURE_3D: 1631 case TGSI_TEXTURE_CUBE: 1632 FETCH(&r[0], 0, CHAN_X); 1633 FETCH(&r[1], 0, CHAN_Y); 1634 FETCH(&r[2], 0, CHAN_Z); 1635 1636 if (modifier == TEX_MODIFIER_PROJECTED) { 1637 micro_div(&r[0], &r[0], &r[3]); 1638 micro_div(&r[1], &r[1], &r[3]); 1639 micro_div(&r[2], &r[2], &r[3]); 1640 } 1641 1642 fetch_texel(mach->Samplers[unit], 1643 &r[0], &r[1], &r[2], lod, 1644 control, 1645 &r[0], &r[1], &r[2], &r[3]); 1646 break; 1647 1648 default: 1649 assert(0); 1650 } 1651 1652 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1653 STORE(&r[chan_index], 0, chan_index); 1654 } 1655} 1656 1657static void 1658exec_txd(struct tgsi_exec_machine *mach, 1659 const struct tgsi_full_instruction *inst) 1660{ 1661 const uint unit = inst->Src[3].Register.Index; 1662 union tgsi_exec_channel r[4]; 1663 uint chan_index; 1664 1665 /* 1666 * XXX: This is fake TXD -- the derivatives are not taken into account, yet. 1667 */ 1668 1669 switch (inst->Texture.Texture) { 1670 case TGSI_TEXTURE_1D: 1671 case TGSI_TEXTURE_SHADOW1D: 1672 1673 FETCH(&r[0], 0, CHAN_X); 1674 1675 fetch_texel(mach->Samplers[unit], 1676 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, BIAS */ 1677 tgsi_sampler_lod_bias, 1678 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1679 break; 1680 1681 case TGSI_TEXTURE_2D: 1682 case TGSI_TEXTURE_RECT: 1683 case TGSI_TEXTURE_SHADOW2D: 1684 case TGSI_TEXTURE_SHADOWRECT: 1685 1686 FETCH(&r[0], 0, CHAN_X); 1687 FETCH(&r[1], 0, CHAN_Y); 1688 FETCH(&r[2], 0, CHAN_Z); 1689 1690 fetch_texel(mach->Samplers[unit], 1691 &r[0], &r[1], &r[2], &ZeroVec, /* inputs */ 1692 tgsi_sampler_lod_bias, 1693 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1694 break; 1695 1696 case TGSI_TEXTURE_3D: 1697 case TGSI_TEXTURE_CUBE: 1698 1699 FETCH(&r[0], 0, CHAN_X); 1700 FETCH(&r[1], 0, CHAN_Y); 1701 FETCH(&r[2], 0, CHAN_Z); 1702 1703 fetch_texel(mach->Samplers[unit], 1704 &r[0], &r[1], &r[2], &ZeroVec, 1705 tgsi_sampler_lod_bias, 1706 &r[0], &r[1], &r[2], &r[3]); 1707 break; 1708 1709 default: 1710 assert(0); 1711 } 1712 1713 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 1714 STORE(&r[chan_index], 0, chan_index); 1715 } 1716} 1717 1718 1719/** 1720 * Evaluate a constant-valued coefficient at the position of the 1721 * current quad. 1722 */ 1723static void 1724eval_constant_coef( 1725 struct tgsi_exec_machine *mach, 1726 unsigned attrib, 1727 unsigned chan ) 1728{ 1729 unsigned i; 1730 1731 for( i = 0; i < QUAD_SIZE; i++ ) { 1732 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 1733 } 1734} 1735 1736/** 1737 * Evaluate a linear-valued coefficient at the position of the 1738 * current quad. 1739 */ 1740static void 1741eval_linear_coef( 1742 struct tgsi_exec_machine *mach, 1743 unsigned attrib, 1744 unsigned chan ) 1745{ 1746 const float x = mach->QuadPos.xyzw[0].f[0]; 1747 const float y = mach->QuadPos.xyzw[1].f[0]; 1748 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1749 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1750 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1751 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 1752 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 1753 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 1754 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 1755} 1756 1757/** 1758 * Evaluate a perspective-valued coefficient at the position of the 1759 * current quad. 1760 */ 1761static void 1762eval_perspective_coef( 1763 struct tgsi_exec_machine *mach, 1764 unsigned attrib, 1765 unsigned chan ) 1766{ 1767 const float x = mach->QuadPos.xyzw[0].f[0]; 1768 const float y = mach->QuadPos.xyzw[1].f[0]; 1769 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 1770 const float dady = mach->InterpCoefs[attrib].dady[chan]; 1771 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 1772 const float *w = mach->QuadPos.xyzw[3].f; 1773 /* divide by W here */ 1774 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 1775 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 1776 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 1777 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 1778} 1779 1780 1781typedef void (* eval_coef_func)( 1782 struct tgsi_exec_machine *mach, 1783 unsigned attrib, 1784 unsigned chan ); 1785 1786static void 1787exec_declaration(struct tgsi_exec_machine *mach, 1788 const struct tgsi_full_declaration *decl) 1789{ 1790 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 1791 if (decl->Declaration.File == TGSI_FILE_INPUT || 1792 decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1793 uint first, last, mask; 1794 1795 first = decl->Range.First; 1796 last = decl->Range.Last; 1797 mask = decl->Declaration.UsageMask; 1798 1799 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { 1800 uint i; 1801 1802 assert(decl->Semantic.Index == 0); 1803 assert(first == last); 1804 1805 for (i = 0; i < QUAD_SIZE; i++) { 1806 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 1807 } 1808 } else { 1809 eval_coef_func eval; 1810 uint i, j; 1811 1812 switch (decl->Declaration.Interpolate) { 1813 case TGSI_INTERPOLATE_CONSTANT: 1814 eval = eval_constant_coef; 1815 break; 1816 1817 case TGSI_INTERPOLATE_LINEAR: 1818 eval = eval_linear_coef; 1819 break; 1820 1821 case TGSI_INTERPOLATE_PERSPECTIVE: 1822 eval = eval_perspective_coef; 1823 break; 1824 1825 default: 1826 assert(0); 1827 return; 1828 } 1829 1830 for (j = 0; j < NUM_CHANNELS; j++) { 1831 if (mask & (1 << j)) { 1832 for (i = first; i <= last; i++) { 1833 eval(mach, i, j); 1834 } 1835 } 1836 } 1837 } 1838 } 1839 } 1840} 1841 1842typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, 1843 const union tgsi_exec_channel *src); 1844 1845static void 1846exec_scalar_unary(struct tgsi_exec_machine *mach, 1847 const struct tgsi_full_instruction *inst, 1848 micro_unary_op op, 1849 enum tgsi_exec_datatype dst_datatype, 1850 enum tgsi_exec_datatype src_datatype) 1851{ 1852 unsigned int chan; 1853 union tgsi_exec_channel src; 1854 union tgsi_exec_channel dst; 1855 1856 fetch_source(mach, &src, &inst->Src[0], CHAN_X, src_datatype); 1857 op(&dst, &src); 1858 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1859 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1860 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 1861 } 1862 } 1863} 1864 1865static void 1866exec_vector_unary(struct tgsi_exec_machine *mach, 1867 const struct tgsi_full_instruction *inst, 1868 micro_unary_op op, 1869 enum tgsi_exec_datatype dst_datatype, 1870 enum tgsi_exec_datatype src_datatype) 1871{ 1872 unsigned int chan; 1873 struct tgsi_exec_vector dst; 1874 1875 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1876 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1877 union tgsi_exec_channel src; 1878 1879 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); 1880 op(&dst.xyzw[chan], &src); 1881 } 1882 } 1883 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1884 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1885 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 1886 } 1887 } 1888} 1889 1890typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, 1891 const union tgsi_exec_channel *src0, 1892 const union tgsi_exec_channel *src1); 1893 1894static void 1895exec_vector_binary(struct tgsi_exec_machine *mach, 1896 const struct tgsi_full_instruction *inst, 1897 micro_binary_op op, 1898 enum tgsi_exec_datatype dst_datatype, 1899 enum tgsi_exec_datatype src_datatype) 1900{ 1901 unsigned int chan; 1902 struct tgsi_exec_vector dst; 1903 1904 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1905 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1906 union tgsi_exec_channel src[2]; 1907 1908 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 1909 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 1910 op(&dst.xyzw[chan], &src[0], &src[1]); 1911 } 1912 } 1913 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1914 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1915 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 1916 } 1917 } 1918} 1919 1920typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst, 1921 const union tgsi_exec_channel *src0, 1922 const union tgsi_exec_channel *src1, 1923 const union tgsi_exec_channel *src2); 1924 1925static void 1926exec_vector_trinary(struct tgsi_exec_machine *mach, 1927 const struct tgsi_full_instruction *inst, 1928 micro_trinary_op op, 1929 enum tgsi_exec_datatype dst_datatype, 1930 enum tgsi_exec_datatype src_datatype) 1931{ 1932 unsigned int chan; 1933 struct tgsi_exec_vector dst; 1934 1935 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1936 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1937 union tgsi_exec_channel src[3]; 1938 1939 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 1940 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 1941 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 1942 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 1943 } 1944 } 1945 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1946 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1947 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 1948 } 1949 } 1950} 1951 1952static void 1953exec_dp3(struct tgsi_exec_machine *mach, 1954 const struct tgsi_full_instruction *inst) 1955{ 1956 unsigned int chan; 1957 union tgsi_exec_channel arg[3]; 1958 1959 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 1960 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 1961 micro_mul(&arg[2], &arg[0], &arg[1]); 1962 1963 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) { 1964 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 1965 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 1966 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 1967 } 1968 1969 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1970 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1971 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 1972 } 1973 } 1974} 1975 1976static void 1977exec_dp4(struct tgsi_exec_machine *mach, 1978 const struct tgsi_full_instruction *inst) 1979{ 1980 unsigned int chan; 1981 union tgsi_exec_channel arg[3]; 1982 1983 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 1984 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 1985 micro_mul(&arg[2], &arg[0], &arg[1]); 1986 1987 for (chan = CHAN_Y; chan <= CHAN_W; chan++) { 1988 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 1989 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 1990 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 1991 } 1992 1993 for (chan = 0; chan < NUM_CHANNELS; chan++) { 1994 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1995 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 1996 } 1997 } 1998} 1999 2000static void 2001exec_dp2a(struct tgsi_exec_machine *mach, 2002 const struct tgsi_full_instruction *inst) 2003{ 2004 unsigned int chan; 2005 union tgsi_exec_channel arg[3]; 2006 2007 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2008 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2009 micro_mul(&arg[2], &arg[0], &arg[1]); 2010 2011 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2012 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2013 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); 2014 2015 fetch_source(mach, &arg[1], &inst->Src[2], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2016 micro_add(&arg[0], &arg[0], &arg[1]); 2017 2018 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2019 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2020 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2021 } 2022 } 2023} 2024 2025static void 2026exec_dph(struct tgsi_exec_machine *mach, 2027 const struct tgsi_full_instruction *inst) 2028{ 2029 unsigned int chan; 2030 union tgsi_exec_channel arg[3]; 2031 2032 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2033 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2034 micro_mul(&arg[2], &arg[0], &arg[1]); 2035 2036 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2037 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2038 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 2039 2040 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2041 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2042 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); 2043 2044 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_W, TGSI_EXEC_DATA_FLOAT); 2045 micro_add(&arg[0], &arg[0], &arg[1]); 2046 2047 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2048 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2049 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2050 } 2051 } 2052} 2053 2054static void 2055exec_dp2(struct tgsi_exec_machine *mach, 2056 const struct tgsi_full_instruction *inst) 2057{ 2058 unsigned int chan; 2059 union tgsi_exec_channel arg[3]; 2060 2061 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2062 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2063 micro_mul(&arg[2], &arg[0], &arg[1]); 2064 2065 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2066 fetch_source(mach, &arg[1], &inst->Src[1], CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2067 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 2068 2069 for (chan = 0; chan < NUM_CHANNELS; chan++) { 2070 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2071 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2072 } 2073 } 2074} 2075 2076static void 2077exec_nrm4(struct tgsi_exec_machine *mach, 2078 const struct tgsi_full_instruction *inst) 2079{ 2080 unsigned int chan; 2081 union tgsi_exec_channel arg[4]; 2082 union tgsi_exec_channel scale; 2083 2084 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2085 micro_mul(&scale, &arg[0], &arg[0]); 2086 2087 for (chan = CHAN_Y; chan <= CHAN_W; chan++) { 2088 union tgsi_exec_channel product; 2089 2090 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2091 micro_mul(&product, &arg[chan], &arg[chan]); 2092 micro_add(&scale, &scale, &product); 2093 } 2094 2095 micro_rsq(&scale, &scale); 2096 2097 for (chan = CHAN_X; chan <= CHAN_W; chan++) { 2098 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2099 micro_mul(&arg[chan], &arg[chan], &scale); 2100 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2101 } 2102 } 2103} 2104 2105static void 2106exec_nrm3(struct tgsi_exec_machine *mach, 2107 const struct tgsi_full_instruction *inst) 2108{ 2109 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 2110 unsigned int chan; 2111 union tgsi_exec_channel arg[3]; 2112 union tgsi_exec_channel scale; 2113 2114 fetch_source(mach, &arg[0], &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_FLOAT); 2115 micro_mul(&scale, &arg[0], &arg[0]); 2116 2117 for (chan = CHAN_Y; chan <= CHAN_Z; chan++) { 2118 union tgsi_exec_channel product; 2119 2120 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2121 micro_mul(&product, &arg[chan], &arg[chan]); 2122 micro_add(&scale, &scale, &product); 2123 } 2124 2125 micro_rsq(&scale, &scale); 2126 2127 for (chan = CHAN_X; chan <= CHAN_Z; chan++) { 2128 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2129 micro_mul(&arg[chan], &arg[chan], &scale); 2130 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2131 } 2132 } 2133 } 2134 2135 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2136 store_dest(mach, &OneVec, &inst->Dst[0], inst, CHAN_W, TGSI_EXEC_DATA_FLOAT); 2137 } 2138} 2139 2140static void 2141exec_break(struct tgsi_exec_machine *mach) 2142{ 2143 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { 2144 /* turn off loop channels for each enabled exec channel */ 2145 mach->LoopMask &= ~mach->ExecMask; 2146 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 2147 UPDATE_EXEC_MASK(mach); 2148 } else { 2149 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); 2150 2151 mach->Switch.mask = 0x0; 2152 2153 UPDATE_EXEC_MASK(mach); 2154 } 2155} 2156 2157static void 2158exec_switch(struct tgsi_exec_machine *mach, 2159 const struct tgsi_full_instruction *inst) 2160{ 2161 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 2162 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 2163 2164 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 2165 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); 2166 mach->Switch.mask = 0x0; 2167 mach->Switch.defaultMask = 0x0; 2168 2169 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 2170 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; 2171 2172 UPDATE_EXEC_MASK(mach); 2173} 2174 2175static void 2176exec_case(struct tgsi_exec_machine *mach, 2177 const struct tgsi_full_instruction *inst) 2178{ 2179 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 2180 union tgsi_exec_channel src; 2181 uint mask = 0; 2182 2183 fetch_source(mach, &src, &inst->Src[0], CHAN_X, TGSI_EXEC_DATA_UINT); 2184 2185 if (mach->Switch.selector.u[0] == src.u[0]) { 2186 mask |= 0x1; 2187 } 2188 if (mach->Switch.selector.u[1] == src.u[1]) { 2189 mask |= 0x2; 2190 } 2191 if (mach->Switch.selector.u[2] == src.u[2]) { 2192 mask |= 0x4; 2193 } 2194 if (mach->Switch.selector.u[3] == src.u[3]) { 2195 mask |= 0x8; 2196 } 2197 2198 mach->Switch.defaultMask |= mask; 2199 2200 mach->Switch.mask |= mask & prevMask; 2201 2202 UPDATE_EXEC_MASK(mach); 2203} 2204 2205static void 2206exec_default(struct tgsi_exec_machine *mach) 2207{ 2208 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 2209 2210 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; 2211 2212 UPDATE_EXEC_MASK(mach); 2213} 2214 2215static void 2216exec_endswitch(struct tgsi_exec_machine *mach) 2217{ 2218 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; 2219 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 2220 2221 UPDATE_EXEC_MASK(mach); 2222} 2223 2224static void 2225micro_i2f(union tgsi_exec_channel *dst, 2226 const union tgsi_exec_channel *src) 2227{ 2228 dst->f[0] = (float)src->i[0]; 2229 dst->f[1] = (float)src->i[1]; 2230 dst->f[2] = (float)src->i[2]; 2231 dst->f[3] = (float)src->i[3]; 2232} 2233 2234static void 2235micro_not(union tgsi_exec_channel *dst, 2236 const union tgsi_exec_channel *src) 2237{ 2238 dst->u[0] = ~src->u[0]; 2239 dst->u[1] = ~src->u[1]; 2240 dst->u[2] = ~src->u[2]; 2241 dst->u[3] = ~src->u[3]; 2242} 2243 2244static void 2245micro_shl(union tgsi_exec_channel *dst, 2246 const union tgsi_exec_channel *src0, 2247 const union tgsi_exec_channel *src1) 2248{ 2249 dst->u[0] = src0->u[0] << src1->u[0]; 2250 dst->u[1] = src0->u[1] << src1->u[1]; 2251 dst->u[2] = src0->u[2] << src1->u[2]; 2252 dst->u[3] = src0->u[3] << src1->u[3]; 2253} 2254 2255static void 2256micro_and(union tgsi_exec_channel *dst, 2257 const union tgsi_exec_channel *src0, 2258 const union tgsi_exec_channel *src1) 2259{ 2260 dst->u[0] = src0->u[0] & src1->u[0]; 2261 dst->u[1] = src0->u[1] & src1->u[1]; 2262 dst->u[2] = src0->u[2] & src1->u[2]; 2263 dst->u[3] = src0->u[3] & src1->u[3]; 2264} 2265 2266static void 2267micro_or(union tgsi_exec_channel *dst, 2268 const union tgsi_exec_channel *src0, 2269 const union tgsi_exec_channel *src1) 2270{ 2271 dst->u[0] = src0->u[0] | src1->u[0]; 2272 dst->u[1] = src0->u[1] | src1->u[1]; 2273 dst->u[2] = src0->u[2] | src1->u[2]; 2274 dst->u[3] = src0->u[3] | src1->u[3]; 2275} 2276 2277static void 2278micro_xor(union tgsi_exec_channel *dst, 2279 const union tgsi_exec_channel *src0, 2280 const union tgsi_exec_channel *src1) 2281{ 2282 dst->u[0] = src0->u[0] ^ src1->u[0]; 2283 dst->u[1] = src0->u[1] ^ src1->u[1]; 2284 dst->u[2] = src0->u[2] ^ src1->u[2]; 2285 dst->u[3] = src0->u[3] ^ src1->u[3]; 2286} 2287 2288static void 2289micro_f2i(union tgsi_exec_channel *dst, 2290 const union tgsi_exec_channel *src) 2291{ 2292 dst->i[0] = (int)src->f[0]; 2293 dst->i[1] = (int)src->f[1]; 2294 dst->i[2] = (int)src->f[2]; 2295 dst->i[3] = (int)src->f[3]; 2296} 2297 2298static void 2299micro_idiv(union tgsi_exec_channel *dst, 2300 const union tgsi_exec_channel *src0, 2301 const union tgsi_exec_channel *src1) 2302{ 2303 dst->i[0] = src0->i[0] / src1->i[0]; 2304 dst->i[1] = src0->i[1] / src1->i[1]; 2305 dst->i[2] = src0->i[2] / src1->i[2]; 2306 dst->i[3] = src0->i[3] / src1->i[3]; 2307} 2308 2309static void 2310micro_imax(union tgsi_exec_channel *dst, 2311 const union tgsi_exec_channel *src0, 2312 const union tgsi_exec_channel *src1) 2313{ 2314 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 2315 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 2316 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 2317 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 2318} 2319 2320static void 2321micro_imin(union tgsi_exec_channel *dst, 2322 const union tgsi_exec_channel *src0, 2323 const union tgsi_exec_channel *src1) 2324{ 2325 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 2326 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 2327 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 2328 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 2329} 2330 2331static void 2332micro_isge(union tgsi_exec_channel *dst, 2333 const union tgsi_exec_channel *src0, 2334 const union tgsi_exec_channel *src1) 2335{ 2336 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0; 2337 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0; 2338 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0; 2339 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0; 2340} 2341 2342static void 2343micro_ishr(union tgsi_exec_channel *dst, 2344 const union tgsi_exec_channel *src0, 2345 const union tgsi_exec_channel *src1) 2346{ 2347 dst->i[0] = src0->i[0] >> src1->i[0]; 2348 dst->i[1] = src0->i[1] >> src1->i[1]; 2349 dst->i[2] = src0->i[2] >> src1->i[2]; 2350 dst->i[3] = src0->i[3] >> src1->i[3]; 2351} 2352 2353static void 2354micro_islt(union tgsi_exec_channel *dst, 2355 const union tgsi_exec_channel *src0, 2356 const union tgsi_exec_channel *src1) 2357{ 2358 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0; 2359 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0; 2360 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0; 2361 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0; 2362} 2363 2364static void 2365micro_f2u(union tgsi_exec_channel *dst, 2366 const union tgsi_exec_channel *src) 2367{ 2368 dst->u[0] = (uint)src->f[0]; 2369 dst->u[1] = (uint)src->f[1]; 2370 dst->u[2] = (uint)src->f[2]; 2371 dst->u[3] = (uint)src->f[3]; 2372} 2373 2374static void 2375micro_u2f(union tgsi_exec_channel *dst, 2376 const union tgsi_exec_channel *src) 2377{ 2378 dst->f[0] = (float)src->u[0]; 2379 dst->f[1] = (float)src->u[1]; 2380 dst->f[2] = (float)src->u[2]; 2381 dst->f[3] = (float)src->u[3]; 2382} 2383 2384static void 2385micro_uadd(union tgsi_exec_channel *dst, 2386 const union tgsi_exec_channel *src0, 2387 const union tgsi_exec_channel *src1) 2388{ 2389 dst->u[0] = src0->u[0] + src1->u[0]; 2390 dst->u[1] = src0->u[1] + src1->u[1]; 2391 dst->u[2] = src0->u[2] + src1->u[2]; 2392 dst->u[3] = src0->u[3] + src1->u[3]; 2393} 2394 2395static void 2396micro_udiv(union tgsi_exec_channel *dst, 2397 const union tgsi_exec_channel *src0, 2398 const union tgsi_exec_channel *src1) 2399{ 2400 dst->u[0] = src0->u[0] / src1->u[0]; 2401 dst->u[1] = src0->u[1] / src1->u[1]; 2402 dst->u[2] = src0->u[2] / src1->u[2]; 2403 dst->u[3] = src0->u[3] / src1->u[3]; 2404} 2405 2406static void 2407micro_umad(union tgsi_exec_channel *dst, 2408 const union tgsi_exec_channel *src0, 2409 const union tgsi_exec_channel *src1, 2410 const union tgsi_exec_channel *src2) 2411{ 2412 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0]; 2413 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1]; 2414 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2]; 2415 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3]; 2416} 2417 2418static void 2419micro_umax(union tgsi_exec_channel *dst, 2420 const union tgsi_exec_channel *src0, 2421 const union tgsi_exec_channel *src1) 2422{ 2423 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 2424 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 2425 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 2426 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 2427} 2428 2429static void 2430micro_umin(union tgsi_exec_channel *dst, 2431 const union tgsi_exec_channel *src0, 2432 const union tgsi_exec_channel *src1) 2433{ 2434 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 2435 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 2436 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 2437 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 2438} 2439 2440static void 2441micro_umod(union tgsi_exec_channel *dst, 2442 const union tgsi_exec_channel *src0, 2443 const union tgsi_exec_channel *src1) 2444{ 2445 dst->u[0] = src0->u[0] % src1->u[0]; 2446 dst->u[1] = src0->u[1] % src1->u[1]; 2447 dst->u[2] = src0->u[2] % src1->u[2]; 2448 dst->u[3] = src0->u[3] % src1->u[3]; 2449} 2450 2451static void 2452micro_umul(union tgsi_exec_channel *dst, 2453 const union tgsi_exec_channel *src0, 2454 const union tgsi_exec_channel *src1) 2455{ 2456 dst->u[0] = src0->u[0] * src1->u[0]; 2457 dst->u[1] = src0->u[1] * src1->u[1]; 2458 dst->u[2] = src0->u[2] * src1->u[2]; 2459 dst->u[3] = src0->u[3] * src1->u[3]; 2460} 2461 2462static void 2463micro_useq(union tgsi_exec_channel *dst, 2464 const union tgsi_exec_channel *src0, 2465 const union tgsi_exec_channel *src1) 2466{ 2467 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0; 2468 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0; 2469 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0; 2470 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0; 2471} 2472 2473static void 2474micro_usge(union tgsi_exec_channel *dst, 2475 const union tgsi_exec_channel *src0, 2476 const union tgsi_exec_channel *src1) 2477{ 2478 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0; 2479 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0; 2480 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0; 2481 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0; 2482} 2483 2484static void 2485micro_ushr(union tgsi_exec_channel *dst, 2486 const union tgsi_exec_channel *src0, 2487 const union tgsi_exec_channel *src1) 2488{ 2489 dst->u[0] = src0->u[0] >> src1->u[0]; 2490 dst->u[1] = src0->u[1] >> src1->u[1]; 2491 dst->u[2] = src0->u[2] >> src1->u[2]; 2492 dst->u[3] = src0->u[3] >> src1->u[3]; 2493} 2494 2495static void 2496micro_uslt(union tgsi_exec_channel *dst, 2497 const union tgsi_exec_channel *src0, 2498 const union tgsi_exec_channel *src1) 2499{ 2500 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0; 2501 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0; 2502 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0; 2503 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0; 2504} 2505 2506static void 2507micro_usne(union tgsi_exec_channel *dst, 2508 const union tgsi_exec_channel *src0, 2509 const union tgsi_exec_channel *src1) 2510{ 2511 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0; 2512 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0; 2513 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0; 2514 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0; 2515} 2516 2517static void 2518exec_instruction( 2519 struct tgsi_exec_machine *mach, 2520 const struct tgsi_full_instruction *inst, 2521 int *pc ) 2522{ 2523 uint chan_index; 2524 union tgsi_exec_channel r[10]; 2525 union tgsi_exec_channel d[8]; 2526 2527 (*pc)++; 2528 2529 switch (inst->Instruction.Opcode) { 2530 case TGSI_OPCODE_ARL: 2531 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 2532 break; 2533 2534 case TGSI_OPCODE_MOV: 2535 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 2536 break; 2537 2538 case TGSI_OPCODE_LIT: 2539 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2540 FETCH( &r[0], 0, CHAN_X ); 2541 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2542 micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2543 } 2544 2545 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2546 FETCH( &r[1], 0, CHAN_Y ); 2547 micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); 2548 2549 FETCH( &r[2], 0, CHAN_W ); 2550 micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); 2551 micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); 2552 micro_pow( &r[1], &r[1], &r[2] ); 2553 micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); 2554 } 2555 2556 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2557 STORE(&d[CHAN_Y], 0, CHAN_Y); 2558 } 2559 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2560 STORE(&d[CHAN_Z], 0, CHAN_Z); 2561 } 2562 } 2563 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2564 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); 2565 } 2566 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2567 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2568 } 2569 break; 2570 2571 case TGSI_OPCODE_RCP: 2572 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2573 break; 2574 2575 case TGSI_OPCODE_RSQ: 2576 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2577 break; 2578 2579 case TGSI_OPCODE_EXP: 2580 FETCH( &r[0], 0, CHAN_X ); 2581 micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ 2582 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2583 micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ 2584 STORE( &r[2], 0, CHAN_X ); /* store r2 */ 2585 } 2586 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2587 micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ 2588 STORE( &r[2], 0, CHAN_Y ); /* store r2 */ 2589 } 2590 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2591 micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ 2592 STORE( &r[2], 0, CHAN_Z ); /* store r2 */ 2593 } 2594 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2595 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2596 } 2597 break; 2598 2599 case TGSI_OPCODE_LOG: 2600 FETCH( &r[0], 0, CHAN_X ); 2601 micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ 2602 micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ 2603 micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ 2604 if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { 2605 STORE( &r[0], 0, CHAN_X ); 2606 } 2607 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2608 micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ 2609 micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ 2610 STORE( &r[0], 0, CHAN_Y ); 2611 } 2612 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2613 STORE( &r[1], 0, CHAN_Z ); 2614 } 2615 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2616 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2617 } 2618 break; 2619 2620 case TGSI_OPCODE_MUL: 2621 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2622 break; 2623 2624 case TGSI_OPCODE_ADD: 2625 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2626 break; 2627 2628 case TGSI_OPCODE_DP3: 2629 exec_dp3(mach, inst); 2630 break; 2631 2632 case TGSI_OPCODE_DP4: 2633 exec_dp4(mach, inst); 2634 break; 2635 2636 case TGSI_OPCODE_DST: 2637 if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { 2638 FETCH( &r[0], 0, CHAN_Y ); 2639 FETCH( &r[1], 1, CHAN_Y); 2640 micro_mul(&d[CHAN_Y], &r[0], &r[1]); 2641 } 2642 if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { 2643 FETCH(&d[CHAN_Z], 0, CHAN_Z); 2644 } 2645 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2646 FETCH(&d[CHAN_W], 1, CHAN_W); 2647 } 2648 2649 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2650 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X); 2651 } 2652 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2653 STORE(&d[CHAN_Y], 0, CHAN_Y); 2654 } 2655 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2656 STORE(&d[CHAN_Z], 0, CHAN_Z); 2657 } 2658 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2659 STORE(&d[CHAN_W], 0, CHAN_W); 2660 } 2661 break; 2662 2663 case TGSI_OPCODE_MIN: 2664 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2665 break; 2666 2667 case TGSI_OPCODE_MAX: 2668 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2669 break; 2670 2671 case TGSI_OPCODE_SLT: 2672 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2673 break; 2674 2675 case TGSI_OPCODE_SGE: 2676 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2677 break; 2678 2679 case TGSI_OPCODE_MAD: 2680 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2681 break; 2682 2683 case TGSI_OPCODE_SUB: 2684 exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2685 break; 2686 2687 case TGSI_OPCODE_LRP: 2688 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2689 break; 2690 2691 case TGSI_OPCODE_CND: 2692 exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2693 break; 2694 2695 case TGSI_OPCODE_DP2A: 2696 exec_dp2a(mach, inst); 2697 break; 2698 2699 case TGSI_OPCODE_FRC: 2700 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2701 break; 2702 2703 case TGSI_OPCODE_CLAMP: 2704 exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2705 break; 2706 2707 case TGSI_OPCODE_FLR: 2708 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2709 break; 2710 2711 case TGSI_OPCODE_ROUND: 2712 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2713 break; 2714 2715 case TGSI_OPCODE_EX2: 2716 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2717 break; 2718 2719 case TGSI_OPCODE_LG2: 2720 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2721 break; 2722 2723 case TGSI_OPCODE_POW: 2724 FETCH(&r[0], 0, CHAN_X); 2725 FETCH(&r[1], 1, CHAN_X); 2726 2727 micro_pow( &r[0], &r[0], &r[1] ); 2728 2729 FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { 2730 STORE( &r[0], 0, chan_index ); 2731 } 2732 break; 2733 2734 case TGSI_OPCODE_XPD: 2735 FETCH(&r[0], 0, CHAN_Y); 2736 FETCH(&r[1], 1, CHAN_Z); 2737 2738 micro_mul( &r[2], &r[0], &r[1] ); 2739 2740 FETCH(&r[3], 0, CHAN_Z); 2741 FETCH(&r[4], 1, CHAN_Y); 2742 2743 micro_mul( &r[5], &r[3], &r[4] ); 2744 micro_sub(&d[CHAN_X], &r[2], &r[5]); 2745 2746 FETCH(&r[2], 1, CHAN_X); 2747 2748 micro_mul( &r[3], &r[3], &r[2] ); 2749 2750 FETCH(&r[5], 0, CHAN_X); 2751 2752 micro_mul( &r[1], &r[1], &r[5] ); 2753 micro_sub(&d[CHAN_Y], &r[3], &r[1]); 2754 2755 micro_mul( &r[5], &r[5], &r[4] ); 2756 micro_mul( &r[0], &r[0], &r[2] ); 2757 micro_sub(&d[CHAN_Z], &r[5], &r[0]); 2758 2759 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2760 STORE(&d[CHAN_X], 0, CHAN_X); 2761 } 2762 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2763 STORE(&d[CHAN_Y], 0, CHAN_Y); 2764 } 2765 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2766 STORE(&d[CHAN_Z], 0, CHAN_Z); 2767 } 2768 if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { 2769 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 2770 } 2771 break; 2772 2773 case TGSI_OPCODE_ABS: 2774 exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2775 break; 2776 2777 case TGSI_OPCODE_RCC: 2778 FETCH(&r[0], 0, CHAN_X); 2779 micro_div(&r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0]); 2780 micro_float_clamp(&r[0], &r[0]); 2781 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2782 STORE(&r[0], 0, chan_index); 2783 } 2784 break; 2785 2786 case TGSI_OPCODE_DPH: 2787 exec_dph(mach, inst); 2788 break; 2789 2790 case TGSI_OPCODE_COS: 2791 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2792 break; 2793 2794 case TGSI_OPCODE_DDX: 2795 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2796 break; 2797 2798 case TGSI_OPCODE_DDY: 2799 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2800 break; 2801 2802 case TGSI_OPCODE_KILP: 2803 exec_kilp (mach, inst); 2804 break; 2805 2806 case TGSI_OPCODE_KIL: 2807 exec_kil (mach, inst); 2808 break; 2809 2810 case TGSI_OPCODE_PK2H: 2811 assert (0); 2812 break; 2813 2814 case TGSI_OPCODE_PK2US: 2815 assert (0); 2816 break; 2817 2818 case TGSI_OPCODE_PK4B: 2819 assert (0); 2820 break; 2821 2822 case TGSI_OPCODE_PK4UB: 2823 assert (0); 2824 break; 2825 2826 case TGSI_OPCODE_RFL: 2827 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2828 IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2829 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2830 /* r0 = dp3(src0, src0) */ 2831 FETCH(&r[2], 0, CHAN_X); 2832 micro_mul(&r[0], &r[2], &r[2]); 2833 FETCH(&r[4], 0, CHAN_Y); 2834 micro_mul(&r[8], &r[4], &r[4]); 2835 micro_add(&r[0], &r[0], &r[8]); 2836 FETCH(&r[6], 0, CHAN_Z); 2837 micro_mul(&r[8], &r[6], &r[6]); 2838 micro_add(&r[0], &r[0], &r[8]); 2839 2840 /* r1 = dp3(src0, src1) */ 2841 FETCH(&r[3], 1, CHAN_X); 2842 micro_mul(&r[1], &r[2], &r[3]); 2843 FETCH(&r[5], 1, CHAN_Y); 2844 micro_mul(&r[8], &r[4], &r[5]); 2845 micro_add(&r[1], &r[1], &r[8]); 2846 FETCH(&r[7], 1, CHAN_Z); 2847 micro_mul(&r[8], &r[6], &r[7]); 2848 micro_add(&r[1], &r[1], &r[8]); 2849 2850 /* r1 = 2 * r1 / r0 */ 2851 micro_add(&r[1], &r[1], &r[1]); 2852 micro_div(&r[1], &r[1], &r[0]); 2853 2854 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2855 micro_mul(&r[2], &r[2], &r[1]); 2856 micro_sub(&r[2], &r[2], &r[3]); 2857 STORE(&r[2], 0, CHAN_X); 2858 } 2859 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2860 micro_mul(&r[4], &r[4], &r[1]); 2861 micro_sub(&r[4], &r[4], &r[5]); 2862 STORE(&r[4], 0, CHAN_Y); 2863 } 2864 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2865 micro_mul(&r[6], &r[6], &r[1]); 2866 micro_sub(&r[6], &r[6], &r[7]); 2867 STORE(&r[6], 0, CHAN_Z); 2868 } 2869 } 2870 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2871 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W); 2872 } 2873 break; 2874 2875 case TGSI_OPCODE_SEQ: 2876 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2877 break; 2878 2879 case TGSI_OPCODE_SFL: 2880 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2881 STORE(&mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, chan_index); 2882 } 2883 break; 2884 2885 case TGSI_OPCODE_SGT: 2886 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2887 break; 2888 2889 case TGSI_OPCODE_SIN: 2890 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2891 break; 2892 2893 case TGSI_OPCODE_SLE: 2894 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2895 break; 2896 2897 case TGSI_OPCODE_SNE: 2898 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 2899 break; 2900 2901 case TGSI_OPCODE_STR: 2902 FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) { 2903 STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, chan_index); 2904 } 2905 break; 2906 2907 case TGSI_OPCODE_TEX: 2908 /* simple texture lookup */ 2909 /* src[0] = texcoord */ 2910 /* src[1] = sampler unit */ 2911 exec_tex(mach, inst, TEX_MODIFIER_NONE); 2912 break; 2913 2914 case TGSI_OPCODE_TXB: 2915 /* Texture lookup with lod bias */ 2916 /* src[0] = texcoord (src[0].w = LOD bias) */ 2917 /* src[1] = sampler unit */ 2918 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS); 2919 break; 2920 2921 case TGSI_OPCODE_TXD: 2922 /* Texture lookup with explict partial derivatives */ 2923 /* src[0] = texcoord */ 2924 /* src[1] = d[strq]/dx */ 2925 /* src[2] = d[strq]/dy */ 2926 /* src[3] = sampler unit */ 2927 exec_txd(mach, inst); 2928 break; 2929 2930 case TGSI_OPCODE_TXL: 2931 /* Texture lookup with explit LOD */ 2932 /* src[0] = texcoord (src[0].w = LOD) */ 2933 /* src[1] = sampler unit */ 2934 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD); 2935 break; 2936 2937 case TGSI_OPCODE_TXP: 2938 /* Texture lookup with projection */ 2939 /* src[0] = texcoord (src[0].w = projection) */ 2940 /* src[1] = sampler unit */ 2941 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED); 2942 break; 2943 2944 case TGSI_OPCODE_UP2H: 2945 assert (0); 2946 break; 2947 2948 case TGSI_OPCODE_UP2US: 2949 assert (0); 2950 break; 2951 2952 case TGSI_OPCODE_UP4B: 2953 assert (0); 2954 break; 2955 2956 case TGSI_OPCODE_UP4UB: 2957 assert (0); 2958 break; 2959 2960 case TGSI_OPCODE_X2D: 2961 FETCH(&r[0], 1, CHAN_X); 2962 FETCH(&r[1], 1, CHAN_Y); 2963 if (IS_CHANNEL_ENABLED(*inst, CHAN_X) || 2964 IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2965 FETCH(&r[2], 2, CHAN_X); 2966 micro_mul(&r[2], &r[2], &r[0]); 2967 FETCH(&r[3], 2, CHAN_Y); 2968 micro_mul(&r[3], &r[3], &r[1]); 2969 micro_add(&r[2], &r[2], &r[3]); 2970 FETCH(&r[3], 0, CHAN_X); 2971 micro_add(&d[CHAN_X], &r[2], &r[3]); 2972 2973 } 2974 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) || 2975 IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2976 FETCH(&r[2], 2, CHAN_Z); 2977 micro_mul(&r[2], &r[2], &r[0]); 2978 FETCH(&r[3], 2, CHAN_W); 2979 micro_mul(&r[3], &r[3], &r[1]); 2980 micro_add(&r[2], &r[2], &r[3]); 2981 FETCH(&r[3], 0, CHAN_Y); 2982 micro_add(&d[CHAN_Y], &r[2], &r[3]); 2983 2984 } 2985 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 2986 STORE(&d[CHAN_X], 0, CHAN_X); 2987 } 2988 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 2989 STORE(&d[CHAN_Y], 0, CHAN_Y); 2990 } 2991 if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) { 2992 STORE(&d[CHAN_X], 0, CHAN_Z); 2993 } 2994 if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) { 2995 STORE(&d[CHAN_Y], 0, CHAN_W); 2996 } 2997 break; 2998 2999 case TGSI_OPCODE_ARA: 3000 assert (0); 3001 break; 3002 3003 case TGSI_OPCODE_ARR: 3004 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 3005 break; 3006 3007 case TGSI_OPCODE_BRA: 3008 assert (0); 3009 break; 3010 3011 case TGSI_OPCODE_CAL: 3012 /* skip the call if no execution channels are enabled */ 3013 if (mach->ExecMask) { 3014 /* do the call */ 3015 3016 /* First, record the depths of the execution stacks. 3017 * This is important for deeply nested/looped return statements. 3018 * We have to unwind the stacks by the correct amount. For a 3019 * real code generator, we could determine the number of entries 3020 * to pop off each stack with simple static analysis and avoid 3021 * implementing this data structure at run time. 3022 */ 3023 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 3024 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 3025 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 3026 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; 3027 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; 3028 /* note that PC was already incremented above */ 3029 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 3030 3031 mach->CallStackTop++; 3032 3033 /* Second, push the Cond, Loop, Cont, Func stacks */ 3034 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 3035 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3036 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3037 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 3038 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3039 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 3040 3041 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 3042 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3043 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3044 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 3045 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3046 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 3047 3048 /* Finally, jump to the subroutine */ 3049 *pc = inst->Label.Label; 3050 } 3051 break; 3052 3053 case TGSI_OPCODE_RET: 3054 mach->FuncMask &= ~mach->ExecMask; 3055 UPDATE_EXEC_MASK(mach); 3056 3057 if (mach->FuncMask == 0x0) { 3058 /* really return now (otherwise, keep executing */ 3059 3060 if (mach->CallStackTop == 0) { 3061 /* returning from main() */ 3062 *pc = -1; 3063 return; 3064 } 3065 3066 assert(mach->CallStackTop > 0); 3067 mach->CallStackTop--; 3068 3069 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 3070 mach->CondMask = mach->CondStack[mach->CondStackTop]; 3071 3072 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 3073 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 3074 3075 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 3076 mach->ContMask = mach->ContStack[mach->ContStackTop]; 3077 3078 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 3079 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 3080 3081 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 3082 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 3083 3084 assert(mach->FuncStackTop > 0); 3085 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 3086 3087 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 3088 3089 UPDATE_EXEC_MASK(mach); 3090 } 3091 break; 3092 3093 case TGSI_OPCODE_SSG: 3094 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3095 break; 3096 3097 case TGSI_OPCODE_CMP: 3098 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3099 break; 3100 3101 case TGSI_OPCODE_SCS: 3102 if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { 3103 FETCH( &r[0], 0, CHAN_X ); 3104 if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) { 3105 micro_cos(&r[1], &r[0]); 3106 STORE(&r[1], 0, CHAN_X); 3107 } 3108 if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) { 3109 micro_sin(&r[1], &r[0]); 3110 STORE(&r[1], 0, CHAN_Y); 3111 } 3112 } 3113 if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { 3114 STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); 3115 } 3116 if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { 3117 STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); 3118 } 3119 break; 3120 3121 case TGSI_OPCODE_NRM: 3122 exec_nrm3(mach, inst); 3123 break; 3124 3125 case TGSI_OPCODE_NRM4: 3126 exec_nrm4(mach, inst); 3127 break; 3128 3129 case TGSI_OPCODE_DIV: 3130 assert( 0 ); 3131 break; 3132 3133 case TGSI_OPCODE_DP2: 3134 exec_dp2(mach, inst); 3135 break; 3136 3137 case TGSI_OPCODE_IF: 3138 /* push CondMask */ 3139 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 3140 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 3141 FETCH( &r[0], 0, CHAN_X ); 3142 /* update CondMask */ 3143 if( ! r[0].u[0] ) { 3144 mach->CondMask &= ~0x1; 3145 } 3146 if( ! r[0].u[1] ) { 3147 mach->CondMask &= ~0x2; 3148 } 3149 if( ! r[0].u[2] ) { 3150 mach->CondMask &= ~0x4; 3151 } 3152 if( ! r[0].u[3] ) { 3153 mach->CondMask &= ~0x8; 3154 } 3155 UPDATE_EXEC_MASK(mach); 3156 /* Todo: If CondMask==0, jump to ELSE */ 3157 break; 3158 3159 case TGSI_OPCODE_ELSE: 3160 /* invert CondMask wrt previous mask */ 3161 { 3162 uint prevMask; 3163 assert(mach->CondStackTop > 0); 3164 prevMask = mach->CondStack[mach->CondStackTop - 1]; 3165 mach->CondMask = ~mach->CondMask & prevMask; 3166 UPDATE_EXEC_MASK(mach); 3167 /* Todo: If CondMask==0, jump to ENDIF */ 3168 } 3169 break; 3170 3171 case TGSI_OPCODE_ENDIF: 3172 /* pop CondMask */ 3173 assert(mach->CondStackTop > 0); 3174 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 3175 UPDATE_EXEC_MASK(mach); 3176 break; 3177 3178 case TGSI_OPCODE_END: 3179 /* halt execution */ 3180 *pc = -1; 3181 break; 3182 3183 case TGSI_OPCODE_REP: 3184 assert (0); 3185 break; 3186 3187 case TGSI_OPCODE_ENDREP: 3188 assert (0); 3189 break; 3190 3191 case TGSI_OPCODE_PUSHA: 3192 assert (0); 3193 break; 3194 3195 case TGSI_OPCODE_POPA: 3196 assert (0); 3197 break; 3198 3199 case TGSI_OPCODE_CEIL: 3200 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3201 break; 3202 3203 case TGSI_OPCODE_I2F: 3204 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); 3205 break; 3206 3207 case TGSI_OPCODE_NOT: 3208 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3209 break; 3210 3211 case TGSI_OPCODE_TRUNC: 3212 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3213 break; 3214 3215 case TGSI_OPCODE_SHL: 3216 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3217 break; 3218 3219 case TGSI_OPCODE_AND: 3220 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3221 break; 3222 3223 case TGSI_OPCODE_OR: 3224 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3225 break; 3226 3227 case TGSI_OPCODE_MOD: 3228 assert (0); 3229 break; 3230 3231 case TGSI_OPCODE_XOR: 3232 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3233 break; 3234 3235 case TGSI_OPCODE_SAD: 3236 assert (0); 3237 break; 3238 3239 case TGSI_OPCODE_TXF: 3240 assert (0); 3241 break; 3242 3243 case TGSI_OPCODE_TXQ: 3244 assert (0); 3245 break; 3246 3247 case TGSI_OPCODE_EMIT: 3248 emit_vertex(mach); 3249 break; 3250 3251 case TGSI_OPCODE_ENDPRIM: 3252 emit_primitive(mach); 3253 break; 3254 3255 case TGSI_OPCODE_BGNFOR: 3256 assert(mach->LoopCounterStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3257 for (chan_index = 0; chan_index < 3; chan_index++) { 3258 FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index ); 3259 } 3260 ++mach->LoopCounterStackTop; 3261 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X); 3262 /* update LoopMask */ 3263 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { 3264 mach->LoopMask &= ~0x1; 3265 } 3266 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { 3267 mach->LoopMask &= ~0x2; 3268 } 3269 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { 3270 mach->LoopMask &= ~0x4; 3271 } 3272 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { 3273 mach->LoopMask &= ~0x8; 3274 } 3275 /* TODO: if mach->LoopMask == 0, jump to end of loop */ 3276 UPDATE_EXEC_MASK(mach); 3277 /* fall-through (for now) */ 3278 case TGSI_OPCODE_BGNLOOP: 3279 /* push LoopMask and ContMasks */ 3280 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3281 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3282 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 3283 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3284 3285 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 3286 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 3287 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 3288 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3289 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; 3290 break; 3291 3292 case TGSI_OPCODE_ENDFOR: 3293 assert(mach->LoopCounterStackTop > 0); 3294 micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3295 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 3296 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); 3297 /* update LoopMask */ 3298 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) { 3299 mach->LoopMask &= ~0x1; 3300 } 3301 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) { 3302 mach->LoopMask &= ~0x2; 3303 } 3304 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) { 3305 mach->LoopMask &= ~0x4; 3306 } 3307 if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) { 3308 mach->LoopMask &= ~0x8; 3309 } 3310 micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3311 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 3312 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]); 3313 assert(mach->LoopLabelStackTop > 0); 3314 inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1]; 3315 STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X); 3316 /* Restore ContMask, but don't pop */ 3317 assert(mach->ContStackTop > 0); 3318 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3319 UPDATE_EXEC_MASK(mach); 3320 if (mach->ExecMask) { 3321 /* repeat loop: jump to instruction just past BGNLOOP */ 3322 assert(mach->LoopLabelStackTop > 0); 3323 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3324 } 3325 else { 3326 /* exit loop: pop LoopMask */ 3327 assert(mach->LoopStackTop > 0); 3328 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3329 /* pop ContMask */ 3330 assert(mach->ContStackTop > 0); 3331 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3332 assert(mach->LoopLabelStackTop > 0); 3333 --mach->LoopLabelStackTop; 3334 assert(mach->LoopCounterStackTop > 0); 3335 --mach->LoopCounterStackTop; 3336 3337 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 3338 } 3339 UPDATE_EXEC_MASK(mach); 3340 break; 3341 3342 case TGSI_OPCODE_ENDLOOP: 3343 /* Restore ContMask, but don't pop */ 3344 assert(mach->ContStackTop > 0); 3345 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 3346 UPDATE_EXEC_MASK(mach); 3347 if (mach->ExecMask) { 3348 /* repeat loop: jump to instruction just past BGNLOOP */ 3349 assert(mach->LoopLabelStackTop > 0); 3350 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 3351 } 3352 else { 3353 /* exit loop: pop LoopMask */ 3354 assert(mach->LoopStackTop > 0); 3355 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 3356 /* pop ContMask */ 3357 assert(mach->ContStackTop > 0); 3358 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 3359 assert(mach->LoopLabelStackTop > 0); 3360 --mach->LoopLabelStackTop; 3361 3362 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 3363 } 3364 UPDATE_EXEC_MASK(mach); 3365 break; 3366 3367 case TGSI_OPCODE_BRK: 3368 exec_break(mach); 3369 break; 3370 3371 case TGSI_OPCODE_CONT: 3372 /* turn off cont channels for each enabled exec channel */ 3373 mach->ContMask &= ~mach->ExecMask; 3374 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3375 UPDATE_EXEC_MASK(mach); 3376 break; 3377 3378 case TGSI_OPCODE_BGNSUB: 3379 /* no-op */ 3380 break; 3381 3382 case TGSI_OPCODE_ENDSUB: 3383 /* 3384 * XXX: This really should be a no-op. We should never reach this opcode. 3385 */ 3386 3387 assert(mach->CallStackTop > 0); 3388 mach->CallStackTop--; 3389 3390 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 3391 mach->CondMask = mach->CondStack[mach->CondStackTop]; 3392 3393 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 3394 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 3395 3396 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 3397 mach->ContMask = mach->ContStack[mach->ContStackTop]; 3398 3399 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 3400 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 3401 3402 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 3403 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 3404 3405 assert(mach->FuncStackTop > 0); 3406 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 3407 3408 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 3409 3410 UPDATE_EXEC_MASK(mach); 3411 break; 3412 3413 case TGSI_OPCODE_NOP: 3414 break; 3415 3416 case TGSI_OPCODE_BREAKC: 3417 FETCH(&r[0], 0, CHAN_X); 3418 /* update CondMask */ 3419 if (r[0].u[0] && (mach->ExecMask & 0x1)) { 3420 mach->LoopMask &= ~0x1; 3421 } 3422 if (r[0].u[1] && (mach->ExecMask & 0x2)) { 3423 mach->LoopMask &= ~0x2; 3424 } 3425 if (r[0].u[2] && (mach->ExecMask & 0x4)) { 3426 mach->LoopMask &= ~0x4; 3427 } 3428 if (r[0].u[3] && (mach->ExecMask & 0x8)) { 3429 mach->LoopMask &= ~0x8; 3430 } 3431 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3432 UPDATE_EXEC_MASK(mach); 3433 break; 3434 3435 case TGSI_OPCODE_F2I: 3436 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 3437 break; 3438 3439 case TGSI_OPCODE_IDIV: 3440 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3441 break; 3442 3443 case TGSI_OPCODE_IMAX: 3444 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3445 break; 3446 3447 case TGSI_OPCODE_IMIN: 3448 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3449 break; 3450 3451 case TGSI_OPCODE_INEG: 3452 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3453 break; 3454 3455 case TGSI_OPCODE_ISGE: 3456 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3457 break; 3458 3459 case TGSI_OPCODE_ISHR: 3460 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3461 break; 3462 3463 case TGSI_OPCODE_ISLT: 3464 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 3465 break; 3466 3467 case TGSI_OPCODE_F2U: 3468 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 3469 break; 3470 3471 case TGSI_OPCODE_U2F: 3472 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); 3473 break; 3474 3475 case TGSI_OPCODE_UADD: 3476 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3477 break; 3478 3479 case TGSI_OPCODE_UDIV: 3480 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3481 break; 3482 3483 case TGSI_OPCODE_UMAD: 3484 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3485 break; 3486 3487 case TGSI_OPCODE_UMAX: 3488 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3489 break; 3490 3491 case TGSI_OPCODE_UMIN: 3492 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3493 break; 3494 3495 case TGSI_OPCODE_UMOD: 3496 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3497 break; 3498 3499 case TGSI_OPCODE_UMUL: 3500 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3501 break; 3502 3503 case TGSI_OPCODE_USEQ: 3504 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3505 break; 3506 3507 case TGSI_OPCODE_USGE: 3508 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3509 break; 3510 3511 case TGSI_OPCODE_USHR: 3512 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3513 break; 3514 3515 case TGSI_OPCODE_USLT: 3516 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3517 break; 3518 3519 case TGSI_OPCODE_USNE: 3520 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 3521 break; 3522 3523 case TGSI_OPCODE_SWITCH: 3524 exec_switch(mach, inst); 3525 break; 3526 3527 case TGSI_OPCODE_CASE: 3528 exec_case(mach, inst); 3529 break; 3530 3531 case TGSI_OPCODE_DEFAULT: 3532 exec_default(mach); 3533 break; 3534 3535 case TGSI_OPCODE_ENDSWITCH: 3536 exec_endswitch(mach); 3537 break; 3538 3539 default: 3540 assert( 0 ); 3541 } 3542} 3543 3544 3545#define DEBUG_EXECUTION 0 3546 3547 3548/** 3549 * Run TGSI interpreter. 3550 * \return bitmask of "alive" quad components 3551 */ 3552uint 3553tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 3554{ 3555 uint i; 3556 int pc = 0; 3557 3558 mach->CondMask = 0xf; 3559 mach->LoopMask = 0xf; 3560 mach->ContMask = 0xf; 3561 mach->FuncMask = 0xf; 3562 mach->ExecMask = 0xf; 3563 3564 mach->Switch.mask = 0xf; 3565 3566 assert(mach->CondStackTop == 0); 3567 assert(mach->LoopStackTop == 0); 3568 assert(mach->ContStackTop == 0); 3569 assert(mach->SwitchStackTop == 0); 3570 assert(mach->BreakStackTop == 0); 3571 assert(mach->CallStackTop == 0); 3572 3573 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 3574 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 3575 3576 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 3577 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 3578 mach->Primitives[0] = 0; 3579 } 3580 3581 for (i = 0; i < QUAD_SIZE; i++) { 3582 mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = 3583 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | 3584 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | 3585 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | 3586 (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); 3587 } 3588 3589 /* execute declarations (interpolants) */ 3590 for (i = 0; i < mach->NumDeclarations; i++) { 3591 exec_declaration( mach, mach->Declarations+i ); 3592 } 3593 3594 { 3595#if DEBUG_EXECUTION 3596 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; 3597 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 3598 uint inst = 1; 3599 3600 memcpy(temps, mach->Temps, sizeof(temps)); 3601 memcpy(outputs, mach->Outputs, sizeof(outputs)); 3602#endif 3603 3604 /* execute instructions, until pc is set to -1 */ 3605 while (pc != -1) { 3606 3607#if DEBUG_EXECUTION 3608 uint i; 3609 3610 tgsi_dump_instruction(&mach->Instructions[pc], inst++); 3611#endif 3612 3613 assert(pc < (int) mach->NumInstructions); 3614 exec_instruction(mach, mach->Instructions + pc, &pc); 3615 3616#if DEBUG_EXECUTION 3617 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { 3618 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 3619 uint j; 3620 3621 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 3622 debug_printf("TEMP[%2u] = ", i); 3623 for (j = 0; j < 4; j++) { 3624 if (j > 0) { 3625 debug_printf(" "); 3626 } 3627 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 3628 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], 3629 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], 3630 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], 3631 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); 3632 } 3633 } 3634 } 3635 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 3636 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 3637 uint j; 3638 3639 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 3640 debug_printf("OUT[%2u] = ", i); 3641 for (j = 0; j < 4; j++) { 3642 if (j > 0) { 3643 debug_printf(" "); 3644 } 3645 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 3646 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], 3647 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], 3648 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], 3649 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); 3650 } 3651 } 3652 } 3653#endif 3654 } 3655 } 3656 3657#if 0 3658 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 3659 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 3660 /* 3661 * Scale back depth component. 3662 */ 3663 for (i = 0; i < 4; i++) 3664 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 3665 } 3666#endif 3667 3668 assert(mach->CondStackTop == 0); 3669 assert(mach->LoopStackTop == 0); 3670 assert(mach->ContStackTop == 0); 3671 assert(mach->SwitchStackTop == 0); 3672 assert(mach->BreakStackTop == 0); 3673 assert(mach->CallStackTop == 0); 3674 3675 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3676} 3677