tgsi_exec.c revision af69d88d
1/************************************************************************** 2 * 3 * Copyright 2007-2008 VMware, Inc. 4 * All Rights Reserved. 5 * Copyright 2009-2010 VMware, Inc. All rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * TGSI interpreter/executor. 31 * 32 * Flow control information: 33 * 34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 36 * care since a condition may be true for some quad components but false 37 * for other components. 38 * 39 * We basically execute all statements (even if they're in the part of 40 * an IF/ELSE clause that's "not taken") and use a special mask to 41 * control writing to destination registers. This is the ExecMask. 42 * See store_dest(). 43 * 44 * The ExecMask is computed from three other masks (CondMask, LoopMask and 45 * ContMask) which are controlled by the flow control instructions (namely: 46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 47 * 48 * 49 * Authors: 50 * Michal Krol 51 * Brian Paul 52 */ 53 54#include "pipe/p_compiler.h" 55#include "pipe/p_state.h" 56#include "pipe/p_shader_tokens.h" 57#include "tgsi/tgsi_dump.h" 58#include "tgsi/tgsi_parse.h" 59#include "tgsi/tgsi_util.h" 60#include "tgsi_exec.h" 61#include "util/u_memory.h" 62#include "util/u_math.h" 63 64 65#define DEBUG_EXECUTION 0 66 67 68#define FAST_MATH 0 69 70#define TILE_TOP_LEFT 0 71#define TILE_TOP_RIGHT 1 72#define TILE_BOTTOM_LEFT 2 73#define TILE_BOTTOM_RIGHT 3 74 75static void 76micro_abs(union tgsi_exec_channel *dst, 77 const union tgsi_exec_channel *src) 78{ 79 dst->f[0] = fabsf(src->f[0]); 80 dst->f[1] = fabsf(src->f[1]); 81 dst->f[2] = fabsf(src->f[2]); 82 dst->f[3] = fabsf(src->f[3]); 83} 84 85static void 86micro_arl(union tgsi_exec_channel *dst, 87 const union tgsi_exec_channel *src) 88{ 89 dst->i[0] = (int)floorf(src->f[0]); 90 dst->i[1] = (int)floorf(src->f[1]); 91 dst->i[2] = (int)floorf(src->f[2]); 92 dst->i[3] = (int)floorf(src->f[3]); 93} 94 95static void 96micro_arr(union tgsi_exec_channel *dst, 97 const union tgsi_exec_channel *src) 98{ 99 dst->i[0] = (int)floorf(src->f[0] + 0.5f); 100 dst->i[1] = (int)floorf(src->f[1] + 0.5f); 101 dst->i[2] = (int)floorf(src->f[2] + 0.5f); 102 dst->i[3] = (int)floorf(src->f[3] + 0.5f); 103} 104 105static void 106micro_ceil(union tgsi_exec_channel *dst, 107 const union tgsi_exec_channel *src) 108{ 109 dst->f[0] = ceilf(src->f[0]); 110 dst->f[1] = ceilf(src->f[1]); 111 dst->f[2] = ceilf(src->f[2]); 112 dst->f[3] = ceilf(src->f[3]); 113} 114 115static void 116micro_clamp(union tgsi_exec_channel *dst, 117 const union tgsi_exec_channel *src0, 118 const union tgsi_exec_channel *src1, 119 const union tgsi_exec_channel *src2) 120{ 121 dst->f[0] = src0->f[0] < src1->f[0] ? src1->f[0] : src0->f[0] > src2->f[0] ? src2->f[0] : src0->f[0]; 122 dst->f[1] = src0->f[1] < src1->f[1] ? src1->f[1] : src0->f[1] > src2->f[1] ? src2->f[1] : src0->f[1]; 123 dst->f[2] = src0->f[2] < src1->f[2] ? src1->f[2] : src0->f[2] > src2->f[2] ? src2->f[2] : src0->f[2]; 124 dst->f[3] = src0->f[3] < src1->f[3] ? src1->f[3] : src0->f[3] > src2->f[3] ? src2->f[3] : src0->f[3]; 125} 126 127static void 128micro_cmp(union tgsi_exec_channel *dst, 129 const union tgsi_exec_channel *src0, 130 const union tgsi_exec_channel *src1, 131 const union tgsi_exec_channel *src2) 132{ 133 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; 134 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; 135 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; 136 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; 137} 138 139static void 140micro_cnd(union tgsi_exec_channel *dst, 141 const union tgsi_exec_channel *src0, 142 const union tgsi_exec_channel *src1, 143 const union tgsi_exec_channel *src2) 144{ 145 dst->f[0] = src2->f[0] > 0.5f ? src0->f[0] : src1->f[0]; 146 dst->f[1] = src2->f[1] > 0.5f ? src0->f[1] : src1->f[1]; 147 dst->f[2] = src2->f[2] > 0.5f ? src0->f[2] : src1->f[2]; 148 dst->f[3] = src2->f[3] > 0.5f ? src0->f[3] : src1->f[3]; 149} 150 151static void 152micro_cos(union tgsi_exec_channel *dst, 153 const union tgsi_exec_channel *src) 154{ 155 dst->f[0] = cosf(src->f[0]); 156 dst->f[1] = cosf(src->f[1]); 157 dst->f[2] = cosf(src->f[2]); 158 dst->f[3] = cosf(src->f[3]); 159} 160 161static void 162micro_ddx(union tgsi_exec_channel *dst, 163 const union tgsi_exec_channel *src) 164{ 165 dst->f[0] = 166 dst->f[1] = 167 dst->f[2] = 168 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 169} 170 171static void 172micro_ddy(union tgsi_exec_channel *dst, 173 const union tgsi_exec_channel *src) 174{ 175 dst->f[0] = 176 dst->f[1] = 177 dst->f[2] = 178 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 179} 180 181static void 182micro_exp2(union tgsi_exec_channel *dst, 183 const union tgsi_exec_channel *src) 184{ 185#if FAST_MATH 186 dst->f[0] = util_fast_exp2(src->f[0]); 187 dst->f[1] = util_fast_exp2(src->f[1]); 188 dst->f[2] = util_fast_exp2(src->f[2]); 189 dst->f[3] = util_fast_exp2(src->f[3]); 190#else 191#if DEBUG 192 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 193 uint i; 194 union tgsi_exec_channel clamped; 195 196 for (i = 0; i < 4; i++) { 197 if (src->f[i] > 127.99999f) { 198 clamped.f[i] = 127.99999f; 199 } else if (src->f[i] < -126.99999f) { 200 clamped.f[i] = -126.99999f; 201 } else { 202 clamped.f[i] = src->f[i]; 203 } 204 } 205 src = &clamped; 206#endif /* DEBUG */ 207 208 dst->f[0] = powf(2.0f, src->f[0]); 209 dst->f[1] = powf(2.0f, src->f[1]); 210 dst->f[2] = powf(2.0f, src->f[2]); 211 dst->f[3] = powf(2.0f, src->f[3]); 212#endif /* FAST_MATH */ 213} 214 215static void 216micro_flr(union tgsi_exec_channel *dst, 217 const union tgsi_exec_channel *src) 218{ 219 dst->f[0] = floorf(src->f[0]); 220 dst->f[1] = floorf(src->f[1]); 221 dst->f[2] = floorf(src->f[2]); 222 dst->f[3] = floorf(src->f[3]); 223} 224 225static void 226micro_frc(union tgsi_exec_channel *dst, 227 const union tgsi_exec_channel *src) 228{ 229 dst->f[0] = src->f[0] - floorf(src->f[0]); 230 dst->f[1] = src->f[1] - floorf(src->f[1]); 231 dst->f[2] = src->f[2] - floorf(src->f[2]); 232 dst->f[3] = src->f[3] - floorf(src->f[3]); 233} 234 235static void 236micro_iabs(union tgsi_exec_channel *dst, 237 const union tgsi_exec_channel *src) 238{ 239 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; 240 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; 241 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; 242 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; 243} 244 245static void 246micro_ineg(union tgsi_exec_channel *dst, 247 const union tgsi_exec_channel *src) 248{ 249 dst->i[0] = -src->i[0]; 250 dst->i[1] = -src->i[1]; 251 dst->i[2] = -src->i[2]; 252 dst->i[3] = -src->i[3]; 253} 254 255static void 256micro_lg2(union tgsi_exec_channel *dst, 257 const union tgsi_exec_channel *src) 258{ 259#if FAST_MATH 260 dst->f[0] = util_fast_log2(src->f[0]); 261 dst->f[1] = util_fast_log2(src->f[1]); 262 dst->f[2] = util_fast_log2(src->f[2]); 263 dst->f[3] = util_fast_log2(src->f[3]); 264#else 265 dst->f[0] = logf(src->f[0]) * 1.442695f; 266 dst->f[1] = logf(src->f[1]) * 1.442695f; 267 dst->f[2] = logf(src->f[2]) * 1.442695f; 268 dst->f[3] = logf(src->f[3]) * 1.442695f; 269#endif 270} 271 272static void 273micro_lrp(union tgsi_exec_channel *dst, 274 const union tgsi_exec_channel *src0, 275 const union tgsi_exec_channel *src1, 276 const union tgsi_exec_channel *src2) 277{ 278 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0]; 279 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1]; 280 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2]; 281 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3]; 282} 283 284static void 285micro_mad(union tgsi_exec_channel *dst, 286 const union tgsi_exec_channel *src0, 287 const union tgsi_exec_channel *src1, 288 const union tgsi_exec_channel *src2) 289{ 290 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0]; 291 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1]; 292 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2]; 293 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3]; 294} 295 296static void 297micro_mov(union tgsi_exec_channel *dst, 298 const union tgsi_exec_channel *src) 299{ 300 dst->u[0] = src->u[0]; 301 dst->u[1] = src->u[1]; 302 dst->u[2] = src->u[2]; 303 dst->u[3] = src->u[3]; 304} 305 306static void 307micro_rcp(union tgsi_exec_channel *dst, 308 const union tgsi_exec_channel *src) 309{ 310#if 0 /* for debugging */ 311 assert(src->f[0] != 0.0f); 312 assert(src->f[1] != 0.0f); 313 assert(src->f[2] != 0.0f); 314 assert(src->f[3] != 0.0f); 315#endif 316 dst->f[0] = 1.0f / src->f[0]; 317 dst->f[1] = 1.0f / src->f[1]; 318 dst->f[2] = 1.0f / src->f[2]; 319 dst->f[3] = 1.0f / src->f[3]; 320} 321 322static void 323micro_rnd(union tgsi_exec_channel *dst, 324 const union tgsi_exec_channel *src) 325{ 326 dst->f[0] = floorf(src->f[0] + 0.5f); 327 dst->f[1] = floorf(src->f[1] + 0.5f); 328 dst->f[2] = floorf(src->f[2] + 0.5f); 329 dst->f[3] = floorf(src->f[3] + 0.5f); 330} 331 332static void 333micro_rsq(union tgsi_exec_channel *dst, 334 const union tgsi_exec_channel *src) 335{ 336#if 0 /* for debugging */ 337 assert(src->f[0] != 0.0f); 338 assert(src->f[1] != 0.0f); 339 assert(src->f[2] != 0.0f); 340 assert(src->f[3] != 0.0f); 341#endif 342 dst->f[0] = 1.0f / sqrtf(src->f[0]); 343 dst->f[1] = 1.0f / sqrtf(src->f[1]); 344 dst->f[2] = 1.0f / sqrtf(src->f[2]); 345 dst->f[3] = 1.0f / sqrtf(src->f[3]); 346} 347 348static void 349micro_sqrt(union tgsi_exec_channel *dst, 350 const union tgsi_exec_channel *src) 351{ 352 dst->f[0] = sqrtf(src->f[0]); 353 dst->f[1] = sqrtf(src->f[1]); 354 dst->f[2] = sqrtf(src->f[2]); 355 dst->f[3] = sqrtf(src->f[3]); 356} 357 358static void 359micro_seq(union tgsi_exec_channel *dst, 360 const union tgsi_exec_channel *src0, 361 const union tgsi_exec_channel *src1) 362{ 363 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f; 364 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f; 365 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f; 366 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f; 367} 368 369static void 370micro_sge(union tgsi_exec_channel *dst, 371 const union tgsi_exec_channel *src0, 372 const union tgsi_exec_channel *src1) 373{ 374 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f; 375 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f; 376 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f; 377 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f; 378} 379 380static void 381micro_sgn(union tgsi_exec_channel *dst, 382 const union tgsi_exec_channel *src) 383{ 384 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 385 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 386 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 387 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 388} 389 390static void 391micro_isgn(union tgsi_exec_channel *dst, 392 const union tgsi_exec_channel *src) 393{ 394 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0; 395 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0; 396 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0; 397 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0; 398} 399 400static void 401micro_sgt(union tgsi_exec_channel *dst, 402 const union tgsi_exec_channel *src0, 403 const union tgsi_exec_channel *src1) 404{ 405 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f; 406 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f; 407 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f; 408 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f; 409} 410 411static void 412micro_sin(union tgsi_exec_channel *dst, 413 const union tgsi_exec_channel *src) 414{ 415 dst->f[0] = sinf(src->f[0]); 416 dst->f[1] = sinf(src->f[1]); 417 dst->f[2] = sinf(src->f[2]); 418 dst->f[3] = sinf(src->f[3]); 419} 420 421static void 422micro_sle(union tgsi_exec_channel *dst, 423 const union tgsi_exec_channel *src0, 424 const union tgsi_exec_channel *src1) 425{ 426 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f; 427 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f; 428 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f; 429 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f; 430} 431 432static void 433micro_slt(union tgsi_exec_channel *dst, 434 const union tgsi_exec_channel *src0, 435 const union tgsi_exec_channel *src1) 436{ 437 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f; 438 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f; 439 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f; 440 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f; 441} 442 443static void 444micro_sne(union tgsi_exec_channel *dst, 445 const union tgsi_exec_channel *src0, 446 const union tgsi_exec_channel *src1) 447{ 448 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f; 449 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f; 450 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f; 451 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; 452} 453 454static void 455micro_sfl(union tgsi_exec_channel *dst) 456{ 457 dst->f[0] = 0.0f; 458 dst->f[1] = 0.0f; 459 dst->f[2] = 0.0f; 460 dst->f[3] = 0.0f; 461} 462 463static void 464micro_str(union tgsi_exec_channel *dst) 465{ 466 dst->f[0] = 1.0f; 467 dst->f[1] = 1.0f; 468 dst->f[2] = 1.0f; 469 dst->f[3] = 1.0f; 470} 471 472static void 473micro_trunc(union tgsi_exec_channel *dst, 474 const union tgsi_exec_channel *src) 475{ 476 dst->f[0] = (float)(int)src->f[0]; 477 dst->f[1] = (float)(int)src->f[1]; 478 dst->f[2] = (float)(int)src->f[2]; 479 dst->f[3] = (float)(int)src->f[3]; 480} 481 482 483enum tgsi_exec_datatype { 484 TGSI_EXEC_DATA_FLOAT, 485 TGSI_EXEC_DATA_INT, 486 TGSI_EXEC_DATA_UINT 487}; 488 489/* 490 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 491 */ 492#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 493#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 494#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 495#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 496#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 497#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 498 499 500/** The execution mask depends on the conditional mask and the loop mask */ 501#define UPDATE_EXEC_MASK(MACH) \ 502 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask 503 504 505static const union tgsi_exec_channel ZeroVec = 506 { { 0.0, 0.0, 0.0, 0.0 } }; 507 508static const union tgsi_exec_channel OneVec = { 509 {1.0f, 1.0f, 1.0f, 1.0f} 510}; 511 512static const union tgsi_exec_channel P128Vec = { 513 {128.0f, 128.0f, 128.0f, 128.0f} 514}; 515 516static const union tgsi_exec_channel M128Vec = { 517 {-128.0f, -128.0f, -128.0f, -128.0f} 518}; 519 520 521/** 522 * Assert that none of the float values in 'chan' are infinite or NaN. 523 * NaN and Inf may occur normally during program execution and should 524 * not lead to crashes, etc. But when debugging, it's helpful to catch 525 * them. 526 */ 527static INLINE void 528check_inf_or_nan(const union tgsi_exec_channel *chan) 529{ 530 assert(!util_is_inf_or_nan((chan)->f[0])); 531 assert(!util_is_inf_or_nan((chan)->f[1])); 532 assert(!util_is_inf_or_nan((chan)->f[2])); 533 assert(!util_is_inf_or_nan((chan)->f[3])); 534} 535 536 537#ifdef DEBUG 538static void 539print_chan(const char *msg, const union tgsi_exec_channel *chan) 540{ 541 debug_printf("%s = {%f, %f, %f, %f}\n", 542 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 543} 544#endif 545 546 547#ifdef DEBUG 548static void 549print_temp(const struct tgsi_exec_machine *mach, uint index) 550{ 551 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 552 int i; 553 debug_printf("Temp[%u] =\n", index); 554 for (i = 0; i < 4; i++) { 555 debug_printf(" %c: { %f, %f, %f, %f }\n", 556 "XYZW"[i], 557 tmp->xyzw[i].f[0], 558 tmp->xyzw[i].f[1], 559 tmp->xyzw[i].f[2], 560 tmp->xyzw[i].f[3]); 561 } 562} 563#endif 564 565 566void 567tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, 568 unsigned num_bufs, 569 const void **bufs, 570 const unsigned *buf_sizes) 571{ 572 unsigned i; 573 574 for (i = 0; i < num_bufs; i++) { 575 mach->Consts[i] = bufs[i]; 576 mach->ConstsSize[i] = buf_sizes[i]; 577 } 578} 579 580 581/** 582 * Check if there's a potential src/dst register data dependency when 583 * using SOA execution. 584 * Example: 585 * MOV T, T.yxwz; 586 * This would expand into: 587 * MOV t0, t1; 588 * MOV t1, t0; 589 * MOV t2, t3; 590 * MOV t3, t2; 591 * The second instruction will have the wrong value for t0 if executed as-is. 592 */ 593boolean 594tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 595{ 596 uint i, chan; 597 598 uint writemask = inst->Dst[0].Register.WriteMask; 599 if (writemask == TGSI_WRITEMASK_X || 600 writemask == TGSI_WRITEMASK_Y || 601 writemask == TGSI_WRITEMASK_Z || 602 writemask == TGSI_WRITEMASK_W || 603 writemask == TGSI_WRITEMASK_NONE) { 604 /* no chance of data dependency */ 605 return FALSE; 606 } 607 608 /* loop over src regs */ 609 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 610 if ((inst->Src[i].Register.File == 611 inst->Dst[0].Register.File) && 612 ((inst->Src[i].Register.Index == 613 inst->Dst[0].Register.Index) || 614 inst->Src[i].Register.Indirect || 615 inst->Dst[0].Register.Indirect)) { 616 /* loop over dest channels */ 617 uint channelsWritten = 0x0; 618 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 619 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 620 /* check if we're reading a channel that's been written */ 621 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); 622 if (channelsWritten & (1 << swizzle)) { 623 return TRUE; 624 } 625 626 channelsWritten |= (1 << chan); 627 } 628 } 629 } 630 } 631 return FALSE; 632} 633 634 635/** 636 * Initialize machine state by expanding tokens to full instructions, 637 * allocating temporary storage, setting up constants, etc. 638 * After this, we can call tgsi_exec_machine_run() many times. 639 */ 640void 641tgsi_exec_machine_bind_shader( 642 struct tgsi_exec_machine *mach, 643 const struct tgsi_token *tokens, 644 struct tgsi_sampler *sampler) 645{ 646 uint k; 647 struct tgsi_parse_context parse; 648 struct tgsi_full_instruction *instructions; 649 struct tgsi_full_declaration *declarations; 650 uint maxInstructions = 10, numInstructions = 0; 651 uint maxDeclarations = 10, numDeclarations = 0; 652 653#if 0 654 tgsi_dump(tokens, 0); 655#endif 656 657 util_init_math(); 658 659 660 mach->Tokens = tokens; 661 mach->Sampler = sampler; 662 663 if (!tokens) { 664 /* unbind and free all */ 665 FREE(mach->Declarations); 666 mach->Declarations = NULL; 667 mach->NumDeclarations = 0; 668 669 FREE(mach->Instructions); 670 mach->Instructions = NULL; 671 mach->NumInstructions = 0; 672 673 return; 674 } 675 676 k = tgsi_parse_init (&parse, mach->Tokens); 677 if (k != TGSI_PARSE_OK) { 678 debug_printf( "Problem parsing!\n" ); 679 return; 680 } 681 682 mach->Processor = parse.FullHeader.Processor.Processor; 683 mach->ImmLimit = 0; 684 mach->NumOutputs = 0; 685 686 if (mach->Processor == TGSI_PROCESSOR_GEOMETRY && 687 !mach->UsedGeometryShader) { 688 struct tgsi_exec_vector *inputs; 689 struct tgsi_exec_vector *outputs; 690 691 inputs = align_malloc(sizeof(struct tgsi_exec_vector) * 692 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS, 693 16); 694 695 if (!inputs) 696 return; 697 698 outputs = align_malloc(sizeof(struct tgsi_exec_vector) * 699 TGSI_MAX_TOTAL_VERTICES, 16); 700 701 if (!outputs) { 702 align_free(inputs); 703 return; 704 } 705 706 align_free(mach->Inputs); 707 align_free(mach->Outputs); 708 709 mach->Inputs = inputs; 710 mach->Outputs = outputs; 711 mach->UsedGeometryShader = TRUE; 712 } 713 714 declarations = (struct tgsi_full_declaration *) 715 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 716 717 if (!declarations) { 718 return; 719 } 720 721 instructions = (struct tgsi_full_instruction *) 722 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 723 724 if (!instructions) { 725 FREE( declarations ); 726 return; 727 } 728 729 while( !tgsi_parse_end_of_tokens( &parse ) ) { 730 uint i; 731 732 tgsi_parse_token( &parse ); 733 switch( parse.FullToken.Token.Type ) { 734 case TGSI_TOKEN_TYPE_DECLARATION: 735 /* save expanded declaration */ 736 if (numDeclarations == maxDeclarations) { 737 declarations = REALLOC(declarations, 738 maxDeclarations 739 * sizeof(struct tgsi_full_declaration), 740 (maxDeclarations + 10) 741 * sizeof(struct tgsi_full_declaration)); 742 maxDeclarations += 10; 743 } 744 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { 745 unsigned reg; 746 for (reg = parse.FullToken.FullDeclaration.Range.First; 747 reg <= parse.FullToken.FullDeclaration.Range.Last; 748 ++reg) { 749 ++mach->NumOutputs; 750 } 751 } 752 memcpy(declarations + numDeclarations, 753 &parse.FullToken.FullDeclaration, 754 sizeof(declarations[0])); 755 numDeclarations++; 756 break; 757 758 case TGSI_TOKEN_TYPE_IMMEDIATE: 759 { 760 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 761 assert( size <= 4 ); 762 assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES ); 763 764 for( i = 0; i < size; i++ ) { 765 mach->Imms[mach->ImmLimit][i] = 766 parse.FullToken.FullImmediate.u[i].Float; 767 } 768 mach->ImmLimit += 1; 769 } 770 break; 771 772 case TGSI_TOKEN_TYPE_INSTRUCTION: 773 774 /* save expanded instruction */ 775 if (numInstructions == maxInstructions) { 776 instructions = REALLOC(instructions, 777 maxInstructions 778 * sizeof(struct tgsi_full_instruction), 779 (maxInstructions + 10) 780 * sizeof(struct tgsi_full_instruction)); 781 maxInstructions += 10; 782 } 783 784 memcpy(instructions + numInstructions, 785 &parse.FullToken.FullInstruction, 786 sizeof(instructions[0])); 787 788 numInstructions++; 789 break; 790 791 case TGSI_TOKEN_TYPE_PROPERTY: 792 if (mach->Processor == TGSI_PROCESSOR_GEOMETRY) { 793 if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) { 794 mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data; 795 } 796 } 797 break; 798 799 default: 800 assert( 0 ); 801 } 802 } 803 tgsi_parse_free (&parse); 804 805 FREE(mach->Declarations); 806 mach->Declarations = declarations; 807 mach->NumDeclarations = numDeclarations; 808 809 FREE(mach->Instructions); 810 mach->Instructions = instructions; 811 mach->NumInstructions = numInstructions; 812} 813 814 815struct tgsi_exec_machine * 816tgsi_exec_machine_create( void ) 817{ 818 struct tgsi_exec_machine *mach; 819 uint i; 820 821 mach = align_malloc( sizeof *mach, 16 ); 822 if (!mach) 823 goto fail; 824 825 memset(mach, 0, sizeof(*mach)); 826 827 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 828 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; 829 mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0]; 830 831 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16); 832 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16); 833 if (!mach->Inputs || !mach->Outputs) 834 goto fail; 835 836 /* Setup constants needed by the SSE2 executor. */ 837 for( i = 0; i < 4; i++ ) { 838 mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000; 839 mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF; 840 mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000; 841 mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */ 842 mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f; 843 mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */ 844 mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f; 845 mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f; 846 mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f; 847 mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f; 848 } 849 850#ifdef DEBUG 851 /* silence warnings */ 852 (void) print_chan; 853 (void) print_temp; 854#endif 855 856 return mach; 857 858fail: 859 if (mach) { 860 align_free(mach->Inputs); 861 align_free(mach->Outputs); 862 align_free(mach); 863 } 864 return NULL; 865} 866 867 868void 869tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 870{ 871 if (mach) { 872 FREE(mach->Instructions); 873 FREE(mach->Declarations); 874 875 align_free(mach->Inputs); 876 align_free(mach->Outputs); 877 878 align_free(mach); 879 } 880} 881 882static void 883micro_add(union tgsi_exec_channel *dst, 884 const union tgsi_exec_channel *src0, 885 const union tgsi_exec_channel *src1) 886{ 887 dst->f[0] = src0->f[0] + src1->f[0]; 888 dst->f[1] = src0->f[1] + src1->f[1]; 889 dst->f[2] = src0->f[2] + src1->f[2]; 890 dst->f[3] = src0->f[3] + src1->f[3]; 891} 892 893static void 894micro_div( 895 union tgsi_exec_channel *dst, 896 const union tgsi_exec_channel *src0, 897 const union tgsi_exec_channel *src1 ) 898{ 899 if (src1->f[0] != 0) { 900 dst->f[0] = src0->f[0] / src1->f[0]; 901 } 902 if (src1->f[1] != 0) { 903 dst->f[1] = src0->f[1] / src1->f[1]; 904 } 905 if (src1->f[2] != 0) { 906 dst->f[2] = src0->f[2] / src1->f[2]; 907 } 908 if (src1->f[3] != 0) { 909 dst->f[3] = src0->f[3] / src1->f[3]; 910 } 911} 912 913static void 914micro_rcc(union tgsi_exec_channel *dst, 915 const union tgsi_exec_channel *src) 916{ 917 uint i; 918 919 for (i = 0; i < 4; i++) { 920 float recip = 1.0f / src->f[i]; 921 922 if (recip > 0.0f) 923 dst->f[i] = CLAMP(recip, 5.42101e-020f, 1.84467e+019f); 924 else 925 dst->f[i] = CLAMP(recip, -1.84467e+019f, -5.42101e-020f); 926 } 927} 928 929static void 930micro_lt( 931 union tgsi_exec_channel *dst, 932 const union tgsi_exec_channel *src0, 933 const union tgsi_exec_channel *src1, 934 const union tgsi_exec_channel *src2, 935 const union tgsi_exec_channel *src3 ) 936{ 937 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 938 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 939 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 940 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 941} 942 943static void 944micro_max(union tgsi_exec_channel *dst, 945 const union tgsi_exec_channel *src0, 946 const union tgsi_exec_channel *src1) 947{ 948 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 949 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 950 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 951 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 952} 953 954static void 955micro_min(union tgsi_exec_channel *dst, 956 const union tgsi_exec_channel *src0, 957 const union tgsi_exec_channel *src1) 958{ 959 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 960 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 961 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 962 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 963} 964 965static void 966micro_mul(union tgsi_exec_channel *dst, 967 const union tgsi_exec_channel *src0, 968 const union tgsi_exec_channel *src1) 969{ 970 dst->f[0] = src0->f[0] * src1->f[0]; 971 dst->f[1] = src0->f[1] * src1->f[1]; 972 dst->f[2] = src0->f[2] * src1->f[2]; 973 dst->f[3] = src0->f[3] * src1->f[3]; 974} 975 976static void 977micro_neg( 978 union tgsi_exec_channel *dst, 979 const union tgsi_exec_channel *src ) 980{ 981 dst->f[0] = -src->f[0]; 982 dst->f[1] = -src->f[1]; 983 dst->f[2] = -src->f[2]; 984 dst->f[3] = -src->f[3]; 985} 986 987static void 988micro_pow( 989 union tgsi_exec_channel *dst, 990 const union tgsi_exec_channel *src0, 991 const union tgsi_exec_channel *src1 ) 992{ 993#if FAST_MATH 994 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 995 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 996 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 997 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 998#else 999 dst->f[0] = powf( src0->f[0], src1->f[0] ); 1000 dst->f[1] = powf( src0->f[1], src1->f[1] ); 1001 dst->f[2] = powf( src0->f[2], src1->f[2] ); 1002 dst->f[3] = powf( src0->f[3], src1->f[3] ); 1003#endif 1004} 1005 1006static void 1007micro_sub(union tgsi_exec_channel *dst, 1008 const union tgsi_exec_channel *src0, 1009 const union tgsi_exec_channel *src1) 1010{ 1011 dst->f[0] = src0->f[0] - src1->f[0]; 1012 dst->f[1] = src0->f[1] - src1->f[1]; 1013 dst->f[2] = src0->f[2] - src1->f[2]; 1014 dst->f[3] = src0->f[3] - src1->f[3]; 1015} 1016 1017static void 1018fetch_src_file_channel(const struct tgsi_exec_machine *mach, 1019 const uint chan_index, 1020 const uint file, 1021 const uint swizzle, 1022 const union tgsi_exec_channel *index, 1023 const union tgsi_exec_channel *index2D, 1024 union tgsi_exec_channel *chan) 1025{ 1026 uint i; 1027 1028 assert(swizzle < 4); 1029 1030 switch (file) { 1031 case TGSI_FILE_CONSTANT: 1032 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1033 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS); 1034 assert(mach->Consts[index2D->i[i]]); 1035 1036 if (index->i[i] < 0) { 1037 chan->u[i] = 0; 1038 } else { 1039 /* NOTE: copying the const value as a uint instead of float */ 1040 const uint constbuf = index2D->i[i]; 1041 const uint *buf = (const uint *)mach->Consts[constbuf]; 1042 const int pos = index->i[i] * 4 + swizzle; 1043 /* const buffer bounds check */ 1044 if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) { 1045 if (0) { 1046 /* Debug: print warning */ 1047 static int count = 0; 1048 if (count++ < 100) 1049 debug_printf("TGSI Exec: const buffer index %d" 1050 " out of bounds\n", pos); 1051 } 1052 chan->u[i] = 0; 1053 } 1054 else 1055 chan->u[i] = buf[pos]; 1056 } 1057 } 1058 break; 1059 1060 case TGSI_FILE_INPUT: 1061 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1062 /* 1063 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { 1064 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", 1065 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], 1066 index2D->i[i], index->i[i]); 1067 }*/ 1068 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; 1069 assert(pos >= 0); 1070 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); 1071 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; 1072 } 1073 break; 1074 1075 case TGSI_FILE_SYSTEM_VALUE: 1076 /* XXX no swizzling at this point. Will be needed if we put 1077 * gl_FragCoord, for example, in a sys value register. 1078 */ 1079 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1080 chan->u[i] = mach->SystemValue[index->i[i]].u[i]; 1081 } 1082 break; 1083 1084 case TGSI_FILE_TEMPORARY: 1085 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1086 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); 1087 assert(index2D->i[i] == 0); 1088 1089 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; 1090 } 1091 break; 1092 1093 case TGSI_FILE_IMMEDIATE: 1094 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1095 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); 1096 assert(index2D->i[i] == 0); 1097 1098 chan->f[i] = mach->Imms[index->i[i]][swizzle]; 1099 } 1100 break; 1101 1102 case TGSI_FILE_ADDRESS: 1103 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1104 assert(index->i[i] >= 0); 1105 assert(index2D->i[i] == 0); 1106 1107 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; 1108 } 1109 break; 1110 1111 case TGSI_FILE_PREDICATE: 1112 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1113 assert(index->i[i] >= 0 && index->i[i] < TGSI_EXEC_NUM_PREDS); 1114 assert(index2D->i[i] == 0); 1115 1116 chan->u[i] = mach->Predicates[0].xyzw[swizzle].u[i]; 1117 } 1118 break; 1119 1120 case TGSI_FILE_OUTPUT: 1121 /* vertex/fragment output vars can be read too */ 1122 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1123 assert(index->i[i] >= 0); 1124 assert(index2D->i[i] == 0); 1125 1126 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; 1127 } 1128 break; 1129 1130 default: 1131 assert(0); 1132 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1133 chan->u[i] = 0; 1134 } 1135 } 1136} 1137 1138static void 1139fetch_source(const struct tgsi_exec_machine *mach, 1140 union tgsi_exec_channel *chan, 1141 const struct tgsi_full_src_register *reg, 1142 const uint chan_index, 1143 enum tgsi_exec_datatype src_datatype) 1144{ 1145 union tgsi_exec_channel index; 1146 union tgsi_exec_channel index2D; 1147 uint swizzle; 1148 1149 /* We start with a direct index into a register file. 1150 * 1151 * file[1], 1152 * where: 1153 * file = Register.File 1154 * [1] = Register.Index 1155 */ 1156 index.i[0] = 1157 index.i[1] = 1158 index.i[2] = 1159 index.i[3] = reg->Register.Index; 1160 1161 /* There is an extra source register that indirectly subscripts 1162 * a register file. The direct index now becomes an offset 1163 * that is being added to the indirect register. 1164 * 1165 * file[ind[2].x+1], 1166 * where: 1167 * ind = Indirect.File 1168 * [2] = Indirect.Index 1169 * .x = Indirect.SwizzleX 1170 */ 1171 if (reg->Register.Indirect) { 1172 union tgsi_exec_channel index2; 1173 union tgsi_exec_channel indir_index; 1174 const uint execmask = mach->ExecMask; 1175 uint i; 1176 1177 /* which address register (always zero now) */ 1178 index2.i[0] = 1179 index2.i[1] = 1180 index2.i[2] = 1181 index2.i[3] = reg->Indirect.Index; 1182 /* get current value of address register[swizzle] */ 1183 swizzle = reg->Indirect.Swizzle; 1184 fetch_src_file_channel(mach, 1185 chan_index, 1186 reg->Indirect.File, 1187 swizzle, 1188 &index2, 1189 &ZeroVec, 1190 &indir_index); 1191 1192 /* add value of address register to the offset */ 1193 index.i[0] += indir_index.i[0]; 1194 index.i[1] += indir_index.i[1]; 1195 index.i[2] += indir_index.i[2]; 1196 index.i[3] += indir_index.i[3]; 1197 1198 /* for disabled execution channels, zero-out the index to 1199 * avoid using a potential garbage value. 1200 */ 1201 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1202 if ((execmask & (1 << i)) == 0) 1203 index.i[i] = 0; 1204 } 1205 } 1206 1207 /* There is an extra source register that is a second 1208 * subscript to a register file. Effectively it means that 1209 * the register file is actually a 2D array of registers. 1210 * 1211 * file[3][1], 1212 * where: 1213 * [3] = Dimension.Index 1214 */ 1215 if (reg->Register.Dimension) { 1216 index2D.i[0] = 1217 index2D.i[1] = 1218 index2D.i[2] = 1219 index2D.i[3] = reg->Dimension.Index; 1220 1221 /* Again, the second subscript index can be addressed indirectly 1222 * identically to the first one. 1223 * Nothing stops us from indirectly addressing the indirect register, 1224 * but there is no need for that, so we won't exercise it. 1225 * 1226 * file[ind[4].y+3][1], 1227 * where: 1228 * ind = DimIndirect.File 1229 * [4] = DimIndirect.Index 1230 * .y = DimIndirect.SwizzleX 1231 */ 1232 if (reg->Dimension.Indirect) { 1233 union tgsi_exec_channel index2; 1234 union tgsi_exec_channel indir_index; 1235 const uint execmask = mach->ExecMask; 1236 uint i; 1237 1238 index2.i[0] = 1239 index2.i[1] = 1240 index2.i[2] = 1241 index2.i[3] = reg->DimIndirect.Index; 1242 1243 swizzle = reg->DimIndirect.Swizzle; 1244 fetch_src_file_channel(mach, 1245 chan_index, 1246 reg->DimIndirect.File, 1247 swizzle, 1248 &index2, 1249 &ZeroVec, 1250 &indir_index); 1251 1252 index2D.i[0] += indir_index.i[0]; 1253 index2D.i[1] += indir_index.i[1]; 1254 index2D.i[2] += indir_index.i[2]; 1255 index2D.i[3] += indir_index.i[3]; 1256 1257 /* for disabled execution channels, zero-out the index to 1258 * avoid using a potential garbage value. 1259 */ 1260 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1261 if ((execmask & (1 << i)) == 0) { 1262 index2D.i[i] = 0; 1263 } 1264 } 1265 } 1266 1267 /* If by any chance there was a need for a 3D array of register 1268 * files, we would have to check whether Dimension is followed 1269 * by a dimension register and continue the saga. 1270 */ 1271 } else { 1272 index2D.i[0] = 1273 index2D.i[1] = 1274 index2D.i[2] = 1275 index2D.i[3] = 0; 1276 } 1277 1278 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1279 fetch_src_file_channel(mach, 1280 chan_index, 1281 reg->Register.File, 1282 swizzle, 1283 &index, 1284 &index2D, 1285 chan); 1286 1287 if (reg->Register.Absolute) { 1288 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1289 micro_abs(chan, chan); 1290 } else { 1291 micro_iabs(chan, chan); 1292 } 1293 } 1294 1295 if (reg->Register.Negate) { 1296 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1297 micro_neg(chan, chan); 1298 } else { 1299 micro_ineg(chan, chan); 1300 } 1301 } 1302} 1303 1304static void 1305store_dest(struct tgsi_exec_machine *mach, 1306 const union tgsi_exec_channel *chan, 1307 const struct tgsi_full_dst_register *reg, 1308 const struct tgsi_full_instruction *inst, 1309 uint chan_index, 1310 enum tgsi_exec_datatype dst_datatype) 1311{ 1312 uint i; 1313 union tgsi_exec_channel null; 1314 union tgsi_exec_channel *dst; 1315 union tgsi_exec_channel index2D; 1316 uint execmask = mach->ExecMask; 1317 int offset = 0; /* indirection offset */ 1318 int index; 1319 1320 /* for debugging */ 1321 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) { 1322 check_inf_or_nan(chan); 1323 } 1324 1325 /* There is an extra source register that indirectly subscripts 1326 * a register file. The direct index now becomes an offset 1327 * that is being added to the indirect register. 1328 * 1329 * file[ind[2].x+1], 1330 * where: 1331 * ind = Indirect.File 1332 * [2] = Indirect.Index 1333 * .x = Indirect.SwizzleX 1334 */ 1335 if (reg->Register.Indirect) { 1336 union tgsi_exec_channel index; 1337 union tgsi_exec_channel indir_index; 1338 uint swizzle; 1339 1340 /* which address register (always zero for now) */ 1341 index.i[0] = 1342 index.i[1] = 1343 index.i[2] = 1344 index.i[3] = reg->Indirect.Index; 1345 1346 /* get current value of address register[swizzle] */ 1347 swizzle = reg->Indirect.Swizzle; 1348 1349 /* fetch values from the address/indirection register */ 1350 fetch_src_file_channel(mach, 1351 chan_index, 1352 reg->Indirect.File, 1353 swizzle, 1354 &index, 1355 &ZeroVec, 1356 &indir_index); 1357 1358 /* save indirection offset */ 1359 offset = indir_index.i[0]; 1360 } 1361 1362 /* There is an extra source register that is a second 1363 * subscript to a register file. Effectively it means that 1364 * the register file is actually a 2D array of registers. 1365 * 1366 * file[3][1], 1367 * where: 1368 * [3] = Dimension.Index 1369 */ 1370 if (reg->Register.Dimension) { 1371 index2D.i[0] = 1372 index2D.i[1] = 1373 index2D.i[2] = 1374 index2D.i[3] = reg->Dimension.Index; 1375 1376 /* Again, the second subscript index can be addressed indirectly 1377 * identically to the first one. 1378 * Nothing stops us from indirectly addressing the indirect register, 1379 * but there is no need for that, so we won't exercise it. 1380 * 1381 * file[ind[4].y+3][1], 1382 * where: 1383 * ind = DimIndirect.File 1384 * [4] = DimIndirect.Index 1385 * .y = DimIndirect.SwizzleX 1386 */ 1387 if (reg->Dimension.Indirect) { 1388 union tgsi_exec_channel index2; 1389 union tgsi_exec_channel indir_index; 1390 const uint execmask = mach->ExecMask; 1391 unsigned swizzle; 1392 uint i; 1393 1394 index2.i[0] = 1395 index2.i[1] = 1396 index2.i[2] = 1397 index2.i[3] = reg->DimIndirect.Index; 1398 1399 swizzle = reg->DimIndirect.Swizzle; 1400 fetch_src_file_channel(mach, 1401 chan_index, 1402 reg->DimIndirect.File, 1403 swizzle, 1404 &index2, 1405 &ZeroVec, 1406 &indir_index); 1407 1408 index2D.i[0] += indir_index.i[0]; 1409 index2D.i[1] += indir_index.i[1]; 1410 index2D.i[2] += indir_index.i[2]; 1411 index2D.i[3] += indir_index.i[3]; 1412 1413 /* for disabled execution channels, zero-out the index to 1414 * avoid using a potential garbage value. 1415 */ 1416 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1417 if ((execmask & (1 << i)) == 0) { 1418 index2D.i[i] = 0; 1419 } 1420 } 1421 } 1422 1423 /* If by any chance there was a need for a 3D array of register 1424 * files, we would have to check whether Dimension is followed 1425 * by a dimension register and continue the saga. 1426 */ 1427 } else { 1428 index2D.i[0] = 1429 index2D.i[1] = 1430 index2D.i[2] = 1431 index2D.i[3] = 0; 1432 } 1433 1434 switch (reg->Register.File) { 1435 case TGSI_FILE_NULL: 1436 dst = &null; 1437 break; 1438 1439 case TGSI_FILE_OUTPUT: 1440 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1441 + reg->Register.Index; 1442 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1443#if 0 1444 debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n", 1445 mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0], 1446 reg->Register.Index); 1447 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { 1448 debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask); 1449 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1450 if (execmask & (1 << i)) 1451 debug_printf("%f, ", chan->f[i]); 1452 debug_printf(")\n"); 1453 } 1454#endif 1455 break; 1456 1457 case TGSI_FILE_TEMPORARY: 1458 index = reg->Register.Index; 1459 assert( index < TGSI_EXEC_NUM_TEMPS ); 1460 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1461 break; 1462 1463 case TGSI_FILE_ADDRESS: 1464 index = reg->Register.Index; 1465 dst = &mach->Addrs[index].xyzw[chan_index]; 1466 break; 1467 1468 case TGSI_FILE_PREDICATE: 1469 index = reg->Register.Index; 1470 assert(index < TGSI_EXEC_NUM_PREDS); 1471 dst = &mach->Predicates[index].xyzw[chan_index]; 1472 break; 1473 1474 default: 1475 assert( 0 ); 1476 return; 1477 } 1478 1479 if (inst->Instruction.Predicate) { 1480 uint swizzle; 1481 union tgsi_exec_channel *pred; 1482 1483 switch (chan_index) { 1484 case TGSI_CHAN_X: 1485 swizzle = inst->Predicate.SwizzleX; 1486 break; 1487 case TGSI_CHAN_Y: 1488 swizzle = inst->Predicate.SwizzleY; 1489 break; 1490 case TGSI_CHAN_Z: 1491 swizzle = inst->Predicate.SwizzleZ; 1492 break; 1493 case TGSI_CHAN_W: 1494 swizzle = inst->Predicate.SwizzleW; 1495 break; 1496 default: 1497 assert(0); 1498 return; 1499 } 1500 1501 assert(inst->Predicate.Index == 0); 1502 1503 pred = &mach->Predicates[inst->Predicate.Index].xyzw[swizzle]; 1504 1505 if (inst->Predicate.Negate) { 1506 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1507 if (pred->u[i]) { 1508 execmask &= ~(1 << i); 1509 } 1510 } 1511 } else { 1512 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1513 if (!pred->u[i]) { 1514 execmask &= ~(1 << i); 1515 } 1516 } 1517 } 1518 } 1519 1520 switch (inst->Instruction.Saturate) { 1521 case TGSI_SAT_NONE: 1522 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1523 if (execmask & (1 << i)) 1524 dst->i[i] = chan->i[i]; 1525 break; 1526 1527 case TGSI_SAT_ZERO_ONE: 1528 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1529 if (execmask & (1 << i)) { 1530 if (chan->f[i] < 0.0f) 1531 dst->f[i] = 0.0f; 1532 else if (chan->f[i] > 1.0f) 1533 dst->f[i] = 1.0f; 1534 else 1535 dst->i[i] = chan->i[i]; 1536 } 1537 break; 1538 1539 case TGSI_SAT_MINUS_PLUS_ONE: 1540 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1541 if (execmask & (1 << i)) { 1542 if (chan->f[i] < -1.0f) 1543 dst->f[i] = -1.0f; 1544 else if (chan->f[i] > 1.0f) 1545 dst->f[i] = 1.0f; 1546 else 1547 dst->i[i] = chan->i[i]; 1548 } 1549 break; 1550 1551 default: 1552 assert( 0 ); 1553 } 1554} 1555 1556#define FETCH(VAL,INDEX,CHAN)\ 1557 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) 1558 1559#define IFETCH(VAL,INDEX,CHAN)\ 1560 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT) 1561 1562 1563/** 1564 * Execute ARB-style KIL which is predicated by a src register. 1565 * Kill fragment if any of the four values is less than zero. 1566 */ 1567static void 1568exec_kill_if(struct tgsi_exec_machine *mach, 1569 const struct tgsi_full_instruction *inst) 1570{ 1571 uint uniquemask; 1572 uint chan_index; 1573 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1574 union tgsi_exec_channel r[1]; 1575 1576 /* This mask stores component bits that were already tested. */ 1577 uniquemask = 0; 1578 1579 for (chan_index = 0; chan_index < 4; chan_index++) 1580 { 1581 uint swizzle; 1582 uint i; 1583 1584 /* unswizzle channel */ 1585 swizzle = tgsi_util_get_full_src_register_swizzle ( 1586 &inst->Src[0], 1587 chan_index); 1588 1589 /* check if the component has not been already tested */ 1590 if (uniquemask & (1 << swizzle)) 1591 continue; 1592 uniquemask |= 1 << swizzle; 1593 1594 FETCH(&r[0], 0, chan_index); 1595 for (i = 0; i < 4; i++) 1596 if (r[0].f[i] < 0.0f) 1597 kilmask |= 1 << i; 1598 } 1599 1600 /* restrict to fragments currently executing */ 1601 kilmask &= mach->ExecMask; 1602 1603 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1604} 1605 1606/** 1607 * Unconditional fragment kill/discard. 1608 */ 1609static void 1610exec_kill(struct tgsi_exec_machine *mach, 1611 const struct tgsi_full_instruction *inst) 1612{ 1613 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1614 1615 /* kill fragment for all fragments currently executing */ 1616 kilmask = mach->ExecMask; 1617 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 1618} 1619 1620static void 1621emit_vertex(struct tgsi_exec_machine *mach) 1622{ 1623 /* FIXME: check for exec mask correctly 1624 unsigned i; 1625 for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 1626 if ((mach->ExecMask & (1 << i))) 1627 */ 1628 if (mach->ExecMask) { 1629 if (mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] >= mach->MaxOutputVertices) 1630 return; 1631 1632 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; 1633 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 1634 } 1635} 1636 1637static void 1638emit_primitive(struct tgsi_exec_machine *mach) 1639{ 1640 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; 1641 /* FIXME: check for exec mask correctly 1642 unsigned i; 1643 for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 1644 if ((mach->ExecMask & (1 << i))) 1645 */ 1646 if (mach->ExecMask) { 1647 ++(*prim_count); 1648 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); 1649 mach->Primitives[*prim_count] = 0; 1650 } 1651} 1652 1653static void 1654conditional_emit_primitive(struct tgsi_exec_machine *mach) 1655{ 1656 if (TGSI_PROCESSOR_GEOMETRY == mach->Processor) { 1657 int emitted_verts = 1658 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; 1659 if (emitted_verts) { 1660 emit_primitive(mach); 1661 } 1662 } 1663} 1664 1665 1666/* 1667 * Fetch four texture samples using STR texture coordinates. 1668 */ 1669static void 1670fetch_texel( struct tgsi_sampler *sampler, 1671 const unsigned sview_idx, 1672 const unsigned sampler_idx, 1673 const union tgsi_exec_channel *s, 1674 const union tgsi_exec_channel *t, 1675 const union tgsi_exec_channel *p, 1676 const union tgsi_exec_channel *c0, 1677 const union tgsi_exec_channel *c1, 1678 float derivs[3][2][TGSI_QUAD_SIZE], 1679 const int8_t offset[3], 1680 enum tgsi_sampler_control control, 1681 union tgsi_exec_channel *r, 1682 union tgsi_exec_channel *g, 1683 union tgsi_exec_channel *b, 1684 union tgsi_exec_channel *a ) 1685{ 1686 uint j; 1687 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 1688 1689 /* FIXME: handle explicit derivs, offsets */ 1690 sampler->get_samples(sampler, sview_idx, sampler_idx, 1691 s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba); 1692 1693 for (j = 0; j < 4; j++) { 1694 r->f[j] = rgba[0][j]; 1695 g->f[j] = rgba[1][j]; 1696 b->f[j] = rgba[2][j]; 1697 a->f[j] = rgba[3][j]; 1698 } 1699} 1700 1701 1702#define TEX_MODIFIER_NONE 0 1703#define TEX_MODIFIER_PROJECTED 1 1704#define TEX_MODIFIER_LOD_BIAS 2 1705#define TEX_MODIFIER_EXPLICIT_LOD 3 1706#define TEX_MODIFIER_LEVEL_ZERO 4 1707 1708 1709/* 1710 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array. 1711 */ 1712static void 1713fetch_texel_offsets(struct tgsi_exec_machine *mach, 1714 const struct tgsi_full_instruction *inst, 1715 int8_t offsets[3]) 1716{ 1717 if (inst->Texture.NumOffsets == 1) { 1718 union tgsi_exec_channel index; 1719 union tgsi_exec_channel offset[3]; 1720 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index; 1721 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, 1722 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]); 1723 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, 1724 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]); 1725 fetch_src_file_channel(mach, 0, inst->TexOffsets[0].File, 1726 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]); 1727 offsets[0] = offset[0].i[0]; 1728 offsets[1] = offset[1].i[0]; 1729 offsets[2] = offset[2].i[0]; 1730 } else { 1731 assert(inst->Texture.NumOffsets == 0); 1732 offsets[0] = offsets[1] = offsets[2] = 0; 1733 } 1734} 1735 1736 1737/* 1738 * Fetch dx and dy values for one channel (s, t or r). 1739 * Put dx values into one float array, dy values into another. 1740 */ 1741static void 1742fetch_assign_deriv_channel(struct tgsi_exec_machine *mach, 1743 const struct tgsi_full_instruction *inst, 1744 unsigned regdsrcx, 1745 unsigned chan, 1746 float derivs[2][TGSI_QUAD_SIZE]) 1747{ 1748 union tgsi_exec_channel d; 1749 FETCH(&d, regdsrcx, chan); 1750 derivs[0][0] = d.f[0]; 1751 derivs[0][1] = d.f[1]; 1752 derivs[0][2] = d.f[2]; 1753 derivs[0][3] = d.f[3]; 1754 FETCH(&d, regdsrcx + 1, chan); 1755 derivs[1][0] = d.f[0]; 1756 derivs[1][1] = d.f[1]; 1757 derivs[1][2] = d.f[2]; 1758 derivs[1][3] = d.f[3]; 1759} 1760 1761 1762/* 1763 * execute a texture instruction. 1764 * 1765 * modifier is used to control the channel routing for the\ 1766 * instruction variants like proj, lod, and texture with lod bias. 1767 * sampler indicates which src register the sampler is contained in. 1768 */ 1769static void 1770exec_tex(struct tgsi_exec_machine *mach, 1771 const struct tgsi_full_instruction *inst, 1772 uint modifier, uint sampler) 1773{ 1774 const uint unit = inst->Src[sampler].Register.Index; 1775 const union tgsi_exec_channel *args[5], *proj = NULL; 1776 union tgsi_exec_channel r[5]; 1777 enum tgsi_sampler_control control = tgsi_sampler_lod_none; 1778 uint chan; 1779 int8_t offsets[3]; 1780 int dim, shadow_ref, i; 1781 1782 /* always fetch all 3 offsets, overkill but keeps code simple */ 1783 fetch_texel_offsets(mach, inst, offsets); 1784 1785 assert(modifier != TEX_MODIFIER_LEVEL_ZERO); 1786 assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER); 1787 1788 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture, &shadow_ref); 1789 1790 assert(dim <= 4); 1791 if (shadow_ref >= 0) 1792 assert(shadow_ref >= dim && shadow_ref < Elements(args)); 1793 1794 /* fetch modifier to the last argument */ 1795 if (modifier != TEX_MODIFIER_NONE) { 1796 const int last = Elements(args) - 1; 1797 1798 /* fetch modifier from src0.w or src1.x */ 1799 if (sampler == 1) { 1800 assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W); 1801 FETCH(&r[last], 0, TGSI_CHAN_W); 1802 } 1803 else { 1804 assert(shadow_ref != 4); 1805 FETCH(&r[last], 1, TGSI_CHAN_X); 1806 } 1807 1808 if (modifier != TEX_MODIFIER_PROJECTED) { 1809 args[last] = &r[last]; 1810 } 1811 else { 1812 proj = &r[last]; 1813 args[last] = &ZeroVec; 1814 } 1815 1816 /* point unused arguments to zero vector */ 1817 for (i = dim; i < last; i++) 1818 args[i] = &ZeroVec; 1819 1820 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) 1821 control = tgsi_sampler_lod_explicit; 1822 else if (modifier == TEX_MODIFIER_LOD_BIAS) 1823 control = tgsi_sampler_lod_bias; 1824 } 1825 else { 1826 for (i = dim; i < Elements(args); i++) 1827 args[i] = &ZeroVec; 1828 } 1829 1830 /* fetch coordinates */ 1831 for (i = 0; i < dim; i++) { 1832 FETCH(&r[i], 0, TGSI_CHAN_X + i); 1833 1834 if (proj) 1835 micro_div(&r[i], &r[i], proj); 1836 1837 args[i] = &r[i]; 1838 } 1839 1840 /* fetch reference value */ 1841 if (shadow_ref >= 0) { 1842 FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4)); 1843 1844 if (proj) 1845 micro_div(&r[shadow_ref], &r[shadow_ref], proj); 1846 1847 args[shadow_ref] = &r[shadow_ref]; 1848 } 1849 1850 fetch_texel(mach->Sampler, unit, unit, 1851 args[0], args[1], args[2], args[3], args[4], 1852 NULL, offsets, control, 1853 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1854 1855#if 0 1856 debug_printf("fetch r: %g %g %g %g\n", 1857 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]); 1858 debug_printf("fetch g: %g %g %g %g\n", 1859 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]); 1860 debug_printf("fetch b: %g %g %g %g\n", 1861 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]); 1862 debug_printf("fetch a: %g %g %g %g\n", 1863 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]); 1864#endif 1865 1866 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 1867 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1868 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 1869 } 1870 } 1871} 1872 1873 1874static void 1875exec_txd(struct tgsi_exec_machine *mach, 1876 const struct tgsi_full_instruction *inst) 1877{ 1878 const uint unit = inst->Src[3].Register.Index; 1879 union tgsi_exec_channel r[4]; 1880 float derivs[3][2][TGSI_QUAD_SIZE]; 1881 uint chan; 1882 int8_t offsets[3]; 1883 1884 /* always fetch all 3 offsets, overkill but keeps code simple */ 1885 fetch_texel_offsets(mach, inst, offsets); 1886 1887 switch (inst->Texture.Texture) { 1888 case TGSI_TEXTURE_1D: 1889 FETCH(&r[0], 0, TGSI_CHAN_X); 1890 1891 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 1892 1893 fetch_texel(mach->Sampler, unit, unit, 1894 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 1895 derivs, offsets, tgsi_sampler_derivs_explicit, 1896 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1897 break; 1898 1899 case TGSI_TEXTURE_SHADOW1D: 1900 case TGSI_TEXTURE_1D_ARRAY: 1901 case TGSI_TEXTURE_SHADOW1D_ARRAY: 1902 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */ 1903 FETCH(&r[0], 0, TGSI_CHAN_X); 1904 FETCH(&r[1], 0, TGSI_CHAN_Y); 1905 FETCH(&r[2], 0, TGSI_CHAN_Z); 1906 1907 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 1908 1909 fetch_texel(mach->Sampler, unit, unit, 1910 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 1911 derivs, offsets, tgsi_sampler_derivs_explicit, 1912 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1913 break; 1914 1915 case TGSI_TEXTURE_2D: 1916 case TGSI_TEXTURE_RECT: 1917 FETCH(&r[0], 0, TGSI_CHAN_X); 1918 FETCH(&r[1], 0, TGSI_CHAN_Y); 1919 1920 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 1921 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 1922 1923 fetch_texel(mach->Sampler, unit, unit, 1924 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 1925 derivs, offsets, tgsi_sampler_derivs_explicit, 1926 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 1927 break; 1928 1929 1930 case TGSI_TEXTURE_SHADOW2D: 1931 case TGSI_TEXTURE_SHADOWRECT: 1932 case TGSI_TEXTURE_2D_ARRAY: 1933 case TGSI_TEXTURE_SHADOW2D_ARRAY: 1934 /* only SHADOW2D_ARRAY actually needs W */ 1935 FETCH(&r[0], 0, TGSI_CHAN_X); 1936 FETCH(&r[1], 0, TGSI_CHAN_Y); 1937 FETCH(&r[2], 0, TGSI_CHAN_Z); 1938 FETCH(&r[3], 0, TGSI_CHAN_W); 1939 1940 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 1941 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 1942 1943 fetch_texel(mach->Sampler, unit, unit, 1944 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 1945 derivs, offsets, tgsi_sampler_derivs_explicit, 1946 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1947 break; 1948 1949 case TGSI_TEXTURE_3D: 1950 case TGSI_TEXTURE_CUBE: 1951 case TGSI_TEXTURE_CUBE_ARRAY: 1952 /* only TEXTURE_CUBE_ARRAY actually needs W */ 1953 FETCH(&r[0], 0, TGSI_CHAN_X); 1954 FETCH(&r[1], 0, TGSI_CHAN_Y); 1955 FETCH(&r[2], 0, TGSI_CHAN_Z); 1956 FETCH(&r[3], 0, TGSI_CHAN_W); 1957 1958 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 1959 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 1960 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]); 1961 1962 fetch_texel(mach->Sampler, unit, unit, 1963 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 1964 derivs, offsets, tgsi_sampler_derivs_explicit, 1965 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 1966 break; 1967 1968 default: 1969 assert(0); 1970 } 1971 1972 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 1973 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1974 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 1975 } 1976 } 1977} 1978 1979 1980static void 1981exec_txf(struct tgsi_exec_machine *mach, 1982 const struct tgsi_full_instruction *inst) 1983{ 1984 const uint unit = inst->Src[1].Register.Index; 1985 union tgsi_exec_channel r[4]; 1986 uint chan; 1987 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 1988 int j; 1989 int8_t offsets[3]; 1990 unsigned target; 1991 1992 /* always fetch all 3 offsets, overkill but keeps code simple */ 1993 fetch_texel_offsets(mach, inst, offsets); 1994 1995 IFETCH(&r[3], 0, TGSI_CHAN_W); 1996 1997 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) { 1998 target = mach->SamplerViews[unit].Resource; 1999 } 2000 else { 2001 target = inst->Texture.Texture; 2002 } 2003 switch(target) { 2004 case TGSI_TEXTURE_3D: 2005 case TGSI_TEXTURE_2D_ARRAY: 2006 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2007 IFETCH(&r[2], 0, TGSI_CHAN_Z); 2008 /* fallthrough */ 2009 case TGSI_TEXTURE_2D: 2010 case TGSI_TEXTURE_RECT: 2011 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2012 case TGSI_TEXTURE_SHADOW2D: 2013 case TGSI_TEXTURE_SHADOWRECT: 2014 case TGSI_TEXTURE_1D_ARRAY: 2015 IFETCH(&r[1], 0, TGSI_CHAN_Y); 2016 /* fallthrough */ 2017 case TGSI_TEXTURE_BUFFER: 2018 case TGSI_TEXTURE_1D: 2019 case TGSI_TEXTURE_SHADOW1D: 2020 IFETCH(&r[0], 0, TGSI_CHAN_X); 2021 break; 2022 default: 2023 assert(0); 2024 break; 2025 } 2026 2027 mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i, 2028 offsets, rgba); 2029 2030 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 2031 r[0].f[j] = rgba[0][j]; 2032 r[1].f[j] = rgba[1][j]; 2033 r[2].f[j] = rgba[2][j]; 2034 r[3].f[j] = rgba[3][j]; 2035 } 2036 2037 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I) { 2038 unsigned char swizzles[4]; 2039 swizzles[0] = inst->Src[1].Register.SwizzleX; 2040 swizzles[1] = inst->Src[1].Register.SwizzleY; 2041 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2042 swizzles[3] = inst->Src[1].Register.SwizzleW; 2043 2044 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2045 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2046 store_dest(mach, &r[swizzles[chan]], 2047 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2048 } 2049 } 2050 } 2051 else { 2052 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2053 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2054 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2055 } 2056 } 2057 } 2058} 2059 2060static void 2061exec_txq(struct tgsi_exec_machine *mach, 2062 const struct tgsi_full_instruction *inst) 2063{ 2064 const uint unit = inst->Src[1].Register.Index; 2065 int result[4]; 2066 union tgsi_exec_channel r[4], src; 2067 uint chan; 2068 int i,j; 2069 2070 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 2071 2072 /* XXX: This interface can't return per-pixel values */ 2073 mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result); 2074 2075 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2076 for (j = 0; j < 4; j++) { 2077 r[j].i[i] = result[j]; 2078 } 2079 } 2080 2081 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2082 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2083 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, 2084 TGSI_EXEC_DATA_INT); 2085 } 2086 } 2087} 2088 2089static void 2090exec_sample(struct tgsi_exec_machine *mach, 2091 const struct tgsi_full_instruction *inst, 2092 uint modifier, boolean compare) 2093{ 2094 const uint resource_unit = inst->Src[1].Register.Index; 2095 const uint sampler_unit = inst->Src[2].Register.Index; 2096 union tgsi_exec_channel r[4], c1; 2097 const union tgsi_exec_channel *lod = &ZeroVec; 2098 enum tgsi_sampler_control control = tgsi_sampler_lod_none; 2099 uint chan; 2100 unsigned char swizzles[4]; 2101 int8_t offsets[3]; 2102 2103 /* always fetch all 3 offsets, overkill but keeps code simple */ 2104 fetch_texel_offsets(mach, inst, offsets); 2105 2106 assert(modifier != TEX_MODIFIER_PROJECTED); 2107 2108 if (modifier != TEX_MODIFIER_NONE) { 2109 if (modifier == TEX_MODIFIER_LOD_BIAS) { 2110 FETCH(&c1, 3, TGSI_CHAN_X); 2111 lod = &c1; 2112 control = tgsi_sampler_lod_bias; 2113 } 2114 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 2115 FETCH(&c1, 3, TGSI_CHAN_X); 2116 lod = &c1; 2117 control = tgsi_sampler_lod_explicit; 2118 } 2119 else { 2120 assert(modifier == TEX_MODIFIER_LEVEL_ZERO); 2121 control = tgsi_sampler_lod_zero; 2122 } 2123 } 2124 2125 FETCH(&r[0], 0, TGSI_CHAN_X); 2126 2127 switch (mach->SamplerViews[resource_unit].Resource) { 2128 case TGSI_TEXTURE_1D: 2129 if (compare) { 2130 FETCH(&r[2], 3, TGSI_CHAN_X); 2131 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2132 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2133 NULL, offsets, control, 2134 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2135 } 2136 else { 2137 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2138 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2139 NULL, offsets, control, 2140 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2141 } 2142 break; 2143 2144 case TGSI_TEXTURE_1D_ARRAY: 2145 case TGSI_TEXTURE_2D: 2146 case TGSI_TEXTURE_RECT: 2147 FETCH(&r[1], 0, TGSI_CHAN_Y); 2148 if (compare) { 2149 FETCH(&r[2], 3, TGSI_CHAN_X); 2150 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2151 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2152 NULL, offsets, control, 2153 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2154 } 2155 else { 2156 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2157 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2158 NULL, offsets, control, 2159 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2160 } 2161 break; 2162 2163 case TGSI_TEXTURE_2D_ARRAY: 2164 case TGSI_TEXTURE_3D: 2165 case TGSI_TEXTURE_CUBE: 2166 FETCH(&r[1], 0, TGSI_CHAN_Y); 2167 FETCH(&r[2], 0, TGSI_CHAN_Z); 2168 if(compare) { 2169 FETCH(&r[3], 3, TGSI_CHAN_X); 2170 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2171 &r[0], &r[1], &r[2], &r[3], lod, 2172 NULL, offsets, control, 2173 &r[0], &r[1], &r[2], &r[3]); 2174 } 2175 else { 2176 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2177 &r[0], &r[1], &r[2], &ZeroVec, lod, 2178 NULL, offsets, control, 2179 &r[0], &r[1], &r[2], &r[3]); 2180 } 2181 break; 2182 2183 case TGSI_TEXTURE_CUBE_ARRAY: 2184 FETCH(&r[1], 0, TGSI_CHAN_Y); 2185 FETCH(&r[2], 0, TGSI_CHAN_Z); 2186 FETCH(&r[3], 0, TGSI_CHAN_W); 2187 if(compare) { 2188 FETCH(&r[4], 3, TGSI_CHAN_X); 2189 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2190 &r[0], &r[1], &r[2], &r[3], &r[4], 2191 NULL, offsets, control, 2192 &r[0], &r[1], &r[2], &r[3]); 2193 } 2194 else { 2195 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2196 &r[0], &r[1], &r[2], &r[3], lod, 2197 NULL, offsets, control, 2198 &r[0], &r[1], &r[2], &r[3]); 2199 } 2200 break; 2201 2202 2203 default: 2204 assert(0); 2205 } 2206 2207 swizzles[0] = inst->Src[1].Register.SwizzleX; 2208 swizzles[1] = inst->Src[1].Register.SwizzleY; 2209 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2210 swizzles[3] = inst->Src[1].Register.SwizzleW; 2211 2212 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2213 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2214 store_dest(mach, &r[swizzles[chan]], 2215 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2216 } 2217 } 2218} 2219 2220static void 2221exec_sample_d(struct tgsi_exec_machine *mach, 2222 const struct tgsi_full_instruction *inst) 2223{ 2224 const uint resource_unit = inst->Src[1].Register.Index; 2225 const uint sampler_unit = inst->Src[2].Register.Index; 2226 union tgsi_exec_channel r[4]; 2227 float derivs[3][2][TGSI_QUAD_SIZE]; 2228 uint chan; 2229 unsigned char swizzles[4]; 2230 int8_t offsets[3]; 2231 2232 /* always fetch all 3 offsets, overkill but keeps code simple */ 2233 fetch_texel_offsets(mach, inst, offsets); 2234 2235 FETCH(&r[0], 0, TGSI_CHAN_X); 2236 2237 switch (mach->SamplerViews[resource_unit].Resource) { 2238 case TGSI_TEXTURE_1D: 2239 case TGSI_TEXTURE_1D_ARRAY: 2240 /* only 1D array actually needs Y */ 2241 FETCH(&r[1], 0, TGSI_CHAN_Y); 2242 2243 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2244 2245 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2246 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2247 derivs, offsets, tgsi_sampler_derivs_explicit, 2248 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2249 break; 2250 2251 case TGSI_TEXTURE_2D: 2252 case TGSI_TEXTURE_RECT: 2253 case TGSI_TEXTURE_2D_ARRAY: 2254 /* only 2D array actually needs Z */ 2255 FETCH(&r[1], 0, TGSI_CHAN_Y); 2256 FETCH(&r[2], 0, TGSI_CHAN_Z); 2257 2258 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2259 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2260 2261 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2262 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */ 2263 derivs, offsets, tgsi_sampler_derivs_explicit, 2264 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2265 break; 2266 2267 case TGSI_TEXTURE_3D: 2268 case TGSI_TEXTURE_CUBE: 2269 case TGSI_TEXTURE_CUBE_ARRAY: 2270 /* only cube array actually needs W */ 2271 FETCH(&r[1], 0, TGSI_CHAN_Y); 2272 FETCH(&r[2], 0, TGSI_CHAN_Z); 2273 FETCH(&r[3], 0, TGSI_CHAN_W); 2274 2275 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2276 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2277 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]); 2278 2279 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2280 &r[0], &r[1], &r[2], &r[3], &ZeroVec, 2281 derivs, offsets, tgsi_sampler_derivs_explicit, 2282 &r[0], &r[1], &r[2], &r[3]); 2283 break; 2284 2285 default: 2286 assert(0); 2287 } 2288 2289 swizzles[0] = inst->Src[1].Register.SwizzleX; 2290 swizzles[1] = inst->Src[1].Register.SwizzleY; 2291 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2292 swizzles[3] = inst->Src[1].Register.SwizzleW; 2293 2294 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2295 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2296 store_dest(mach, &r[swizzles[chan]], 2297 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2298 } 2299 } 2300} 2301 2302 2303/** 2304 * Evaluate a constant-valued coefficient at the position of the 2305 * current quad. 2306 */ 2307static void 2308eval_constant_coef( 2309 struct tgsi_exec_machine *mach, 2310 unsigned attrib, 2311 unsigned chan ) 2312{ 2313 unsigned i; 2314 2315 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) { 2316 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 2317 } 2318} 2319 2320/** 2321 * Evaluate a linear-valued coefficient at the position of the 2322 * current quad. 2323 */ 2324static void 2325eval_linear_coef( 2326 struct tgsi_exec_machine *mach, 2327 unsigned attrib, 2328 unsigned chan ) 2329{ 2330 const float x = mach->QuadPos.xyzw[0].f[0]; 2331 const float y = mach->QuadPos.xyzw[1].f[0]; 2332 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2333 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2334 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2335 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 2336 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 2337 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 2338 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 2339} 2340 2341/** 2342 * Evaluate a perspective-valued coefficient at the position of the 2343 * current quad. 2344 */ 2345static void 2346eval_perspective_coef( 2347 struct tgsi_exec_machine *mach, 2348 unsigned attrib, 2349 unsigned chan ) 2350{ 2351 const float x = mach->QuadPos.xyzw[0].f[0]; 2352 const float y = mach->QuadPos.xyzw[1].f[0]; 2353 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2354 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2355 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2356 const float *w = mach->QuadPos.xyzw[3].f; 2357 /* divide by W here */ 2358 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 2359 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 2360 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 2361 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 2362} 2363 2364 2365typedef void (* eval_coef_func)( 2366 struct tgsi_exec_machine *mach, 2367 unsigned attrib, 2368 unsigned chan ); 2369 2370static void 2371exec_declaration(struct tgsi_exec_machine *mach, 2372 const struct tgsi_full_declaration *decl) 2373{ 2374 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { 2375 mach->SamplerViews[decl->Range.First] = decl->SamplerView; 2376 return; 2377 } 2378 2379 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 2380 if (decl->Declaration.File == TGSI_FILE_INPUT) { 2381 uint first, last, mask; 2382 2383 first = decl->Range.First; 2384 last = decl->Range.Last; 2385 mask = decl->Declaration.UsageMask; 2386 2387 /* XXX we could remove this special-case code since 2388 * mach->InterpCoefs[first].a0 should already have the 2389 * front/back-face value. But we should first update the 2390 * ureg code to emit the right UsageMask value (WRITEMASK_X). 2391 * Then, we could remove the tgsi_exec_machine::Face field. 2392 */ 2393 /* XXX make FACE a system value */ 2394 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { 2395 uint i; 2396 2397 assert(decl->Semantic.Index == 0); 2398 assert(first == last); 2399 2400 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2401 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 2402 } 2403 } else { 2404 eval_coef_func eval; 2405 uint i, j; 2406 2407 switch (decl->Interp.Interpolate) { 2408 case TGSI_INTERPOLATE_CONSTANT: 2409 eval = eval_constant_coef; 2410 break; 2411 2412 case TGSI_INTERPOLATE_LINEAR: 2413 eval = eval_linear_coef; 2414 break; 2415 2416 case TGSI_INTERPOLATE_PERSPECTIVE: 2417 eval = eval_perspective_coef; 2418 break; 2419 2420 case TGSI_INTERPOLATE_COLOR: 2421 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef; 2422 break; 2423 2424 default: 2425 assert(0); 2426 return; 2427 } 2428 2429 for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 2430 if (mask & (1 << j)) { 2431 for (i = first; i <= last; i++) { 2432 eval(mach, i, j); 2433 } 2434 } 2435 } 2436 } 2437 2438 if (DEBUG_EXECUTION) { 2439 uint i, j; 2440 for (i = first; i <= last; ++i) { 2441 debug_printf("IN[%2u] = ", i); 2442 for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 2443 if (j > 0) { 2444 debug_printf(" "); 2445 } 2446 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 2447 mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j], 2448 mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j], 2449 mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j], 2450 mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]); 2451 } 2452 } 2453 } 2454 } 2455 } 2456 2457 if (decl->Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 2458 mach->SysSemanticToIndex[decl->Declaration.Semantic] = decl->Range.First; 2459 } 2460} 2461 2462 2463typedef void (* micro_op)(union tgsi_exec_channel *dst); 2464 2465static void 2466exec_vector(struct tgsi_exec_machine *mach, 2467 const struct tgsi_full_instruction *inst, 2468 micro_op op, 2469 enum tgsi_exec_datatype dst_datatype) 2470{ 2471 unsigned int chan; 2472 2473 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2474 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2475 union tgsi_exec_channel dst; 2476 2477 op(&dst); 2478 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 2479 } 2480 } 2481} 2482 2483typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, 2484 const union tgsi_exec_channel *src); 2485 2486static void 2487exec_scalar_unary(struct tgsi_exec_machine *mach, 2488 const struct tgsi_full_instruction *inst, 2489 micro_unary_op op, 2490 enum tgsi_exec_datatype dst_datatype, 2491 enum tgsi_exec_datatype src_datatype) 2492{ 2493 unsigned int chan; 2494 union tgsi_exec_channel src; 2495 union tgsi_exec_channel dst; 2496 2497 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 2498 op(&dst, &src); 2499 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2500 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2501 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 2502 } 2503 } 2504} 2505 2506static void 2507exec_vector_unary(struct tgsi_exec_machine *mach, 2508 const struct tgsi_full_instruction *inst, 2509 micro_unary_op op, 2510 enum tgsi_exec_datatype dst_datatype, 2511 enum tgsi_exec_datatype src_datatype) 2512{ 2513 unsigned int chan; 2514 struct tgsi_exec_vector dst; 2515 2516 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2517 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2518 union tgsi_exec_channel src; 2519 2520 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); 2521 op(&dst.xyzw[chan], &src); 2522 } 2523 } 2524 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2525 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2526 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 2527 } 2528 } 2529} 2530 2531typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, 2532 const union tgsi_exec_channel *src0, 2533 const union tgsi_exec_channel *src1); 2534 2535static void 2536exec_scalar_binary(struct tgsi_exec_machine *mach, 2537 const struct tgsi_full_instruction *inst, 2538 micro_binary_op op, 2539 enum tgsi_exec_datatype dst_datatype, 2540 enum tgsi_exec_datatype src_datatype) 2541{ 2542 unsigned int chan; 2543 union tgsi_exec_channel src[2]; 2544 union tgsi_exec_channel dst; 2545 2546 fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype); 2547 fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype); 2548 op(&dst, &src[0], &src[1]); 2549 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2550 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2551 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 2552 } 2553 } 2554} 2555 2556static void 2557exec_vector_binary(struct tgsi_exec_machine *mach, 2558 const struct tgsi_full_instruction *inst, 2559 micro_binary_op op, 2560 enum tgsi_exec_datatype dst_datatype, 2561 enum tgsi_exec_datatype src_datatype) 2562{ 2563 unsigned int chan; 2564 struct tgsi_exec_vector dst; 2565 2566 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2567 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2568 union tgsi_exec_channel src[2]; 2569 2570 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 2571 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 2572 op(&dst.xyzw[chan], &src[0], &src[1]); 2573 } 2574 } 2575 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2576 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2577 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 2578 } 2579 } 2580} 2581 2582typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst, 2583 const union tgsi_exec_channel *src0, 2584 const union tgsi_exec_channel *src1, 2585 const union tgsi_exec_channel *src2); 2586 2587static void 2588exec_vector_trinary(struct tgsi_exec_machine *mach, 2589 const struct tgsi_full_instruction *inst, 2590 micro_trinary_op op, 2591 enum tgsi_exec_datatype dst_datatype, 2592 enum tgsi_exec_datatype src_datatype) 2593{ 2594 unsigned int chan; 2595 struct tgsi_exec_vector dst; 2596 2597 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2598 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2599 union tgsi_exec_channel src[3]; 2600 2601 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 2602 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 2603 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 2604 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 2605 } 2606 } 2607 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2608 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2609 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 2610 } 2611 } 2612} 2613 2614typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst, 2615 const union tgsi_exec_channel *src0, 2616 const union tgsi_exec_channel *src1, 2617 const union tgsi_exec_channel *src2, 2618 const union tgsi_exec_channel *src3); 2619 2620static void 2621exec_vector_quaternary(struct tgsi_exec_machine *mach, 2622 const struct tgsi_full_instruction *inst, 2623 micro_quaternary_op op, 2624 enum tgsi_exec_datatype dst_datatype, 2625 enum tgsi_exec_datatype src_datatype) 2626{ 2627 unsigned int chan; 2628 struct tgsi_exec_vector dst; 2629 2630 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2631 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2632 union tgsi_exec_channel src[4]; 2633 2634 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 2635 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 2636 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 2637 fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype); 2638 op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]); 2639 } 2640 } 2641 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2642 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2643 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 2644 } 2645 } 2646} 2647 2648static void 2649exec_dp3(struct tgsi_exec_machine *mach, 2650 const struct tgsi_full_instruction *inst) 2651{ 2652 unsigned int chan; 2653 union tgsi_exec_channel arg[3]; 2654 2655 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2656 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2657 micro_mul(&arg[2], &arg[0], &arg[1]); 2658 2659 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) { 2660 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2661 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 2662 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 2663 } 2664 2665 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2666 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2667 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2668 } 2669 } 2670} 2671 2672static void 2673exec_dp4(struct tgsi_exec_machine *mach, 2674 const struct tgsi_full_instruction *inst) 2675{ 2676 unsigned int chan; 2677 union tgsi_exec_channel arg[3]; 2678 2679 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2680 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2681 micro_mul(&arg[2], &arg[0], &arg[1]); 2682 2683 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) { 2684 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2685 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 2686 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 2687 } 2688 2689 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2690 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2691 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2692 } 2693 } 2694} 2695 2696static void 2697exec_dp2a(struct tgsi_exec_machine *mach, 2698 const struct tgsi_full_instruction *inst) 2699{ 2700 unsigned int chan; 2701 union tgsi_exec_channel arg[3]; 2702 2703 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2704 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2705 micro_mul(&arg[2], &arg[0], &arg[1]); 2706 2707 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2708 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2709 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); 2710 2711 fetch_source(mach, &arg[1], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2712 micro_add(&arg[0], &arg[0], &arg[1]); 2713 2714 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2715 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2716 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2717 } 2718 } 2719} 2720 2721static void 2722exec_dph(struct tgsi_exec_machine *mach, 2723 const struct tgsi_full_instruction *inst) 2724{ 2725 unsigned int chan; 2726 union tgsi_exec_channel arg[3]; 2727 2728 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2729 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2730 micro_mul(&arg[2], &arg[0], &arg[1]); 2731 2732 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2733 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2734 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 2735 2736 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2737 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2738 micro_mad(&arg[0], &arg[0], &arg[1], &arg[2]); 2739 2740 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 2741 micro_add(&arg[0], &arg[0], &arg[1]); 2742 2743 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2744 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2745 store_dest(mach, &arg[0], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2746 } 2747 } 2748} 2749 2750static void 2751exec_dp2(struct tgsi_exec_machine *mach, 2752 const struct tgsi_full_instruction *inst) 2753{ 2754 unsigned int chan; 2755 union tgsi_exec_channel arg[3]; 2756 2757 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2758 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2759 micro_mul(&arg[2], &arg[0], &arg[1]); 2760 2761 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2762 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2763 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 2764 2765 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2766 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2767 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2768 } 2769 } 2770} 2771 2772static void 2773exec_nrm4(struct tgsi_exec_machine *mach, 2774 const struct tgsi_full_instruction *inst) 2775{ 2776 unsigned int chan; 2777 union tgsi_exec_channel arg[4]; 2778 union tgsi_exec_channel scale; 2779 2780 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2781 micro_mul(&scale, &arg[0], &arg[0]); 2782 2783 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) { 2784 union tgsi_exec_channel product; 2785 2786 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2787 micro_mul(&product, &arg[chan], &arg[chan]); 2788 micro_add(&scale, &scale, &product); 2789 } 2790 2791 micro_rsq(&scale, &scale); 2792 2793 for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_W; chan++) { 2794 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2795 micro_mul(&arg[chan], &arg[chan], &scale); 2796 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2797 } 2798 } 2799} 2800 2801static void 2802exec_nrm3(struct tgsi_exec_machine *mach, 2803 const struct tgsi_full_instruction *inst) 2804{ 2805 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 2806 unsigned int chan; 2807 union tgsi_exec_channel arg[3]; 2808 union tgsi_exec_channel scale; 2809 2810 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2811 micro_mul(&scale, &arg[0], &arg[0]); 2812 2813 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) { 2814 union tgsi_exec_channel product; 2815 2816 fetch_source(mach, &arg[chan], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 2817 micro_mul(&product, &arg[chan], &arg[chan]); 2818 micro_add(&scale, &scale, &product); 2819 } 2820 2821 micro_rsq(&scale, &scale); 2822 2823 for (chan = TGSI_CHAN_X; chan <= TGSI_CHAN_Z; chan++) { 2824 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2825 micro_mul(&arg[chan], &arg[chan], &scale); 2826 store_dest(mach, &arg[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2827 } 2828 } 2829 } 2830 2831 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2832 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 2833 } 2834} 2835 2836static void 2837exec_scs(struct tgsi_exec_machine *mach, 2838 const struct tgsi_full_instruction *inst) 2839{ 2840 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { 2841 union tgsi_exec_channel arg; 2842 union tgsi_exec_channel result; 2843 2844 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2845 2846 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2847 micro_cos(&result, &arg); 2848 store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2849 } 2850 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2851 micro_sin(&result, &arg); 2852 store_dest(mach, &result, &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2853 } 2854 } 2855 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2856 store_dest(mach, &ZeroVec, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2857 } 2858 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2859 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 2860 } 2861} 2862 2863static void 2864exec_x2d(struct tgsi_exec_machine *mach, 2865 const struct tgsi_full_instruction *inst) 2866{ 2867 union tgsi_exec_channel r[4]; 2868 union tgsi_exec_channel d[2]; 2869 2870 fetch_source(mach, &r[0], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2871 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2872 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XZ) { 2873 fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2874 micro_mul(&r[2], &r[2], &r[0]); 2875 fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2876 micro_mul(&r[3], &r[3], &r[1]); 2877 micro_add(&r[2], &r[2], &r[3]); 2878 fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2879 micro_add(&d[0], &r[2], &r[3]); 2880 } 2881 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YW) { 2882 fetch_source(mach, &r[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2883 micro_mul(&r[2], &r[2], &r[0]); 2884 fetch_source(mach, &r[3], &inst->Src[2], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 2885 micro_mul(&r[3], &r[3], &r[1]); 2886 micro_add(&r[2], &r[2], &r[3]); 2887 fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2888 micro_add(&d[1], &r[2], &r[3]); 2889 } 2890 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2891 store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2892 } 2893 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2894 store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2895 } 2896 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2897 store_dest(mach, &d[0], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2898 } 2899 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2900 store_dest(mach, &d[1], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 2901 } 2902} 2903 2904static void 2905exec_rfl(struct tgsi_exec_machine *mach, 2906 const struct tgsi_full_instruction *inst) 2907{ 2908 union tgsi_exec_channel r[9]; 2909 2910 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 2911 /* r0 = dp3(src0, src0) */ 2912 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2913 micro_mul(&r[0], &r[2], &r[2]); 2914 fetch_source(mach, &r[4], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2915 micro_mul(&r[8], &r[4], &r[4]); 2916 micro_add(&r[0], &r[0], &r[8]); 2917 fetch_source(mach, &r[6], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2918 micro_mul(&r[8], &r[6], &r[6]); 2919 micro_add(&r[0], &r[0], &r[8]); 2920 2921 /* r1 = dp3(src0, src1) */ 2922 fetch_source(mach, &r[3], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2923 micro_mul(&r[1], &r[2], &r[3]); 2924 fetch_source(mach, &r[5], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2925 micro_mul(&r[8], &r[4], &r[5]); 2926 micro_add(&r[1], &r[1], &r[8]); 2927 fetch_source(mach, &r[7], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2928 micro_mul(&r[8], &r[6], &r[7]); 2929 micro_add(&r[1], &r[1], &r[8]); 2930 2931 /* r1 = 2 * r1 / r0 */ 2932 micro_add(&r[1], &r[1], &r[1]); 2933 micro_div(&r[1], &r[1], &r[0]); 2934 2935 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2936 micro_mul(&r[2], &r[2], &r[1]); 2937 micro_sub(&r[2], &r[2], &r[3]); 2938 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2939 } 2940 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2941 micro_mul(&r[4], &r[4], &r[1]); 2942 micro_sub(&r[4], &r[4], &r[5]); 2943 store_dest(mach, &r[4], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2944 } 2945 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2946 micro_mul(&r[6], &r[6], &r[1]); 2947 micro_sub(&r[6], &r[6], &r[7]); 2948 store_dest(mach, &r[6], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2949 } 2950 } 2951 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2952 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 2953 } 2954} 2955 2956static void 2957exec_xpd(struct tgsi_exec_machine *mach, 2958 const struct tgsi_full_instruction *inst) 2959{ 2960 union tgsi_exec_channel r[6]; 2961 union tgsi_exec_channel d[3]; 2962 2963 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2964 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2965 2966 micro_mul(&r[2], &r[0], &r[1]); 2967 2968 fetch_source(mach, &r[3], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2969 fetch_source(mach, &r[4], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2970 2971 micro_mul(&r[5], &r[3], &r[4] ); 2972 micro_sub(&d[TGSI_CHAN_X], &r[2], &r[5]); 2973 2974 fetch_source(mach, &r[2], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2975 2976 micro_mul(&r[3], &r[3], &r[2]); 2977 2978 fetch_source(mach, &r[5], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2979 2980 micro_mul(&r[1], &r[1], &r[5]); 2981 micro_sub(&d[TGSI_CHAN_Y], &r[3], &r[1]); 2982 2983 micro_mul(&r[5], &r[5], &r[4]); 2984 micro_mul(&r[0], &r[0], &r[2]); 2985 micro_sub(&d[TGSI_CHAN_Z], &r[5], &r[0]); 2986 2987 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2988 store_dest(mach, &d[TGSI_CHAN_X], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 2989 } 2990 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2991 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 2992 } 2993 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 2994 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 2995 } 2996 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 2997 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 2998 } 2999} 3000 3001static void 3002exec_dst(struct tgsi_exec_machine *mach, 3003 const struct tgsi_full_instruction *inst) 3004{ 3005 union tgsi_exec_channel r[2]; 3006 union tgsi_exec_channel d[4]; 3007 3008 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3009 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3010 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3011 micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]); 3012 } 3013 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3014 fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3015 } 3016 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3017 fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3018 } 3019 3020 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3021 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3022 } 3023 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3024 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3025 } 3026 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3027 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3028 } 3029 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3030 store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3031 } 3032} 3033 3034static void 3035exec_log(struct tgsi_exec_machine *mach, 3036 const struct tgsi_full_instruction *inst) 3037{ 3038 union tgsi_exec_channel r[3]; 3039 3040 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3041 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */ 3042 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */ 3043 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */ 3044 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3045 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3046 } 3047 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3048 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */ 3049 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */ 3050 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3051 } 3052 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3053 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3054 } 3055 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3056 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3057 } 3058} 3059 3060static void 3061exec_exp(struct tgsi_exec_machine *mach, 3062 const struct tgsi_full_instruction *inst) 3063{ 3064 union tgsi_exec_channel r[3]; 3065 3066 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3067 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */ 3068 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3069 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */ 3070 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3071 } 3072 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3073 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */ 3074 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3075 } 3076 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3077 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */ 3078 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3079 } 3080 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3081 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3082 } 3083} 3084 3085static void 3086exec_lit(struct tgsi_exec_machine *mach, 3087 const struct tgsi_full_instruction *inst) 3088{ 3089 union tgsi_exec_channel r[3]; 3090 union tgsi_exec_channel d[3]; 3091 3092 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) { 3093 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3094 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3095 fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3096 micro_max(&r[1], &r[1], &ZeroVec); 3097 3098 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3099 micro_min(&r[2], &r[2], &P128Vec); 3100 micro_max(&r[2], &r[2], &M128Vec); 3101 micro_pow(&r[1], &r[1], &r[2]); 3102 micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec); 3103 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3104 } 3105 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3106 micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec); 3107 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3108 } 3109 } 3110 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3111 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3112 } 3113 3114 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3115 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3116 } 3117} 3118 3119static void 3120exec_break(struct tgsi_exec_machine *mach) 3121{ 3122 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { 3123 /* turn off loop channels for each enabled exec channel */ 3124 mach->LoopMask &= ~mach->ExecMask; 3125 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3126 UPDATE_EXEC_MASK(mach); 3127 } else { 3128 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); 3129 3130 mach->Switch.mask = 0x0; 3131 3132 UPDATE_EXEC_MASK(mach); 3133 } 3134} 3135 3136static void 3137exec_switch(struct tgsi_exec_machine *mach, 3138 const struct tgsi_full_instruction *inst) 3139{ 3140 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 3141 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3142 3143 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 3144 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3145 mach->Switch.mask = 0x0; 3146 mach->Switch.defaultMask = 0x0; 3147 3148 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3149 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; 3150 3151 UPDATE_EXEC_MASK(mach); 3152} 3153 3154static void 3155exec_case(struct tgsi_exec_machine *mach, 3156 const struct tgsi_full_instruction *inst) 3157{ 3158 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3159 union tgsi_exec_channel src; 3160 uint mask = 0; 3161 3162 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3163 3164 if (mach->Switch.selector.u[0] == src.u[0]) { 3165 mask |= 0x1; 3166 } 3167 if (mach->Switch.selector.u[1] == src.u[1]) { 3168 mask |= 0x2; 3169 } 3170 if (mach->Switch.selector.u[2] == src.u[2]) { 3171 mask |= 0x4; 3172 } 3173 if (mach->Switch.selector.u[3] == src.u[3]) { 3174 mask |= 0x8; 3175 } 3176 3177 mach->Switch.defaultMask |= mask; 3178 3179 mach->Switch.mask |= mask & prevMask; 3180 3181 UPDATE_EXEC_MASK(mach); 3182} 3183 3184/* FIXME: this will only work if default is last */ 3185static void 3186exec_default(struct tgsi_exec_machine *mach) 3187{ 3188 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3189 3190 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; 3191 3192 UPDATE_EXEC_MASK(mach); 3193} 3194 3195static void 3196exec_endswitch(struct tgsi_exec_machine *mach) 3197{ 3198 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; 3199 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 3200 3201 UPDATE_EXEC_MASK(mach); 3202} 3203 3204static void 3205micro_i2f(union tgsi_exec_channel *dst, 3206 const union tgsi_exec_channel *src) 3207{ 3208 dst->f[0] = (float)src->i[0]; 3209 dst->f[1] = (float)src->i[1]; 3210 dst->f[2] = (float)src->i[2]; 3211 dst->f[3] = (float)src->i[3]; 3212} 3213 3214static void 3215micro_not(union tgsi_exec_channel *dst, 3216 const union tgsi_exec_channel *src) 3217{ 3218 dst->u[0] = ~src->u[0]; 3219 dst->u[1] = ~src->u[1]; 3220 dst->u[2] = ~src->u[2]; 3221 dst->u[3] = ~src->u[3]; 3222} 3223 3224static void 3225micro_shl(union tgsi_exec_channel *dst, 3226 const union tgsi_exec_channel *src0, 3227 const union tgsi_exec_channel *src1) 3228{ 3229 unsigned masked_count; 3230 masked_count = src1->u[0] & 0x1f; 3231 dst->u[0] = src0->u[0] << masked_count; 3232 masked_count = src1->u[1] & 0x1f; 3233 dst->u[1] = src0->u[1] << masked_count; 3234 masked_count = src1->u[2] & 0x1f; 3235 dst->u[2] = src0->u[2] << masked_count; 3236 masked_count = src1->u[3] & 0x1f; 3237 dst->u[3] = src0->u[3] << masked_count; 3238} 3239 3240static void 3241micro_and(union tgsi_exec_channel *dst, 3242 const union tgsi_exec_channel *src0, 3243 const union tgsi_exec_channel *src1) 3244{ 3245 dst->u[0] = src0->u[0] & src1->u[0]; 3246 dst->u[1] = src0->u[1] & src1->u[1]; 3247 dst->u[2] = src0->u[2] & src1->u[2]; 3248 dst->u[3] = src0->u[3] & src1->u[3]; 3249} 3250 3251static void 3252micro_or(union tgsi_exec_channel *dst, 3253 const union tgsi_exec_channel *src0, 3254 const union tgsi_exec_channel *src1) 3255{ 3256 dst->u[0] = src0->u[0] | src1->u[0]; 3257 dst->u[1] = src0->u[1] | src1->u[1]; 3258 dst->u[2] = src0->u[2] | src1->u[2]; 3259 dst->u[3] = src0->u[3] | src1->u[3]; 3260} 3261 3262static void 3263micro_xor(union tgsi_exec_channel *dst, 3264 const union tgsi_exec_channel *src0, 3265 const union tgsi_exec_channel *src1) 3266{ 3267 dst->u[0] = src0->u[0] ^ src1->u[0]; 3268 dst->u[1] = src0->u[1] ^ src1->u[1]; 3269 dst->u[2] = src0->u[2] ^ src1->u[2]; 3270 dst->u[3] = src0->u[3] ^ src1->u[3]; 3271} 3272 3273static void 3274micro_mod(union tgsi_exec_channel *dst, 3275 const union tgsi_exec_channel *src0, 3276 const union tgsi_exec_channel *src1) 3277{ 3278 dst->i[0] = src0->i[0] % src1->i[0]; 3279 dst->i[1] = src0->i[1] % src1->i[1]; 3280 dst->i[2] = src0->i[2] % src1->i[2]; 3281 dst->i[3] = src0->i[3] % src1->i[3]; 3282} 3283 3284static void 3285micro_f2i(union tgsi_exec_channel *dst, 3286 const union tgsi_exec_channel *src) 3287{ 3288 dst->i[0] = (int)src->f[0]; 3289 dst->i[1] = (int)src->f[1]; 3290 dst->i[2] = (int)src->f[2]; 3291 dst->i[3] = (int)src->f[3]; 3292} 3293 3294static void 3295micro_fseq(union tgsi_exec_channel *dst, 3296 const union tgsi_exec_channel *src0, 3297 const union tgsi_exec_channel *src1) 3298{ 3299 dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0; 3300 dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0; 3301 dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0; 3302 dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0; 3303} 3304 3305static void 3306micro_fsge(union tgsi_exec_channel *dst, 3307 const union tgsi_exec_channel *src0, 3308 const union tgsi_exec_channel *src1) 3309{ 3310 dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0; 3311 dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0; 3312 dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0; 3313 dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0; 3314} 3315 3316static void 3317micro_fslt(union tgsi_exec_channel *dst, 3318 const union tgsi_exec_channel *src0, 3319 const union tgsi_exec_channel *src1) 3320{ 3321 dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0; 3322 dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0; 3323 dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0; 3324 dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0; 3325} 3326 3327static void 3328micro_fsne(union tgsi_exec_channel *dst, 3329 const union tgsi_exec_channel *src0, 3330 const union tgsi_exec_channel *src1) 3331{ 3332 dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0; 3333 dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0; 3334 dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0; 3335 dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0; 3336} 3337 3338static void 3339micro_idiv(union tgsi_exec_channel *dst, 3340 const union tgsi_exec_channel *src0, 3341 const union tgsi_exec_channel *src1) 3342{ 3343 dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0; 3344 dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0; 3345 dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0; 3346 dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0; 3347} 3348 3349static void 3350micro_imax(union tgsi_exec_channel *dst, 3351 const union tgsi_exec_channel *src0, 3352 const union tgsi_exec_channel *src1) 3353{ 3354 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 3355 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 3356 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 3357 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 3358} 3359 3360static void 3361micro_imin(union tgsi_exec_channel *dst, 3362 const union tgsi_exec_channel *src0, 3363 const union tgsi_exec_channel *src1) 3364{ 3365 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 3366 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 3367 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 3368 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 3369} 3370 3371static void 3372micro_isge(union tgsi_exec_channel *dst, 3373 const union tgsi_exec_channel *src0, 3374 const union tgsi_exec_channel *src1) 3375{ 3376 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0; 3377 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0; 3378 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0; 3379 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0; 3380} 3381 3382static void 3383micro_ishr(union tgsi_exec_channel *dst, 3384 const union tgsi_exec_channel *src0, 3385 const union tgsi_exec_channel *src1) 3386{ 3387 unsigned masked_count; 3388 masked_count = src1->i[0] & 0x1f; 3389 dst->i[0] = src0->i[0] >> masked_count; 3390 masked_count = src1->i[1] & 0x1f; 3391 dst->i[1] = src0->i[1] >> masked_count; 3392 masked_count = src1->i[2] & 0x1f; 3393 dst->i[2] = src0->i[2] >> masked_count; 3394 masked_count = src1->i[3] & 0x1f; 3395 dst->i[3] = src0->i[3] >> masked_count; 3396} 3397 3398static void 3399micro_islt(union tgsi_exec_channel *dst, 3400 const union tgsi_exec_channel *src0, 3401 const union tgsi_exec_channel *src1) 3402{ 3403 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0; 3404 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0; 3405 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0; 3406 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0; 3407} 3408 3409static void 3410micro_f2u(union tgsi_exec_channel *dst, 3411 const union tgsi_exec_channel *src) 3412{ 3413 dst->u[0] = (uint)src->f[0]; 3414 dst->u[1] = (uint)src->f[1]; 3415 dst->u[2] = (uint)src->f[2]; 3416 dst->u[3] = (uint)src->f[3]; 3417} 3418 3419static void 3420micro_u2f(union tgsi_exec_channel *dst, 3421 const union tgsi_exec_channel *src) 3422{ 3423 dst->f[0] = (float)src->u[0]; 3424 dst->f[1] = (float)src->u[1]; 3425 dst->f[2] = (float)src->u[2]; 3426 dst->f[3] = (float)src->u[3]; 3427} 3428 3429static void 3430micro_uadd(union tgsi_exec_channel *dst, 3431 const union tgsi_exec_channel *src0, 3432 const union tgsi_exec_channel *src1) 3433{ 3434 dst->u[0] = src0->u[0] + src1->u[0]; 3435 dst->u[1] = src0->u[1] + src1->u[1]; 3436 dst->u[2] = src0->u[2] + src1->u[2]; 3437 dst->u[3] = src0->u[3] + src1->u[3]; 3438} 3439 3440static void 3441micro_udiv(union tgsi_exec_channel *dst, 3442 const union tgsi_exec_channel *src0, 3443 const union tgsi_exec_channel *src1) 3444{ 3445 dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u; 3446 dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u; 3447 dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u; 3448 dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u; 3449} 3450 3451static void 3452micro_umad(union tgsi_exec_channel *dst, 3453 const union tgsi_exec_channel *src0, 3454 const union tgsi_exec_channel *src1, 3455 const union tgsi_exec_channel *src2) 3456{ 3457 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0]; 3458 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1]; 3459 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2]; 3460 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3]; 3461} 3462 3463static void 3464micro_umax(union tgsi_exec_channel *dst, 3465 const union tgsi_exec_channel *src0, 3466 const union tgsi_exec_channel *src1) 3467{ 3468 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 3469 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 3470 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 3471 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 3472} 3473 3474static void 3475micro_umin(union tgsi_exec_channel *dst, 3476 const union tgsi_exec_channel *src0, 3477 const union tgsi_exec_channel *src1) 3478{ 3479 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 3480 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 3481 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 3482 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 3483} 3484 3485static void 3486micro_umod(union tgsi_exec_channel *dst, 3487 const union tgsi_exec_channel *src0, 3488 const union tgsi_exec_channel *src1) 3489{ 3490 dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u; 3491 dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u; 3492 dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u; 3493 dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u; 3494} 3495 3496static void 3497micro_umul(union tgsi_exec_channel *dst, 3498 const union tgsi_exec_channel *src0, 3499 const union tgsi_exec_channel *src1) 3500{ 3501 dst->u[0] = src0->u[0] * src1->u[0]; 3502 dst->u[1] = src0->u[1] * src1->u[1]; 3503 dst->u[2] = src0->u[2] * src1->u[2]; 3504 dst->u[3] = src0->u[3] * src1->u[3]; 3505} 3506 3507static void 3508micro_imul_hi(union tgsi_exec_channel *dst, 3509 const union tgsi_exec_channel *src0, 3510 const union tgsi_exec_channel *src1) 3511{ 3512#define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32) 3513 dst->i[0] = I64M(src0->i[0], src1->i[0]); 3514 dst->i[1] = I64M(src0->i[1], src1->i[1]); 3515 dst->i[2] = I64M(src0->i[2], src1->i[2]); 3516 dst->i[3] = I64M(src0->i[3], src1->i[3]); 3517#undef I64M 3518} 3519 3520static void 3521micro_umul_hi(union tgsi_exec_channel *dst, 3522 const union tgsi_exec_channel *src0, 3523 const union tgsi_exec_channel *src1) 3524{ 3525#define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32) 3526 dst->u[0] = U64M(src0->u[0], src1->u[0]); 3527 dst->u[1] = U64M(src0->u[1], src1->u[1]); 3528 dst->u[2] = U64M(src0->u[2], src1->u[2]); 3529 dst->u[3] = U64M(src0->u[3], src1->u[3]); 3530#undef U64M 3531} 3532 3533static void 3534micro_useq(union tgsi_exec_channel *dst, 3535 const union tgsi_exec_channel *src0, 3536 const union tgsi_exec_channel *src1) 3537{ 3538 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0; 3539 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0; 3540 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0; 3541 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0; 3542} 3543 3544static void 3545micro_usge(union tgsi_exec_channel *dst, 3546 const union tgsi_exec_channel *src0, 3547 const union tgsi_exec_channel *src1) 3548{ 3549 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0; 3550 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0; 3551 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0; 3552 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0; 3553} 3554 3555static void 3556micro_ushr(union tgsi_exec_channel *dst, 3557 const union tgsi_exec_channel *src0, 3558 const union tgsi_exec_channel *src1) 3559{ 3560 unsigned masked_count; 3561 masked_count = src1->u[0] & 0x1f; 3562 dst->u[0] = src0->u[0] >> masked_count; 3563 masked_count = src1->u[1] & 0x1f; 3564 dst->u[1] = src0->u[1] >> masked_count; 3565 masked_count = src1->u[2] & 0x1f; 3566 dst->u[2] = src0->u[2] >> masked_count; 3567 masked_count = src1->u[3] & 0x1f; 3568 dst->u[3] = src0->u[3] >> masked_count; 3569} 3570 3571static void 3572micro_uslt(union tgsi_exec_channel *dst, 3573 const union tgsi_exec_channel *src0, 3574 const union tgsi_exec_channel *src1) 3575{ 3576 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0; 3577 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0; 3578 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0; 3579 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0; 3580} 3581 3582static void 3583micro_usne(union tgsi_exec_channel *dst, 3584 const union tgsi_exec_channel *src0, 3585 const union tgsi_exec_channel *src1) 3586{ 3587 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0; 3588 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0; 3589 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0; 3590 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0; 3591} 3592 3593static void 3594micro_uarl(union tgsi_exec_channel *dst, 3595 const union tgsi_exec_channel *src) 3596{ 3597 dst->i[0] = src->u[0]; 3598 dst->i[1] = src->u[1]; 3599 dst->i[2] = src->u[2]; 3600 dst->i[3] = src->u[3]; 3601} 3602 3603static void 3604micro_ucmp(union tgsi_exec_channel *dst, 3605 const union tgsi_exec_channel *src0, 3606 const union tgsi_exec_channel *src1, 3607 const union tgsi_exec_channel *src2) 3608{ 3609 dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; 3610 dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; 3611 dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; 3612 dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; 3613} 3614 3615/** 3616 * Signed bitfield extract (i.e. sign-extend the extracted bits) 3617 */ 3618static void 3619micro_ibfe(union tgsi_exec_channel *dst, 3620 const union tgsi_exec_channel *src0, 3621 const union tgsi_exec_channel *src1, 3622 const union tgsi_exec_channel *src2) 3623{ 3624 int i; 3625 for (i = 0; i < 4; i++) { 3626 int width = src2->i[i] & 0x1f; 3627 int offset = src1->i[i] & 0x1f; 3628 if (width == 0) 3629 dst->i[i] = 0; 3630 else if (width + offset < 32) 3631 dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width); 3632 else 3633 dst->i[i] = src0->i[i] >> offset; 3634 } 3635} 3636 3637/** 3638 * Unsigned bitfield extract 3639 */ 3640static void 3641micro_ubfe(union tgsi_exec_channel *dst, 3642 const union tgsi_exec_channel *src0, 3643 const union tgsi_exec_channel *src1, 3644 const union tgsi_exec_channel *src2) 3645{ 3646 int i; 3647 for (i = 0; i < 4; i++) { 3648 int width = src2->u[i] & 0x1f; 3649 int offset = src1->u[i] & 0x1f; 3650 if (width == 0) 3651 dst->u[i] = 0; 3652 else if (width + offset < 32) 3653 dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width); 3654 else 3655 dst->u[i] = src0->u[i] >> offset; 3656 } 3657} 3658 3659/** 3660 * Bitfield insert: copy low bits from src1 into a region of src0. 3661 */ 3662static void 3663micro_bfi(union tgsi_exec_channel *dst, 3664 const union tgsi_exec_channel *src0, 3665 const union tgsi_exec_channel *src1, 3666 const union tgsi_exec_channel *src2, 3667 const union tgsi_exec_channel *src3) 3668{ 3669 int i; 3670 for (i = 0; i < 4; i++) { 3671 int width = src3->u[i] & 0x1f; 3672 int offset = src2->u[i] & 0x1f; 3673 int bitmask = ((1 << width) - 1) << offset; 3674 dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask); 3675 } 3676} 3677 3678static void 3679micro_brev(union tgsi_exec_channel *dst, 3680 const union tgsi_exec_channel *src) 3681{ 3682 dst->u[0] = util_bitreverse(src->u[0]); 3683 dst->u[1] = util_bitreverse(src->u[1]); 3684 dst->u[2] = util_bitreverse(src->u[2]); 3685 dst->u[3] = util_bitreverse(src->u[3]); 3686} 3687 3688static void 3689micro_popc(union tgsi_exec_channel *dst, 3690 const union tgsi_exec_channel *src) 3691{ 3692 dst->u[0] = util_bitcount(src->u[0]); 3693 dst->u[1] = util_bitcount(src->u[1]); 3694 dst->u[2] = util_bitcount(src->u[2]); 3695 dst->u[3] = util_bitcount(src->u[3]); 3696} 3697 3698static void 3699micro_lsb(union tgsi_exec_channel *dst, 3700 const union tgsi_exec_channel *src) 3701{ 3702 dst->i[0] = ffs(src->u[0]) - 1; 3703 dst->i[1] = ffs(src->u[1]) - 1; 3704 dst->i[2] = ffs(src->u[2]) - 1; 3705 dst->i[3] = ffs(src->u[3]) - 1; 3706} 3707 3708static void 3709micro_imsb(union tgsi_exec_channel *dst, 3710 const union tgsi_exec_channel *src) 3711{ 3712 dst->i[0] = util_last_bit_signed(src->i[0]) - 1; 3713 dst->i[1] = util_last_bit_signed(src->i[1]) - 1; 3714 dst->i[2] = util_last_bit_signed(src->i[2]) - 1; 3715 dst->i[3] = util_last_bit_signed(src->i[3]) - 1; 3716} 3717 3718static void 3719micro_umsb(union tgsi_exec_channel *dst, 3720 const union tgsi_exec_channel *src) 3721{ 3722 dst->i[0] = util_last_bit(src->u[0]) - 1; 3723 dst->i[1] = util_last_bit(src->u[1]) - 1; 3724 dst->i[2] = util_last_bit(src->u[2]) - 1; 3725 dst->i[3] = util_last_bit(src->u[3]) - 1; 3726} 3727 3728static void 3729exec_instruction( 3730 struct tgsi_exec_machine *mach, 3731 const struct tgsi_full_instruction *inst, 3732 int *pc ) 3733{ 3734 union tgsi_exec_channel r[10]; 3735 3736 (*pc)++; 3737 3738 switch (inst->Instruction.Opcode) { 3739 case TGSI_OPCODE_ARL: 3740 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 3741 break; 3742 3743 case TGSI_OPCODE_MOV: 3744 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 3745 break; 3746 3747 case TGSI_OPCODE_LIT: 3748 exec_lit(mach, inst); 3749 break; 3750 3751 case TGSI_OPCODE_RCP: 3752 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3753 break; 3754 3755 case TGSI_OPCODE_RSQ: 3756 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3757 break; 3758 3759 case TGSI_OPCODE_EXP: 3760 exec_exp(mach, inst); 3761 break; 3762 3763 case TGSI_OPCODE_LOG: 3764 exec_log(mach, inst); 3765 break; 3766 3767 case TGSI_OPCODE_MUL: 3768 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3769 break; 3770 3771 case TGSI_OPCODE_ADD: 3772 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3773 break; 3774 3775 case TGSI_OPCODE_DP3: 3776 exec_dp3(mach, inst); 3777 break; 3778 3779 case TGSI_OPCODE_DP4: 3780 exec_dp4(mach, inst); 3781 break; 3782 3783 case TGSI_OPCODE_DST: 3784 exec_dst(mach, inst); 3785 break; 3786 3787 case TGSI_OPCODE_MIN: 3788 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3789 break; 3790 3791 case TGSI_OPCODE_MAX: 3792 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3793 break; 3794 3795 case TGSI_OPCODE_SLT: 3796 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3797 break; 3798 3799 case TGSI_OPCODE_SGE: 3800 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3801 break; 3802 3803 case TGSI_OPCODE_MAD: 3804 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3805 break; 3806 3807 case TGSI_OPCODE_SUB: 3808 exec_vector_binary(mach, inst, micro_sub, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3809 break; 3810 3811 case TGSI_OPCODE_LRP: 3812 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3813 break; 3814 3815 case TGSI_OPCODE_CND: 3816 exec_vector_trinary(mach, inst, micro_cnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3817 break; 3818 3819 case TGSI_OPCODE_SQRT: 3820 exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3821 break; 3822 3823 case TGSI_OPCODE_DP2A: 3824 exec_dp2a(mach, inst); 3825 break; 3826 3827 case TGSI_OPCODE_FRC: 3828 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3829 break; 3830 3831 case TGSI_OPCODE_CLAMP: 3832 exec_vector_trinary(mach, inst, micro_clamp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3833 break; 3834 3835 case TGSI_OPCODE_FLR: 3836 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3837 break; 3838 3839 case TGSI_OPCODE_ROUND: 3840 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3841 break; 3842 3843 case TGSI_OPCODE_EX2: 3844 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3845 break; 3846 3847 case TGSI_OPCODE_LG2: 3848 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3849 break; 3850 3851 case TGSI_OPCODE_POW: 3852 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3853 break; 3854 3855 case TGSI_OPCODE_XPD: 3856 exec_xpd(mach, inst); 3857 break; 3858 3859 case TGSI_OPCODE_ABS: 3860 exec_vector_unary(mach, inst, micro_abs, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3861 break; 3862 3863 case TGSI_OPCODE_RCC: 3864 exec_scalar_unary(mach, inst, micro_rcc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3865 break; 3866 3867 case TGSI_OPCODE_DPH: 3868 exec_dph(mach, inst); 3869 break; 3870 3871 case TGSI_OPCODE_COS: 3872 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3873 break; 3874 3875 case TGSI_OPCODE_DDX: 3876 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3877 break; 3878 3879 case TGSI_OPCODE_DDY: 3880 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3881 break; 3882 3883 case TGSI_OPCODE_KILL: 3884 exec_kill (mach, inst); 3885 break; 3886 3887 case TGSI_OPCODE_KILL_IF: 3888 exec_kill_if (mach, inst); 3889 break; 3890 3891 case TGSI_OPCODE_PK2H: 3892 assert (0); 3893 break; 3894 3895 case TGSI_OPCODE_PK2US: 3896 assert (0); 3897 break; 3898 3899 case TGSI_OPCODE_PK4B: 3900 assert (0); 3901 break; 3902 3903 case TGSI_OPCODE_PK4UB: 3904 assert (0); 3905 break; 3906 3907 case TGSI_OPCODE_RFL: 3908 exec_rfl(mach, inst); 3909 break; 3910 3911 case TGSI_OPCODE_SEQ: 3912 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3913 break; 3914 3915 case TGSI_OPCODE_SFL: 3916 exec_vector(mach, inst, micro_sfl, TGSI_EXEC_DATA_FLOAT); 3917 break; 3918 3919 case TGSI_OPCODE_SGT: 3920 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3921 break; 3922 3923 case TGSI_OPCODE_SIN: 3924 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3925 break; 3926 3927 case TGSI_OPCODE_SLE: 3928 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3929 break; 3930 3931 case TGSI_OPCODE_SNE: 3932 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 3933 break; 3934 3935 case TGSI_OPCODE_STR: 3936 exec_vector(mach, inst, micro_str, TGSI_EXEC_DATA_FLOAT); 3937 break; 3938 3939 case TGSI_OPCODE_TEX: 3940 /* simple texture lookup */ 3941 /* src[0] = texcoord */ 3942 /* src[1] = sampler unit */ 3943 exec_tex(mach, inst, TEX_MODIFIER_NONE, 1); 3944 break; 3945 3946 case TGSI_OPCODE_TXB: 3947 /* Texture lookup with lod bias */ 3948 /* src[0] = texcoord (src[0].w = LOD bias) */ 3949 /* src[1] = sampler unit */ 3950 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1); 3951 break; 3952 3953 case TGSI_OPCODE_TXD: 3954 /* Texture lookup with explict partial derivatives */ 3955 /* src[0] = texcoord */ 3956 /* src[1] = d[strq]/dx */ 3957 /* src[2] = d[strq]/dy */ 3958 /* src[3] = sampler unit */ 3959 exec_txd(mach, inst); 3960 break; 3961 3962 case TGSI_OPCODE_TXL: 3963 /* Texture lookup with explit LOD */ 3964 /* src[0] = texcoord (src[0].w = LOD) */ 3965 /* src[1] = sampler unit */ 3966 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1); 3967 break; 3968 3969 case TGSI_OPCODE_TXP: 3970 /* Texture lookup with projection */ 3971 /* src[0] = texcoord (src[0].w = projection) */ 3972 /* src[1] = sampler unit */ 3973 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1); 3974 break; 3975 3976 case TGSI_OPCODE_UP2H: 3977 assert (0); 3978 break; 3979 3980 case TGSI_OPCODE_UP2US: 3981 assert (0); 3982 break; 3983 3984 case TGSI_OPCODE_UP4B: 3985 assert (0); 3986 break; 3987 3988 case TGSI_OPCODE_UP4UB: 3989 assert (0); 3990 break; 3991 3992 case TGSI_OPCODE_X2D: 3993 exec_x2d(mach, inst); 3994 break; 3995 3996 case TGSI_OPCODE_ARA: 3997 assert (0); 3998 break; 3999 4000 case TGSI_OPCODE_ARR: 4001 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 4002 break; 4003 4004 case TGSI_OPCODE_BRA: 4005 assert (0); 4006 break; 4007 4008 case TGSI_OPCODE_CAL: 4009 /* skip the call if no execution channels are enabled */ 4010 if (mach->ExecMask) { 4011 /* do the call */ 4012 4013 /* First, record the depths of the execution stacks. 4014 * This is important for deeply nested/looped return statements. 4015 * We have to unwind the stacks by the correct amount. For a 4016 * real code generator, we could determine the number of entries 4017 * to pop off each stack with simple static analysis and avoid 4018 * implementing this data structure at run time. 4019 */ 4020 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 4021 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 4022 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 4023 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; 4024 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; 4025 /* note that PC was already incremented above */ 4026 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 4027 4028 mach->CallStackTop++; 4029 4030 /* Second, push the Cond, Loop, Cont, Func stacks */ 4031 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 4032 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 4033 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 4034 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 4035 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 4036 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 4037 4038 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 4039 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 4040 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 4041 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 4042 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 4043 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 4044 4045 /* Finally, jump to the subroutine */ 4046 *pc = inst->Label.Label; 4047 } 4048 break; 4049 4050 case TGSI_OPCODE_RET: 4051 mach->FuncMask &= ~mach->ExecMask; 4052 UPDATE_EXEC_MASK(mach); 4053 4054 if (mach->FuncMask == 0x0) { 4055 /* really return now (otherwise, keep executing */ 4056 4057 if (mach->CallStackTop == 0) { 4058 /* returning from main() */ 4059 mach->CondStackTop = 0; 4060 mach->LoopStackTop = 0; 4061 *pc = -1; 4062 return; 4063 } 4064 4065 assert(mach->CallStackTop > 0); 4066 mach->CallStackTop--; 4067 4068 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 4069 mach->CondMask = mach->CondStack[mach->CondStackTop]; 4070 4071 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 4072 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 4073 4074 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 4075 mach->ContMask = mach->ContStack[mach->ContStackTop]; 4076 4077 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 4078 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 4079 4080 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 4081 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 4082 4083 assert(mach->FuncStackTop > 0); 4084 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 4085 4086 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 4087 4088 UPDATE_EXEC_MASK(mach); 4089 } 4090 break; 4091 4092 case TGSI_OPCODE_SSG: 4093 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 4094 break; 4095 4096 case TGSI_OPCODE_CMP: 4097 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 4098 break; 4099 4100 case TGSI_OPCODE_SCS: 4101 exec_scs(mach, inst); 4102 break; 4103 4104 case TGSI_OPCODE_NRM: 4105 exec_nrm3(mach, inst); 4106 break; 4107 4108 case TGSI_OPCODE_NRM4: 4109 exec_nrm4(mach, inst); 4110 break; 4111 4112 case TGSI_OPCODE_DIV: 4113 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 4114 break; 4115 4116 case TGSI_OPCODE_DP2: 4117 exec_dp2(mach, inst); 4118 break; 4119 4120 case TGSI_OPCODE_IF: 4121 /* push CondMask */ 4122 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 4123 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 4124 FETCH( &r[0], 0, TGSI_CHAN_X ); 4125 /* update CondMask */ 4126 if( ! r[0].f[0] ) { 4127 mach->CondMask &= ~0x1; 4128 } 4129 if( ! r[0].f[1] ) { 4130 mach->CondMask &= ~0x2; 4131 } 4132 if( ! r[0].f[2] ) { 4133 mach->CondMask &= ~0x4; 4134 } 4135 if( ! r[0].f[3] ) { 4136 mach->CondMask &= ~0x8; 4137 } 4138 UPDATE_EXEC_MASK(mach); 4139 /* Todo: If CondMask==0, jump to ELSE */ 4140 break; 4141 4142 case TGSI_OPCODE_UIF: 4143 /* push CondMask */ 4144 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 4145 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 4146 IFETCH( &r[0], 0, TGSI_CHAN_X ); 4147 /* update CondMask */ 4148 if( ! r[0].u[0] ) { 4149 mach->CondMask &= ~0x1; 4150 } 4151 if( ! r[0].u[1] ) { 4152 mach->CondMask &= ~0x2; 4153 } 4154 if( ! r[0].u[2] ) { 4155 mach->CondMask &= ~0x4; 4156 } 4157 if( ! r[0].u[3] ) { 4158 mach->CondMask &= ~0x8; 4159 } 4160 UPDATE_EXEC_MASK(mach); 4161 /* Todo: If CondMask==0, jump to ELSE */ 4162 break; 4163 4164 case TGSI_OPCODE_ELSE: 4165 /* invert CondMask wrt previous mask */ 4166 { 4167 uint prevMask; 4168 assert(mach->CondStackTop > 0); 4169 prevMask = mach->CondStack[mach->CondStackTop - 1]; 4170 mach->CondMask = ~mach->CondMask & prevMask; 4171 UPDATE_EXEC_MASK(mach); 4172 /* Todo: If CondMask==0, jump to ENDIF */ 4173 } 4174 break; 4175 4176 case TGSI_OPCODE_ENDIF: 4177 /* pop CondMask */ 4178 assert(mach->CondStackTop > 0); 4179 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 4180 UPDATE_EXEC_MASK(mach); 4181 break; 4182 4183 case TGSI_OPCODE_END: 4184 /* make sure we end primitives which haven't 4185 * been explicitly emitted */ 4186 conditional_emit_primitive(mach); 4187 /* halt execution */ 4188 *pc = -1; 4189 break; 4190 4191 case TGSI_OPCODE_PUSHA: 4192 assert (0); 4193 break; 4194 4195 case TGSI_OPCODE_POPA: 4196 assert (0); 4197 break; 4198 4199 case TGSI_OPCODE_CEIL: 4200 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 4201 break; 4202 4203 case TGSI_OPCODE_I2F: 4204 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); 4205 break; 4206 4207 case TGSI_OPCODE_NOT: 4208 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4209 break; 4210 4211 case TGSI_OPCODE_TRUNC: 4212 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 4213 break; 4214 4215 case TGSI_OPCODE_SHL: 4216 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4217 break; 4218 4219 case TGSI_OPCODE_AND: 4220 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4221 break; 4222 4223 case TGSI_OPCODE_OR: 4224 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4225 break; 4226 4227 case TGSI_OPCODE_MOD: 4228 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4229 break; 4230 4231 case TGSI_OPCODE_XOR: 4232 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4233 break; 4234 4235 case TGSI_OPCODE_SAD: 4236 assert (0); 4237 break; 4238 4239 case TGSI_OPCODE_TXF: 4240 exec_txf(mach, inst); 4241 break; 4242 4243 case TGSI_OPCODE_TXQ: 4244 exec_txq(mach, inst); 4245 break; 4246 4247 case TGSI_OPCODE_EMIT: 4248 emit_vertex(mach); 4249 break; 4250 4251 case TGSI_OPCODE_ENDPRIM: 4252 emit_primitive(mach); 4253 break; 4254 4255 case TGSI_OPCODE_BGNLOOP: 4256 /* push LoopMask and ContMasks */ 4257 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 4258 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 4259 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 4260 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 4261 4262 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 4263 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 4264 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 4265 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 4266 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; 4267 break; 4268 4269 case TGSI_OPCODE_ENDLOOP: 4270 /* Restore ContMask, but don't pop */ 4271 assert(mach->ContStackTop > 0); 4272 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 4273 UPDATE_EXEC_MASK(mach); 4274 if (mach->ExecMask) { 4275 /* repeat loop: jump to instruction just past BGNLOOP */ 4276 assert(mach->LoopLabelStackTop > 0); 4277 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 4278 } 4279 else { 4280 /* exit loop: pop LoopMask */ 4281 assert(mach->LoopStackTop > 0); 4282 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 4283 /* pop ContMask */ 4284 assert(mach->ContStackTop > 0); 4285 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 4286 assert(mach->LoopLabelStackTop > 0); 4287 --mach->LoopLabelStackTop; 4288 4289 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 4290 } 4291 UPDATE_EXEC_MASK(mach); 4292 break; 4293 4294 case TGSI_OPCODE_BRK: 4295 exec_break(mach); 4296 break; 4297 4298 case TGSI_OPCODE_CONT: 4299 /* turn off cont channels for each enabled exec channel */ 4300 mach->ContMask &= ~mach->ExecMask; 4301 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 4302 UPDATE_EXEC_MASK(mach); 4303 break; 4304 4305 case TGSI_OPCODE_BGNSUB: 4306 /* no-op */ 4307 break; 4308 4309 case TGSI_OPCODE_ENDSUB: 4310 /* 4311 * XXX: This really should be a no-op. We should never reach this opcode. 4312 */ 4313 4314 assert(mach->CallStackTop > 0); 4315 mach->CallStackTop--; 4316 4317 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 4318 mach->CondMask = mach->CondStack[mach->CondStackTop]; 4319 4320 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 4321 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 4322 4323 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 4324 mach->ContMask = mach->ContStack[mach->ContStackTop]; 4325 4326 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 4327 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 4328 4329 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 4330 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 4331 4332 assert(mach->FuncStackTop > 0); 4333 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 4334 4335 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 4336 4337 UPDATE_EXEC_MASK(mach); 4338 break; 4339 4340 case TGSI_OPCODE_NOP: 4341 break; 4342 4343 case TGSI_OPCODE_BREAKC: 4344 IFETCH(&r[0], 0, TGSI_CHAN_X); 4345 /* update CondMask */ 4346 if (r[0].u[0] && (mach->ExecMask & 0x1)) { 4347 mach->LoopMask &= ~0x1; 4348 } 4349 if (r[0].u[1] && (mach->ExecMask & 0x2)) { 4350 mach->LoopMask &= ~0x2; 4351 } 4352 if (r[0].u[2] && (mach->ExecMask & 0x4)) { 4353 mach->LoopMask &= ~0x4; 4354 } 4355 if (r[0].u[3] && (mach->ExecMask & 0x8)) { 4356 mach->LoopMask &= ~0x8; 4357 } 4358 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 4359 UPDATE_EXEC_MASK(mach); 4360 break; 4361 4362 case TGSI_OPCODE_F2I: 4363 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 4364 break; 4365 4366 case TGSI_OPCODE_FSEQ: 4367 exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 4368 break; 4369 4370 case TGSI_OPCODE_FSGE: 4371 exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 4372 break; 4373 4374 case TGSI_OPCODE_FSLT: 4375 exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 4376 break; 4377 4378 case TGSI_OPCODE_FSNE: 4379 exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 4380 break; 4381 4382 case TGSI_OPCODE_IDIV: 4383 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4384 break; 4385 4386 case TGSI_OPCODE_IMAX: 4387 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4388 break; 4389 4390 case TGSI_OPCODE_IMIN: 4391 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4392 break; 4393 4394 case TGSI_OPCODE_INEG: 4395 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4396 break; 4397 4398 case TGSI_OPCODE_ISGE: 4399 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4400 break; 4401 4402 case TGSI_OPCODE_ISHR: 4403 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4404 break; 4405 4406 case TGSI_OPCODE_ISLT: 4407 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4408 break; 4409 4410 case TGSI_OPCODE_F2U: 4411 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 4412 break; 4413 4414 case TGSI_OPCODE_U2F: 4415 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); 4416 break; 4417 4418 case TGSI_OPCODE_UADD: 4419 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4420 break; 4421 4422 case TGSI_OPCODE_UDIV: 4423 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4424 break; 4425 4426 case TGSI_OPCODE_UMAD: 4427 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4428 break; 4429 4430 case TGSI_OPCODE_UMAX: 4431 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4432 break; 4433 4434 case TGSI_OPCODE_UMIN: 4435 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4436 break; 4437 4438 case TGSI_OPCODE_UMOD: 4439 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4440 break; 4441 4442 case TGSI_OPCODE_UMUL: 4443 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4444 break; 4445 4446 case TGSI_OPCODE_IMUL_HI: 4447 exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4448 break; 4449 4450 case TGSI_OPCODE_UMUL_HI: 4451 exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4452 break; 4453 4454 case TGSI_OPCODE_USEQ: 4455 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4456 break; 4457 4458 case TGSI_OPCODE_USGE: 4459 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4460 break; 4461 4462 case TGSI_OPCODE_USHR: 4463 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4464 break; 4465 4466 case TGSI_OPCODE_USLT: 4467 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4468 break; 4469 4470 case TGSI_OPCODE_USNE: 4471 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4472 break; 4473 4474 case TGSI_OPCODE_SWITCH: 4475 exec_switch(mach, inst); 4476 break; 4477 4478 case TGSI_OPCODE_CASE: 4479 exec_case(mach, inst); 4480 break; 4481 4482 case TGSI_OPCODE_DEFAULT: 4483 exec_default(mach); 4484 break; 4485 4486 case TGSI_OPCODE_ENDSWITCH: 4487 exec_endswitch(mach); 4488 break; 4489 4490 case TGSI_OPCODE_SAMPLE_I: 4491 exec_txf(mach, inst); 4492 break; 4493 4494 case TGSI_OPCODE_SAMPLE_I_MS: 4495 assert(0); 4496 break; 4497 4498 case TGSI_OPCODE_SAMPLE: 4499 exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE); 4500 break; 4501 4502 case TGSI_OPCODE_SAMPLE_B: 4503 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE); 4504 break; 4505 4506 case TGSI_OPCODE_SAMPLE_C: 4507 exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE); 4508 break; 4509 4510 case TGSI_OPCODE_SAMPLE_C_LZ: 4511 exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE); 4512 break; 4513 4514 case TGSI_OPCODE_SAMPLE_D: 4515 exec_sample_d(mach, inst); 4516 break; 4517 4518 case TGSI_OPCODE_SAMPLE_L: 4519 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE); 4520 break; 4521 4522 case TGSI_OPCODE_GATHER4: 4523 assert(0); 4524 break; 4525 4526 case TGSI_OPCODE_SVIEWINFO: 4527 exec_txq(mach, inst); 4528 break; 4529 4530 case TGSI_OPCODE_SAMPLE_POS: 4531 assert(0); 4532 break; 4533 4534 case TGSI_OPCODE_SAMPLE_INFO: 4535 assert(0); 4536 break; 4537 4538 case TGSI_OPCODE_UARL: 4539 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 4540 break; 4541 4542 case TGSI_OPCODE_UCMP: 4543 exec_vector_trinary(mach, inst, micro_ucmp, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4544 break; 4545 4546 case TGSI_OPCODE_IABS: 4547 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4548 break; 4549 4550 case TGSI_OPCODE_ISSG: 4551 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4552 break; 4553 4554 case TGSI_OPCODE_TEX2: 4555 /* simple texture lookup */ 4556 /* src[0] = texcoord */ 4557 /* src[1] = compare */ 4558 /* src[2] = sampler unit */ 4559 exec_tex(mach, inst, TEX_MODIFIER_NONE, 2); 4560 break; 4561 case TGSI_OPCODE_TXB2: 4562 /* simple texture lookup */ 4563 /* src[0] = texcoord */ 4564 /* src[1] = bias */ 4565 /* src[2] = sampler unit */ 4566 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2); 4567 break; 4568 case TGSI_OPCODE_TXL2: 4569 /* simple texture lookup */ 4570 /* src[0] = texcoord */ 4571 /* src[1] = lod */ 4572 /* src[2] = sampler unit */ 4573 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2); 4574 break; 4575 4576 case TGSI_OPCODE_IBFE: 4577 exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4578 break; 4579 case TGSI_OPCODE_UBFE: 4580 exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4581 break; 4582 case TGSI_OPCODE_BFI: 4583 exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4584 break; 4585 case TGSI_OPCODE_BREV: 4586 exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4587 break; 4588 case TGSI_OPCODE_POPC: 4589 exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 4590 break; 4591 case TGSI_OPCODE_LSB: 4592 exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 4593 break; 4594 case TGSI_OPCODE_IMSB: 4595 exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 4596 break; 4597 case TGSI_OPCODE_UMSB: 4598 exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 4599 break; 4600 default: 4601 assert( 0 ); 4602 } 4603} 4604 4605 4606/** 4607 * Run TGSI interpreter. 4608 * \return bitmask of "alive" quad components 4609 */ 4610uint 4611tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) 4612{ 4613 uint i; 4614 int pc = 0; 4615 uint default_mask = 0xf; 4616 4617 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 4618 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 4619 4620 if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { 4621 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 4622 mach->Primitives[0] = 0; 4623 /* GS runs on a single primitive for now */ 4624 default_mask = 0x1; 4625 } 4626 4627 mach->CondMask = default_mask; 4628 mach->LoopMask = default_mask; 4629 mach->ContMask = default_mask; 4630 mach->FuncMask = default_mask; 4631 mach->ExecMask = default_mask; 4632 4633 mach->Switch.mask = default_mask; 4634 4635 assert(mach->CondStackTop == 0); 4636 assert(mach->LoopStackTop == 0); 4637 assert(mach->ContStackTop == 0); 4638 assert(mach->SwitchStackTop == 0); 4639 assert(mach->BreakStackTop == 0); 4640 assert(mach->CallStackTop == 0); 4641 4642 4643 /* execute declarations (interpolants) */ 4644 for (i = 0; i < mach->NumDeclarations; i++) { 4645 exec_declaration( mach, mach->Declarations+i ); 4646 } 4647 4648 { 4649#if DEBUG_EXECUTION 4650 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; 4651 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 4652 uint inst = 1; 4653 4654 memset(mach->Temps, 0, sizeof(temps)); 4655 memset(mach->Outputs, 0, sizeof(outputs)); 4656 memset(temps, 0, sizeof(temps)); 4657 memset(outputs, 0, sizeof(outputs)); 4658#endif 4659 4660 /* execute instructions, until pc is set to -1 */ 4661 while (pc != -1) { 4662 4663#if DEBUG_EXECUTION 4664 uint i; 4665 4666 tgsi_dump_instruction(&mach->Instructions[pc], inst++); 4667#endif 4668 4669 assert(pc < (int) mach->NumInstructions); 4670 exec_instruction(mach, mach->Instructions + pc, &pc); 4671 4672#if DEBUG_EXECUTION 4673 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { 4674 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 4675 uint j; 4676 4677 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 4678 debug_printf("TEMP[%2u] = ", i); 4679 for (j = 0; j < 4; j++) { 4680 if (j > 0) { 4681 debug_printf(" "); 4682 } 4683 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 4684 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], 4685 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], 4686 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], 4687 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); 4688 } 4689 } 4690 } 4691 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 4692 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 4693 uint j; 4694 4695 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 4696 debug_printf("OUT[%2u] = ", i); 4697 for (j = 0; j < 4; j++) { 4698 if (j > 0) { 4699 debug_printf(" "); 4700 } 4701 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 4702 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], 4703 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], 4704 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], 4705 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); 4706 } 4707 } 4708 } 4709#endif 4710 } 4711 } 4712 4713#if 0 4714 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 4715 if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { 4716 /* 4717 * Scale back depth component. 4718 */ 4719 for (i = 0; i < 4; i++) 4720 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 4721 } 4722#endif 4723 4724 /* Strictly speaking, these assertions aren't really needed but they 4725 * can potentially catch some bugs in the control flow code. 4726 */ 4727 assert(mach->CondStackTop == 0); 4728 assert(mach->LoopStackTop == 0); 4729 assert(mach->ContStackTop == 0); 4730 assert(mach->SwitchStackTop == 0); 4731 assert(mach->BreakStackTop == 0); 4732 assert(mach->CallStackTop == 0); 4733 4734 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4735} 4736