tgsi_exec.c revision 01e04c3f
1/************************************************************************** 2 * 3 * Copyright 2007-2008 VMware, Inc. 4 * All Rights Reserved. 5 * Copyright 2009-2010 VMware, Inc. All rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * TGSI interpreter/executor. 31 * 32 * Flow control information: 33 * 34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 36 * care since a condition may be true for some quad components but false 37 * for other components. 38 * 39 * We basically execute all statements (even if they're in the part of 40 * an IF/ELSE clause that's "not taken") and use a special mask to 41 * control writing to destination registers. This is the ExecMask. 42 * See store_dest(). 43 * 44 * The ExecMask is computed from three other masks (CondMask, LoopMask and 45 * ContMask) which are controlled by the flow control instructions (namely: 46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 47 * 48 * 49 * Authors: 50 * Michal Krol 51 * Brian Paul 52 */ 53 54#include "pipe/p_compiler.h" 55#include "pipe/p_state.h" 56#include "pipe/p_shader_tokens.h" 57#include "tgsi/tgsi_dump.h" 58#include "tgsi/tgsi_parse.h" 59#include "tgsi/tgsi_util.h" 60#include "tgsi_exec.h" 61#include "util/u_half.h" 62#include "util/u_memory.h" 63#include "util/u_math.h" 64#include "util/rounding.h" 65 66 67#define DEBUG_EXECUTION 0 68 69 70#define FAST_MATH 0 71 72#define TILE_TOP_LEFT 0 73#define TILE_TOP_RIGHT 1 74#define TILE_BOTTOM_LEFT 2 75#define TILE_BOTTOM_RIGHT 3 76 77union tgsi_double_channel { 78 double d[TGSI_QUAD_SIZE]; 79 unsigned u[TGSI_QUAD_SIZE][2]; 80 uint64_t u64[TGSI_QUAD_SIZE]; 81 int64_t i64[TGSI_QUAD_SIZE]; 82}; 83 84struct tgsi_double_vector { 85 union tgsi_double_channel xy; 86 union tgsi_double_channel zw; 87}; 88 89static void 90micro_abs(union tgsi_exec_channel *dst, 91 const union tgsi_exec_channel *src) 92{ 93 dst->f[0] = fabsf(src->f[0]); 94 dst->f[1] = fabsf(src->f[1]); 95 dst->f[2] = fabsf(src->f[2]); 96 dst->f[3] = fabsf(src->f[3]); 97} 98 99static void 100micro_arl(union tgsi_exec_channel *dst, 101 const union tgsi_exec_channel *src) 102{ 103 dst->i[0] = (int)floorf(src->f[0]); 104 dst->i[1] = (int)floorf(src->f[1]); 105 dst->i[2] = (int)floorf(src->f[2]); 106 dst->i[3] = (int)floorf(src->f[3]); 107} 108 109static void 110micro_arr(union tgsi_exec_channel *dst, 111 const union tgsi_exec_channel *src) 112{ 113 dst->i[0] = (int)floorf(src->f[0] + 0.5f); 114 dst->i[1] = (int)floorf(src->f[1] + 0.5f); 115 dst->i[2] = (int)floorf(src->f[2] + 0.5f); 116 dst->i[3] = (int)floorf(src->f[3] + 0.5f); 117} 118 119static void 120micro_ceil(union tgsi_exec_channel *dst, 121 const union tgsi_exec_channel *src) 122{ 123 dst->f[0] = ceilf(src->f[0]); 124 dst->f[1] = ceilf(src->f[1]); 125 dst->f[2] = ceilf(src->f[2]); 126 dst->f[3] = ceilf(src->f[3]); 127} 128 129static void 130micro_cmp(union tgsi_exec_channel *dst, 131 const union tgsi_exec_channel *src0, 132 const union tgsi_exec_channel *src1, 133 const union tgsi_exec_channel *src2) 134{ 135 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; 136 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; 137 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; 138 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; 139} 140 141static void 142micro_cos(union tgsi_exec_channel *dst, 143 const union tgsi_exec_channel *src) 144{ 145 dst->f[0] = cosf(src->f[0]); 146 dst->f[1] = cosf(src->f[1]); 147 dst->f[2] = cosf(src->f[2]); 148 dst->f[3] = cosf(src->f[3]); 149} 150 151static void 152micro_d2f(union tgsi_exec_channel *dst, 153 const union tgsi_double_channel *src) 154{ 155 dst->f[0] = (float)src->d[0]; 156 dst->f[1] = (float)src->d[1]; 157 dst->f[2] = (float)src->d[2]; 158 dst->f[3] = (float)src->d[3]; 159} 160 161static void 162micro_d2i(union tgsi_exec_channel *dst, 163 const union tgsi_double_channel *src) 164{ 165 dst->i[0] = (int)src->d[0]; 166 dst->i[1] = (int)src->d[1]; 167 dst->i[2] = (int)src->d[2]; 168 dst->i[3] = (int)src->d[3]; 169} 170 171static void 172micro_d2u(union tgsi_exec_channel *dst, 173 const union tgsi_double_channel *src) 174{ 175 dst->u[0] = (unsigned)src->d[0]; 176 dst->u[1] = (unsigned)src->d[1]; 177 dst->u[2] = (unsigned)src->d[2]; 178 dst->u[3] = (unsigned)src->d[3]; 179} 180static void 181micro_dabs(union tgsi_double_channel *dst, 182 const union tgsi_double_channel *src) 183{ 184 dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0]; 185 dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1]; 186 dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2]; 187 dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3]; 188} 189 190static void 191micro_dadd(union tgsi_double_channel *dst, 192 const union tgsi_double_channel *src) 193{ 194 dst->d[0] = src[0].d[0] + src[1].d[0]; 195 dst->d[1] = src[0].d[1] + src[1].d[1]; 196 dst->d[2] = src[0].d[2] + src[1].d[2]; 197 dst->d[3] = src[0].d[3] + src[1].d[3]; 198} 199 200static void 201micro_ddiv(union tgsi_double_channel *dst, 202 const union tgsi_double_channel *src) 203{ 204 dst->d[0] = src[0].d[0] / src[1].d[0]; 205 dst->d[1] = src[0].d[1] / src[1].d[1]; 206 dst->d[2] = src[0].d[2] / src[1].d[2]; 207 dst->d[3] = src[0].d[3] / src[1].d[3]; 208} 209 210static void 211micro_ddx(union tgsi_exec_channel *dst, 212 const union tgsi_exec_channel *src) 213{ 214 dst->f[0] = 215 dst->f[1] = 216 dst->f[2] = 217 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 218} 219 220static void 221micro_ddy(union tgsi_exec_channel *dst, 222 const union tgsi_exec_channel *src) 223{ 224 dst->f[0] = 225 dst->f[1] = 226 dst->f[2] = 227 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 228} 229 230static void 231micro_dmul(union tgsi_double_channel *dst, 232 const union tgsi_double_channel *src) 233{ 234 dst->d[0] = src[0].d[0] * src[1].d[0]; 235 dst->d[1] = src[0].d[1] * src[1].d[1]; 236 dst->d[2] = src[0].d[2] * src[1].d[2]; 237 dst->d[3] = src[0].d[3] * src[1].d[3]; 238} 239 240static void 241micro_dmax(union tgsi_double_channel *dst, 242 const union tgsi_double_channel *src) 243{ 244 dst->d[0] = src[0].d[0] > src[1].d[0] ? src[0].d[0] : src[1].d[0]; 245 dst->d[1] = src[0].d[1] > src[1].d[1] ? src[0].d[1] : src[1].d[1]; 246 dst->d[2] = src[0].d[2] > src[1].d[2] ? src[0].d[2] : src[1].d[2]; 247 dst->d[3] = src[0].d[3] > src[1].d[3] ? src[0].d[3] : src[1].d[3]; 248} 249 250static void 251micro_dmin(union tgsi_double_channel *dst, 252 const union tgsi_double_channel *src) 253{ 254 dst->d[0] = src[0].d[0] < src[1].d[0] ? src[0].d[0] : src[1].d[0]; 255 dst->d[1] = src[0].d[1] < src[1].d[1] ? src[0].d[1] : src[1].d[1]; 256 dst->d[2] = src[0].d[2] < src[1].d[2] ? src[0].d[2] : src[1].d[2]; 257 dst->d[3] = src[0].d[3] < src[1].d[3] ? src[0].d[3] : src[1].d[3]; 258} 259 260static void 261micro_dneg(union tgsi_double_channel *dst, 262 const union tgsi_double_channel *src) 263{ 264 dst->d[0] = -src->d[0]; 265 dst->d[1] = -src->d[1]; 266 dst->d[2] = -src->d[2]; 267 dst->d[3] = -src->d[3]; 268} 269 270static void 271micro_dslt(union tgsi_double_channel *dst, 272 const union tgsi_double_channel *src) 273{ 274 dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U; 275 dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U; 276 dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U; 277 dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U; 278} 279 280static void 281micro_dsne(union tgsi_double_channel *dst, 282 const union tgsi_double_channel *src) 283{ 284 dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U; 285 dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U; 286 dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U; 287 dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U; 288} 289 290static void 291micro_dsge(union tgsi_double_channel *dst, 292 const union tgsi_double_channel *src) 293{ 294 dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U; 295 dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U; 296 dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U; 297 dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U; 298} 299 300static void 301micro_dseq(union tgsi_double_channel *dst, 302 const union tgsi_double_channel *src) 303{ 304 dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U; 305 dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U; 306 dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U; 307 dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U; 308} 309 310static void 311micro_drcp(union tgsi_double_channel *dst, 312 const union tgsi_double_channel *src) 313{ 314 dst->d[0] = 1.0 / src->d[0]; 315 dst->d[1] = 1.0 / src->d[1]; 316 dst->d[2] = 1.0 / src->d[2]; 317 dst->d[3] = 1.0 / src->d[3]; 318} 319 320static void 321micro_dsqrt(union tgsi_double_channel *dst, 322 const union tgsi_double_channel *src) 323{ 324 dst->d[0] = sqrt(src->d[0]); 325 dst->d[1] = sqrt(src->d[1]); 326 dst->d[2] = sqrt(src->d[2]); 327 dst->d[3] = sqrt(src->d[3]); 328} 329 330static void 331micro_drsq(union tgsi_double_channel *dst, 332 const union tgsi_double_channel *src) 333{ 334 dst->d[0] = 1.0 / sqrt(src->d[0]); 335 dst->d[1] = 1.0 / sqrt(src->d[1]); 336 dst->d[2] = 1.0 / sqrt(src->d[2]); 337 dst->d[3] = 1.0 / sqrt(src->d[3]); 338} 339 340static void 341micro_dmad(union tgsi_double_channel *dst, 342 const union tgsi_double_channel *src) 343{ 344 dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0]; 345 dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1]; 346 dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2]; 347 dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3]; 348} 349 350static void 351micro_dfrac(union tgsi_double_channel *dst, 352 const union tgsi_double_channel *src) 353{ 354 dst->d[0] = src->d[0] - floor(src->d[0]); 355 dst->d[1] = src->d[1] - floor(src->d[1]); 356 dst->d[2] = src->d[2] - floor(src->d[2]); 357 dst->d[3] = src->d[3] - floor(src->d[3]); 358} 359 360static void 361micro_dldexp(union tgsi_double_channel *dst, 362 const union tgsi_double_channel *src0, 363 union tgsi_exec_channel *src1) 364{ 365 dst->d[0] = ldexp(src0->d[0], src1->i[0]); 366 dst->d[1] = ldexp(src0->d[1], src1->i[1]); 367 dst->d[2] = ldexp(src0->d[2], src1->i[2]); 368 dst->d[3] = ldexp(src0->d[3], src1->i[3]); 369} 370 371static void 372micro_dfracexp(union tgsi_double_channel *dst, 373 union tgsi_exec_channel *dst_exp, 374 const union tgsi_double_channel *src) 375{ 376 dst->d[0] = frexp(src->d[0], &dst_exp->i[0]); 377 dst->d[1] = frexp(src->d[1], &dst_exp->i[1]); 378 dst->d[2] = frexp(src->d[2], &dst_exp->i[2]); 379 dst->d[3] = frexp(src->d[3], &dst_exp->i[3]); 380} 381 382static void 383micro_exp2(union tgsi_exec_channel *dst, 384 const union tgsi_exec_channel *src) 385{ 386#if FAST_MATH 387 dst->f[0] = util_fast_exp2(src->f[0]); 388 dst->f[1] = util_fast_exp2(src->f[1]); 389 dst->f[2] = util_fast_exp2(src->f[2]); 390 dst->f[3] = util_fast_exp2(src->f[3]); 391#else 392#if DEBUG 393 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 394 uint i; 395 union tgsi_exec_channel clamped; 396 397 for (i = 0; i < 4; i++) { 398 if (src->f[i] > 127.99999f) { 399 clamped.f[i] = 127.99999f; 400 } else if (src->f[i] < -126.99999f) { 401 clamped.f[i] = -126.99999f; 402 } else { 403 clamped.f[i] = src->f[i]; 404 } 405 } 406 src = &clamped; 407#endif /* DEBUG */ 408 409 dst->f[0] = powf(2.0f, src->f[0]); 410 dst->f[1] = powf(2.0f, src->f[1]); 411 dst->f[2] = powf(2.0f, src->f[2]); 412 dst->f[3] = powf(2.0f, src->f[3]); 413#endif /* FAST_MATH */ 414} 415 416static void 417micro_f2d(union tgsi_double_channel *dst, 418 const union tgsi_exec_channel *src) 419{ 420 dst->d[0] = (double)src->f[0]; 421 dst->d[1] = (double)src->f[1]; 422 dst->d[2] = (double)src->f[2]; 423 dst->d[3] = (double)src->f[3]; 424} 425 426static void 427micro_flr(union tgsi_exec_channel *dst, 428 const union tgsi_exec_channel *src) 429{ 430 dst->f[0] = floorf(src->f[0]); 431 dst->f[1] = floorf(src->f[1]); 432 dst->f[2] = floorf(src->f[2]); 433 dst->f[3] = floorf(src->f[3]); 434} 435 436static void 437micro_frc(union tgsi_exec_channel *dst, 438 const union tgsi_exec_channel *src) 439{ 440 dst->f[0] = src->f[0] - floorf(src->f[0]); 441 dst->f[1] = src->f[1] - floorf(src->f[1]); 442 dst->f[2] = src->f[2] - floorf(src->f[2]); 443 dst->f[3] = src->f[3] - floorf(src->f[3]); 444} 445 446static void 447micro_i2d(union tgsi_double_channel *dst, 448 const union tgsi_exec_channel *src) 449{ 450 dst->d[0] = (double)src->i[0]; 451 dst->d[1] = (double)src->i[1]; 452 dst->d[2] = (double)src->i[2]; 453 dst->d[3] = (double)src->i[3]; 454} 455 456static void 457micro_iabs(union tgsi_exec_channel *dst, 458 const union tgsi_exec_channel *src) 459{ 460 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; 461 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; 462 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; 463 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; 464} 465 466static void 467micro_ineg(union tgsi_exec_channel *dst, 468 const union tgsi_exec_channel *src) 469{ 470 dst->i[0] = -src->i[0]; 471 dst->i[1] = -src->i[1]; 472 dst->i[2] = -src->i[2]; 473 dst->i[3] = -src->i[3]; 474} 475 476static void 477micro_lg2(union tgsi_exec_channel *dst, 478 const union tgsi_exec_channel *src) 479{ 480#if FAST_MATH 481 dst->f[0] = util_fast_log2(src->f[0]); 482 dst->f[1] = util_fast_log2(src->f[1]); 483 dst->f[2] = util_fast_log2(src->f[2]); 484 dst->f[3] = util_fast_log2(src->f[3]); 485#else 486 dst->f[0] = logf(src->f[0]) * 1.442695f; 487 dst->f[1] = logf(src->f[1]) * 1.442695f; 488 dst->f[2] = logf(src->f[2]) * 1.442695f; 489 dst->f[3] = logf(src->f[3]) * 1.442695f; 490#endif 491} 492 493static void 494micro_lrp(union tgsi_exec_channel *dst, 495 const union tgsi_exec_channel *src0, 496 const union tgsi_exec_channel *src1, 497 const union tgsi_exec_channel *src2) 498{ 499 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0]; 500 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1]; 501 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2]; 502 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3]; 503} 504 505static void 506micro_mad(union tgsi_exec_channel *dst, 507 const union tgsi_exec_channel *src0, 508 const union tgsi_exec_channel *src1, 509 const union tgsi_exec_channel *src2) 510{ 511 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0]; 512 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1]; 513 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2]; 514 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3]; 515} 516 517static void 518micro_mov(union tgsi_exec_channel *dst, 519 const union tgsi_exec_channel *src) 520{ 521 dst->u[0] = src->u[0]; 522 dst->u[1] = src->u[1]; 523 dst->u[2] = src->u[2]; 524 dst->u[3] = src->u[3]; 525} 526 527static void 528micro_rcp(union tgsi_exec_channel *dst, 529 const union tgsi_exec_channel *src) 530{ 531#if 0 /* for debugging */ 532 assert(src->f[0] != 0.0f); 533 assert(src->f[1] != 0.0f); 534 assert(src->f[2] != 0.0f); 535 assert(src->f[3] != 0.0f); 536#endif 537 dst->f[0] = 1.0f / src->f[0]; 538 dst->f[1] = 1.0f / src->f[1]; 539 dst->f[2] = 1.0f / src->f[2]; 540 dst->f[3] = 1.0f / src->f[3]; 541} 542 543static void 544micro_rnd(union tgsi_exec_channel *dst, 545 const union tgsi_exec_channel *src) 546{ 547 dst->f[0] = _mesa_roundevenf(src->f[0]); 548 dst->f[1] = _mesa_roundevenf(src->f[1]); 549 dst->f[2] = _mesa_roundevenf(src->f[2]); 550 dst->f[3] = _mesa_roundevenf(src->f[3]); 551} 552 553static void 554micro_rsq(union tgsi_exec_channel *dst, 555 const union tgsi_exec_channel *src) 556{ 557#if 0 /* for debugging */ 558 assert(src->f[0] != 0.0f); 559 assert(src->f[1] != 0.0f); 560 assert(src->f[2] != 0.0f); 561 assert(src->f[3] != 0.0f); 562#endif 563 dst->f[0] = 1.0f / sqrtf(src->f[0]); 564 dst->f[1] = 1.0f / sqrtf(src->f[1]); 565 dst->f[2] = 1.0f / sqrtf(src->f[2]); 566 dst->f[3] = 1.0f / sqrtf(src->f[3]); 567} 568 569static void 570micro_sqrt(union tgsi_exec_channel *dst, 571 const union tgsi_exec_channel *src) 572{ 573 dst->f[0] = sqrtf(src->f[0]); 574 dst->f[1] = sqrtf(src->f[1]); 575 dst->f[2] = sqrtf(src->f[2]); 576 dst->f[3] = sqrtf(src->f[3]); 577} 578 579static void 580micro_seq(union tgsi_exec_channel *dst, 581 const union tgsi_exec_channel *src0, 582 const union tgsi_exec_channel *src1) 583{ 584 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f; 585 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f; 586 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f; 587 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f; 588} 589 590static void 591micro_sge(union tgsi_exec_channel *dst, 592 const union tgsi_exec_channel *src0, 593 const union tgsi_exec_channel *src1) 594{ 595 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f; 596 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f; 597 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f; 598 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f; 599} 600 601static void 602micro_sgn(union tgsi_exec_channel *dst, 603 const union tgsi_exec_channel *src) 604{ 605 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 606 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 607 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 608 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 609} 610 611static void 612micro_isgn(union tgsi_exec_channel *dst, 613 const union tgsi_exec_channel *src) 614{ 615 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0; 616 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0; 617 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0; 618 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0; 619} 620 621static void 622micro_sgt(union tgsi_exec_channel *dst, 623 const union tgsi_exec_channel *src0, 624 const union tgsi_exec_channel *src1) 625{ 626 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f; 627 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f; 628 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f; 629 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f; 630} 631 632static void 633micro_sin(union tgsi_exec_channel *dst, 634 const union tgsi_exec_channel *src) 635{ 636 dst->f[0] = sinf(src->f[0]); 637 dst->f[1] = sinf(src->f[1]); 638 dst->f[2] = sinf(src->f[2]); 639 dst->f[3] = sinf(src->f[3]); 640} 641 642static void 643micro_sle(union tgsi_exec_channel *dst, 644 const union tgsi_exec_channel *src0, 645 const union tgsi_exec_channel *src1) 646{ 647 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f; 648 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f; 649 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f; 650 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f; 651} 652 653static void 654micro_slt(union tgsi_exec_channel *dst, 655 const union tgsi_exec_channel *src0, 656 const union tgsi_exec_channel *src1) 657{ 658 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f; 659 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f; 660 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f; 661 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f; 662} 663 664static void 665micro_sne(union tgsi_exec_channel *dst, 666 const union tgsi_exec_channel *src0, 667 const union tgsi_exec_channel *src1) 668{ 669 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f; 670 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f; 671 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f; 672 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; 673} 674 675static void 676micro_trunc(union tgsi_exec_channel *dst, 677 const union tgsi_exec_channel *src) 678{ 679 dst->f[0] = truncf(src->f[0]); 680 dst->f[1] = truncf(src->f[1]); 681 dst->f[2] = truncf(src->f[2]); 682 dst->f[3] = truncf(src->f[3]); 683} 684 685static void 686micro_u2d(union tgsi_double_channel *dst, 687 const union tgsi_exec_channel *src) 688{ 689 dst->d[0] = (double)src->u[0]; 690 dst->d[1] = (double)src->u[1]; 691 dst->d[2] = (double)src->u[2]; 692 dst->d[3] = (double)src->u[3]; 693} 694 695static void 696micro_i64abs(union tgsi_double_channel *dst, 697 const union tgsi_double_channel *src) 698{ 699 dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0]; 700 dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1]; 701 dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2]; 702 dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3]; 703} 704 705static void 706micro_i64sgn(union tgsi_double_channel *dst, 707 const union tgsi_double_channel *src) 708{ 709 dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0; 710 dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0; 711 dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0; 712 dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0; 713} 714 715static void 716micro_i64neg(union tgsi_double_channel *dst, 717 const union tgsi_double_channel *src) 718{ 719 dst->i64[0] = -src->i64[0]; 720 dst->i64[1] = -src->i64[1]; 721 dst->i64[2] = -src->i64[2]; 722 dst->i64[3] = -src->i64[3]; 723} 724 725static void 726micro_u64seq(union tgsi_double_channel *dst, 727 const union tgsi_double_channel *src) 728{ 729 dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U; 730 dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U; 731 dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U; 732 dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U; 733} 734 735static void 736micro_u64sne(union tgsi_double_channel *dst, 737 const union tgsi_double_channel *src) 738{ 739 dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U; 740 dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U; 741 dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U; 742 dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U; 743} 744 745static void 746micro_i64slt(union tgsi_double_channel *dst, 747 const union tgsi_double_channel *src) 748{ 749 dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U; 750 dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U; 751 dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U; 752 dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U; 753} 754 755static void 756micro_u64slt(union tgsi_double_channel *dst, 757 const union tgsi_double_channel *src) 758{ 759 dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U; 760 dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U; 761 dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U; 762 dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U; 763} 764 765static void 766micro_i64sge(union tgsi_double_channel *dst, 767 const union tgsi_double_channel *src) 768{ 769 dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U; 770 dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U; 771 dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U; 772 dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U; 773} 774 775static void 776micro_u64sge(union tgsi_double_channel *dst, 777 const union tgsi_double_channel *src) 778{ 779 dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U; 780 dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U; 781 dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U; 782 dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U; 783} 784 785static void 786micro_u64max(union tgsi_double_channel *dst, 787 const union tgsi_double_channel *src) 788{ 789 dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; 790 dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; 791 dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; 792 dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; 793} 794 795static void 796micro_i64max(union tgsi_double_channel *dst, 797 const union tgsi_double_channel *src) 798{ 799 dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; 800 dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; 801 dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; 802 dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; 803} 804 805static void 806micro_u64min(union tgsi_double_channel *dst, 807 const union tgsi_double_channel *src) 808{ 809 dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; 810 dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; 811 dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; 812 dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; 813} 814 815static void 816micro_i64min(union tgsi_double_channel *dst, 817 const union tgsi_double_channel *src) 818{ 819 dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; 820 dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; 821 dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; 822 dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; 823} 824 825static void 826micro_u64add(union tgsi_double_channel *dst, 827 const union tgsi_double_channel *src) 828{ 829 dst->u64[0] = src[0].u64[0] + src[1].u64[0]; 830 dst->u64[1] = src[0].u64[1] + src[1].u64[1]; 831 dst->u64[2] = src[0].u64[2] + src[1].u64[2]; 832 dst->u64[3] = src[0].u64[3] + src[1].u64[3]; 833} 834 835static void 836micro_u64mul(union tgsi_double_channel *dst, 837 const union tgsi_double_channel *src) 838{ 839 dst->u64[0] = src[0].u64[0] * src[1].u64[0]; 840 dst->u64[1] = src[0].u64[1] * src[1].u64[1]; 841 dst->u64[2] = src[0].u64[2] * src[1].u64[2]; 842 dst->u64[3] = src[0].u64[3] * src[1].u64[3]; 843} 844 845static void 846micro_u64div(union tgsi_double_channel *dst, 847 const union tgsi_double_channel *src) 848{ 849 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull; 850 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull; 851 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull; 852 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull; 853} 854 855static void 856micro_i64div(union tgsi_double_channel *dst, 857 const union tgsi_double_channel *src) 858{ 859 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0; 860 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0; 861 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0; 862 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0; 863} 864 865static void 866micro_u64mod(union tgsi_double_channel *dst, 867 const union tgsi_double_channel *src) 868{ 869 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull; 870 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull; 871 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull; 872 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull; 873} 874 875static void 876micro_i64mod(union tgsi_double_channel *dst, 877 const union tgsi_double_channel *src) 878{ 879 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll; 880 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll; 881 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll; 882 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll; 883} 884 885static void 886micro_u64shl(union tgsi_double_channel *dst, 887 const union tgsi_double_channel *src0, 888 union tgsi_exec_channel *src1) 889{ 890 unsigned masked_count; 891 masked_count = src1->u[0] & 0x3f; 892 dst->u64[0] = src0->u64[0] << masked_count; 893 masked_count = src1->u[1] & 0x3f; 894 dst->u64[1] = src0->u64[1] << masked_count; 895 masked_count = src1->u[2] & 0x3f; 896 dst->u64[2] = src0->u64[2] << masked_count; 897 masked_count = src1->u[3] & 0x3f; 898 dst->u64[3] = src0->u64[3] << masked_count; 899} 900 901static void 902micro_i64shr(union tgsi_double_channel *dst, 903 const union tgsi_double_channel *src0, 904 union tgsi_exec_channel *src1) 905{ 906 unsigned masked_count; 907 masked_count = src1->u[0] & 0x3f; 908 dst->i64[0] = src0->i64[0] >> masked_count; 909 masked_count = src1->u[1] & 0x3f; 910 dst->i64[1] = src0->i64[1] >> masked_count; 911 masked_count = src1->u[2] & 0x3f; 912 dst->i64[2] = src0->i64[2] >> masked_count; 913 masked_count = src1->u[3] & 0x3f; 914 dst->i64[3] = src0->i64[3] >> masked_count; 915} 916 917static void 918micro_u64shr(union tgsi_double_channel *dst, 919 const union tgsi_double_channel *src0, 920 union tgsi_exec_channel *src1) 921{ 922 unsigned masked_count; 923 masked_count = src1->u[0] & 0x3f; 924 dst->u64[0] = src0->u64[0] >> masked_count; 925 masked_count = src1->u[1] & 0x3f; 926 dst->u64[1] = src0->u64[1] >> masked_count; 927 masked_count = src1->u[2] & 0x3f; 928 dst->u64[2] = src0->u64[2] >> masked_count; 929 masked_count = src1->u[3] & 0x3f; 930 dst->u64[3] = src0->u64[3] >> masked_count; 931} 932 933enum tgsi_exec_datatype { 934 TGSI_EXEC_DATA_FLOAT, 935 TGSI_EXEC_DATA_INT, 936 TGSI_EXEC_DATA_UINT, 937 TGSI_EXEC_DATA_DOUBLE, 938 TGSI_EXEC_DATA_INT64, 939 TGSI_EXEC_DATA_UINT64, 940}; 941 942/* 943 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 944 */ 945#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 946#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 947#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 948#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 949#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 950#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 951 952 953/** The execution mask depends on the conditional mask and the loop mask */ 954#define UPDATE_EXEC_MASK(MACH) \ 955 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask 956 957 958static const union tgsi_exec_channel ZeroVec = 959 { { 0.0, 0.0, 0.0, 0.0 } }; 960 961static const union tgsi_exec_channel OneVec = { 962 {1.0f, 1.0f, 1.0f, 1.0f} 963}; 964 965static const union tgsi_exec_channel P128Vec = { 966 {128.0f, 128.0f, 128.0f, 128.0f} 967}; 968 969static const union tgsi_exec_channel M128Vec = { 970 {-128.0f, -128.0f, -128.0f, -128.0f} 971}; 972 973 974/** 975 * Assert that none of the float values in 'chan' are infinite or NaN. 976 * NaN and Inf may occur normally during program execution and should 977 * not lead to crashes, etc. But when debugging, it's helpful to catch 978 * them. 979 */ 980static inline void 981check_inf_or_nan(const union tgsi_exec_channel *chan) 982{ 983 assert(!util_is_inf_or_nan((chan)->f[0])); 984 assert(!util_is_inf_or_nan((chan)->f[1])); 985 assert(!util_is_inf_or_nan((chan)->f[2])); 986 assert(!util_is_inf_or_nan((chan)->f[3])); 987} 988 989 990#ifdef DEBUG 991static void 992print_chan(const char *msg, const union tgsi_exec_channel *chan) 993{ 994 debug_printf("%s = {%f, %f, %f, %f}\n", 995 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 996} 997#endif 998 999 1000#ifdef DEBUG 1001static void 1002print_temp(const struct tgsi_exec_machine *mach, uint index) 1003{ 1004 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 1005 int i; 1006 debug_printf("Temp[%u] =\n", index); 1007 for (i = 0; i < 4; i++) { 1008 debug_printf(" %c: { %f, %f, %f, %f }\n", 1009 "XYZW"[i], 1010 tmp->xyzw[i].f[0], 1011 tmp->xyzw[i].f[1], 1012 tmp->xyzw[i].f[2], 1013 tmp->xyzw[i].f[3]); 1014 } 1015} 1016#endif 1017 1018 1019void 1020tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, 1021 unsigned num_bufs, 1022 const void **bufs, 1023 const unsigned *buf_sizes) 1024{ 1025 unsigned i; 1026 1027 for (i = 0; i < num_bufs; i++) { 1028 mach->Consts[i] = bufs[i]; 1029 mach->ConstsSize[i] = buf_sizes[i]; 1030 } 1031} 1032 1033 1034/** 1035 * Check if there's a potential src/dst register data dependency when 1036 * using SOA execution. 1037 * Example: 1038 * MOV T, T.yxwz; 1039 * This would expand into: 1040 * MOV t0, t1; 1041 * MOV t1, t0; 1042 * MOV t2, t3; 1043 * MOV t3, t2; 1044 * The second instruction will have the wrong value for t0 if executed as-is. 1045 */ 1046boolean 1047tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 1048{ 1049 uint i, chan; 1050 1051 uint writemask = inst->Dst[0].Register.WriteMask; 1052 if (writemask == TGSI_WRITEMASK_X || 1053 writemask == TGSI_WRITEMASK_Y || 1054 writemask == TGSI_WRITEMASK_Z || 1055 writemask == TGSI_WRITEMASK_W || 1056 writemask == TGSI_WRITEMASK_NONE) { 1057 /* no chance of data dependency */ 1058 return FALSE; 1059 } 1060 1061 /* loop over src regs */ 1062 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1063 if ((inst->Src[i].Register.File == 1064 inst->Dst[0].Register.File) && 1065 ((inst->Src[i].Register.Index == 1066 inst->Dst[0].Register.Index) || 1067 inst->Src[i].Register.Indirect || 1068 inst->Dst[0].Register.Indirect)) { 1069 /* loop over dest channels */ 1070 uint channelsWritten = 0x0; 1071 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 1072 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1073 /* check if we're reading a channel that's been written */ 1074 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); 1075 if (channelsWritten & (1 << swizzle)) { 1076 return TRUE; 1077 } 1078 1079 channelsWritten |= (1 << chan); 1080 } 1081 } 1082 } 1083 } 1084 return FALSE; 1085} 1086 1087 1088/** 1089 * Initialize machine state by expanding tokens to full instructions, 1090 * allocating temporary storage, setting up constants, etc. 1091 * After this, we can call tgsi_exec_machine_run() many times. 1092 */ 1093void 1094tgsi_exec_machine_bind_shader( 1095 struct tgsi_exec_machine *mach, 1096 const struct tgsi_token *tokens, 1097 struct tgsi_sampler *sampler, 1098 struct tgsi_image *image, 1099 struct tgsi_buffer *buffer) 1100{ 1101 uint k; 1102 struct tgsi_parse_context parse; 1103 struct tgsi_full_instruction *instructions; 1104 struct tgsi_full_declaration *declarations; 1105 uint maxInstructions = 10, numInstructions = 0; 1106 uint maxDeclarations = 10, numDeclarations = 0; 1107 1108#if 0 1109 tgsi_dump(tokens, 0); 1110#endif 1111 1112 util_init_math(); 1113 1114 1115 mach->Tokens = tokens; 1116 mach->Sampler = sampler; 1117 mach->Image = image; 1118 mach->Buffer = buffer; 1119 1120 if (!tokens) { 1121 /* unbind and free all */ 1122 FREE(mach->Declarations); 1123 mach->Declarations = NULL; 1124 mach->NumDeclarations = 0; 1125 1126 FREE(mach->Instructions); 1127 mach->Instructions = NULL; 1128 mach->NumInstructions = 0; 1129 1130 return; 1131 } 1132 1133 k = tgsi_parse_init (&parse, mach->Tokens); 1134 if (k != TGSI_PARSE_OK) { 1135 debug_printf( "Problem parsing!\n" ); 1136 return; 1137 } 1138 1139 mach->ImmLimit = 0; 1140 mach->NumOutputs = 0; 1141 1142 for (k = 0; k < TGSI_SEMANTIC_COUNT; k++) 1143 mach->SysSemanticToIndex[k] = -1; 1144 1145 if (mach->ShaderType == PIPE_SHADER_GEOMETRY && 1146 !mach->UsedGeometryShader) { 1147 struct tgsi_exec_vector *inputs; 1148 struct tgsi_exec_vector *outputs; 1149 1150 inputs = align_malloc(sizeof(struct tgsi_exec_vector) * 1151 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS, 1152 16); 1153 1154 if (!inputs) 1155 return; 1156 1157 outputs = align_malloc(sizeof(struct tgsi_exec_vector) * 1158 TGSI_MAX_TOTAL_VERTICES, 16); 1159 1160 if (!outputs) { 1161 align_free(inputs); 1162 return; 1163 } 1164 1165 align_free(mach->Inputs); 1166 align_free(mach->Outputs); 1167 1168 mach->Inputs = inputs; 1169 mach->Outputs = outputs; 1170 mach->UsedGeometryShader = TRUE; 1171 } 1172 1173 declarations = (struct tgsi_full_declaration *) 1174 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 1175 1176 if (!declarations) { 1177 return; 1178 } 1179 1180 instructions = (struct tgsi_full_instruction *) 1181 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 1182 1183 if (!instructions) { 1184 FREE( declarations ); 1185 return; 1186 } 1187 1188 while( !tgsi_parse_end_of_tokens( &parse ) ) { 1189 uint i; 1190 1191 tgsi_parse_token( &parse ); 1192 switch( parse.FullToken.Token.Type ) { 1193 case TGSI_TOKEN_TYPE_DECLARATION: 1194 /* save expanded declaration */ 1195 if (numDeclarations == maxDeclarations) { 1196 declarations = REALLOC(declarations, 1197 maxDeclarations 1198 * sizeof(struct tgsi_full_declaration), 1199 (maxDeclarations + 10) 1200 * sizeof(struct tgsi_full_declaration)); 1201 maxDeclarations += 10; 1202 } 1203 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { 1204 unsigned reg; 1205 for (reg = parse.FullToken.FullDeclaration.Range.First; 1206 reg <= parse.FullToken.FullDeclaration.Range.Last; 1207 ++reg) { 1208 ++mach->NumOutputs; 1209 } 1210 } 1211 else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1212 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; 1213 mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First; 1214 } 1215 1216 memcpy(declarations + numDeclarations, 1217 &parse.FullToken.FullDeclaration, 1218 sizeof(declarations[0])); 1219 numDeclarations++; 1220 break; 1221 1222 case TGSI_TOKEN_TYPE_IMMEDIATE: 1223 { 1224 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1225 assert( size <= 4 ); 1226 if (mach->ImmLimit >= mach->ImmsReserved) { 1227 unsigned newReserved = mach->ImmsReserved ? 2 * mach->ImmsReserved : 128; 1228 float4 *imms = REALLOC(mach->Imms, mach->ImmsReserved, newReserved * sizeof(float4)); 1229 if (imms) { 1230 mach->ImmsReserved = newReserved; 1231 mach->Imms = imms; 1232 } else { 1233 debug_printf("Unable to (re)allocate space for immidiate constants\n"); 1234 break; 1235 } 1236 } 1237 1238 for( i = 0; i < size; i++ ) { 1239 mach->Imms[mach->ImmLimit][i] = 1240 parse.FullToken.FullImmediate.u[i].Float; 1241 } 1242 mach->ImmLimit += 1; 1243 } 1244 break; 1245 1246 case TGSI_TOKEN_TYPE_INSTRUCTION: 1247 1248 /* save expanded instruction */ 1249 if (numInstructions == maxInstructions) { 1250 instructions = REALLOC(instructions, 1251 maxInstructions 1252 * sizeof(struct tgsi_full_instruction), 1253 (maxInstructions + 10) 1254 * sizeof(struct tgsi_full_instruction)); 1255 maxInstructions += 10; 1256 } 1257 1258 memcpy(instructions + numInstructions, 1259 &parse.FullToken.FullInstruction, 1260 sizeof(instructions[0])); 1261 1262 numInstructions++; 1263 break; 1264 1265 case TGSI_TOKEN_TYPE_PROPERTY: 1266 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { 1267 if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) { 1268 mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data; 1269 } 1270 } 1271 break; 1272 1273 default: 1274 assert( 0 ); 1275 } 1276 } 1277 tgsi_parse_free (&parse); 1278 1279 FREE(mach->Declarations); 1280 mach->Declarations = declarations; 1281 mach->NumDeclarations = numDeclarations; 1282 1283 FREE(mach->Instructions); 1284 mach->Instructions = instructions; 1285 mach->NumInstructions = numInstructions; 1286} 1287 1288 1289struct tgsi_exec_machine * 1290tgsi_exec_machine_create(enum pipe_shader_type shader_type) 1291{ 1292 struct tgsi_exec_machine *mach; 1293 uint i; 1294 1295 mach = align_malloc( sizeof *mach, 16 ); 1296 if (!mach) 1297 goto fail; 1298 1299 memset(mach, 0, sizeof(*mach)); 1300 1301 mach->ShaderType = shader_type; 1302 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 1303 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; 1304 1305 if (shader_type != PIPE_SHADER_COMPUTE) { 1306 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16); 1307 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16); 1308 if (!mach->Inputs || !mach->Outputs) 1309 goto fail; 1310 } 1311 1312 /* Setup constants needed by the SSE2 executor. */ 1313 for( i = 0; i < 4; i++ ) { 1314 mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000; 1315 mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF; 1316 mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000; 1317 mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */ 1318 mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f; 1319 mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */ 1320 mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f; 1321 mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f; 1322 mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f; 1323 mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f; 1324 } 1325 1326#ifdef DEBUG 1327 /* silence warnings */ 1328 (void) print_chan; 1329 (void) print_temp; 1330#endif 1331 1332 return mach; 1333 1334fail: 1335 if (mach) { 1336 align_free(mach->Inputs); 1337 align_free(mach->Outputs); 1338 align_free(mach); 1339 } 1340 return NULL; 1341} 1342 1343 1344void 1345tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 1346{ 1347 if (mach) { 1348 FREE(mach->Instructions); 1349 FREE(mach->Declarations); 1350 FREE(mach->Imms); 1351 1352 align_free(mach->Inputs); 1353 align_free(mach->Outputs); 1354 1355 align_free(mach); 1356 } 1357} 1358 1359static void 1360micro_add(union tgsi_exec_channel *dst, 1361 const union tgsi_exec_channel *src0, 1362 const union tgsi_exec_channel *src1) 1363{ 1364 dst->f[0] = src0->f[0] + src1->f[0]; 1365 dst->f[1] = src0->f[1] + src1->f[1]; 1366 dst->f[2] = src0->f[2] + src1->f[2]; 1367 dst->f[3] = src0->f[3] + src1->f[3]; 1368} 1369 1370static void 1371micro_div( 1372 union tgsi_exec_channel *dst, 1373 const union tgsi_exec_channel *src0, 1374 const union tgsi_exec_channel *src1 ) 1375{ 1376 if (src1->f[0] != 0) { 1377 dst->f[0] = src0->f[0] / src1->f[0]; 1378 } 1379 if (src1->f[1] != 0) { 1380 dst->f[1] = src0->f[1] / src1->f[1]; 1381 } 1382 if (src1->f[2] != 0) { 1383 dst->f[2] = src0->f[2] / src1->f[2]; 1384 } 1385 if (src1->f[3] != 0) { 1386 dst->f[3] = src0->f[3] / src1->f[3]; 1387 } 1388} 1389 1390static void 1391micro_lt( 1392 union tgsi_exec_channel *dst, 1393 const union tgsi_exec_channel *src0, 1394 const union tgsi_exec_channel *src1, 1395 const union tgsi_exec_channel *src2, 1396 const union tgsi_exec_channel *src3 ) 1397{ 1398 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 1399 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 1400 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 1401 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 1402} 1403 1404static void 1405micro_max(union tgsi_exec_channel *dst, 1406 const union tgsi_exec_channel *src0, 1407 const union tgsi_exec_channel *src1) 1408{ 1409 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 1410 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 1411 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 1412 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 1413} 1414 1415static void 1416micro_min(union tgsi_exec_channel *dst, 1417 const union tgsi_exec_channel *src0, 1418 const union tgsi_exec_channel *src1) 1419{ 1420 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 1421 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 1422 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 1423 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 1424} 1425 1426static void 1427micro_mul(union tgsi_exec_channel *dst, 1428 const union tgsi_exec_channel *src0, 1429 const union tgsi_exec_channel *src1) 1430{ 1431 dst->f[0] = src0->f[0] * src1->f[0]; 1432 dst->f[1] = src0->f[1] * src1->f[1]; 1433 dst->f[2] = src0->f[2] * src1->f[2]; 1434 dst->f[3] = src0->f[3] * src1->f[3]; 1435} 1436 1437static void 1438micro_neg( 1439 union tgsi_exec_channel *dst, 1440 const union tgsi_exec_channel *src ) 1441{ 1442 dst->f[0] = -src->f[0]; 1443 dst->f[1] = -src->f[1]; 1444 dst->f[2] = -src->f[2]; 1445 dst->f[3] = -src->f[3]; 1446} 1447 1448static void 1449micro_pow( 1450 union tgsi_exec_channel *dst, 1451 const union tgsi_exec_channel *src0, 1452 const union tgsi_exec_channel *src1 ) 1453{ 1454#if FAST_MATH 1455 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 1456 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 1457 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 1458 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 1459#else 1460 dst->f[0] = powf( src0->f[0], src1->f[0] ); 1461 dst->f[1] = powf( src0->f[1], src1->f[1] ); 1462 dst->f[2] = powf( src0->f[2], src1->f[2] ); 1463 dst->f[3] = powf( src0->f[3], src1->f[3] ); 1464#endif 1465} 1466 1467static void 1468micro_ldexp(union tgsi_exec_channel *dst, 1469 const union tgsi_exec_channel *src0, 1470 const union tgsi_exec_channel *src1) 1471{ 1472 dst->f[0] = ldexpf(src0->f[0], src1->i[0]); 1473 dst->f[1] = ldexpf(src0->f[1], src1->i[1]); 1474 dst->f[2] = ldexpf(src0->f[2], src1->i[2]); 1475 dst->f[3] = ldexpf(src0->f[3], src1->i[3]); 1476} 1477 1478static void 1479micro_sub(union tgsi_exec_channel *dst, 1480 const union tgsi_exec_channel *src0, 1481 const union tgsi_exec_channel *src1) 1482{ 1483 dst->f[0] = src0->f[0] - src1->f[0]; 1484 dst->f[1] = src0->f[1] - src1->f[1]; 1485 dst->f[2] = src0->f[2] - src1->f[2]; 1486 dst->f[3] = src0->f[3] - src1->f[3]; 1487} 1488 1489static void 1490fetch_src_file_channel(const struct tgsi_exec_machine *mach, 1491 const uint file, 1492 const uint swizzle, 1493 const union tgsi_exec_channel *index, 1494 const union tgsi_exec_channel *index2D, 1495 union tgsi_exec_channel *chan) 1496{ 1497 uint i; 1498 1499 assert(swizzle < 4); 1500 1501 switch (file) { 1502 case TGSI_FILE_CONSTANT: 1503 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1504 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS); 1505 assert(mach->Consts[index2D->i[i]]); 1506 1507 if (index->i[i] < 0) { 1508 chan->u[i] = 0; 1509 } else { 1510 /* NOTE: copying the const value as a uint instead of float */ 1511 const uint constbuf = index2D->i[i]; 1512 const uint *buf = (const uint *)mach->Consts[constbuf]; 1513 const int pos = index->i[i] * 4 + swizzle; 1514 /* const buffer bounds check */ 1515 if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) { 1516 if (0) { 1517 /* Debug: print warning */ 1518 static int count = 0; 1519 if (count++ < 100) 1520 debug_printf("TGSI Exec: const buffer index %d" 1521 " out of bounds\n", pos); 1522 } 1523 chan->u[i] = 0; 1524 } 1525 else 1526 chan->u[i] = buf[pos]; 1527 } 1528 } 1529 break; 1530 1531 case TGSI_FILE_INPUT: 1532 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1533 /* 1534 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1535 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", 1536 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], 1537 index2D->i[i], index->i[i]); 1538 }*/ 1539 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; 1540 assert(pos >= 0); 1541 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); 1542 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; 1543 } 1544 break; 1545 1546 case TGSI_FILE_SYSTEM_VALUE: 1547 /* XXX no swizzling at this point. Will be needed if we put 1548 * gl_FragCoord, for example, in a sys value register. 1549 */ 1550 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1551 chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i]; 1552 } 1553 break; 1554 1555 case TGSI_FILE_TEMPORARY: 1556 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1557 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); 1558 assert(index2D->i[i] == 0); 1559 1560 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; 1561 } 1562 break; 1563 1564 case TGSI_FILE_IMMEDIATE: 1565 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1566 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); 1567 assert(index2D->i[i] == 0); 1568 1569 chan->f[i] = mach->Imms[index->i[i]][swizzle]; 1570 } 1571 break; 1572 1573 case TGSI_FILE_ADDRESS: 1574 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1575 assert(index->i[i] >= 0); 1576 assert(index2D->i[i] == 0); 1577 1578 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; 1579 } 1580 break; 1581 1582 case TGSI_FILE_OUTPUT: 1583 /* vertex/fragment output vars can be read too */ 1584 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1585 assert(index->i[i] >= 0); 1586 assert(index2D->i[i] == 0); 1587 1588 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; 1589 } 1590 break; 1591 1592 default: 1593 assert(0); 1594 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1595 chan->u[i] = 0; 1596 } 1597 } 1598} 1599 1600static void 1601fetch_source_d(const struct tgsi_exec_machine *mach, 1602 union tgsi_exec_channel *chan, 1603 const struct tgsi_full_src_register *reg, 1604 const uint chan_index) 1605{ 1606 union tgsi_exec_channel index; 1607 union tgsi_exec_channel index2D; 1608 uint swizzle; 1609 1610 /* We start with a direct index into a register file. 1611 * 1612 * file[1], 1613 * where: 1614 * file = Register.File 1615 * [1] = Register.Index 1616 */ 1617 index.i[0] = 1618 index.i[1] = 1619 index.i[2] = 1620 index.i[3] = reg->Register.Index; 1621 1622 /* There is an extra source register that indirectly subscripts 1623 * a register file. The direct index now becomes an offset 1624 * that is being added to the indirect register. 1625 * 1626 * file[ind[2].x+1], 1627 * where: 1628 * ind = Indirect.File 1629 * [2] = Indirect.Index 1630 * .x = Indirect.SwizzleX 1631 */ 1632 if (reg->Register.Indirect) { 1633 union tgsi_exec_channel index2; 1634 union tgsi_exec_channel indir_index; 1635 const uint execmask = mach->ExecMask; 1636 uint i; 1637 1638 /* which address register (always zero now) */ 1639 index2.i[0] = 1640 index2.i[1] = 1641 index2.i[2] = 1642 index2.i[3] = reg->Indirect.Index; 1643 /* get current value of address register[swizzle] */ 1644 swizzle = reg->Indirect.Swizzle; 1645 fetch_src_file_channel(mach, 1646 reg->Indirect.File, 1647 swizzle, 1648 &index2, 1649 &ZeroVec, 1650 &indir_index); 1651 1652 /* add value of address register to the offset */ 1653 index.i[0] += indir_index.i[0]; 1654 index.i[1] += indir_index.i[1]; 1655 index.i[2] += indir_index.i[2]; 1656 index.i[3] += indir_index.i[3]; 1657 1658 /* for disabled execution channels, zero-out the index to 1659 * avoid using a potential garbage value. 1660 */ 1661 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1662 if ((execmask & (1 << i)) == 0) 1663 index.i[i] = 0; 1664 } 1665 } 1666 1667 /* There is an extra source register that is a second 1668 * subscript to a register file. Effectively it means that 1669 * the register file is actually a 2D array of registers. 1670 * 1671 * file[3][1], 1672 * where: 1673 * [3] = Dimension.Index 1674 */ 1675 if (reg->Register.Dimension) { 1676 index2D.i[0] = 1677 index2D.i[1] = 1678 index2D.i[2] = 1679 index2D.i[3] = reg->Dimension.Index; 1680 1681 /* Again, the second subscript index can be addressed indirectly 1682 * identically to the first one. 1683 * Nothing stops us from indirectly addressing the indirect register, 1684 * but there is no need for that, so we won't exercise it. 1685 * 1686 * file[ind[4].y+3][1], 1687 * where: 1688 * ind = DimIndirect.File 1689 * [4] = DimIndirect.Index 1690 * .y = DimIndirect.SwizzleX 1691 */ 1692 if (reg->Dimension.Indirect) { 1693 union tgsi_exec_channel index2; 1694 union tgsi_exec_channel indir_index; 1695 const uint execmask = mach->ExecMask; 1696 uint i; 1697 1698 index2.i[0] = 1699 index2.i[1] = 1700 index2.i[2] = 1701 index2.i[3] = reg->DimIndirect.Index; 1702 1703 swizzle = reg->DimIndirect.Swizzle; 1704 fetch_src_file_channel(mach, 1705 reg->DimIndirect.File, 1706 swizzle, 1707 &index2, 1708 &ZeroVec, 1709 &indir_index); 1710 1711 index2D.i[0] += indir_index.i[0]; 1712 index2D.i[1] += indir_index.i[1]; 1713 index2D.i[2] += indir_index.i[2]; 1714 index2D.i[3] += indir_index.i[3]; 1715 1716 /* for disabled execution channels, zero-out the index to 1717 * avoid using a potential garbage value. 1718 */ 1719 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1720 if ((execmask & (1 << i)) == 0) { 1721 index2D.i[i] = 0; 1722 } 1723 } 1724 } 1725 1726 /* If by any chance there was a need for a 3D array of register 1727 * files, we would have to check whether Dimension is followed 1728 * by a dimension register and continue the saga. 1729 */ 1730 } else { 1731 index2D.i[0] = 1732 index2D.i[1] = 1733 index2D.i[2] = 1734 index2D.i[3] = 0; 1735 } 1736 1737 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1738 fetch_src_file_channel(mach, 1739 reg->Register.File, 1740 swizzle, 1741 &index, 1742 &index2D, 1743 chan); 1744} 1745 1746static void 1747fetch_source(const struct tgsi_exec_machine *mach, 1748 union tgsi_exec_channel *chan, 1749 const struct tgsi_full_src_register *reg, 1750 const uint chan_index, 1751 enum tgsi_exec_datatype src_datatype) 1752{ 1753 fetch_source_d(mach, chan, reg, chan_index); 1754 1755 if (reg->Register.Absolute) { 1756 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1757 micro_abs(chan, chan); 1758 } else { 1759 micro_iabs(chan, chan); 1760 } 1761 } 1762 1763 if (reg->Register.Negate) { 1764 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1765 micro_neg(chan, chan); 1766 } else { 1767 micro_ineg(chan, chan); 1768 } 1769 } 1770} 1771 1772static union tgsi_exec_channel * 1773store_dest_dstret(struct tgsi_exec_machine *mach, 1774 const union tgsi_exec_channel *chan, 1775 const struct tgsi_full_dst_register *reg, 1776 uint chan_index, 1777 enum tgsi_exec_datatype dst_datatype) 1778{ 1779 static union tgsi_exec_channel null; 1780 union tgsi_exec_channel *dst; 1781 union tgsi_exec_channel index2D; 1782 int offset = 0; /* indirection offset */ 1783 int index; 1784 1785 /* for debugging */ 1786 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) { 1787 check_inf_or_nan(chan); 1788 } 1789 1790 /* There is an extra source register that indirectly subscripts 1791 * a register file. The direct index now becomes an offset 1792 * that is being added to the indirect register. 1793 * 1794 * file[ind[2].x+1], 1795 * where: 1796 * ind = Indirect.File 1797 * [2] = Indirect.Index 1798 * .x = Indirect.SwizzleX 1799 */ 1800 if (reg->Register.Indirect) { 1801 union tgsi_exec_channel index; 1802 union tgsi_exec_channel indir_index; 1803 uint swizzle; 1804 1805 /* which address register (always zero for now) */ 1806 index.i[0] = 1807 index.i[1] = 1808 index.i[2] = 1809 index.i[3] = reg->Indirect.Index; 1810 1811 /* get current value of address register[swizzle] */ 1812 swizzle = reg->Indirect.Swizzle; 1813 1814 /* fetch values from the address/indirection register */ 1815 fetch_src_file_channel(mach, 1816 reg->Indirect.File, 1817 swizzle, 1818 &index, 1819 &ZeroVec, 1820 &indir_index); 1821 1822 /* save indirection offset */ 1823 offset = indir_index.i[0]; 1824 } 1825 1826 /* There is an extra source register that is a second 1827 * subscript to a register file. Effectively it means that 1828 * the register file is actually a 2D array of registers. 1829 * 1830 * file[3][1], 1831 * where: 1832 * [3] = Dimension.Index 1833 */ 1834 if (reg->Register.Dimension) { 1835 index2D.i[0] = 1836 index2D.i[1] = 1837 index2D.i[2] = 1838 index2D.i[3] = reg->Dimension.Index; 1839 1840 /* Again, the second subscript index can be addressed indirectly 1841 * identically to the first one. 1842 * Nothing stops us from indirectly addressing the indirect register, 1843 * but there is no need for that, so we won't exercise it. 1844 * 1845 * file[ind[4].y+3][1], 1846 * where: 1847 * ind = DimIndirect.File 1848 * [4] = DimIndirect.Index 1849 * .y = DimIndirect.SwizzleX 1850 */ 1851 if (reg->Dimension.Indirect) { 1852 union tgsi_exec_channel index2; 1853 union tgsi_exec_channel indir_index; 1854 const uint execmask = mach->ExecMask; 1855 unsigned swizzle; 1856 uint i; 1857 1858 index2.i[0] = 1859 index2.i[1] = 1860 index2.i[2] = 1861 index2.i[3] = reg->DimIndirect.Index; 1862 1863 swizzle = reg->DimIndirect.Swizzle; 1864 fetch_src_file_channel(mach, 1865 reg->DimIndirect.File, 1866 swizzle, 1867 &index2, 1868 &ZeroVec, 1869 &indir_index); 1870 1871 index2D.i[0] += indir_index.i[0]; 1872 index2D.i[1] += indir_index.i[1]; 1873 index2D.i[2] += indir_index.i[2]; 1874 index2D.i[3] += indir_index.i[3]; 1875 1876 /* for disabled execution channels, zero-out the index to 1877 * avoid using a potential garbage value. 1878 */ 1879 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1880 if ((execmask & (1 << i)) == 0) { 1881 index2D.i[i] = 0; 1882 } 1883 } 1884 } 1885 1886 /* If by any chance there was a need for a 3D array of register 1887 * files, we would have to check whether Dimension is followed 1888 * by a dimension register and continue the saga. 1889 */ 1890 } else { 1891 index2D.i[0] = 1892 index2D.i[1] = 1893 index2D.i[2] = 1894 index2D.i[3] = 0; 1895 } 1896 1897 switch (reg->Register.File) { 1898 case TGSI_FILE_NULL: 1899 dst = &null; 1900 break; 1901 1902 case TGSI_FILE_OUTPUT: 1903 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1904 + reg->Register.Index; 1905 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1906#if 0 1907 debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n", 1908 mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0], 1909 reg->Register.Index); 1910 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1911 debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask); 1912 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1913 if (execmask & (1 << i)) 1914 debug_printf("%f, ", chan->f[i]); 1915 debug_printf(")\n"); 1916 } 1917#endif 1918 break; 1919 1920 case TGSI_FILE_TEMPORARY: 1921 index = reg->Register.Index; 1922 assert( index < TGSI_EXEC_NUM_TEMPS ); 1923 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1924 break; 1925 1926 case TGSI_FILE_ADDRESS: 1927 index = reg->Register.Index; 1928 dst = &mach->Addrs[index].xyzw[chan_index]; 1929 break; 1930 1931 default: 1932 assert( 0 ); 1933 return NULL; 1934 } 1935 1936 return dst; 1937} 1938 1939static void 1940store_dest_double(struct tgsi_exec_machine *mach, 1941 const union tgsi_exec_channel *chan, 1942 const struct tgsi_full_dst_register *reg, 1943 uint chan_index, 1944 enum tgsi_exec_datatype dst_datatype) 1945{ 1946 union tgsi_exec_channel *dst; 1947 const uint execmask = mach->ExecMask; 1948 int i; 1949 1950 dst = store_dest_dstret(mach, chan, reg, chan_index, dst_datatype); 1951 if (!dst) 1952 return; 1953 1954 /* doubles path */ 1955 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1956 if (execmask & (1 << i)) 1957 dst->i[i] = chan->i[i]; 1958} 1959 1960static void 1961store_dest(struct tgsi_exec_machine *mach, 1962 const union tgsi_exec_channel *chan, 1963 const struct tgsi_full_dst_register *reg, 1964 const struct tgsi_full_instruction *inst, 1965 uint chan_index, 1966 enum tgsi_exec_datatype dst_datatype) 1967{ 1968 union tgsi_exec_channel *dst; 1969 const uint execmask = mach->ExecMask; 1970 int i; 1971 1972 dst = store_dest_dstret(mach, chan, reg, chan_index, dst_datatype); 1973 if (!dst) 1974 return; 1975 1976 if (!inst->Instruction.Saturate) { 1977 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1978 if (execmask & (1 << i)) 1979 dst->i[i] = chan->i[i]; 1980 } 1981 else { 1982 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1983 if (execmask & (1 << i)) { 1984 if (chan->f[i] < 0.0f) 1985 dst->f[i] = 0.0f; 1986 else if (chan->f[i] > 1.0f) 1987 dst->f[i] = 1.0f; 1988 else 1989 dst->i[i] = chan->i[i]; 1990 } 1991 } 1992} 1993 1994#define FETCH(VAL,INDEX,CHAN)\ 1995 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) 1996 1997#define IFETCH(VAL,INDEX,CHAN)\ 1998 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT) 1999 2000 2001/** 2002 * Execute ARB-style KIL which is predicated by a src register. 2003 * Kill fragment if any of the four values is less than zero. 2004 */ 2005static void 2006exec_kill_if(struct tgsi_exec_machine *mach, 2007 const struct tgsi_full_instruction *inst) 2008{ 2009 uint uniquemask; 2010 uint chan_index; 2011 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 2012 union tgsi_exec_channel r[1]; 2013 2014 /* This mask stores component bits that were already tested. */ 2015 uniquemask = 0; 2016 2017 for (chan_index = 0; chan_index < 4; chan_index++) 2018 { 2019 uint swizzle; 2020 uint i; 2021 2022 /* unswizzle channel */ 2023 swizzle = tgsi_util_get_full_src_register_swizzle ( 2024 &inst->Src[0], 2025 chan_index); 2026 2027 /* check if the component has not been already tested */ 2028 if (uniquemask & (1 << swizzle)) 2029 continue; 2030 uniquemask |= 1 << swizzle; 2031 2032 FETCH(&r[0], 0, chan_index); 2033 for (i = 0; i < 4; i++) 2034 if (r[0].f[i] < 0.0f) 2035 kilmask |= 1 << i; 2036 } 2037 2038 /* restrict to fragments currently executing */ 2039 kilmask &= mach->ExecMask; 2040 2041 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 2042} 2043 2044/** 2045 * Unconditional fragment kill/discard. 2046 */ 2047static void 2048exec_kill(struct tgsi_exec_machine *mach) 2049{ 2050 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 2051 2052 /* kill fragment for all fragments currently executing */ 2053 kilmask = mach->ExecMask; 2054 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 2055} 2056 2057static void 2058emit_vertex(struct tgsi_exec_machine *mach) 2059{ 2060 /* FIXME: check for exec mask correctly 2061 unsigned i; 2062 for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 2063 if ((mach->ExecMask & (1 << i))) 2064 */ 2065 if (mach->ExecMask) { 2066 if (mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] >= mach->MaxOutputVertices) 2067 return; 2068 2069 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; 2070 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; 2071 } 2072} 2073 2074static void 2075emit_primitive(struct tgsi_exec_machine *mach) 2076{ 2077 unsigned *prim_count = &mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]; 2078 /* FIXME: check for exec mask correctly 2079 unsigned i; 2080 for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 2081 if ((mach->ExecMask & (1 << i))) 2082 */ 2083 if (mach->ExecMask) { 2084 ++(*prim_count); 2085 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); 2086 mach->Primitives[*prim_count] = 0; 2087 } 2088} 2089 2090static void 2091conditional_emit_primitive(struct tgsi_exec_machine *mach) 2092{ 2093 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 2094 int emitted_verts = 2095 mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]; 2096 if (emitted_verts) { 2097 emit_primitive(mach); 2098 } 2099 } 2100} 2101 2102 2103/* 2104 * Fetch four texture samples using STR texture coordinates. 2105 */ 2106static void 2107fetch_texel( struct tgsi_sampler *sampler, 2108 const unsigned sview_idx, 2109 const unsigned sampler_idx, 2110 const union tgsi_exec_channel *s, 2111 const union tgsi_exec_channel *t, 2112 const union tgsi_exec_channel *p, 2113 const union tgsi_exec_channel *c0, 2114 const union tgsi_exec_channel *c1, 2115 float derivs[3][2][TGSI_QUAD_SIZE], 2116 const int8_t offset[3], 2117 enum tgsi_sampler_control control, 2118 union tgsi_exec_channel *r, 2119 union tgsi_exec_channel *g, 2120 union tgsi_exec_channel *b, 2121 union tgsi_exec_channel *a ) 2122{ 2123 uint j; 2124 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 2125 2126 /* FIXME: handle explicit derivs, offsets */ 2127 sampler->get_samples(sampler, sview_idx, sampler_idx, 2128 s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba); 2129 2130 for (j = 0; j < 4; j++) { 2131 r->f[j] = rgba[0][j]; 2132 g->f[j] = rgba[1][j]; 2133 b->f[j] = rgba[2][j]; 2134 a->f[j] = rgba[3][j]; 2135 } 2136} 2137 2138 2139#define TEX_MODIFIER_NONE 0 2140#define TEX_MODIFIER_PROJECTED 1 2141#define TEX_MODIFIER_LOD_BIAS 2 2142#define TEX_MODIFIER_EXPLICIT_LOD 3 2143#define TEX_MODIFIER_LEVEL_ZERO 4 2144#define TEX_MODIFIER_GATHER 5 2145 2146/* 2147 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array. 2148 */ 2149static void 2150fetch_texel_offsets(struct tgsi_exec_machine *mach, 2151 const struct tgsi_full_instruction *inst, 2152 int8_t offsets[3]) 2153{ 2154 if (inst->Texture.NumOffsets == 1) { 2155 union tgsi_exec_channel index; 2156 union tgsi_exec_channel offset[3]; 2157 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index; 2158 fetch_src_file_channel(mach, inst->TexOffsets[0].File, 2159 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]); 2160 fetch_src_file_channel(mach, inst->TexOffsets[0].File, 2161 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]); 2162 fetch_src_file_channel(mach, inst->TexOffsets[0].File, 2163 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]); 2164 offsets[0] = offset[0].i[0]; 2165 offsets[1] = offset[1].i[0]; 2166 offsets[2] = offset[2].i[0]; 2167 } else { 2168 assert(inst->Texture.NumOffsets == 0); 2169 offsets[0] = offsets[1] = offsets[2] = 0; 2170 } 2171} 2172 2173 2174/* 2175 * Fetch dx and dy values for one channel (s, t or r). 2176 * Put dx values into one float array, dy values into another. 2177 */ 2178static void 2179fetch_assign_deriv_channel(struct tgsi_exec_machine *mach, 2180 const struct tgsi_full_instruction *inst, 2181 unsigned regdsrcx, 2182 unsigned chan, 2183 float derivs[2][TGSI_QUAD_SIZE]) 2184{ 2185 union tgsi_exec_channel d; 2186 FETCH(&d, regdsrcx, chan); 2187 derivs[0][0] = d.f[0]; 2188 derivs[0][1] = d.f[1]; 2189 derivs[0][2] = d.f[2]; 2190 derivs[0][3] = d.f[3]; 2191 FETCH(&d, regdsrcx + 1, chan); 2192 derivs[1][0] = d.f[0]; 2193 derivs[1][1] = d.f[1]; 2194 derivs[1][2] = d.f[2]; 2195 derivs[1][3] = d.f[3]; 2196} 2197 2198static uint 2199fetch_sampler_unit(struct tgsi_exec_machine *mach, 2200 const struct tgsi_full_instruction *inst, 2201 uint sampler) 2202{ 2203 uint unit = 0; 2204 int i; 2205 if (inst->Src[sampler].Register.Indirect) { 2206 const struct tgsi_full_src_register *reg = &inst->Src[sampler]; 2207 union tgsi_exec_channel indir_index, index2; 2208 const uint execmask = mach->ExecMask; 2209 index2.i[0] = 2210 index2.i[1] = 2211 index2.i[2] = 2212 index2.i[3] = reg->Indirect.Index; 2213 2214 fetch_src_file_channel(mach, 2215 reg->Indirect.File, 2216 reg->Indirect.Swizzle, 2217 &index2, 2218 &ZeroVec, 2219 &indir_index); 2220 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2221 if (execmask & (1 << i)) { 2222 unit = inst->Src[sampler].Register.Index + indir_index.i[i]; 2223 break; 2224 } 2225 } 2226 2227 } else { 2228 unit = inst->Src[sampler].Register.Index; 2229 } 2230 return unit; 2231} 2232 2233/* 2234 * execute a texture instruction. 2235 * 2236 * modifier is used to control the channel routing for the 2237 * instruction variants like proj, lod, and texture with lod bias. 2238 * sampler indicates which src register the sampler is contained in. 2239 */ 2240static void 2241exec_tex(struct tgsi_exec_machine *mach, 2242 const struct tgsi_full_instruction *inst, 2243 uint modifier, uint sampler) 2244{ 2245 const union tgsi_exec_channel *args[5], *proj = NULL; 2246 union tgsi_exec_channel r[5]; 2247 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; 2248 uint chan; 2249 uint unit; 2250 int8_t offsets[3]; 2251 int dim, shadow_ref, i; 2252 2253 unit = fetch_sampler_unit(mach, inst, sampler); 2254 /* always fetch all 3 offsets, overkill but keeps code simple */ 2255 fetch_texel_offsets(mach, inst, offsets); 2256 2257 assert(modifier != TEX_MODIFIER_LEVEL_ZERO); 2258 assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER); 2259 2260 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 2261 shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture); 2262 2263 assert(dim <= 4); 2264 if (shadow_ref >= 0) 2265 assert(shadow_ref >= dim && shadow_ref < (int)ARRAY_SIZE(args)); 2266 2267 /* fetch modifier to the last argument */ 2268 if (modifier != TEX_MODIFIER_NONE) { 2269 const int last = ARRAY_SIZE(args) - 1; 2270 2271 /* fetch modifier from src0.w or src1.x */ 2272 if (sampler == 1) { 2273 assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W); 2274 FETCH(&r[last], 0, TGSI_CHAN_W); 2275 } 2276 else { 2277 assert(shadow_ref != 4); 2278 FETCH(&r[last], 1, TGSI_CHAN_X); 2279 } 2280 2281 if (modifier != TEX_MODIFIER_PROJECTED) { 2282 args[last] = &r[last]; 2283 } 2284 else { 2285 proj = &r[last]; 2286 args[last] = &ZeroVec; 2287 } 2288 2289 /* point unused arguments to zero vector */ 2290 for (i = dim; i < last; i++) 2291 args[i] = &ZeroVec; 2292 2293 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) 2294 control = TGSI_SAMPLER_LOD_EXPLICIT; 2295 else if (modifier == TEX_MODIFIER_LOD_BIAS) 2296 control = TGSI_SAMPLER_LOD_BIAS; 2297 else if (modifier == TEX_MODIFIER_GATHER) 2298 control = TGSI_SAMPLER_GATHER; 2299 } 2300 else { 2301 for (i = dim; i < (int)ARRAY_SIZE(args); i++) 2302 args[i] = &ZeroVec; 2303 } 2304 2305 /* fetch coordinates */ 2306 for (i = 0; i < dim; i++) { 2307 FETCH(&r[i], 0, TGSI_CHAN_X + i); 2308 2309 if (proj) 2310 micro_div(&r[i], &r[i], proj); 2311 2312 args[i] = &r[i]; 2313 } 2314 2315 /* fetch reference value */ 2316 if (shadow_ref >= 0) { 2317 FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4)); 2318 2319 if (proj) 2320 micro_div(&r[shadow_ref], &r[shadow_ref], proj); 2321 2322 args[shadow_ref] = &r[shadow_ref]; 2323 } 2324 2325 fetch_texel(mach->Sampler, unit, unit, 2326 args[0], args[1], args[2], args[3], args[4], 2327 NULL, offsets, control, 2328 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2329 2330#if 0 2331 debug_printf("fetch r: %g %g %g %g\n", 2332 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]); 2333 debug_printf("fetch g: %g %g %g %g\n", 2334 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]); 2335 debug_printf("fetch b: %g %g %g %g\n", 2336 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]); 2337 debug_printf("fetch a: %g %g %g %g\n", 2338 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]); 2339#endif 2340 2341 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2342 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2343 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2344 } 2345 } 2346} 2347 2348static void 2349exec_lodq(struct tgsi_exec_machine *mach, 2350 const struct tgsi_full_instruction *inst) 2351{ 2352 uint resource_unit, sampler_unit; 2353 unsigned dim; 2354 unsigned i; 2355 union tgsi_exec_channel coords[4]; 2356 const union tgsi_exec_channel *args[ARRAY_SIZE(coords)]; 2357 union tgsi_exec_channel r[2]; 2358 2359 resource_unit = fetch_sampler_unit(mach, inst, 1); 2360 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { 2361 uint target = mach->SamplerViews[resource_unit].Resource; 2362 dim = tgsi_util_get_texture_coord_dim(target); 2363 sampler_unit = fetch_sampler_unit(mach, inst, 2); 2364 } else { 2365 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 2366 sampler_unit = resource_unit; 2367 } 2368 assert(dim <= ARRAY_SIZE(coords)); 2369 /* fetch coordinates */ 2370 for (i = 0; i < dim; i++) { 2371 FETCH(&coords[i], 0, TGSI_CHAN_X + i); 2372 args[i] = &coords[i]; 2373 } 2374 for (i = dim; i < ARRAY_SIZE(coords); i++) { 2375 args[i] = &ZeroVec; 2376 } 2377 mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit, 2378 args[0]->f, 2379 args[1]->f, 2380 args[2]->f, 2381 args[3]->f, 2382 TGSI_SAMPLER_LOD_NONE, 2383 r[0].f, 2384 r[1].f); 2385 2386 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2387 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, 2388 TGSI_EXEC_DATA_FLOAT); 2389 } 2390 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2391 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, 2392 TGSI_EXEC_DATA_FLOAT); 2393 } 2394 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { 2395 unsigned char swizzles[4]; 2396 unsigned chan; 2397 swizzles[0] = inst->Src[1].Register.SwizzleX; 2398 swizzles[1] = inst->Src[1].Register.SwizzleY; 2399 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2400 swizzles[3] = inst->Src[1].Register.SwizzleW; 2401 2402 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2403 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2404 if (swizzles[chan] >= 2) { 2405 store_dest(mach, &ZeroVec, 2406 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2407 } else { 2408 store_dest(mach, &r[swizzles[chan]], 2409 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2410 } 2411 } 2412 } 2413 } else { 2414 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2415 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, 2416 TGSI_EXEC_DATA_FLOAT); 2417 } 2418 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2419 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, 2420 TGSI_EXEC_DATA_FLOAT); 2421 } 2422 } 2423} 2424 2425static void 2426exec_txd(struct tgsi_exec_machine *mach, 2427 const struct tgsi_full_instruction *inst) 2428{ 2429 union tgsi_exec_channel r[4]; 2430 float derivs[3][2][TGSI_QUAD_SIZE]; 2431 uint chan; 2432 uint unit; 2433 int8_t offsets[3]; 2434 2435 unit = fetch_sampler_unit(mach, inst, 3); 2436 /* always fetch all 3 offsets, overkill but keeps code simple */ 2437 fetch_texel_offsets(mach, inst, offsets); 2438 2439 switch (inst->Texture.Texture) { 2440 case TGSI_TEXTURE_1D: 2441 FETCH(&r[0], 0, TGSI_CHAN_X); 2442 2443 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2444 2445 fetch_texel(mach->Sampler, unit, unit, 2446 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2447 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2448 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2449 break; 2450 2451 case TGSI_TEXTURE_SHADOW1D: 2452 case TGSI_TEXTURE_1D_ARRAY: 2453 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2454 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */ 2455 FETCH(&r[0], 0, TGSI_CHAN_X); 2456 FETCH(&r[1], 0, TGSI_CHAN_Y); 2457 FETCH(&r[2], 0, TGSI_CHAN_Z); 2458 2459 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2460 2461 fetch_texel(mach->Sampler, unit, unit, 2462 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2463 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2464 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2465 break; 2466 2467 case TGSI_TEXTURE_2D: 2468 case TGSI_TEXTURE_RECT: 2469 FETCH(&r[0], 0, TGSI_CHAN_X); 2470 FETCH(&r[1], 0, TGSI_CHAN_Y); 2471 2472 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2473 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2474 2475 fetch_texel(mach->Sampler, unit, unit, 2476 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2477 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2478 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2479 break; 2480 2481 2482 case TGSI_TEXTURE_SHADOW2D: 2483 case TGSI_TEXTURE_SHADOWRECT: 2484 case TGSI_TEXTURE_2D_ARRAY: 2485 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2486 /* only SHADOW2D_ARRAY actually needs W */ 2487 FETCH(&r[0], 0, TGSI_CHAN_X); 2488 FETCH(&r[1], 0, TGSI_CHAN_Y); 2489 FETCH(&r[2], 0, TGSI_CHAN_Z); 2490 FETCH(&r[3], 0, TGSI_CHAN_W); 2491 2492 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2493 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2494 2495 fetch_texel(mach->Sampler, unit, unit, 2496 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 2497 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2498 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2499 break; 2500 2501 case TGSI_TEXTURE_3D: 2502 case TGSI_TEXTURE_CUBE: 2503 case TGSI_TEXTURE_CUBE_ARRAY: 2504 case TGSI_TEXTURE_SHADOWCUBE: 2505 /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */ 2506 FETCH(&r[0], 0, TGSI_CHAN_X); 2507 FETCH(&r[1], 0, TGSI_CHAN_Y); 2508 FETCH(&r[2], 0, TGSI_CHAN_Z); 2509 FETCH(&r[3], 0, TGSI_CHAN_W); 2510 2511 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2512 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2513 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]); 2514 2515 fetch_texel(mach->Sampler, unit, unit, 2516 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 2517 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2518 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2519 break; 2520 2521 default: 2522 assert(0); 2523 } 2524 2525 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2526 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2527 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2528 } 2529 } 2530} 2531 2532 2533static void 2534exec_txf(struct tgsi_exec_machine *mach, 2535 const struct tgsi_full_instruction *inst) 2536{ 2537 union tgsi_exec_channel r[4]; 2538 uint chan; 2539 uint unit; 2540 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 2541 int j; 2542 int8_t offsets[3]; 2543 unsigned target; 2544 2545 unit = fetch_sampler_unit(mach, inst, 1); 2546 /* always fetch all 3 offsets, overkill but keeps code simple */ 2547 fetch_texel_offsets(mach, inst, offsets); 2548 2549 IFETCH(&r[3], 0, TGSI_CHAN_W); 2550 2551 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || 2552 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { 2553 target = mach->SamplerViews[unit].Resource; 2554 } 2555 else { 2556 target = inst->Texture.Texture; 2557 } 2558 switch(target) { 2559 case TGSI_TEXTURE_3D: 2560 case TGSI_TEXTURE_2D_ARRAY: 2561 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2562 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2563 IFETCH(&r[2], 0, TGSI_CHAN_Z); 2564 /* fallthrough */ 2565 case TGSI_TEXTURE_2D: 2566 case TGSI_TEXTURE_RECT: 2567 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2568 case TGSI_TEXTURE_SHADOW2D: 2569 case TGSI_TEXTURE_SHADOWRECT: 2570 case TGSI_TEXTURE_1D_ARRAY: 2571 case TGSI_TEXTURE_2D_MSAA: 2572 IFETCH(&r[1], 0, TGSI_CHAN_Y); 2573 /* fallthrough */ 2574 case TGSI_TEXTURE_BUFFER: 2575 case TGSI_TEXTURE_1D: 2576 case TGSI_TEXTURE_SHADOW1D: 2577 IFETCH(&r[0], 0, TGSI_CHAN_X); 2578 break; 2579 default: 2580 assert(0); 2581 break; 2582 } 2583 2584 mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i, 2585 offsets, rgba); 2586 2587 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 2588 r[0].f[j] = rgba[0][j]; 2589 r[1].f[j] = rgba[1][j]; 2590 r[2].f[j] = rgba[2][j]; 2591 r[3].f[j] = rgba[3][j]; 2592 } 2593 2594 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || 2595 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { 2596 unsigned char swizzles[4]; 2597 swizzles[0] = inst->Src[1].Register.SwizzleX; 2598 swizzles[1] = inst->Src[1].Register.SwizzleY; 2599 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2600 swizzles[3] = inst->Src[1].Register.SwizzleW; 2601 2602 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2603 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2604 store_dest(mach, &r[swizzles[chan]], 2605 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2606 } 2607 } 2608 } 2609 else { 2610 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2611 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2612 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2613 } 2614 } 2615 } 2616} 2617 2618static void 2619exec_txq(struct tgsi_exec_machine *mach, 2620 const struct tgsi_full_instruction *inst) 2621{ 2622 int result[4]; 2623 union tgsi_exec_channel r[4], src; 2624 uint chan; 2625 uint unit; 2626 int i,j; 2627 2628 unit = fetch_sampler_unit(mach, inst, 1); 2629 2630 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 2631 2632 /* XXX: This interface can't return per-pixel values */ 2633 mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result); 2634 2635 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2636 for (j = 0; j < 4; j++) { 2637 r[j].i[i] = result[j]; 2638 } 2639 } 2640 2641 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2642 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2643 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, 2644 TGSI_EXEC_DATA_INT); 2645 } 2646 } 2647} 2648 2649static void 2650exec_sample(struct tgsi_exec_machine *mach, 2651 const struct tgsi_full_instruction *inst, 2652 uint modifier, boolean compare) 2653{ 2654 const uint resource_unit = inst->Src[1].Register.Index; 2655 const uint sampler_unit = inst->Src[2].Register.Index; 2656 union tgsi_exec_channel r[5], c1; 2657 const union tgsi_exec_channel *lod = &ZeroVec; 2658 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; 2659 uint chan; 2660 unsigned char swizzles[4]; 2661 int8_t offsets[3]; 2662 2663 /* always fetch all 3 offsets, overkill but keeps code simple */ 2664 fetch_texel_offsets(mach, inst, offsets); 2665 2666 assert(modifier != TEX_MODIFIER_PROJECTED); 2667 2668 if (modifier != TEX_MODIFIER_NONE) { 2669 if (modifier == TEX_MODIFIER_LOD_BIAS) { 2670 FETCH(&c1, 3, TGSI_CHAN_X); 2671 lod = &c1; 2672 control = TGSI_SAMPLER_LOD_BIAS; 2673 } 2674 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 2675 FETCH(&c1, 3, TGSI_CHAN_X); 2676 lod = &c1; 2677 control = TGSI_SAMPLER_LOD_EXPLICIT; 2678 } 2679 else if (modifier == TEX_MODIFIER_GATHER) { 2680 control = TGSI_SAMPLER_GATHER; 2681 } 2682 else { 2683 assert(modifier == TEX_MODIFIER_LEVEL_ZERO); 2684 control = TGSI_SAMPLER_LOD_ZERO; 2685 } 2686 } 2687 2688 FETCH(&r[0], 0, TGSI_CHAN_X); 2689 2690 switch (mach->SamplerViews[resource_unit].Resource) { 2691 case TGSI_TEXTURE_1D: 2692 if (compare) { 2693 FETCH(&r[2], 3, TGSI_CHAN_X); 2694 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2695 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2696 NULL, offsets, control, 2697 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2698 } 2699 else { 2700 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2701 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2702 NULL, offsets, control, 2703 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2704 } 2705 break; 2706 2707 case TGSI_TEXTURE_1D_ARRAY: 2708 case TGSI_TEXTURE_2D: 2709 case TGSI_TEXTURE_RECT: 2710 FETCH(&r[1], 0, TGSI_CHAN_Y); 2711 if (compare) { 2712 FETCH(&r[2], 3, TGSI_CHAN_X); 2713 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2714 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2715 NULL, offsets, control, 2716 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2717 } 2718 else { 2719 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2720 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2721 NULL, offsets, control, 2722 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2723 } 2724 break; 2725 2726 case TGSI_TEXTURE_2D_ARRAY: 2727 case TGSI_TEXTURE_3D: 2728 case TGSI_TEXTURE_CUBE: 2729 FETCH(&r[1], 0, TGSI_CHAN_Y); 2730 FETCH(&r[2], 0, TGSI_CHAN_Z); 2731 if(compare) { 2732 FETCH(&r[3], 3, TGSI_CHAN_X); 2733 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2734 &r[0], &r[1], &r[2], &r[3], lod, 2735 NULL, offsets, control, 2736 &r[0], &r[1], &r[2], &r[3]); 2737 } 2738 else { 2739 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2740 &r[0], &r[1], &r[2], &ZeroVec, lod, 2741 NULL, offsets, control, 2742 &r[0], &r[1], &r[2], &r[3]); 2743 } 2744 break; 2745 2746 case TGSI_TEXTURE_CUBE_ARRAY: 2747 FETCH(&r[1], 0, TGSI_CHAN_Y); 2748 FETCH(&r[2], 0, TGSI_CHAN_Z); 2749 FETCH(&r[3], 0, TGSI_CHAN_W); 2750 if(compare) { 2751 FETCH(&r[4], 3, TGSI_CHAN_X); 2752 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2753 &r[0], &r[1], &r[2], &r[3], &r[4], 2754 NULL, offsets, control, 2755 &r[0], &r[1], &r[2], &r[3]); 2756 } 2757 else { 2758 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2759 &r[0], &r[1], &r[2], &r[3], lod, 2760 NULL, offsets, control, 2761 &r[0], &r[1], &r[2], &r[3]); 2762 } 2763 break; 2764 2765 2766 default: 2767 assert(0); 2768 } 2769 2770 swizzles[0] = inst->Src[1].Register.SwizzleX; 2771 swizzles[1] = inst->Src[1].Register.SwizzleY; 2772 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2773 swizzles[3] = inst->Src[1].Register.SwizzleW; 2774 2775 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2776 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2777 store_dest(mach, &r[swizzles[chan]], 2778 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2779 } 2780 } 2781} 2782 2783static void 2784exec_sample_d(struct tgsi_exec_machine *mach, 2785 const struct tgsi_full_instruction *inst) 2786{ 2787 const uint resource_unit = inst->Src[1].Register.Index; 2788 const uint sampler_unit = inst->Src[2].Register.Index; 2789 union tgsi_exec_channel r[4]; 2790 float derivs[3][2][TGSI_QUAD_SIZE]; 2791 uint chan; 2792 unsigned char swizzles[4]; 2793 int8_t offsets[3]; 2794 2795 /* always fetch all 3 offsets, overkill but keeps code simple */ 2796 fetch_texel_offsets(mach, inst, offsets); 2797 2798 FETCH(&r[0], 0, TGSI_CHAN_X); 2799 2800 switch (mach->SamplerViews[resource_unit].Resource) { 2801 case TGSI_TEXTURE_1D: 2802 case TGSI_TEXTURE_1D_ARRAY: 2803 /* only 1D array actually needs Y */ 2804 FETCH(&r[1], 0, TGSI_CHAN_Y); 2805 2806 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2807 2808 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2809 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2810 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2811 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2812 break; 2813 2814 case TGSI_TEXTURE_2D: 2815 case TGSI_TEXTURE_RECT: 2816 case TGSI_TEXTURE_2D_ARRAY: 2817 /* only 2D array actually needs Z */ 2818 FETCH(&r[1], 0, TGSI_CHAN_Y); 2819 FETCH(&r[2], 0, TGSI_CHAN_Z); 2820 2821 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2822 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2823 2824 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2825 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */ 2826 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2827 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2828 break; 2829 2830 case TGSI_TEXTURE_3D: 2831 case TGSI_TEXTURE_CUBE: 2832 case TGSI_TEXTURE_CUBE_ARRAY: 2833 /* only cube array actually needs W */ 2834 FETCH(&r[1], 0, TGSI_CHAN_Y); 2835 FETCH(&r[2], 0, TGSI_CHAN_Z); 2836 FETCH(&r[3], 0, TGSI_CHAN_W); 2837 2838 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2839 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2840 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]); 2841 2842 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2843 &r[0], &r[1], &r[2], &r[3], &ZeroVec, 2844 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2845 &r[0], &r[1], &r[2], &r[3]); 2846 break; 2847 2848 default: 2849 assert(0); 2850 } 2851 2852 swizzles[0] = inst->Src[1].Register.SwizzleX; 2853 swizzles[1] = inst->Src[1].Register.SwizzleY; 2854 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2855 swizzles[3] = inst->Src[1].Register.SwizzleW; 2856 2857 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2858 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2859 store_dest(mach, &r[swizzles[chan]], 2860 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2861 } 2862 } 2863} 2864 2865 2866/** 2867 * Evaluate a constant-valued coefficient at the position of the 2868 * current quad. 2869 */ 2870static void 2871eval_constant_coef( 2872 struct tgsi_exec_machine *mach, 2873 unsigned attrib, 2874 unsigned chan ) 2875{ 2876 unsigned i; 2877 2878 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) { 2879 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 2880 } 2881} 2882 2883/** 2884 * Evaluate a linear-valued coefficient at the position of the 2885 * current quad. 2886 */ 2887static void 2888eval_linear_coef( 2889 struct tgsi_exec_machine *mach, 2890 unsigned attrib, 2891 unsigned chan ) 2892{ 2893 const float x = mach->QuadPos.xyzw[0].f[0]; 2894 const float y = mach->QuadPos.xyzw[1].f[0]; 2895 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2896 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2897 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2898 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 2899 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 2900 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 2901 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 2902} 2903 2904/** 2905 * Evaluate a perspective-valued coefficient at the position of the 2906 * current quad. 2907 */ 2908static void 2909eval_perspective_coef( 2910 struct tgsi_exec_machine *mach, 2911 unsigned attrib, 2912 unsigned chan ) 2913{ 2914 const float x = mach->QuadPos.xyzw[0].f[0]; 2915 const float y = mach->QuadPos.xyzw[1].f[0]; 2916 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2917 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2918 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2919 const float *w = mach->QuadPos.xyzw[3].f; 2920 /* divide by W here */ 2921 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 2922 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 2923 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 2924 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 2925} 2926 2927 2928typedef void (* eval_coef_func)( 2929 struct tgsi_exec_machine *mach, 2930 unsigned attrib, 2931 unsigned chan ); 2932 2933static void 2934exec_declaration(struct tgsi_exec_machine *mach, 2935 const struct tgsi_full_declaration *decl) 2936{ 2937 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { 2938 mach->SamplerViews[decl->Range.First] = decl->SamplerView; 2939 return; 2940 } 2941 2942 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { 2943 if (decl->Declaration.File == TGSI_FILE_INPUT) { 2944 uint first, last, mask; 2945 2946 first = decl->Range.First; 2947 last = decl->Range.Last; 2948 mask = decl->Declaration.UsageMask; 2949 2950 /* XXX we could remove this special-case code since 2951 * mach->InterpCoefs[first].a0 should already have the 2952 * front/back-face value. But we should first update the 2953 * ureg code to emit the right UsageMask value (WRITEMASK_X). 2954 * Then, we could remove the tgsi_exec_machine::Face field. 2955 */ 2956 /* XXX make FACE a system value */ 2957 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { 2958 uint i; 2959 2960 assert(decl->Semantic.Index == 0); 2961 assert(first == last); 2962 2963 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2964 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 2965 } 2966 } else { 2967 eval_coef_func eval; 2968 uint i, j; 2969 2970 switch (decl->Interp.Interpolate) { 2971 case TGSI_INTERPOLATE_CONSTANT: 2972 eval = eval_constant_coef; 2973 break; 2974 2975 case TGSI_INTERPOLATE_LINEAR: 2976 eval = eval_linear_coef; 2977 break; 2978 2979 case TGSI_INTERPOLATE_PERSPECTIVE: 2980 eval = eval_perspective_coef; 2981 break; 2982 2983 case TGSI_INTERPOLATE_COLOR: 2984 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef; 2985 break; 2986 2987 default: 2988 assert(0); 2989 return; 2990 } 2991 2992 for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 2993 if (mask & (1 << j)) { 2994 for (i = first; i <= last; i++) { 2995 eval(mach, i, j); 2996 } 2997 } 2998 } 2999 } 3000 3001 if (DEBUG_EXECUTION) { 3002 uint i, j; 3003 for (i = first; i <= last; ++i) { 3004 debug_printf("IN[%2u] = ", i); 3005 for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 3006 if (j > 0) { 3007 debug_printf(" "); 3008 } 3009 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 3010 mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j], 3011 mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j], 3012 mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j], 3013 mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]); 3014 } 3015 } 3016 } 3017 } 3018 } 3019 3020} 3021 3022typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, 3023 const union tgsi_exec_channel *src); 3024 3025static void 3026exec_scalar_unary(struct tgsi_exec_machine *mach, 3027 const struct tgsi_full_instruction *inst, 3028 micro_unary_op op, 3029 enum tgsi_exec_datatype dst_datatype, 3030 enum tgsi_exec_datatype src_datatype) 3031{ 3032 unsigned int chan; 3033 union tgsi_exec_channel src; 3034 union tgsi_exec_channel dst; 3035 3036 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 3037 op(&dst, &src); 3038 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3039 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3040 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 3041 } 3042 } 3043} 3044 3045static void 3046exec_vector_unary(struct tgsi_exec_machine *mach, 3047 const struct tgsi_full_instruction *inst, 3048 micro_unary_op op, 3049 enum tgsi_exec_datatype dst_datatype, 3050 enum tgsi_exec_datatype src_datatype) 3051{ 3052 unsigned int chan; 3053 struct tgsi_exec_vector dst; 3054 3055 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3056 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3057 union tgsi_exec_channel src; 3058 3059 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); 3060 op(&dst.xyzw[chan], &src); 3061 } 3062 } 3063 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3064 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3065 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3066 } 3067 } 3068} 3069 3070typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, 3071 const union tgsi_exec_channel *src0, 3072 const union tgsi_exec_channel *src1); 3073 3074static void 3075exec_scalar_binary(struct tgsi_exec_machine *mach, 3076 const struct tgsi_full_instruction *inst, 3077 micro_binary_op op, 3078 enum tgsi_exec_datatype dst_datatype, 3079 enum tgsi_exec_datatype src_datatype) 3080{ 3081 unsigned int chan; 3082 union tgsi_exec_channel src[2]; 3083 union tgsi_exec_channel dst; 3084 3085 fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype); 3086 fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype); 3087 op(&dst, &src[0], &src[1]); 3088 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3089 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3090 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 3091 } 3092 } 3093} 3094 3095static void 3096exec_vector_binary(struct tgsi_exec_machine *mach, 3097 const struct tgsi_full_instruction *inst, 3098 micro_binary_op op, 3099 enum tgsi_exec_datatype dst_datatype, 3100 enum tgsi_exec_datatype src_datatype) 3101{ 3102 unsigned int chan; 3103 struct tgsi_exec_vector dst; 3104 3105 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3106 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3107 union tgsi_exec_channel src[2]; 3108 3109 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3110 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3111 op(&dst.xyzw[chan], &src[0], &src[1]); 3112 } 3113 } 3114 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3115 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3116 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3117 } 3118 } 3119} 3120 3121typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst, 3122 const union tgsi_exec_channel *src0, 3123 const union tgsi_exec_channel *src1, 3124 const union tgsi_exec_channel *src2); 3125 3126static void 3127exec_vector_trinary(struct tgsi_exec_machine *mach, 3128 const struct tgsi_full_instruction *inst, 3129 micro_trinary_op op, 3130 enum tgsi_exec_datatype dst_datatype, 3131 enum tgsi_exec_datatype src_datatype) 3132{ 3133 unsigned int chan; 3134 struct tgsi_exec_vector dst; 3135 3136 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3137 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3138 union tgsi_exec_channel src[3]; 3139 3140 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3141 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3142 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 3143 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 3144 } 3145 } 3146 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3147 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3148 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3149 } 3150 } 3151} 3152 3153typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst, 3154 const union tgsi_exec_channel *src0, 3155 const union tgsi_exec_channel *src1, 3156 const union tgsi_exec_channel *src2, 3157 const union tgsi_exec_channel *src3); 3158 3159static void 3160exec_vector_quaternary(struct tgsi_exec_machine *mach, 3161 const struct tgsi_full_instruction *inst, 3162 micro_quaternary_op op, 3163 enum tgsi_exec_datatype dst_datatype, 3164 enum tgsi_exec_datatype src_datatype) 3165{ 3166 unsigned int chan; 3167 struct tgsi_exec_vector dst; 3168 3169 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3170 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3171 union tgsi_exec_channel src[4]; 3172 3173 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3174 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3175 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 3176 fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype); 3177 op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]); 3178 } 3179 } 3180 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3181 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3182 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3183 } 3184 } 3185} 3186 3187static void 3188exec_dp3(struct tgsi_exec_machine *mach, 3189 const struct tgsi_full_instruction *inst) 3190{ 3191 unsigned int chan; 3192 union tgsi_exec_channel arg[3]; 3193 3194 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3195 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3196 micro_mul(&arg[2], &arg[0], &arg[1]); 3197 3198 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) { 3199 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 3200 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 3201 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3202 } 3203 3204 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3205 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3206 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3207 } 3208 } 3209} 3210 3211static void 3212exec_dp4(struct tgsi_exec_machine *mach, 3213 const struct tgsi_full_instruction *inst) 3214{ 3215 unsigned int chan; 3216 union tgsi_exec_channel arg[3]; 3217 3218 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3219 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3220 micro_mul(&arg[2], &arg[0], &arg[1]); 3221 3222 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) { 3223 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 3224 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 3225 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3226 } 3227 3228 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3229 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3230 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3231 } 3232 } 3233} 3234 3235static void 3236exec_dp2(struct tgsi_exec_machine *mach, 3237 const struct tgsi_full_instruction *inst) 3238{ 3239 unsigned int chan; 3240 union tgsi_exec_channel arg[3]; 3241 3242 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3243 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3244 micro_mul(&arg[2], &arg[0], &arg[1]); 3245 3246 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3247 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3248 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3249 3250 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3251 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3252 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3253 } 3254 } 3255} 3256 3257static void 3258exec_pk2h(struct tgsi_exec_machine *mach, 3259 const struct tgsi_full_instruction *inst) 3260{ 3261 unsigned chan; 3262 union tgsi_exec_channel arg[2], dst; 3263 3264 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3265 fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3266 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { 3267 dst.u[chan] = util_float_to_half(arg[0].f[chan]) | 3268 (util_float_to_half(arg[1].f[chan]) << 16); 3269 } 3270 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3271 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3272 store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT); 3273 } 3274 } 3275} 3276 3277static void 3278exec_up2h(struct tgsi_exec_machine *mach, 3279 const struct tgsi_full_instruction *inst) 3280{ 3281 unsigned chan; 3282 union tgsi_exec_channel arg, dst[2]; 3283 3284 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3285 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { 3286 dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff); 3287 dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16); 3288 } 3289 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3290 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3291 store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3292 } 3293 } 3294} 3295 3296static void 3297micro_ucmp(union tgsi_exec_channel *dst, 3298 const union tgsi_exec_channel *src0, 3299 const union tgsi_exec_channel *src1, 3300 const union tgsi_exec_channel *src2) 3301{ 3302 dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0]; 3303 dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1]; 3304 dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2]; 3305 dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3]; 3306} 3307 3308static void 3309exec_ucmp(struct tgsi_exec_machine *mach, 3310 const struct tgsi_full_instruction *inst) 3311{ 3312 unsigned int chan; 3313 struct tgsi_exec_vector dst; 3314 3315 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3316 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3317 union tgsi_exec_channel src[3]; 3318 3319 fetch_source(mach, &src[0], &inst->Src[0], chan, 3320 TGSI_EXEC_DATA_UINT); 3321 fetch_source(mach, &src[1], &inst->Src[1], chan, 3322 TGSI_EXEC_DATA_FLOAT); 3323 fetch_source(mach, &src[2], &inst->Src[2], chan, 3324 TGSI_EXEC_DATA_FLOAT); 3325 micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 3326 } 3327 } 3328 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3329 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3330 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, 3331 TGSI_EXEC_DATA_FLOAT); 3332 } 3333 } 3334} 3335 3336static void 3337exec_dst(struct tgsi_exec_machine *mach, 3338 const struct tgsi_full_instruction *inst) 3339{ 3340 union tgsi_exec_channel r[2]; 3341 union tgsi_exec_channel d[4]; 3342 3343 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3344 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3345 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3346 micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]); 3347 } 3348 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3349 fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3350 } 3351 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3352 fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3353 } 3354 3355 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3356 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3357 } 3358 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3359 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3360 } 3361 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3362 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3363 } 3364 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3365 store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3366 } 3367} 3368 3369static void 3370exec_log(struct tgsi_exec_machine *mach, 3371 const struct tgsi_full_instruction *inst) 3372{ 3373 union tgsi_exec_channel r[3]; 3374 3375 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3376 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */ 3377 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */ 3378 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */ 3379 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3380 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3381 } 3382 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3383 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */ 3384 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */ 3385 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3386 } 3387 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3388 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3389 } 3390 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3391 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3392 } 3393} 3394 3395static void 3396exec_exp(struct tgsi_exec_machine *mach, 3397 const struct tgsi_full_instruction *inst) 3398{ 3399 union tgsi_exec_channel r[3]; 3400 3401 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3402 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */ 3403 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3404 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */ 3405 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3406 } 3407 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3408 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */ 3409 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3410 } 3411 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3412 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */ 3413 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3414 } 3415 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3416 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3417 } 3418} 3419 3420static void 3421exec_lit(struct tgsi_exec_machine *mach, 3422 const struct tgsi_full_instruction *inst) 3423{ 3424 union tgsi_exec_channel r[3]; 3425 union tgsi_exec_channel d[3]; 3426 3427 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) { 3428 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3429 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3430 fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3431 micro_max(&r[1], &r[1], &ZeroVec); 3432 3433 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3434 micro_min(&r[2], &r[2], &P128Vec); 3435 micro_max(&r[2], &r[2], &M128Vec); 3436 micro_pow(&r[1], &r[1], &r[2]); 3437 micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec); 3438 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3439 } 3440 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3441 micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec); 3442 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3443 } 3444 } 3445 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3446 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3447 } 3448 3449 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3450 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3451 } 3452} 3453 3454static void 3455exec_break(struct tgsi_exec_machine *mach) 3456{ 3457 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { 3458 /* turn off loop channels for each enabled exec channel */ 3459 mach->LoopMask &= ~mach->ExecMask; 3460 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3461 UPDATE_EXEC_MASK(mach); 3462 } else { 3463 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); 3464 3465 mach->Switch.mask = 0x0; 3466 3467 UPDATE_EXEC_MASK(mach); 3468 } 3469} 3470 3471static void 3472exec_switch(struct tgsi_exec_machine *mach, 3473 const struct tgsi_full_instruction *inst) 3474{ 3475 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 3476 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3477 3478 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 3479 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3480 mach->Switch.mask = 0x0; 3481 mach->Switch.defaultMask = 0x0; 3482 3483 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3484 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; 3485 3486 UPDATE_EXEC_MASK(mach); 3487} 3488 3489static void 3490exec_case(struct tgsi_exec_machine *mach, 3491 const struct tgsi_full_instruction *inst) 3492{ 3493 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3494 union tgsi_exec_channel src; 3495 uint mask = 0; 3496 3497 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3498 3499 if (mach->Switch.selector.u[0] == src.u[0]) { 3500 mask |= 0x1; 3501 } 3502 if (mach->Switch.selector.u[1] == src.u[1]) { 3503 mask |= 0x2; 3504 } 3505 if (mach->Switch.selector.u[2] == src.u[2]) { 3506 mask |= 0x4; 3507 } 3508 if (mach->Switch.selector.u[3] == src.u[3]) { 3509 mask |= 0x8; 3510 } 3511 3512 mach->Switch.defaultMask |= mask; 3513 3514 mach->Switch.mask |= mask & prevMask; 3515 3516 UPDATE_EXEC_MASK(mach); 3517} 3518 3519/* FIXME: this will only work if default is last */ 3520static void 3521exec_default(struct tgsi_exec_machine *mach) 3522{ 3523 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3524 3525 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; 3526 3527 UPDATE_EXEC_MASK(mach); 3528} 3529 3530static void 3531exec_endswitch(struct tgsi_exec_machine *mach) 3532{ 3533 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; 3534 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 3535 3536 UPDATE_EXEC_MASK(mach); 3537} 3538 3539typedef void (* micro_dop)(union tgsi_double_channel *dst, 3540 const union tgsi_double_channel *src); 3541 3542typedef void (* micro_dop_sop)(union tgsi_double_channel *dst, 3543 const union tgsi_double_channel *src0, 3544 union tgsi_exec_channel *src1); 3545 3546typedef void (* micro_dop_s)(union tgsi_double_channel *dst, 3547 const union tgsi_exec_channel *src); 3548 3549typedef void (* micro_sop_d)(union tgsi_exec_channel *dst, 3550 const union tgsi_double_channel *src); 3551 3552static void 3553fetch_double_channel(struct tgsi_exec_machine *mach, 3554 union tgsi_double_channel *chan, 3555 const struct tgsi_full_src_register *reg, 3556 uint chan_0, 3557 uint chan_1) 3558{ 3559 union tgsi_exec_channel src[2]; 3560 uint i; 3561 3562 fetch_source_d(mach, &src[0], reg, chan_0); 3563 fetch_source_d(mach, &src[1], reg, chan_1); 3564 3565 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 3566 chan->u[i][0] = src[0].u[i]; 3567 chan->u[i][1] = src[1].u[i]; 3568 } 3569 if (reg->Register.Absolute) { 3570 micro_dabs(chan, chan); 3571 } 3572 if (reg->Register.Negate) { 3573 micro_dneg(chan, chan); 3574 } 3575} 3576 3577static void 3578store_double_channel(struct tgsi_exec_machine *mach, 3579 const union tgsi_double_channel *chan, 3580 const struct tgsi_full_dst_register *reg, 3581 const struct tgsi_full_instruction *inst, 3582 uint chan_0, 3583 uint chan_1) 3584{ 3585 union tgsi_exec_channel dst[2]; 3586 uint i; 3587 union tgsi_double_channel temp; 3588 const uint execmask = mach->ExecMask; 3589 3590 if (!inst->Instruction.Saturate) { 3591 for (i = 0; i < TGSI_QUAD_SIZE; i++) 3592 if (execmask & (1 << i)) { 3593 dst[0].u[i] = chan->u[i][0]; 3594 dst[1].u[i] = chan->u[i][1]; 3595 } 3596 } 3597 else { 3598 for (i = 0; i < TGSI_QUAD_SIZE; i++) 3599 if (execmask & (1 << i)) { 3600 if (chan->d[i] < 0.0) 3601 temp.d[i] = 0.0; 3602 else if (chan->d[i] > 1.0) 3603 temp.d[i] = 1.0; 3604 else 3605 temp.d[i] = chan->d[i]; 3606 3607 dst[0].u[i] = temp.u[i][0]; 3608 dst[1].u[i] = temp.u[i][1]; 3609 } 3610 } 3611 3612 store_dest_double(mach, &dst[0], reg, chan_0, TGSI_EXEC_DATA_UINT); 3613 if (chan_1 != (unsigned)-1) 3614 store_dest_double(mach, &dst[1], reg, chan_1, TGSI_EXEC_DATA_UINT); 3615} 3616 3617static void 3618exec_double_unary(struct tgsi_exec_machine *mach, 3619 const struct tgsi_full_instruction *inst, 3620 micro_dop op) 3621{ 3622 union tgsi_double_channel src; 3623 union tgsi_double_channel dst; 3624 3625 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 3626 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3627 op(&dst, &src); 3628 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3629 } 3630 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 3631 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3632 op(&dst, &src); 3633 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3634 } 3635} 3636 3637static void 3638exec_double_binary(struct tgsi_exec_machine *mach, 3639 const struct tgsi_full_instruction *inst, 3640 micro_dop op, 3641 enum tgsi_exec_datatype dst_datatype) 3642{ 3643 union tgsi_double_channel src[2]; 3644 union tgsi_double_channel dst; 3645 int first_dest_chan, second_dest_chan; 3646 int wmask; 3647 3648 wmask = inst->Dst[0].Register.WriteMask; 3649 /* these are & because of the way DSLT etc store their destinations */ 3650 if (wmask & TGSI_WRITEMASK_XY) { 3651 first_dest_chan = TGSI_CHAN_X; 3652 second_dest_chan = TGSI_CHAN_Y; 3653 if (dst_datatype == TGSI_EXEC_DATA_UINT) { 3654 first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y; 3655 second_dest_chan = -1; 3656 } 3657 3658 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3659 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y); 3660 op(&dst, src); 3661 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan); 3662 } 3663 3664 if (wmask & TGSI_WRITEMASK_ZW) { 3665 first_dest_chan = TGSI_CHAN_Z; 3666 second_dest_chan = TGSI_CHAN_W; 3667 if (dst_datatype == TGSI_EXEC_DATA_UINT) { 3668 first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W; 3669 second_dest_chan = -1; 3670 } 3671 3672 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3673 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W); 3674 op(&dst, src); 3675 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan); 3676 } 3677} 3678 3679static void 3680exec_double_trinary(struct tgsi_exec_machine *mach, 3681 const struct tgsi_full_instruction *inst, 3682 micro_dop op) 3683{ 3684 union tgsi_double_channel src[3]; 3685 union tgsi_double_channel dst; 3686 3687 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 3688 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3689 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y); 3690 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y); 3691 op(&dst, src); 3692 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3693 } 3694 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 3695 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3696 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W); 3697 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W); 3698 op(&dst, src); 3699 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3700 } 3701} 3702 3703static void 3704exec_dldexp(struct tgsi_exec_machine *mach, 3705 const struct tgsi_full_instruction *inst) 3706{ 3707 union tgsi_double_channel src0; 3708 union tgsi_exec_channel src1; 3709 union tgsi_double_channel dst; 3710 int wmask; 3711 3712 wmask = inst->Dst[0].Register.WriteMask; 3713 if (wmask & TGSI_WRITEMASK_XY) { 3714 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3715 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 3716 micro_dldexp(&dst, &src0, &src1); 3717 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3718 } 3719 3720 if (wmask & TGSI_WRITEMASK_ZW) { 3721 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3722 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); 3723 micro_dldexp(&dst, &src0, &src1); 3724 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3725 } 3726} 3727 3728static void 3729exec_dfracexp(struct tgsi_exec_machine *mach, 3730 const struct tgsi_full_instruction *inst) 3731{ 3732 union tgsi_double_channel src; 3733 union tgsi_double_channel dst; 3734 union tgsi_exec_channel dst_exp; 3735 3736 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3737 micro_dfracexp(&dst, &dst_exp, &src); 3738 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) 3739 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3740 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) 3741 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3742 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3743 if (inst->Dst[1].Register.WriteMask & (1 << chan)) 3744 store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan, TGSI_EXEC_DATA_INT); 3745 } 3746} 3747 3748static void 3749exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach, 3750 const struct tgsi_full_instruction *inst, 3751 micro_dop_sop op) 3752{ 3753 union tgsi_double_channel src0; 3754 union tgsi_exec_channel src1; 3755 union tgsi_double_channel dst; 3756 int wmask; 3757 3758 wmask = inst->Dst[0].Register.WriteMask; 3759 if (wmask & TGSI_WRITEMASK_XY) { 3760 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3761 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 3762 op(&dst, &src0, &src1); 3763 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3764 } 3765 3766 if (wmask & TGSI_WRITEMASK_ZW) { 3767 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3768 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); 3769 op(&dst, &src0, &src1); 3770 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3771 } 3772} 3773 3774static int 3775get_image_coord_dim(unsigned tgsi_tex) 3776{ 3777 int dim; 3778 switch (tgsi_tex) { 3779 case TGSI_TEXTURE_BUFFER: 3780 case TGSI_TEXTURE_1D: 3781 dim = 1; 3782 break; 3783 case TGSI_TEXTURE_2D: 3784 case TGSI_TEXTURE_RECT: 3785 case TGSI_TEXTURE_1D_ARRAY: 3786 case TGSI_TEXTURE_2D_MSAA: 3787 dim = 2; 3788 break; 3789 case TGSI_TEXTURE_3D: 3790 case TGSI_TEXTURE_CUBE: 3791 case TGSI_TEXTURE_2D_ARRAY: 3792 case TGSI_TEXTURE_2D_ARRAY_MSAA: 3793 case TGSI_TEXTURE_CUBE_ARRAY: 3794 dim = 3; 3795 break; 3796 default: 3797 assert(!"unknown texture target"); 3798 dim = 0; 3799 break; 3800 } 3801 3802 return dim; 3803} 3804 3805static int 3806get_image_coord_sample(unsigned tgsi_tex) 3807{ 3808 int sample = 0; 3809 switch (tgsi_tex) { 3810 case TGSI_TEXTURE_2D_MSAA: 3811 sample = 3; 3812 break; 3813 case TGSI_TEXTURE_2D_ARRAY_MSAA: 3814 sample = 4; 3815 break; 3816 default: 3817 break; 3818 } 3819 return sample; 3820} 3821 3822static void 3823exec_load_img(struct tgsi_exec_machine *mach, 3824 const struct tgsi_full_instruction *inst) 3825{ 3826 union tgsi_exec_channel r[4], sample_r; 3827 uint unit; 3828 int sample; 3829 int i, j; 3830 int dim; 3831 uint chan; 3832 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3833 struct tgsi_image_params params; 3834 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3835 3836 unit = fetch_sampler_unit(mach, inst, 0); 3837 dim = get_image_coord_dim(inst->Memory.Texture); 3838 sample = get_image_coord_sample(inst->Memory.Texture); 3839 assert(dim <= 3); 3840 3841 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 3842 params.unit = unit; 3843 params.tgsi_tex_instr = inst->Memory.Texture; 3844 params.format = inst->Memory.Format; 3845 3846 for (i = 0; i < dim; i++) { 3847 IFETCH(&r[i], 1, TGSI_CHAN_X + i); 3848 } 3849 3850 if (sample) 3851 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); 3852 3853 mach->Image->load(mach->Image, ¶ms, 3854 r[0].i, r[1].i, r[2].i, sample_r.i, 3855 rgba); 3856 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3857 r[0].f[j] = rgba[0][j]; 3858 r[1].f[j] = rgba[1][j]; 3859 r[2].f[j] = rgba[2][j]; 3860 r[3].f[j] = rgba[3][j]; 3861 } 3862 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3863 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3864 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3865 } 3866 } 3867} 3868 3869static void 3870exec_load_buf(struct tgsi_exec_machine *mach, 3871 const struct tgsi_full_instruction *inst) 3872{ 3873 union tgsi_exec_channel r[4]; 3874 uint unit; 3875 int j; 3876 uint chan; 3877 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3878 struct tgsi_buffer_params params; 3879 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3880 3881 unit = fetch_sampler_unit(mach, inst, 0); 3882 3883 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 3884 params.unit = unit; 3885 IFETCH(&r[0], 1, TGSI_CHAN_X); 3886 3887 mach->Buffer->load(mach->Buffer, ¶ms, 3888 r[0].i, rgba); 3889 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3890 r[0].f[j] = rgba[0][j]; 3891 r[1].f[j] = rgba[1][j]; 3892 r[2].f[j] = rgba[2][j]; 3893 r[3].f[j] = rgba[3][j]; 3894 } 3895 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3896 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3897 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3898 } 3899 } 3900} 3901 3902static void 3903exec_load_mem(struct tgsi_exec_machine *mach, 3904 const struct tgsi_full_instruction *inst) 3905{ 3906 union tgsi_exec_channel r[4]; 3907 uint chan; 3908 char *ptr = mach->LocalMem; 3909 uint32_t offset; 3910 int j; 3911 3912 IFETCH(&r[0], 1, TGSI_CHAN_X); 3913 if (r[0].u[0] >= mach->LocalMemSize) 3914 return; 3915 3916 offset = r[0].u[0]; 3917 ptr += offset; 3918 3919 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3920 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3921 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3922 memcpy(&r[chan].u[j], ptr + (4 * chan), 4); 3923 } 3924 } 3925 } 3926 3927 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3928 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3929 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3930 } 3931 } 3932} 3933 3934static void 3935exec_load(struct tgsi_exec_machine *mach, 3936 const struct tgsi_full_instruction *inst) 3937{ 3938 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 3939 exec_load_img(mach, inst); 3940 else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) 3941 exec_load_buf(mach, inst); 3942 else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) 3943 exec_load_mem(mach, inst); 3944} 3945 3946static void 3947exec_store_img(struct tgsi_exec_machine *mach, 3948 const struct tgsi_full_instruction *inst) 3949{ 3950 union tgsi_exec_channel r[3], sample_r; 3951 union tgsi_exec_channel value[4]; 3952 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3953 struct tgsi_image_params params; 3954 int dim; 3955 int sample; 3956 int i, j; 3957 uint unit; 3958 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3959 unit = inst->Dst[0].Register.Index; 3960 dim = get_image_coord_dim(inst->Memory.Texture); 3961 sample = get_image_coord_sample(inst->Memory.Texture); 3962 assert(dim <= 3); 3963 3964 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 3965 params.unit = unit; 3966 params.tgsi_tex_instr = inst->Memory.Texture; 3967 params.format = inst->Memory.Format; 3968 3969 for (i = 0; i < dim; i++) { 3970 IFETCH(&r[i], 0, TGSI_CHAN_X + i); 3971 } 3972 3973 for (i = 0; i < 4; i++) { 3974 FETCH(&value[i], 1, TGSI_CHAN_X + i); 3975 } 3976 if (sample) 3977 IFETCH(&sample_r, 0, TGSI_CHAN_X + sample); 3978 3979 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3980 rgba[0][j] = value[0].f[j]; 3981 rgba[1][j] = value[1].f[j]; 3982 rgba[2][j] = value[2].f[j]; 3983 rgba[3][j] = value[3].f[j]; 3984 } 3985 3986 mach->Image->store(mach->Image, ¶ms, 3987 r[0].i, r[1].i, r[2].i, sample_r.i, 3988 rgba); 3989} 3990 3991static void 3992exec_store_buf(struct tgsi_exec_machine *mach, 3993 const struct tgsi_full_instruction *inst) 3994{ 3995 union tgsi_exec_channel r[3]; 3996 union tgsi_exec_channel value[4]; 3997 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3998 struct tgsi_buffer_params params; 3999 int i, j; 4000 uint unit; 4001 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4002 4003 unit = inst->Dst[0].Register.Index; 4004 4005 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4006 params.unit = unit; 4007 params.writemask = inst->Dst[0].Register.WriteMask; 4008 4009 IFETCH(&r[0], 0, TGSI_CHAN_X); 4010 for (i = 0; i < 4; i++) { 4011 FETCH(&value[i], 1, TGSI_CHAN_X + i); 4012 } 4013 4014 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4015 rgba[0][j] = value[0].f[j]; 4016 rgba[1][j] = value[1].f[j]; 4017 rgba[2][j] = value[2].f[j]; 4018 rgba[3][j] = value[3].f[j]; 4019 } 4020 4021 mach->Buffer->store(mach->Buffer, ¶ms, 4022 r[0].i, 4023 rgba); 4024} 4025 4026static void 4027exec_store_mem(struct tgsi_exec_machine *mach, 4028 const struct tgsi_full_instruction *inst) 4029{ 4030 union tgsi_exec_channel r[3]; 4031 union tgsi_exec_channel value[4]; 4032 uint i, chan; 4033 char *ptr = mach->LocalMem; 4034 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4035 int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4036 4037 IFETCH(&r[0], 0, TGSI_CHAN_X); 4038 4039 for (i = 0; i < 4; i++) { 4040 FETCH(&value[i], 1, TGSI_CHAN_X + i); 4041 } 4042 4043 if (r[0].u[0] >= mach->LocalMemSize) 4044 return; 4045 ptr += r[0].u[0]; 4046 4047 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4048 if (execmask & (1 << i)) { 4049 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4050 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4051 memcpy(ptr + (chan * 4), &value[chan].u[0], 4); 4052 } 4053 } 4054 } 4055 } 4056} 4057 4058static void 4059exec_store(struct tgsi_exec_machine *mach, 4060 const struct tgsi_full_instruction *inst) 4061{ 4062 if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) 4063 exec_store_img(mach, inst); 4064 else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) 4065 exec_store_buf(mach, inst); 4066 else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) 4067 exec_store_mem(mach, inst); 4068} 4069 4070static void 4071exec_atomop_img(struct tgsi_exec_machine *mach, 4072 const struct tgsi_full_instruction *inst) 4073{ 4074 union tgsi_exec_channel r[4], sample_r; 4075 union tgsi_exec_channel value[4], value2[4]; 4076 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4077 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4078 struct tgsi_image_params params; 4079 int dim; 4080 int sample; 4081 int i, j; 4082 uint unit, chan; 4083 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4084 unit = fetch_sampler_unit(mach, inst, 0); 4085 dim = get_image_coord_dim(inst->Memory.Texture); 4086 sample = get_image_coord_sample(inst->Memory.Texture); 4087 assert(dim <= 3); 4088 4089 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4090 params.unit = unit; 4091 params.tgsi_tex_instr = inst->Memory.Texture; 4092 params.format = inst->Memory.Format; 4093 4094 for (i = 0; i < dim; i++) { 4095 IFETCH(&r[i], 1, TGSI_CHAN_X + i); 4096 } 4097 4098 for (i = 0; i < 4; i++) { 4099 FETCH(&value[i], 2, TGSI_CHAN_X + i); 4100 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4101 FETCH(&value2[i], 3, TGSI_CHAN_X + i); 4102 } 4103 if (sample) 4104 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); 4105 4106 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4107 rgba[0][j] = value[0].f[j]; 4108 rgba[1][j] = value[1].f[j]; 4109 rgba[2][j] = value[2].f[j]; 4110 rgba[3][j] = value[3].f[j]; 4111 } 4112 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 4113 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4114 rgba2[0][j] = value2[0].f[j]; 4115 rgba2[1][j] = value2[1].f[j]; 4116 rgba2[2][j] = value2[2].f[j]; 4117 rgba2[3][j] = value2[3].f[j]; 4118 } 4119 } 4120 4121 mach->Image->op(mach->Image, ¶ms, inst->Instruction.Opcode, 4122 r[0].i, r[1].i, r[2].i, sample_r.i, 4123 rgba, rgba2); 4124 4125 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4126 r[0].f[j] = rgba[0][j]; 4127 r[1].f[j] = rgba[1][j]; 4128 r[2].f[j] = rgba[2][j]; 4129 r[3].f[j] = rgba[3][j]; 4130 } 4131 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4132 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4133 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4134 } 4135 } 4136} 4137 4138static void 4139exec_atomop_buf(struct tgsi_exec_machine *mach, 4140 const struct tgsi_full_instruction *inst) 4141{ 4142 union tgsi_exec_channel r[4]; 4143 union tgsi_exec_channel value[4], value2[4]; 4144 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4145 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4146 struct tgsi_buffer_params params; 4147 int i, j; 4148 uint unit, chan; 4149 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4150 4151 unit = fetch_sampler_unit(mach, inst, 0); 4152 4153 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4154 params.unit = unit; 4155 params.writemask = inst->Dst[0].Register.WriteMask; 4156 4157 IFETCH(&r[0], 1, TGSI_CHAN_X); 4158 4159 for (i = 0; i < 4; i++) { 4160 FETCH(&value[i], 2, TGSI_CHAN_X + i); 4161 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4162 FETCH(&value2[i], 3, TGSI_CHAN_X + i); 4163 } 4164 4165 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4166 rgba[0][j] = value[0].f[j]; 4167 rgba[1][j] = value[1].f[j]; 4168 rgba[2][j] = value[2].f[j]; 4169 rgba[3][j] = value[3].f[j]; 4170 } 4171 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 4172 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4173 rgba2[0][j] = value2[0].f[j]; 4174 rgba2[1][j] = value2[1].f[j]; 4175 rgba2[2][j] = value2[2].f[j]; 4176 rgba2[3][j] = value2[3].f[j]; 4177 } 4178 } 4179 4180 mach->Buffer->op(mach->Buffer, ¶ms, inst->Instruction.Opcode, 4181 r[0].i, 4182 rgba, rgba2); 4183 4184 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4185 r[0].f[j] = rgba[0][j]; 4186 r[1].f[j] = rgba[1][j]; 4187 r[2].f[j] = rgba[2][j]; 4188 r[3].f[j] = rgba[3][j]; 4189 } 4190 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4191 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4192 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4193 } 4194 } 4195} 4196 4197static void 4198exec_atomop_mem(struct tgsi_exec_machine *mach, 4199 const struct tgsi_full_instruction *inst) 4200{ 4201 union tgsi_exec_channel r[4]; 4202 union tgsi_exec_channel value[4], value2[4]; 4203 char *ptr = mach->LocalMem; 4204 uint32_t val; 4205 uint chan, i; 4206 uint32_t offset; 4207 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4208 int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4209 IFETCH(&r[0], 1, TGSI_CHAN_X); 4210 4211 if (r[0].u[0] >= mach->LocalMemSize) 4212 return; 4213 4214 offset = r[0].u[0]; 4215 ptr += offset; 4216 for (i = 0; i < 4; i++) { 4217 FETCH(&value[i], 2, TGSI_CHAN_X + i); 4218 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4219 FETCH(&value2[i], 3, TGSI_CHAN_X + i); 4220 } 4221 4222 memcpy(&r[0].u[0], ptr, 4); 4223 val = r[0].u[0]; 4224 switch (inst->Instruction.Opcode) { 4225 case TGSI_OPCODE_ATOMUADD: 4226 val += value[0].u[0]; 4227 break; 4228 case TGSI_OPCODE_ATOMXOR: 4229 val ^= value[0].u[0]; 4230 break; 4231 case TGSI_OPCODE_ATOMOR: 4232 val |= value[0].u[0]; 4233 break; 4234 case TGSI_OPCODE_ATOMAND: 4235 val &= value[0].u[0]; 4236 break; 4237 case TGSI_OPCODE_ATOMUMIN: 4238 val = MIN2(val, value[0].u[0]); 4239 break; 4240 case TGSI_OPCODE_ATOMUMAX: 4241 val = MAX2(val, value[0].u[0]); 4242 break; 4243 case TGSI_OPCODE_ATOMIMIN: 4244 val = MIN2(r[0].i[0], value[0].i[0]); 4245 break; 4246 case TGSI_OPCODE_ATOMIMAX: 4247 val = MAX2(r[0].i[0], value[0].i[0]); 4248 break; 4249 case TGSI_OPCODE_ATOMXCHG: 4250 val = value[0].i[0]; 4251 break; 4252 case TGSI_OPCODE_ATOMCAS: 4253 if (val == value[0].u[0]) 4254 val = value2[0].u[0]; 4255 break; 4256 default: 4257 break; 4258 } 4259 for (i = 0; i < TGSI_QUAD_SIZE; i++) 4260 if (execmask & (1 << i)) 4261 memcpy(ptr, &val, 4); 4262 4263 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4264 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4265 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4266 } 4267 } 4268} 4269 4270static void 4271exec_atomop(struct tgsi_exec_machine *mach, 4272 const struct tgsi_full_instruction *inst) 4273{ 4274 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 4275 exec_atomop_img(mach, inst); 4276 else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) 4277 exec_atomop_buf(mach, inst); 4278 else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) 4279 exec_atomop_mem(mach, inst); 4280} 4281 4282static void 4283exec_resq_img(struct tgsi_exec_machine *mach, 4284 const struct tgsi_full_instruction *inst) 4285{ 4286 int result[4]; 4287 union tgsi_exec_channel r[4]; 4288 uint unit; 4289 int i, chan, j; 4290 struct tgsi_image_params params; 4291 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4292 4293 unit = fetch_sampler_unit(mach, inst, 0); 4294 4295 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4296 params.unit = unit; 4297 params.tgsi_tex_instr = inst->Memory.Texture; 4298 params.format = inst->Memory.Format; 4299 4300 mach->Image->get_dims(mach->Image, ¶ms, result); 4301 4302 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4303 for (j = 0; j < 4; j++) { 4304 r[j].i[i] = result[j]; 4305 } 4306 } 4307 4308 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4309 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4310 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, 4311 TGSI_EXEC_DATA_INT); 4312 } 4313 } 4314} 4315 4316static void 4317exec_resq_buf(struct tgsi_exec_machine *mach, 4318 const struct tgsi_full_instruction *inst) 4319{ 4320 int result; 4321 union tgsi_exec_channel r[4]; 4322 uint unit; 4323 int i, chan; 4324 struct tgsi_buffer_params params; 4325 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4326 4327 unit = fetch_sampler_unit(mach, inst, 0); 4328 4329 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4330 params.unit = unit; 4331 4332 mach->Buffer->get_dims(mach->Buffer, ¶ms, &result); 4333 4334 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4335 r[0].i[i] = result; 4336 } 4337 4338 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4339 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4340 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, 4341 TGSI_EXEC_DATA_INT); 4342 } 4343 } 4344} 4345 4346static void 4347exec_resq(struct tgsi_exec_machine *mach, 4348 const struct tgsi_full_instruction *inst) 4349{ 4350 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 4351 exec_resq_img(mach, inst); 4352 else 4353 exec_resq_buf(mach, inst); 4354} 4355 4356static void 4357micro_f2u64(union tgsi_double_channel *dst, 4358 const union tgsi_exec_channel *src) 4359{ 4360 dst->u64[0] = (uint64_t)src->f[0]; 4361 dst->u64[1] = (uint64_t)src->f[1]; 4362 dst->u64[2] = (uint64_t)src->f[2]; 4363 dst->u64[3] = (uint64_t)src->f[3]; 4364} 4365 4366static void 4367micro_f2i64(union tgsi_double_channel *dst, 4368 const union tgsi_exec_channel *src) 4369{ 4370 dst->i64[0] = (int64_t)src->f[0]; 4371 dst->i64[1] = (int64_t)src->f[1]; 4372 dst->i64[2] = (int64_t)src->f[2]; 4373 dst->i64[3] = (int64_t)src->f[3]; 4374} 4375 4376static void 4377micro_u2i64(union tgsi_double_channel *dst, 4378 const union tgsi_exec_channel *src) 4379{ 4380 dst->u64[0] = (uint64_t)src->u[0]; 4381 dst->u64[1] = (uint64_t)src->u[1]; 4382 dst->u64[2] = (uint64_t)src->u[2]; 4383 dst->u64[3] = (uint64_t)src->u[3]; 4384} 4385 4386static void 4387micro_i2i64(union tgsi_double_channel *dst, 4388 const union tgsi_exec_channel *src) 4389{ 4390 dst->i64[0] = (int64_t)src->i[0]; 4391 dst->i64[1] = (int64_t)src->i[1]; 4392 dst->i64[2] = (int64_t)src->i[2]; 4393 dst->i64[3] = (int64_t)src->i[3]; 4394} 4395 4396static void 4397micro_d2u64(union tgsi_double_channel *dst, 4398 const union tgsi_double_channel *src) 4399{ 4400 dst->u64[0] = (uint64_t)src->d[0]; 4401 dst->u64[1] = (uint64_t)src->d[1]; 4402 dst->u64[2] = (uint64_t)src->d[2]; 4403 dst->u64[3] = (uint64_t)src->d[3]; 4404} 4405 4406static void 4407micro_d2i64(union tgsi_double_channel *dst, 4408 const union tgsi_double_channel *src) 4409{ 4410 dst->i64[0] = (int64_t)src->d[0]; 4411 dst->i64[1] = (int64_t)src->d[1]; 4412 dst->i64[2] = (int64_t)src->d[2]; 4413 dst->i64[3] = (int64_t)src->d[3]; 4414} 4415 4416static void 4417micro_u642d(union tgsi_double_channel *dst, 4418 const union tgsi_double_channel *src) 4419{ 4420 dst->d[0] = (double)src->u64[0]; 4421 dst->d[1] = (double)src->u64[1]; 4422 dst->d[2] = (double)src->u64[2]; 4423 dst->d[3] = (double)src->u64[3]; 4424} 4425 4426static void 4427micro_i642d(union tgsi_double_channel *dst, 4428 const union tgsi_double_channel *src) 4429{ 4430 dst->d[0] = (double)src->i64[0]; 4431 dst->d[1] = (double)src->i64[1]; 4432 dst->d[2] = (double)src->i64[2]; 4433 dst->d[3] = (double)src->i64[3]; 4434} 4435 4436static void 4437micro_u642f(union tgsi_exec_channel *dst, 4438 const union tgsi_double_channel *src) 4439{ 4440 dst->f[0] = (float)src->u64[0]; 4441 dst->f[1] = (float)src->u64[1]; 4442 dst->f[2] = (float)src->u64[2]; 4443 dst->f[3] = (float)src->u64[3]; 4444} 4445 4446static void 4447micro_i642f(union tgsi_exec_channel *dst, 4448 const union tgsi_double_channel *src) 4449{ 4450 dst->f[0] = (float)src->i64[0]; 4451 dst->f[1] = (float)src->i64[1]; 4452 dst->f[2] = (float)src->i64[2]; 4453 dst->f[3] = (float)src->i64[3]; 4454} 4455 4456static void 4457exec_t_2_64(struct tgsi_exec_machine *mach, 4458 const struct tgsi_full_instruction *inst, 4459 micro_dop_s op, 4460 enum tgsi_exec_datatype src_datatype) 4461{ 4462 union tgsi_exec_channel src; 4463 union tgsi_double_channel dst; 4464 4465 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 4466 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 4467 op(&dst, &src); 4468 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 4469 } 4470 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 4471 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype); 4472 op(&dst, &src); 4473 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 4474 } 4475} 4476 4477static void 4478exec_64_2_t(struct tgsi_exec_machine *mach, 4479 const struct tgsi_full_instruction *inst, 4480 micro_sop_d op, 4481 enum tgsi_exec_datatype dst_datatype) 4482{ 4483 union tgsi_double_channel src; 4484 union tgsi_exec_channel dst; 4485 int wm = inst->Dst[0].Register.WriteMask; 4486 int i; 4487 int bit; 4488 for (i = 0; i < 2; i++) { 4489 bit = ffs(wm); 4490 if (bit) { 4491 wm &= ~(1 << (bit - 1)); 4492 if (i == 0) 4493 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 4494 else 4495 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 4496 op(&dst, &src); 4497 store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype); 4498 } 4499 } 4500} 4501 4502static void 4503micro_i2f(union tgsi_exec_channel *dst, 4504 const union tgsi_exec_channel *src) 4505{ 4506 dst->f[0] = (float)src->i[0]; 4507 dst->f[1] = (float)src->i[1]; 4508 dst->f[2] = (float)src->i[2]; 4509 dst->f[3] = (float)src->i[3]; 4510} 4511 4512static void 4513micro_not(union tgsi_exec_channel *dst, 4514 const union tgsi_exec_channel *src) 4515{ 4516 dst->u[0] = ~src->u[0]; 4517 dst->u[1] = ~src->u[1]; 4518 dst->u[2] = ~src->u[2]; 4519 dst->u[3] = ~src->u[3]; 4520} 4521 4522static void 4523micro_shl(union tgsi_exec_channel *dst, 4524 const union tgsi_exec_channel *src0, 4525 const union tgsi_exec_channel *src1) 4526{ 4527 unsigned masked_count; 4528 masked_count = src1->u[0] & 0x1f; 4529 dst->u[0] = src0->u[0] << masked_count; 4530 masked_count = src1->u[1] & 0x1f; 4531 dst->u[1] = src0->u[1] << masked_count; 4532 masked_count = src1->u[2] & 0x1f; 4533 dst->u[2] = src0->u[2] << masked_count; 4534 masked_count = src1->u[3] & 0x1f; 4535 dst->u[3] = src0->u[3] << masked_count; 4536} 4537 4538static void 4539micro_and(union tgsi_exec_channel *dst, 4540 const union tgsi_exec_channel *src0, 4541 const union tgsi_exec_channel *src1) 4542{ 4543 dst->u[0] = src0->u[0] & src1->u[0]; 4544 dst->u[1] = src0->u[1] & src1->u[1]; 4545 dst->u[2] = src0->u[2] & src1->u[2]; 4546 dst->u[3] = src0->u[3] & src1->u[3]; 4547} 4548 4549static void 4550micro_or(union tgsi_exec_channel *dst, 4551 const union tgsi_exec_channel *src0, 4552 const union tgsi_exec_channel *src1) 4553{ 4554 dst->u[0] = src0->u[0] | src1->u[0]; 4555 dst->u[1] = src0->u[1] | src1->u[1]; 4556 dst->u[2] = src0->u[2] | src1->u[2]; 4557 dst->u[3] = src0->u[3] | src1->u[3]; 4558} 4559 4560static void 4561micro_xor(union tgsi_exec_channel *dst, 4562 const union tgsi_exec_channel *src0, 4563 const union tgsi_exec_channel *src1) 4564{ 4565 dst->u[0] = src0->u[0] ^ src1->u[0]; 4566 dst->u[1] = src0->u[1] ^ src1->u[1]; 4567 dst->u[2] = src0->u[2] ^ src1->u[2]; 4568 dst->u[3] = src0->u[3] ^ src1->u[3]; 4569} 4570 4571static void 4572micro_mod(union tgsi_exec_channel *dst, 4573 const union tgsi_exec_channel *src0, 4574 const union tgsi_exec_channel *src1) 4575{ 4576 dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0; 4577 dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0; 4578 dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0; 4579 dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0; 4580} 4581 4582static void 4583micro_f2i(union tgsi_exec_channel *dst, 4584 const union tgsi_exec_channel *src) 4585{ 4586 dst->i[0] = (int)src->f[0]; 4587 dst->i[1] = (int)src->f[1]; 4588 dst->i[2] = (int)src->f[2]; 4589 dst->i[3] = (int)src->f[3]; 4590} 4591 4592static void 4593micro_fseq(union tgsi_exec_channel *dst, 4594 const union tgsi_exec_channel *src0, 4595 const union tgsi_exec_channel *src1) 4596{ 4597 dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0; 4598 dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0; 4599 dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0; 4600 dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0; 4601} 4602 4603static void 4604micro_fsge(union tgsi_exec_channel *dst, 4605 const union tgsi_exec_channel *src0, 4606 const union tgsi_exec_channel *src1) 4607{ 4608 dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0; 4609 dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0; 4610 dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0; 4611 dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0; 4612} 4613 4614static void 4615micro_fslt(union tgsi_exec_channel *dst, 4616 const union tgsi_exec_channel *src0, 4617 const union tgsi_exec_channel *src1) 4618{ 4619 dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0; 4620 dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0; 4621 dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0; 4622 dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0; 4623} 4624 4625static void 4626micro_fsne(union tgsi_exec_channel *dst, 4627 const union tgsi_exec_channel *src0, 4628 const union tgsi_exec_channel *src1) 4629{ 4630 dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0; 4631 dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0; 4632 dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0; 4633 dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0; 4634} 4635 4636static void 4637micro_idiv(union tgsi_exec_channel *dst, 4638 const union tgsi_exec_channel *src0, 4639 const union tgsi_exec_channel *src1) 4640{ 4641 dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0; 4642 dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0; 4643 dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0; 4644 dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0; 4645} 4646 4647static void 4648micro_imax(union tgsi_exec_channel *dst, 4649 const union tgsi_exec_channel *src0, 4650 const union tgsi_exec_channel *src1) 4651{ 4652 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 4653 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 4654 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 4655 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 4656} 4657 4658static void 4659micro_imin(union tgsi_exec_channel *dst, 4660 const union tgsi_exec_channel *src0, 4661 const union tgsi_exec_channel *src1) 4662{ 4663 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 4664 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 4665 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 4666 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 4667} 4668 4669static void 4670micro_isge(union tgsi_exec_channel *dst, 4671 const union tgsi_exec_channel *src0, 4672 const union tgsi_exec_channel *src1) 4673{ 4674 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0; 4675 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0; 4676 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0; 4677 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0; 4678} 4679 4680static void 4681micro_ishr(union tgsi_exec_channel *dst, 4682 const union tgsi_exec_channel *src0, 4683 const union tgsi_exec_channel *src1) 4684{ 4685 unsigned masked_count; 4686 masked_count = src1->i[0] & 0x1f; 4687 dst->i[0] = src0->i[0] >> masked_count; 4688 masked_count = src1->i[1] & 0x1f; 4689 dst->i[1] = src0->i[1] >> masked_count; 4690 masked_count = src1->i[2] & 0x1f; 4691 dst->i[2] = src0->i[2] >> masked_count; 4692 masked_count = src1->i[3] & 0x1f; 4693 dst->i[3] = src0->i[3] >> masked_count; 4694} 4695 4696static void 4697micro_islt(union tgsi_exec_channel *dst, 4698 const union tgsi_exec_channel *src0, 4699 const union tgsi_exec_channel *src1) 4700{ 4701 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0; 4702 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0; 4703 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0; 4704 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0; 4705} 4706 4707static void 4708micro_f2u(union tgsi_exec_channel *dst, 4709 const union tgsi_exec_channel *src) 4710{ 4711 dst->u[0] = (uint)src->f[0]; 4712 dst->u[1] = (uint)src->f[1]; 4713 dst->u[2] = (uint)src->f[2]; 4714 dst->u[3] = (uint)src->f[3]; 4715} 4716 4717static void 4718micro_u2f(union tgsi_exec_channel *dst, 4719 const union tgsi_exec_channel *src) 4720{ 4721 dst->f[0] = (float)src->u[0]; 4722 dst->f[1] = (float)src->u[1]; 4723 dst->f[2] = (float)src->u[2]; 4724 dst->f[3] = (float)src->u[3]; 4725} 4726 4727static void 4728micro_uadd(union tgsi_exec_channel *dst, 4729 const union tgsi_exec_channel *src0, 4730 const union tgsi_exec_channel *src1) 4731{ 4732 dst->u[0] = src0->u[0] + src1->u[0]; 4733 dst->u[1] = src0->u[1] + src1->u[1]; 4734 dst->u[2] = src0->u[2] + src1->u[2]; 4735 dst->u[3] = src0->u[3] + src1->u[3]; 4736} 4737 4738static void 4739micro_udiv(union tgsi_exec_channel *dst, 4740 const union tgsi_exec_channel *src0, 4741 const union tgsi_exec_channel *src1) 4742{ 4743 dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u; 4744 dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u; 4745 dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u; 4746 dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u; 4747} 4748 4749static void 4750micro_umad(union tgsi_exec_channel *dst, 4751 const union tgsi_exec_channel *src0, 4752 const union tgsi_exec_channel *src1, 4753 const union tgsi_exec_channel *src2) 4754{ 4755 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0]; 4756 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1]; 4757 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2]; 4758 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3]; 4759} 4760 4761static void 4762micro_umax(union tgsi_exec_channel *dst, 4763 const union tgsi_exec_channel *src0, 4764 const union tgsi_exec_channel *src1) 4765{ 4766 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 4767 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 4768 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 4769 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 4770} 4771 4772static void 4773micro_umin(union tgsi_exec_channel *dst, 4774 const union tgsi_exec_channel *src0, 4775 const union tgsi_exec_channel *src1) 4776{ 4777 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 4778 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 4779 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 4780 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 4781} 4782 4783static void 4784micro_umod(union tgsi_exec_channel *dst, 4785 const union tgsi_exec_channel *src0, 4786 const union tgsi_exec_channel *src1) 4787{ 4788 dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u; 4789 dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u; 4790 dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u; 4791 dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u; 4792} 4793 4794static void 4795micro_umul(union tgsi_exec_channel *dst, 4796 const union tgsi_exec_channel *src0, 4797 const union tgsi_exec_channel *src1) 4798{ 4799 dst->u[0] = src0->u[0] * src1->u[0]; 4800 dst->u[1] = src0->u[1] * src1->u[1]; 4801 dst->u[2] = src0->u[2] * src1->u[2]; 4802 dst->u[3] = src0->u[3] * src1->u[3]; 4803} 4804 4805static void 4806micro_imul_hi(union tgsi_exec_channel *dst, 4807 const union tgsi_exec_channel *src0, 4808 const union tgsi_exec_channel *src1) 4809{ 4810#define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32) 4811 dst->i[0] = I64M(src0->i[0], src1->i[0]); 4812 dst->i[1] = I64M(src0->i[1], src1->i[1]); 4813 dst->i[2] = I64M(src0->i[2], src1->i[2]); 4814 dst->i[3] = I64M(src0->i[3], src1->i[3]); 4815#undef I64M 4816} 4817 4818static void 4819micro_umul_hi(union tgsi_exec_channel *dst, 4820 const union tgsi_exec_channel *src0, 4821 const union tgsi_exec_channel *src1) 4822{ 4823#define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32) 4824 dst->u[0] = U64M(src0->u[0], src1->u[0]); 4825 dst->u[1] = U64M(src0->u[1], src1->u[1]); 4826 dst->u[2] = U64M(src0->u[2], src1->u[2]); 4827 dst->u[3] = U64M(src0->u[3], src1->u[3]); 4828#undef U64M 4829} 4830 4831static void 4832micro_useq(union tgsi_exec_channel *dst, 4833 const union tgsi_exec_channel *src0, 4834 const union tgsi_exec_channel *src1) 4835{ 4836 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0; 4837 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0; 4838 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0; 4839 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0; 4840} 4841 4842static void 4843micro_usge(union tgsi_exec_channel *dst, 4844 const union tgsi_exec_channel *src0, 4845 const union tgsi_exec_channel *src1) 4846{ 4847 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0; 4848 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0; 4849 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0; 4850 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0; 4851} 4852 4853static void 4854micro_ushr(union tgsi_exec_channel *dst, 4855 const union tgsi_exec_channel *src0, 4856 const union tgsi_exec_channel *src1) 4857{ 4858 unsigned masked_count; 4859 masked_count = src1->u[0] & 0x1f; 4860 dst->u[0] = src0->u[0] >> masked_count; 4861 masked_count = src1->u[1] & 0x1f; 4862 dst->u[1] = src0->u[1] >> masked_count; 4863 masked_count = src1->u[2] & 0x1f; 4864 dst->u[2] = src0->u[2] >> masked_count; 4865 masked_count = src1->u[3] & 0x1f; 4866 dst->u[3] = src0->u[3] >> masked_count; 4867} 4868 4869static void 4870micro_uslt(union tgsi_exec_channel *dst, 4871 const union tgsi_exec_channel *src0, 4872 const union tgsi_exec_channel *src1) 4873{ 4874 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0; 4875 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0; 4876 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0; 4877 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0; 4878} 4879 4880static void 4881micro_usne(union tgsi_exec_channel *dst, 4882 const union tgsi_exec_channel *src0, 4883 const union tgsi_exec_channel *src1) 4884{ 4885 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0; 4886 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0; 4887 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0; 4888 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0; 4889} 4890 4891static void 4892micro_uarl(union tgsi_exec_channel *dst, 4893 const union tgsi_exec_channel *src) 4894{ 4895 dst->i[0] = src->u[0]; 4896 dst->i[1] = src->u[1]; 4897 dst->i[2] = src->u[2]; 4898 dst->i[3] = src->u[3]; 4899} 4900 4901/** 4902 * Signed bitfield extract (i.e. sign-extend the extracted bits) 4903 */ 4904static void 4905micro_ibfe(union tgsi_exec_channel *dst, 4906 const union tgsi_exec_channel *src0, 4907 const union tgsi_exec_channel *src1, 4908 const union tgsi_exec_channel *src2) 4909{ 4910 int i; 4911 for (i = 0; i < 4; i++) { 4912 int width = src2->i[i] & 0x1f; 4913 int offset = src1->i[i] & 0x1f; 4914 if (width == 0) 4915 dst->i[i] = 0; 4916 else if (width + offset < 32) 4917 dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width); 4918 else 4919 dst->i[i] = src0->i[i] >> offset; 4920 } 4921} 4922 4923/** 4924 * Unsigned bitfield extract 4925 */ 4926static void 4927micro_ubfe(union tgsi_exec_channel *dst, 4928 const union tgsi_exec_channel *src0, 4929 const union tgsi_exec_channel *src1, 4930 const union tgsi_exec_channel *src2) 4931{ 4932 int i; 4933 for (i = 0; i < 4; i++) { 4934 int width = src2->u[i] & 0x1f; 4935 int offset = src1->u[i] & 0x1f; 4936 if (width == 0) 4937 dst->u[i] = 0; 4938 else if (width + offset < 32) 4939 dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width); 4940 else 4941 dst->u[i] = src0->u[i] >> offset; 4942 } 4943} 4944 4945/** 4946 * Bitfield insert: copy low bits from src1 into a region of src0. 4947 */ 4948static void 4949micro_bfi(union tgsi_exec_channel *dst, 4950 const union tgsi_exec_channel *src0, 4951 const union tgsi_exec_channel *src1, 4952 const union tgsi_exec_channel *src2, 4953 const union tgsi_exec_channel *src3) 4954{ 4955 int i; 4956 for (i = 0; i < 4; i++) { 4957 int width = src3->u[i] & 0x1f; 4958 int offset = src2->u[i] & 0x1f; 4959 int bitmask = ((1 << width) - 1) << offset; 4960 dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask); 4961 } 4962} 4963 4964static void 4965micro_brev(union tgsi_exec_channel *dst, 4966 const union tgsi_exec_channel *src) 4967{ 4968 dst->u[0] = util_bitreverse(src->u[0]); 4969 dst->u[1] = util_bitreverse(src->u[1]); 4970 dst->u[2] = util_bitreverse(src->u[2]); 4971 dst->u[3] = util_bitreverse(src->u[3]); 4972} 4973 4974static void 4975micro_popc(union tgsi_exec_channel *dst, 4976 const union tgsi_exec_channel *src) 4977{ 4978 dst->u[0] = util_bitcount(src->u[0]); 4979 dst->u[1] = util_bitcount(src->u[1]); 4980 dst->u[2] = util_bitcount(src->u[2]); 4981 dst->u[3] = util_bitcount(src->u[3]); 4982} 4983 4984static void 4985micro_lsb(union tgsi_exec_channel *dst, 4986 const union tgsi_exec_channel *src) 4987{ 4988 dst->i[0] = ffs(src->u[0]) - 1; 4989 dst->i[1] = ffs(src->u[1]) - 1; 4990 dst->i[2] = ffs(src->u[2]) - 1; 4991 dst->i[3] = ffs(src->u[3]) - 1; 4992} 4993 4994static void 4995micro_imsb(union tgsi_exec_channel *dst, 4996 const union tgsi_exec_channel *src) 4997{ 4998 dst->i[0] = util_last_bit_signed(src->i[0]) - 1; 4999 dst->i[1] = util_last_bit_signed(src->i[1]) - 1; 5000 dst->i[2] = util_last_bit_signed(src->i[2]) - 1; 5001 dst->i[3] = util_last_bit_signed(src->i[3]) - 1; 5002} 5003 5004static void 5005micro_umsb(union tgsi_exec_channel *dst, 5006 const union tgsi_exec_channel *src) 5007{ 5008 dst->i[0] = util_last_bit(src->u[0]) - 1; 5009 dst->i[1] = util_last_bit(src->u[1]) - 1; 5010 dst->i[2] = util_last_bit(src->u[2]) - 1; 5011 dst->i[3] = util_last_bit(src->u[3]) - 1; 5012} 5013 5014/** 5015 * Execute a TGSI instruction. 5016 * Returns TRUE if a barrier instruction is hit, 5017 * otherwise FALSE. 5018 */ 5019static boolean 5020exec_instruction( 5021 struct tgsi_exec_machine *mach, 5022 const struct tgsi_full_instruction *inst, 5023 int *pc ) 5024{ 5025 union tgsi_exec_channel r[10]; 5026 5027 (*pc)++; 5028 5029 switch (inst->Instruction.Opcode) { 5030 case TGSI_OPCODE_ARL: 5031 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 5032 break; 5033 5034 case TGSI_OPCODE_MOV: 5035 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5036 break; 5037 5038 case TGSI_OPCODE_LIT: 5039 exec_lit(mach, inst); 5040 break; 5041 5042 case TGSI_OPCODE_RCP: 5043 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5044 break; 5045 5046 case TGSI_OPCODE_RSQ: 5047 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5048 break; 5049 5050 case TGSI_OPCODE_EXP: 5051 exec_exp(mach, inst); 5052 break; 5053 5054 case TGSI_OPCODE_LOG: 5055 exec_log(mach, inst); 5056 break; 5057 5058 case TGSI_OPCODE_MUL: 5059 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5060 break; 5061 5062 case TGSI_OPCODE_ADD: 5063 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5064 break; 5065 5066 case TGSI_OPCODE_DP3: 5067 exec_dp3(mach, inst); 5068 break; 5069 5070 case TGSI_OPCODE_DP4: 5071 exec_dp4(mach, inst); 5072 break; 5073 5074 case TGSI_OPCODE_DST: 5075 exec_dst(mach, inst); 5076 break; 5077 5078 case TGSI_OPCODE_MIN: 5079 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5080 break; 5081 5082 case TGSI_OPCODE_MAX: 5083 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5084 break; 5085 5086 case TGSI_OPCODE_SLT: 5087 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5088 break; 5089 5090 case TGSI_OPCODE_SGE: 5091 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5092 break; 5093 5094 case TGSI_OPCODE_MAD: 5095 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5096 break; 5097 5098 case TGSI_OPCODE_LRP: 5099 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5100 break; 5101 5102 case TGSI_OPCODE_SQRT: 5103 exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5104 break; 5105 5106 case TGSI_OPCODE_FRC: 5107 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5108 break; 5109 5110 case TGSI_OPCODE_FLR: 5111 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5112 break; 5113 5114 case TGSI_OPCODE_ROUND: 5115 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5116 break; 5117 5118 case TGSI_OPCODE_EX2: 5119 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5120 break; 5121 5122 case TGSI_OPCODE_LG2: 5123 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5124 break; 5125 5126 case TGSI_OPCODE_POW: 5127 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5128 break; 5129 5130 case TGSI_OPCODE_LDEXP: 5131 exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5132 break; 5133 5134 case TGSI_OPCODE_COS: 5135 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5136 break; 5137 5138 case TGSI_OPCODE_DDX: 5139 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5140 break; 5141 5142 case TGSI_OPCODE_DDY: 5143 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5144 break; 5145 5146 case TGSI_OPCODE_KILL: 5147 exec_kill (mach); 5148 break; 5149 5150 case TGSI_OPCODE_KILL_IF: 5151 exec_kill_if (mach, inst); 5152 break; 5153 5154 case TGSI_OPCODE_PK2H: 5155 exec_pk2h(mach, inst); 5156 break; 5157 5158 case TGSI_OPCODE_PK2US: 5159 assert (0); 5160 break; 5161 5162 case TGSI_OPCODE_PK4B: 5163 assert (0); 5164 break; 5165 5166 case TGSI_OPCODE_PK4UB: 5167 assert (0); 5168 break; 5169 5170 case TGSI_OPCODE_SEQ: 5171 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5172 break; 5173 5174 case TGSI_OPCODE_SGT: 5175 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5176 break; 5177 5178 case TGSI_OPCODE_SIN: 5179 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5180 break; 5181 5182 case TGSI_OPCODE_SLE: 5183 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5184 break; 5185 5186 case TGSI_OPCODE_SNE: 5187 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5188 break; 5189 5190 case TGSI_OPCODE_TEX: 5191 /* simple texture lookup */ 5192 /* src[0] = texcoord */ 5193 /* src[1] = sampler unit */ 5194 exec_tex(mach, inst, TEX_MODIFIER_NONE, 1); 5195 break; 5196 5197 case TGSI_OPCODE_TXB: 5198 /* Texture lookup with lod bias */ 5199 /* src[0] = texcoord (src[0].w = LOD bias) */ 5200 /* src[1] = sampler unit */ 5201 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1); 5202 break; 5203 5204 case TGSI_OPCODE_TXD: 5205 /* Texture lookup with explict partial derivatives */ 5206 /* src[0] = texcoord */ 5207 /* src[1] = d[strq]/dx */ 5208 /* src[2] = d[strq]/dy */ 5209 /* src[3] = sampler unit */ 5210 exec_txd(mach, inst); 5211 break; 5212 5213 case TGSI_OPCODE_TXL: 5214 /* Texture lookup with explit LOD */ 5215 /* src[0] = texcoord (src[0].w = LOD) */ 5216 /* src[1] = sampler unit */ 5217 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1); 5218 break; 5219 5220 case TGSI_OPCODE_TXP: 5221 /* Texture lookup with projection */ 5222 /* src[0] = texcoord (src[0].w = projection) */ 5223 /* src[1] = sampler unit */ 5224 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1); 5225 break; 5226 5227 case TGSI_OPCODE_TG4: 5228 /* src[0] = texcoord */ 5229 /* src[1] = component */ 5230 /* src[2] = sampler unit */ 5231 exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2); 5232 break; 5233 5234 case TGSI_OPCODE_LODQ: 5235 /* src[0] = texcoord */ 5236 /* src[1] = sampler unit */ 5237 exec_lodq(mach, inst); 5238 break; 5239 5240 case TGSI_OPCODE_UP2H: 5241 exec_up2h(mach, inst); 5242 break; 5243 5244 case TGSI_OPCODE_UP2US: 5245 assert (0); 5246 break; 5247 5248 case TGSI_OPCODE_UP4B: 5249 assert (0); 5250 break; 5251 5252 case TGSI_OPCODE_UP4UB: 5253 assert (0); 5254 break; 5255 5256 case TGSI_OPCODE_ARR: 5257 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 5258 break; 5259 5260 case TGSI_OPCODE_CAL: 5261 /* skip the call if no execution channels are enabled */ 5262 if (mach->ExecMask) { 5263 /* do the call */ 5264 5265 /* First, record the depths of the execution stacks. 5266 * This is important for deeply nested/looped return statements. 5267 * We have to unwind the stacks by the correct amount. For a 5268 * real code generator, we could determine the number of entries 5269 * to pop off each stack with simple static analysis and avoid 5270 * implementing this data structure at run time. 5271 */ 5272 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 5273 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 5274 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 5275 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; 5276 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; 5277 /* note that PC was already incremented above */ 5278 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 5279 5280 mach->CallStackTop++; 5281 5282 /* Second, push the Cond, Loop, Cont, Func stacks */ 5283 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5284 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5285 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5286 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 5287 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 5288 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 5289 5290 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5291 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 5292 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 5293 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 5294 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 5295 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 5296 5297 /* Finally, jump to the subroutine. The label is a pointer 5298 * (an instruction number) to the BGNSUB instruction. 5299 */ 5300 *pc = inst->Label.Label; 5301 assert(mach->Instructions[*pc].Instruction.Opcode 5302 == TGSI_OPCODE_BGNSUB); 5303 } 5304 break; 5305 5306 case TGSI_OPCODE_RET: 5307 mach->FuncMask &= ~mach->ExecMask; 5308 UPDATE_EXEC_MASK(mach); 5309 5310 if (mach->FuncMask == 0x0) { 5311 /* really return now (otherwise, keep executing */ 5312 5313 if (mach->CallStackTop == 0) { 5314 /* returning from main() */ 5315 mach->CondStackTop = 0; 5316 mach->LoopStackTop = 0; 5317 mach->ContStackTop = 0; 5318 mach->LoopLabelStackTop = 0; 5319 mach->SwitchStackTop = 0; 5320 mach->BreakStackTop = 0; 5321 *pc = -1; 5322 return FALSE; 5323 } 5324 5325 assert(mach->CallStackTop > 0); 5326 mach->CallStackTop--; 5327 5328 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 5329 mach->CondMask = mach->CondStack[mach->CondStackTop]; 5330 5331 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 5332 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 5333 5334 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 5335 mach->ContMask = mach->ContStack[mach->ContStackTop]; 5336 5337 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 5338 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 5339 5340 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 5341 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 5342 5343 assert(mach->FuncStackTop > 0); 5344 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 5345 5346 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 5347 5348 UPDATE_EXEC_MASK(mach); 5349 } 5350 break; 5351 5352 case TGSI_OPCODE_SSG: 5353 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5354 break; 5355 5356 case TGSI_OPCODE_CMP: 5357 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5358 break; 5359 5360 case TGSI_OPCODE_DIV: 5361 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5362 break; 5363 5364 case TGSI_OPCODE_DP2: 5365 exec_dp2(mach, inst); 5366 break; 5367 5368 case TGSI_OPCODE_IF: 5369 /* push CondMask */ 5370 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5371 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5372 FETCH( &r[0], 0, TGSI_CHAN_X ); 5373 /* update CondMask */ 5374 if( ! r[0].f[0] ) { 5375 mach->CondMask &= ~0x1; 5376 } 5377 if( ! r[0].f[1] ) { 5378 mach->CondMask &= ~0x2; 5379 } 5380 if( ! r[0].f[2] ) { 5381 mach->CondMask &= ~0x4; 5382 } 5383 if( ! r[0].f[3] ) { 5384 mach->CondMask &= ~0x8; 5385 } 5386 UPDATE_EXEC_MASK(mach); 5387 /* Todo: If CondMask==0, jump to ELSE */ 5388 break; 5389 5390 case TGSI_OPCODE_UIF: 5391 /* push CondMask */ 5392 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5393 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5394 IFETCH( &r[0], 0, TGSI_CHAN_X ); 5395 /* update CondMask */ 5396 if( ! r[0].u[0] ) { 5397 mach->CondMask &= ~0x1; 5398 } 5399 if( ! r[0].u[1] ) { 5400 mach->CondMask &= ~0x2; 5401 } 5402 if( ! r[0].u[2] ) { 5403 mach->CondMask &= ~0x4; 5404 } 5405 if( ! r[0].u[3] ) { 5406 mach->CondMask &= ~0x8; 5407 } 5408 UPDATE_EXEC_MASK(mach); 5409 /* Todo: If CondMask==0, jump to ELSE */ 5410 break; 5411 5412 case TGSI_OPCODE_ELSE: 5413 /* invert CondMask wrt previous mask */ 5414 { 5415 uint prevMask; 5416 assert(mach->CondStackTop > 0); 5417 prevMask = mach->CondStack[mach->CondStackTop - 1]; 5418 mach->CondMask = ~mach->CondMask & prevMask; 5419 UPDATE_EXEC_MASK(mach); 5420 /* Todo: If CondMask==0, jump to ENDIF */ 5421 } 5422 break; 5423 5424 case TGSI_OPCODE_ENDIF: 5425 /* pop CondMask */ 5426 assert(mach->CondStackTop > 0); 5427 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 5428 UPDATE_EXEC_MASK(mach); 5429 break; 5430 5431 case TGSI_OPCODE_END: 5432 /* make sure we end primitives which haven't 5433 * been explicitly emitted */ 5434 conditional_emit_primitive(mach); 5435 /* halt execution */ 5436 *pc = -1; 5437 break; 5438 5439 case TGSI_OPCODE_CEIL: 5440 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5441 break; 5442 5443 case TGSI_OPCODE_I2F: 5444 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); 5445 break; 5446 5447 case TGSI_OPCODE_NOT: 5448 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5449 break; 5450 5451 case TGSI_OPCODE_TRUNC: 5452 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5453 break; 5454 5455 case TGSI_OPCODE_SHL: 5456 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5457 break; 5458 5459 case TGSI_OPCODE_AND: 5460 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5461 break; 5462 5463 case TGSI_OPCODE_OR: 5464 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5465 break; 5466 5467 case TGSI_OPCODE_MOD: 5468 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5469 break; 5470 5471 case TGSI_OPCODE_XOR: 5472 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5473 break; 5474 5475 case TGSI_OPCODE_TXF: 5476 exec_txf(mach, inst); 5477 break; 5478 5479 case TGSI_OPCODE_TXQ: 5480 exec_txq(mach, inst); 5481 break; 5482 5483 case TGSI_OPCODE_EMIT: 5484 emit_vertex(mach); 5485 break; 5486 5487 case TGSI_OPCODE_ENDPRIM: 5488 emit_primitive(mach); 5489 break; 5490 5491 case TGSI_OPCODE_BGNLOOP: 5492 /* push LoopMask and ContMasks */ 5493 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5494 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5495 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5496 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 5497 5498 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 5499 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 5500 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 5501 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 5502 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; 5503 break; 5504 5505 case TGSI_OPCODE_ENDLOOP: 5506 /* Restore ContMask, but don't pop */ 5507 assert(mach->ContStackTop > 0); 5508 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 5509 UPDATE_EXEC_MASK(mach); 5510 if (mach->ExecMask) { 5511 /* repeat loop: jump to instruction just past BGNLOOP */ 5512 assert(mach->LoopLabelStackTop > 0); 5513 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 5514 } 5515 else { 5516 /* exit loop: pop LoopMask */ 5517 assert(mach->LoopStackTop > 0); 5518 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 5519 /* pop ContMask */ 5520 assert(mach->ContStackTop > 0); 5521 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 5522 assert(mach->LoopLabelStackTop > 0); 5523 --mach->LoopLabelStackTop; 5524 5525 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 5526 } 5527 UPDATE_EXEC_MASK(mach); 5528 break; 5529 5530 case TGSI_OPCODE_BRK: 5531 exec_break(mach); 5532 break; 5533 5534 case TGSI_OPCODE_CONT: 5535 /* turn off cont channels for each enabled exec channel */ 5536 mach->ContMask &= ~mach->ExecMask; 5537 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 5538 UPDATE_EXEC_MASK(mach); 5539 break; 5540 5541 case TGSI_OPCODE_BGNSUB: 5542 /* no-op */ 5543 break; 5544 5545 case TGSI_OPCODE_ENDSUB: 5546 /* 5547 * XXX: This really should be a no-op. We should never reach this opcode. 5548 */ 5549 5550 assert(mach->CallStackTop > 0); 5551 mach->CallStackTop--; 5552 5553 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 5554 mach->CondMask = mach->CondStack[mach->CondStackTop]; 5555 5556 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 5557 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 5558 5559 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 5560 mach->ContMask = mach->ContStack[mach->ContStackTop]; 5561 5562 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 5563 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 5564 5565 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 5566 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 5567 5568 assert(mach->FuncStackTop > 0); 5569 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 5570 5571 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 5572 5573 UPDATE_EXEC_MASK(mach); 5574 break; 5575 5576 case TGSI_OPCODE_NOP: 5577 break; 5578 5579 case TGSI_OPCODE_F2I: 5580 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 5581 break; 5582 5583 case TGSI_OPCODE_FSEQ: 5584 exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5585 break; 5586 5587 case TGSI_OPCODE_FSGE: 5588 exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5589 break; 5590 5591 case TGSI_OPCODE_FSLT: 5592 exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5593 break; 5594 5595 case TGSI_OPCODE_FSNE: 5596 exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5597 break; 5598 5599 case TGSI_OPCODE_IDIV: 5600 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5601 break; 5602 5603 case TGSI_OPCODE_IMAX: 5604 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5605 break; 5606 5607 case TGSI_OPCODE_IMIN: 5608 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5609 break; 5610 5611 case TGSI_OPCODE_INEG: 5612 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5613 break; 5614 5615 case TGSI_OPCODE_ISGE: 5616 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5617 break; 5618 5619 case TGSI_OPCODE_ISHR: 5620 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5621 break; 5622 5623 case TGSI_OPCODE_ISLT: 5624 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5625 break; 5626 5627 case TGSI_OPCODE_F2U: 5628 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5629 break; 5630 5631 case TGSI_OPCODE_U2F: 5632 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); 5633 break; 5634 5635 case TGSI_OPCODE_UADD: 5636 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5637 break; 5638 5639 case TGSI_OPCODE_UDIV: 5640 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5641 break; 5642 5643 case TGSI_OPCODE_UMAD: 5644 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5645 break; 5646 5647 case TGSI_OPCODE_UMAX: 5648 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5649 break; 5650 5651 case TGSI_OPCODE_UMIN: 5652 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5653 break; 5654 5655 case TGSI_OPCODE_UMOD: 5656 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5657 break; 5658 5659 case TGSI_OPCODE_UMUL: 5660 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5661 break; 5662 5663 case TGSI_OPCODE_IMUL_HI: 5664 exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5665 break; 5666 5667 case TGSI_OPCODE_UMUL_HI: 5668 exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5669 break; 5670 5671 case TGSI_OPCODE_USEQ: 5672 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5673 break; 5674 5675 case TGSI_OPCODE_USGE: 5676 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5677 break; 5678 5679 case TGSI_OPCODE_USHR: 5680 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5681 break; 5682 5683 case TGSI_OPCODE_USLT: 5684 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5685 break; 5686 5687 case TGSI_OPCODE_USNE: 5688 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5689 break; 5690 5691 case TGSI_OPCODE_SWITCH: 5692 exec_switch(mach, inst); 5693 break; 5694 5695 case TGSI_OPCODE_CASE: 5696 exec_case(mach, inst); 5697 break; 5698 5699 case TGSI_OPCODE_DEFAULT: 5700 exec_default(mach); 5701 break; 5702 5703 case TGSI_OPCODE_ENDSWITCH: 5704 exec_endswitch(mach); 5705 break; 5706 5707 case TGSI_OPCODE_SAMPLE_I: 5708 exec_txf(mach, inst); 5709 break; 5710 5711 case TGSI_OPCODE_SAMPLE_I_MS: 5712 exec_txf(mach, inst); 5713 break; 5714 5715 case TGSI_OPCODE_SAMPLE: 5716 exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE); 5717 break; 5718 5719 case TGSI_OPCODE_SAMPLE_B: 5720 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE); 5721 break; 5722 5723 case TGSI_OPCODE_SAMPLE_C: 5724 exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE); 5725 break; 5726 5727 case TGSI_OPCODE_SAMPLE_C_LZ: 5728 exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE); 5729 break; 5730 5731 case TGSI_OPCODE_SAMPLE_D: 5732 exec_sample_d(mach, inst); 5733 break; 5734 5735 case TGSI_OPCODE_SAMPLE_L: 5736 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE); 5737 break; 5738 5739 case TGSI_OPCODE_GATHER4: 5740 exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE); 5741 break; 5742 5743 case TGSI_OPCODE_SVIEWINFO: 5744 exec_txq(mach, inst); 5745 break; 5746 5747 case TGSI_OPCODE_SAMPLE_POS: 5748 assert(0); 5749 break; 5750 5751 case TGSI_OPCODE_SAMPLE_INFO: 5752 assert(0); 5753 break; 5754 5755 case TGSI_OPCODE_LOD: 5756 exec_lodq(mach, inst); 5757 break; 5758 5759 case TGSI_OPCODE_UARL: 5760 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 5761 break; 5762 5763 case TGSI_OPCODE_UCMP: 5764 exec_ucmp(mach, inst); 5765 break; 5766 5767 case TGSI_OPCODE_IABS: 5768 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5769 break; 5770 5771 case TGSI_OPCODE_ISSG: 5772 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5773 break; 5774 5775 case TGSI_OPCODE_TEX2: 5776 /* simple texture lookup */ 5777 /* src[0] = texcoord */ 5778 /* src[1] = compare */ 5779 /* src[2] = sampler unit */ 5780 exec_tex(mach, inst, TEX_MODIFIER_NONE, 2); 5781 break; 5782 case TGSI_OPCODE_TXB2: 5783 /* simple texture lookup */ 5784 /* src[0] = texcoord */ 5785 /* src[1] = bias */ 5786 /* src[2] = sampler unit */ 5787 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2); 5788 break; 5789 case TGSI_OPCODE_TXL2: 5790 /* simple texture lookup */ 5791 /* src[0] = texcoord */ 5792 /* src[1] = lod */ 5793 /* src[2] = sampler unit */ 5794 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2); 5795 break; 5796 5797 case TGSI_OPCODE_IBFE: 5798 exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5799 break; 5800 case TGSI_OPCODE_UBFE: 5801 exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5802 break; 5803 case TGSI_OPCODE_BFI: 5804 exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5805 break; 5806 case TGSI_OPCODE_BREV: 5807 exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5808 break; 5809 case TGSI_OPCODE_POPC: 5810 exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5811 break; 5812 case TGSI_OPCODE_LSB: 5813 exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 5814 break; 5815 case TGSI_OPCODE_IMSB: 5816 exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5817 break; 5818 case TGSI_OPCODE_UMSB: 5819 exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 5820 break; 5821 5822 case TGSI_OPCODE_F2D: 5823 exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT); 5824 break; 5825 5826 case TGSI_OPCODE_D2F: 5827 exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT); 5828 break; 5829 5830 case TGSI_OPCODE_DABS: 5831 exec_double_unary(mach, inst, micro_dabs); 5832 break; 5833 5834 case TGSI_OPCODE_DNEG: 5835 exec_double_unary(mach, inst, micro_dneg); 5836 break; 5837 5838 case TGSI_OPCODE_DADD: 5839 exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE); 5840 break; 5841 5842 case TGSI_OPCODE_DDIV: 5843 exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE); 5844 break; 5845 5846 case TGSI_OPCODE_DMUL: 5847 exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE); 5848 break; 5849 5850 case TGSI_OPCODE_DMAX: 5851 exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE); 5852 break; 5853 5854 case TGSI_OPCODE_DMIN: 5855 exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE); 5856 break; 5857 5858 case TGSI_OPCODE_DSLT: 5859 exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT); 5860 break; 5861 5862 case TGSI_OPCODE_DSGE: 5863 exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT); 5864 break; 5865 5866 case TGSI_OPCODE_DSEQ: 5867 exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT); 5868 break; 5869 5870 case TGSI_OPCODE_DSNE: 5871 exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT); 5872 break; 5873 5874 case TGSI_OPCODE_DRCP: 5875 exec_double_unary(mach, inst, micro_drcp); 5876 break; 5877 5878 case TGSI_OPCODE_DSQRT: 5879 exec_double_unary(mach, inst, micro_dsqrt); 5880 break; 5881 5882 case TGSI_OPCODE_DRSQ: 5883 exec_double_unary(mach, inst, micro_drsq); 5884 break; 5885 5886 case TGSI_OPCODE_DMAD: 5887 exec_double_trinary(mach, inst, micro_dmad); 5888 break; 5889 5890 case TGSI_OPCODE_DFRAC: 5891 exec_double_unary(mach, inst, micro_dfrac); 5892 break; 5893 5894 case TGSI_OPCODE_DLDEXP: 5895 exec_dldexp(mach, inst); 5896 break; 5897 5898 case TGSI_OPCODE_DFRACEXP: 5899 exec_dfracexp(mach, inst); 5900 break; 5901 5902 case TGSI_OPCODE_I2D: 5903 exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT); 5904 break; 5905 5906 case TGSI_OPCODE_D2I: 5907 exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT); 5908 break; 5909 5910 case TGSI_OPCODE_U2D: 5911 exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT); 5912 break; 5913 5914 case TGSI_OPCODE_D2U: 5915 exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT); 5916 break; 5917 5918 case TGSI_OPCODE_LOAD: 5919 exec_load(mach, inst); 5920 break; 5921 5922 case TGSI_OPCODE_STORE: 5923 exec_store(mach, inst); 5924 break; 5925 5926 case TGSI_OPCODE_ATOMUADD: 5927 case TGSI_OPCODE_ATOMXCHG: 5928 case TGSI_OPCODE_ATOMCAS: 5929 case TGSI_OPCODE_ATOMAND: 5930 case TGSI_OPCODE_ATOMOR: 5931 case TGSI_OPCODE_ATOMXOR: 5932 case TGSI_OPCODE_ATOMUMIN: 5933 case TGSI_OPCODE_ATOMUMAX: 5934 case TGSI_OPCODE_ATOMIMIN: 5935 case TGSI_OPCODE_ATOMIMAX: 5936 exec_atomop(mach, inst); 5937 break; 5938 5939 case TGSI_OPCODE_RESQ: 5940 exec_resq(mach, inst); 5941 break; 5942 case TGSI_OPCODE_BARRIER: 5943 case TGSI_OPCODE_MEMBAR: 5944 return TRUE; 5945 break; 5946 5947 case TGSI_OPCODE_I64ABS: 5948 exec_double_unary(mach, inst, micro_i64abs); 5949 break; 5950 5951 case TGSI_OPCODE_I64SSG: 5952 exec_double_unary(mach, inst, micro_i64sgn); 5953 break; 5954 5955 case TGSI_OPCODE_I64NEG: 5956 exec_double_unary(mach, inst, micro_i64neg); 5957 break; 5958 5959 case TGSI_OPCODE_U64SEQ: 5960 exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT); 5961 break; 5962 5963 case TGSI_OPCODE_U64SNE: 5964 exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT); 5965 break; 5966 5967 case TGSI_OPCODE_I64SLT: 5968 exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT); 5969 break; 5970 case TGSI_OPCODE_U64SLT: 5971 exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT); 5972 break; 5973 5974 case TGSI_OPCODE_I64SGE: 5975 exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT); 5976 break; 5977 case TGSI_OPCODE_U64SGE: 5978 exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT); 5979 break; 5980 5981 case TGSI_OPCODE_I64MIN: 5982 exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64); 5983 break; 5984 case TGSI_OPCODE_U64MIN: 5985 exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64); 5986 break; 5987 case TGSI_OPCODE_I64MAX: 5988 exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64); 5989 break; 5990 case TGSI_OPCODE_U64MAX: 5991 exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64); 5992 break; 5993 case TGSI_OPCODE_U64ADD: 5994 exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64); 5995 break; 5996 case TGSI_OPCODE_U64MUL: 5997 exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64); 5998 break; 5999 case TGSI_OPCODE_U64SHL: 6000 exec_arg0_64_arg1_32(mach, inst, micro_u64shl); 6001 break; 6002 case TGSI_OPCODE_I64SHR: 6003 exec_arg0_64_arg1_32(mach, inst, micro_i64shr); 6004 break; 6005 case TGSI_OPCODE_U64SHR: 6006 exec_arg0_64_arg1_32(mach, inst, micro_u64shr); 6007 break; 6008 case TGSI_OPCODE_U64DIV: 6009 exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64); 6010 break; 6011 case TGSI_OPCODE_I64DIV: 6012 exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64); 6013 break; 6014 case TGSI_OPCODE_U64MOD: 6015 exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64); 6016 break; 6017 case TGSI_OPCODE_I64MOD: 6018 exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64); 6019 break; 6020 6021 case TGSI_OPCODE_F2U64: 6022 exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT); 6023 break; 6024 6025 case TGSI_OPCODE_F2I64: 6026 exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT); 6027 break; 6028 6029 case TGSI_OPCODE_U2I64: 6030 exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT); 6031 break; 6032 case TGSI_OPCODE_I2I64: 6033 exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT); 6034 break; 6035 6036 case TGSI_OPCODE_D2U64: 6037 exec_double_unary(mach, inst, micro_d2u64); 6038 break; 6039 6040 case TGSI_OPCODE_D2I64: 6041 exec_double_unary(mach, inst, micro_d2i64); 6042 break; 6043 6044 case TGSI_OPCODE_U642F: 6045 exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT); 6046 break; 6047 case TGSI_OPCODE_I642F: 6048 exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT); 6049 break; 6050 6051 case TGSI_OPCODE_U642D: 6052 exec_double_unary(mach, inst, micro_u642d); 6053 break; 6054 case TGSI_OPCODE_I642D: 6055 exec_double_unary(mach, inst, micro_i642d); 6056 break; 6057 6058 default: 6059 assert( 0 ); 6060 } 6061 return FALSE; 6062} 6063 6064static void 6065tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach) 6066{ 6067 uint default_mask = 0xf; 6068 6069 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 6070 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 6071 6072 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { 6073 mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; 6074 mach->Primitives[0] = 0; 6075 /* GS runs on a single primitive for now */ 6076 default_mask = 0x1; 6077 } 6078 6079 if (mach->NonHelperMask == 0) 6080 mach->NonHelperMask = default_mask; 6081 mach->CondMask = default_mask; 6082 mach->LoopMask = default_mask; 6083 mach->ContMask = default_mask; 6084 mach->FuncMask = default_mask; 6085 mach->ExecMask = default_mask; 6086 6087 mach->Switch.mask = default_mask; 6088 6089 assert(mach->CondStackTop == 0); 6090 assert(mach->LoopStackTop == 0); 6091 assert(mach->ContStackTop == 0); 6092 assert(mach->SwitchStackTop == 0); 6093 assert(mach->BreakStackTop == 0); 6094 assert(mach->CallStackTop == 0); 6095} 6096 6097/** 6098 * Run TGSI interpreter. 6099 * \return bitmask of "alive" quad components 6100 */ 6101uint 6102tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc ) 6103{ 6104 uint i; 6105 6106 mach->pc = start_pc; 6107 6108 if (!start_pc) { 6109 tgsi_exec_machine_setup_masks(mach); 6110 6111 /* execute declarations (interpolants) */ 6112 for (i = 0; i < mach->NumDeclarations; i++) { 6113 exec_declaration( mach, mach->Declarations+i ); 6114 } 6115 } 6116 6117 { 6118#if DEBUG_EXECUTION 6119 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; 6120 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 6121 uint inst = 1; 6122 6123 if (!start_pc) { 6124 memset(mach->Temps, 0, sizeof(temps)); 6125 if (mach->Outputs) 6126 memset(mach->Outputs, 0, sizeof(outputs)); 6127 memset(temps, 0, sizeof(temps)); 6128 memset(outputs, 0, sizeof(outputs)); 6129 } 6130#endif 6131 6132 /* execute instructions, until pc is set to -1 */ 6133 while (mach->pc != -1) { 6134 boolean barrier_hit; 6135#if DEBUG_EXECUTION 6136 uint i; 6137 6138 tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++); 6139#endif 6140 6141 assert(mach->pc < (int) mach->NumInstructions); 6142 barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc); 6143 6144 /* for compute shaders if we hit a barrier return now for later rescheduling */ 6145 if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE) 6146 return 0; 6147 6148#if DEBUG_EXECUTION 6149 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { 6150 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 6151 uint j; 6152 6153 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 6154 debug_printf("TEMP[%2u] = ", i); 6155 for (j = 0; j < 4; j++) { 6156 if (j > 0) { 6157 debug_printf(" "); 6158 } 6159 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 6160 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], 6161 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], 6162 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], 6163 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); 6164 } 6165 } 6166 } 6167 if (mach->Outputs) { 6168 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 6169 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 6170 uint j; 6171 6172 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 6173 debug_printf("OUT[%2u] = ", i); 6174 for (j = 0; j < 4; j++) { 6175 if (j > 0) { 6176 debug_printf(" "); 6177 } 6178 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 6179 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], 6180 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], 6181 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], 6182 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); 6183 } 6184 } 6185 } 6186 } 6187#endif 6188 } 6189 } 6190 6191#if 0 6192 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 6193 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { 6194 /* 6195 * Scale back depth component. 6196 */ 6197 for (i = 0; i < 4; i++) 6198 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 6199 } 6200#endif 6201 6202 /* Strictly speaking, these assertions aren't really needed but they 6203 * can potentially catch some bugs in the control flow code. 6204 */ 6205 assert(mach->CondStackTop == 0); 6206 assert(mach->LoopStackTop == 0); 6207 assert(mach->ContStackTop == 0); 6208 assert(mach->SwitchStackTop == 0); 6209 assert(mach->BreakStackTop == 0); 6210 assert(mach->CallStackTop == 0); 6211 6212 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 6213} 6214