1/************************************************************************** 2 * 3 * Copyright 2007-2008 VMware, Inc. 4 * All Rights Reserved. 5 * Copyright 2009-2010 VMware, Inc. All rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * TGSI interpreter/executor. 31 * 32 * Flow control information: 33 * 34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 36 * care since a condition may be true for some quad components but false 37 * for other components. 38 * 39 * We basically execute all statements (even if they're in the part of 40 * an IF/ELSE clause that's "not taken") and use a special mask to 41 * control writing to destination registers. This is the ExecMask. 42 * See store_dest(). 43 * 44 * The ExecMask is computed from three other masks (CondMask, LoopMask and 45 * ContMask) which are controlled by the flow control instructions (namely: 46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 47 * 48 * 49 * Authors: 50 * Michal Krol 51 * Brian Paul 52 */ 53 54#include "pipe/p_compiler.h" 55#include "pipe/p_state.h" 56#include "pipe/p_shader_tokens.h" 57#include "tgsi/tgsi_dump.h" 58#include "tgsi/tgsi_parse.h" 59#include "tgsi/tgsi_util.h" 60#include "tgsi_exec.h" 61#include "util/u_half.h" 62#include "util/u_memory.h" 63#include "util/u_math.h" 64#include "util/rounding.h" 65 66 67#define DEBUG_EXECUTION 0 68 69 70#define FAST_MATH 0 71 72#define TILE_TOP_LEFT 0 73#define TILE_TOP_RIGHT 1 74#define TILE_BOTTOM_LEFT 2 75#define TILE_BOTTOM_RIGHT 3 76 77union tgsi_double_channel { 78 double d[TGSI_QUAD_SIZE]; 79 unsigned u[TGSI_QUAD_SIZE][2]; 80 uint64_t u64[TGSI_QUAD_SIZE]; 81 int64_t i64[TGSI_QUAD_SIZE]; 82}; 83 84struct tgsi_double_vector { 85 union tgsi_double_channel xy; 86 union tgsi_double_channel zw; 87}; 88 89static void 90micro_abs(union tgsi_exec_channel *dst, 91 const union tgsi_exec_channel *src) 92{ 93 dst->f[0] = fabsf(src->f[0]); 94 dst->f[1] = fabsf(src->f[1]); 95 dst->f[2] = fabsf(src->f[2]); 96 dst->f[3] = fabsf(src->f[3]); 97} 98 99static void 100micro_arl(union tgsi_exec_channel *dst, 101 const union tgsi_exec_channel *src) 102{ 103 dst->i[0] = (int)floorf(src->f[0]); 104 dst->i[1] = (int)floorf(src->f[1]); 105 dst->i[2] = (int)floorf(src->f[2]); 106 dst->i[3] = (int)floorf(src->f[3]); 107} 108 109static void 110micro_arr(union tgsi_exec_channel *dst, 111 const union tgsi_exec_channel *src) 112{ 113 dst->i[0] = (int)floorf(src->f[0] + 0.5f); 114 dst->i[1] = (int)floorf(src->f[1] + 0.5f); 115 dst->i[2] = (int)floorf(src->f[2] + 0.5f); 116 dst->i[3] = (int)floorf(src->f[3] + 0.5f); 117} 118 119static void 120micro_ceil(union tgsi_exec_channel *dst, 121 const union tgsi_exec_channel *src) 122{ 123 dst->f[0] = ceilf(src->f[0]); 124 dst->f[1] = ceilf(src->f[1]); 125 dst->f[2] = ceilf(src->f[2]); 126 dst->f[3] = ceilf(src->f[3]); 127} 128 129static void 130micro_cmp(union tgsi_exec_channel *dst, 131 const union tgsi_exec_channel *src0, 132 const union tgsi_exec_channel *src1, 133 const union tgsi_exec_channel *src2) 134{ 135 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; 136 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; 137 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; 138 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; 139} 140 141static void 142micro_cos(union tgsi_exec_channel *dst, 143 const union tgsi_exec_channel *src) 144{ 145 dst->f[0] = cosf(src->f[0]); 146 dst->f[1] = cosf(src->f[1]); 147 dst->f[2] = cosf(src->f[2]); 148 dst->f[3] = cosf(src->f[3]); 149} 150 151static void 152micro_d2f(union tgsi_exec_channel *dst, 153 const union tgsi_double_channel *src) 154{ 155 dst->f[0] = (float)src->d[0]; 156 dst->f[1] = (float)src->d[1]; 157 dst->f[2] = (float)src->d[2]; 158 dst->f[3] = (float)src->d[3]; 159} 160 161static void 162micro_d2i(union tgsi_exec_channel *dst, 163 const union tgsi_double_channel *src) 164{ 165 dst->i[0] = (int)src->d[0]; 166 dst->i[1] = (int)src->d[1]; 167 dst->i[2] = (int)src->d[2]; 168 dst->i[3] = (int)src->d[3]; 169} 170 171static void 172micro_d2u(union tgsi_exec_channel *dst, 173 const union tgsi_double_channel *src) 174{ 175 dst->u[0] = (unsigned)src->d[0]; 176 dst->u[1] = (unsigned)src->d[1]; 177 dst->u[2] = (unsigned)src->d[2]; 178 dst->u[3] = (unsigned)src->d[3]; 179} 180static void 181micro_dabs(union tgsi_double_channel *dst, 182 const union tgsi_double_channel *src) 183{ 184 dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0]; 185 dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1]; 186 dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2]; 187 dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3]; 188} 189 190static void 191micro_dadd(union tgsi_double_channel *dst, 192 const union tgsi_double_channel *src) 193{ 194 dst->d[0] = src[0].d[0] + src[1].d[0]; 195 dst->d[1] = src[0].d[1] + src[1].d[1]; 196 dst->d[2] = src[0].d[2] + src[1].d[2]; 197 dst->d[3] = src[0].d[3] + src[1].d[3]; 198} 199 200static void 201micro_ddiv(union tgsi_double_channel *dst, 202 const union tgsi_double_channel *src) 203{ 204 dst->d[0] = src[0].d[0] / src[1].d[0]; 205 dst->d[1] = src[0].d[1] / src[1].d[1]; 206 dst->d[2] = src[0].d[2] / src[1].d[2]; 207 dst->d[3] = src[0].d[3] / src[1].d[3]; 208} 209 210static void 211micro_ddx(union tgsi_exec_channel *dst, 212 const union tgsi_exec_channel *src) 213{ 214 dst->f[0] = 215 dst->f[1] = 216 dst->f[2] = 217 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 218} 219 220static void 221micro_ddy(union tgsi_exec_channel *dst, 222 const union tgsi_exec_channel *src) 223{ 224 dst->f[0] = 225 dst->f[1] = 226 dst->f[2] = 227 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 228} 229 230static void 231micro_dmul(union tgsi_double_channel *dst, 232 const union tgsi_double_channel *src) 233{ 234 dst->d[0] = src[0].d[0] * src[1].d[0]; 235 dst->d[1] = src[0].d[1] * src[1].d[1]; 236 dst->d[2] = src[0].d[2] * src[1].d[2]; 237 dst->d[3] = src[0].d[3] * src[1].d[3]; 238} 239 240static void 241micro_dmax(union tgsi_double_channel *dst, 242 const union tgsi_double_channel *src) 243{ 244 dst->d[0] = src[0].d[0] > src[1].d[0] ? src[0].d[0] : src[1].d[0]; 245 dst->d[1] = src[0].d[1] > src[1].d[1] ? src[0].d[1] : src[1].d[1]; 246 dst->d[2] = src[0].d[2] > src[1].d[2] ? src[0].d[2] : src[1].d[2]; 247 dst->d[3] = src[0].d[3] > src[1].d[3] ? src[0].d[3] : src[1].d[3]; 248} 249 250static void 251micro_dmin(union tgsi_double_channel *dst, 252 const union tgsi_double_channel *src) 253{ 254 dst->d[0] = src[0].d[0] < src[1].d[0] ? src[0].d[0] : src[1].d[0]; 255 dst->d[1] = src[0].d[1] < src[1].d[1] ? src[0].d[1] : src[1].d[1]; 256 dst->d[2] = src[0].d[2] < src[1].d[2] ? src[0].d[2] : src[1].d[2]; 257 dst->d[3] = src[0].d[3] < src[1].d[3] ? src[0].d[3] : src[1].d[3]; 258} 259 260static void 261micro_dneg(union tgsi_double_channel *dst, 262 const union tgsi_double_channel *src) 263{ 264 dst->d[0] = -src->d[0]; 265 dst->d[1] = -src->d[1]; 266 dst->d[2] = -src->d[2]; 267 dst->d[3] = -src->d[3]; 268} 269 270static void 271micro_dslt(union tgsi_double_channel *dst, 272 const union tgsi_double_channel *src) 273{ 274 dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U; 275 dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U; 276 dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U; 277 dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U; 278} 279 280static void 281micro_dsne(union tgsi_double_channel *dst, 282 const union tgsi_double_channel *src) 283{ 284 dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U; 285 dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U; 286 dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U; 287 dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U; 288} 289 290static void 291micro_dsge(union tgsi_double_channel *dst, 292 const union tgsi_double_channel *src) 293{ 294 dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U; 295 dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U; 296 dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U; 297 dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U; 298} 299 300static void 301micro_dseq(union tgsi_double_channel *dst, 302 const union tgsi_double_channel *src) 303{ 304 dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U; 305 dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U; 306 dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U; 307 dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U; 308} 309 310static void 311micro_drcp(union tgsi_double_channel *dst, 312 const union tgsi_double_channel *src) 313{ 314 dst->d[0] = 1.0 / src->d[0]; 315 dst->d[1] = 1.0 / src->d[1]; 316 dst->d[2] = 1.0 / src->d[2]; 317 dst->d[3] = 1.0 / src->d[3]; 318} 319 320static void 321micro_dsqrt(union tgsi_double_channel *dst, 322 const union tgsi_double_channel *src) 323{ 324 dst->d[0] = sqrt(src->d[0]); 325 dst->d[1] = sqrt(src->d[1]); 326 dst->d[2] = sqrt(src->d[2]); 327 dst->d[3] = sqrt(src->d[3]); 328} 329 330static void 331micro_drsq(union tgsi_double_channel *dst, 332 const union tgsi_double_channel *src) 333{ 334 dst->d[0] = 1.0 / sqrt(src->d[0]); 335 dst->d[1] = 1.0 / sqrt(src->d[1]); 336 dst->d[2] = 1.0 / sqrt(src->d[2]); 337 dst->d[3] = 1.0 / sqrt(src->d[3]); 338} 339 340static void 341micro_dmad(union tgsi_double_channel *dst, 342 const union tgsi_double_channel *src) 343{ 344 dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0]; 345 dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1]; 346 dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2]; 347 dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3]; 348} 349 350static void 351micro_dfrac(union tgsi_double_channel *dst, 352 const union tgsi_double_channel *src) 353{ 354 dst->d[0] = src->d[0] - floor(src->d[0]); 355 dst->d[1] = src->d[1] - floor(src->d[1]); 356 dst->d[2] = src->d[2] - floor(src->d[2]); 357 dst->d[3] = src->d[3] - floor(src->d[3]); 358} 359 360static void 361micro_dldexp(union tgsi_double_channel *dst, 362 const union tgsi_double_channel *src0, 363 union tgsi_exec_channel *src1) 364{ 365 dst->d[0] = ldexp(src0->d[0], src1->i[0]); 366 dst->d[1] = ldexp(src0->d[1], src1->i[1]); 367 dst->d[2] = ldexp(src0->d[2], src1->i[2]); 368 dst->d[3] = ldexp(src0->d[3], src1->i[3]); 369} 370 371static void 372micro_dfracexp(union tgsi_double_channel *dst, 373 union tgsi_exec_channel *dst_exp, 374 const union tgsi_double_channel *src) 375{ 376 dst->d[0] = frexp(src->d[0], &dst_exp->i[0]); 377 dst->d[1] = frexp(src->d[1], &dst_exp->i[1]); 378 dst->d[2] = frexp(src->d[2], &dst_exp->i[2]); 379 dst->d[3] = frexp(src->d[3], &dst_exp->i[3]); 380} 381 382static void 383micro_exp2(union tgsi_exec_channel *dst, 384 const union tgsi_exec_channel *src) 385{ 386#if FAST_MATH 387 dst->f[0] = util_fast_exp2(src->f[0]); 388 dst->f[1] = util_fast_exp2(src->f[1]); 389 dst->f[2] = util_fast_exp2(src->f[2]); 390 dst->f[3] = util_fast_exp2(src->f[3]); 391#else 392#if DEBUG 393 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 394 uint i; 395 union tgsi_exec_channel clamped; 396 397 for (i = 0; i < 4; i++) { 398 if (src->f[i] > 127.99999f) { 399 clamped.f[i] = 127.99999f; 400 } else if (src->f[i] < -126.99999f) { 401 clamped.f[i] = -126.99999f; 402 } else { 403 clamped.f[i] = src->f[i]; 404 } 405 } 406 src = &clamped; 407#endif /* DEBUG */ 408 409 dst->f[0] = powf(2.0f, src->f[0]); 410 dst->f[1] = powf(2.0f, src->f[1]); 411 dst->f[2] = powf(2.0f, src->f[2]); 412 dst->f[3] = powf(2.0f, src->f[3]); 413#endif /* FAST_MATH */ 414} 415 416static void 417micro_f2d(union tgsi_double_channel *dst, 418 const union tgsi_exec_channel *src) 419{ 420 dst->d[0] = (double)src->f[0]; 421 dst->d[1] = (double)src->f[1]; 422 dst->d[2] = (double)src->f[2]; 423 dst->d[3] = (double)src->f[3]; 424} 425 426static void 427micro_flr(union tgsi_exec_channel *dst, 428 const union tgsi_exec_channel *src) 429{ 430 dst->f[0] = floorf(src->f[0]); 431 dst->f[1] = floorf(src->f[1]); 432 dst->f[2] = floorf(src->f[2]); 433 dst->f[3] = floorf(src->f[3]); 434} 435 436static void 437micro_frc(union tgsi_exec_channel *dst, 438 const union tgsi_exec_channel *src) 439{ 440 dst->f[0] = src->f[0] - floorf(src->f[0]); 441 dst->f[1] = src->f[1] - floorf(src->f[1]); 442 dst->f[2] = src->f[2] - floorf(src->f[2]); 443 dst->f[3] = src->f[3] - floorf(src->f[3]); 444} 445 446static void 447micro_i2d(union tgsi_double_channel *dst, 448 const union tgsi_exec_channel *src) 449{ 450 dst->d[0] = (double)src->i[0]; 451 dst->d[1] = (double)src->i[1]; 452 dst->d[2] = (double)src->i[2]; 453 dst->d[3] = (double)src->i[3]; 454} 455 456static void 457micro_iabs(union tgsi_exec_channel *dst, 458 const union tgsi_exec_channel *src) 459{ 460 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; 461 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; 462 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; 463 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; 464} 465 466static void 467micro_ineg(union tgsi_exec_channel *dst, 468 const union tgsi_exec_channel *src) 469{ 470 dst->i[0] = -src->i[0]; 471 dst->i[1] = -src->i[1]; 472 dst->i[2] = -src->i[2]; 473 dst->i[3] = -src->i[3]; 474} 475 476static void 477micro_lg2(union tgsi_exec_channel *dst, 478 const union tgsi_exec_channel *src) 479{ 480#if FAST_MATH 481 dst->f[0] = util_fast_log2(src->f[0]); 482 dst->f[1] = util_fast_log2(src->f[1]); 483 dst->f[2] = util_fast_log2(src->f[2]); 484 dst->f[3] = util_fast_log2(src->f[3]); 485#else 486 dst->f[0] = logf(src->f[0]) * 1.442695f; 487 dst->f[1] = logf(src->f[1]) * 1.442695f; 488 dst->f[2] = logf(src->f[2]) * 1.442695f; 489 dst->f[3] = logf(src->f[3]) * 1.442695f; 490#endif 491} 492 493static void 494micro_lrp(union tgsi_exec_channel *dst, 495 const union tgsi_exec_channel *src0, 496 const union tgsi_exec_channel *src1, 497 const union tgsi_exec_channel *src2) 498{ 499 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0]; 500 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1]; 501 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2]; 502 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3]; 503} 504 505static void 506micro_mad(union tgsi_exec_channel *dst, 507 const union tgsi_exec_channel *src0, 508 const union tgsi_exec_channel *src1, 509 const union tgsi_exec_channel *src2) 510{ 511 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0]; 512 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1]; 513 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2]; 514 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3]; 515} 516 517static void 518micro_mov(union tgsi_exec_channel *dst, 519 const union tgsi_exec_channel *src) 520{ 521 dst->u[0] = src->u[0]; 522 dst->u[1] = src->u[1]; 523 dst->u[2] = src->u[2]; 524 dst->u[3] = src->u[3]; 525} 526 527static void 528micro_rcp(union tgsi_exec_channel *dst, 529 const union tgsi_exec_channel *src) 530{ 531#if 0 /* for debugging */ 532 assert(src->f[0] != 0.0f); 533 assert(src->f[1] != 0.0f); 534 assert(src->f[2] != 0.0f); 535 assert(src->f[3] != 0.0f); 536#endif 537 dst->f[0] = 1.0f / src->f[0]; 538 dst->f[1] = 1.0f / src->f[1]; 539 dst->f[2] = 1.0f / src->f[2]; 540 dst->f[3] = 1.0f / src->f[3]; 541} 542 543static void 544micro_rnd(union tgsi_exec_channel *dst, 545 const union tgsi_exec_channel *src) 546{ 547 dst->f[0] = _mesa_roundevenf(src->f[0]); 548 dst->f[1] = _mesa_roundevenf(src->f[1]); 549 dst->f[2] = _mesa_roundevenf(src->f[2]); 550 dst->f[3] = _mesa_roundevenf(src->f[3]); 551} 552 553static void 554micro_rsq(union tgsi_exec_channel *dst, 555 const union tgsi_exec_channel *src) 556{ 557#if 0 /* for debugging */ 558 assert(src->f[0] != 0.0f); 559 assert(src->f[1] != 0.0f); 560 assert(src->f[2] != 0.0f); 561 assert(src->f[3] != 0.0f); 562#endif 563 dst->f[0] = 1.0f / sqrtf(src->f[0]); 564 dst->f[1] = 1.0f / sqrtf(src->f[1]); 565 dst->f[2] = 1.0f / sqrtf(src->f[2]); 566 dst->f[3] = 1.0f / sqrtf(src->f[3]); 567} 568 569static void 570micro_sqrt(union tgsi_exec_channel *dst, 571 const union tgsi_exec_channel *src) 572{ 573 dst->f[0] = sqrtf(src->f[0]); 574 dst->f[1] = sqrtf(src->f[1]); 575 dst->f[2] = sqrtf(src->f[2]); 576 dst->f[3] = sqrtf(src->f[3]); 577} 578 579static void 580micro_seq(union tgsi_exec_channel *dst, 581 const union tgsi_exec_channel *src0, 582 const union tgsi_exec_channel *src1) 583{ 584 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f; 585 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f; 586 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f; 587 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f; 588} 589 590static void 591micro_sge(union tgsi_exec_channel *dst, 592 const union tgsi_exec_channel *src0, 593 const union tgsi_exec_channel *src1) 594{ 595 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f; 596 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f; 597 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f; 598 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f; 599} 600 601static void 602micro_sgn(union tgsi_exec_channel *dst, 603 const union tgsi_exec_channel *src) 604{ 605 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 606 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 607 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 608 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 609} 610 611static void 612micro_isgn(union tgsi_exec_channel *dst, 613 const union tgsi_exec_channel *src) 614{ 615 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0; 616 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0; 617 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0; 618 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0; 619} 620 621static void 622micro_sgt(union tgsi_exec_channel *dst, 623 const union tgsi_exec_channel *src0, 624 const union tgsi_exec_channel *src1) 625{ 626 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f; 627 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f; 628 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f; 629 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f; 630} 631 632static void 633micro_sin(union tgsi_exec_channel *dst, 634 const union tgsi_exec_channel *src) 635{ 636 dst->f[0] = sinf(src->f[0]); 637 dst->f[1] = sinf(src->f[1]); 638 dst->f[2] = sinf(src->f[2]); 639 dst->f[3] = sinf(src->f[3]); 640} 641 642static void 643micro_sle(union tgsi_exec_channel *dst, 644 const union tgsi_exec_channel *src0, 645 const union tgsi_exec_channel *src1) 646{ 647 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f; 648 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f; 649 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f; 650 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f; 651} 652 653static void 654micro_slt(union tgsi_exec_channel *dst, 655 const union tgsi_exec_channel *src0, 656 const union tgsi_exec_channel *src1) 657{ 658 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f; 659 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f; 660 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f; 661 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f; 662} 663 664static void 665micro_sne(union tgsi_exec_channel *dst, 666 const union tgsi_exec_channel *src0, 667 const union tgsi_exec_channel *src1) 668{ 669 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f; 670 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f; 671 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f; 672 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; 673} 674 675static void 676micro_trunc(union tgsi_exec_channel *dst, 677 const union tgsi_exec_channel *src) 678{ 679 dst->f[0] = truncf(src->f[0]); 680 dst->f[1] = truncf(src->f[1]); 681 dst->f[2] = truncf(src->f[2]); 682 dst->f[3] = truncf(src->f[3]); 683} 684 685static void 686micro_u2d(union tgsi_double_channel *dst, 687 const union tgsi_exec_channel *src) 688{ 689 dst->d[0] = (double)src->u[0]; 690 dst->d[1] = (double)src->u[1]; 691 dst->d[2] = (double)src->u[2]; 692 dst->d[3] = (double)src->u[3]; 693} 694 695static void 696micro_i64abs(union tgsi_double_channel *dst, 697 const union tgsi_double_channel *src) 698{ 699 dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0]; 700 dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1]; 701 dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2]; 702 dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3]; 703} 704 705static void 706micro_i64sgn(union tgsi_double_channel *dst, 707 const union tgsi_double_channel *src) 708{ 709 dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0; 710 dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0; 711 dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0; 712 dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0; 713} 714 715static void 716micro_i64neg(union tgsi_double_channel *dst, 717 const union tgsi_double_channel *src) 718{ 719 dst->i64[0] = -src->i64[0]; 720 dst->i64[1] = -src->i64[1]; 721 dst->i64[2] = -src->i64[2]; 722 dst->i64[3] = -src->i64[3]; 723} 724 725static void 726micro_u64seq(union tgsi_double_channel *dst, 727 const union tgsi_double_channel *src) 728{ 729 dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U; 730 dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U; 731 dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U; 732 dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U; 733} 734 735static void 736micro_u64sne(union tgsi_double_channel *dst, 737 const union tgsi_double_channel *src) 738{ 739 dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U; 740 dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U; 741 dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U; 742 dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U; 743} 744 745static void 746micro_i64slt(union tgsi_double_channel *dst, 747 const union tgsi_double_channel *src) 748{ 749 dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U; 750 dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U; 751 dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U; 752 dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U; 753} 754 755static void 756micro_u64slt(union tgsi_double_channel *dst, 757 const union tgsi_double_channel *src) 758{ 759 dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U; 760 dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U; 761 dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U; 762 dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U; 763} 764 765static void 766micro_i64sge(union tgsi_double_channel *dst, 767 const union tgsi_double_channel *src) 768{ 769 dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U; 770 dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U; 771 dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U; 772 dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U; 773} 774 775static void 776micro_u64sge(union tgsi_double_channel *dst, 777 const union tgsi_double_channel *src) 778{ 779 dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U; 780 dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U; 781 dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U; 782 dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U; 783} 784 785static void 786micro_u64max(union tgsi_double_channel *dst, 787 const union tgsi_double_channel *src) 788{ 789 dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; 790 dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; 791 dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; 792 dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; 793} 794 795static void 796micro_i64max(union tgsi_double_channel *dst, 797 const union tgsi_double_channel *src) 798{ 799 dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; 800 dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; 801 dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; 802 dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; 803} 804 805static void 806micro_u64min(union tgsi_double_channel *dst, 807 const union tgsi_double_channel *src) 808{ 809 dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; 810 dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; 811 dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; 812 dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; 813} 814 815static void 816micro_i64min(union tgsi_double_channel *dst, 817 const union tgsi_double_channel *src) 818{ 819 dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; 820 dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; 821 dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; 822 dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; 823} 824 825static void 826micro_u64add(union tgsi_double_channel *dst, 827 const union tgsi_double_channel *src) 828{ 829 dst->u64[0] = src[0].u64[0] + src[1].u64[0]; 830 dst->u64[1] = src[0].u64[1] + src[1].u64[1]; 831 dst->u64[2] = src[0].u64[2] + src[1].u64[2]; 832 dst->u64[3] = src[0].u64[3] + src[1].u64[3]; 833} 834 835static void 836micro_u64mul(union tgsi_double_channel *dst, 837 const union tgsi_double_channel *src) 838{ 839 dst->u64[0] = src[0].u64[0] * src[1].u64[0]; 840 dst->u64[1] = src[0].u64[1] * src[1].u64[1]; 841 dst->u64[2] = src[0].u64[2] * src[1].u64[2]; 842 dst->u64[3] = src[0].u64[3] * src[1].u64[3]; 843} 844 845static void 846micro_u64div(union tgsi_double_channel *dst, 847 const union tgsi_double_channel *src) 848{ 849 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull; 850 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull; 851 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull; 852 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull; 853} 854 855static void 856micro_i64div(union tgsi_double_channel *dst, 857 const union tgsi_double_channel *src) 858{ 859 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0; 860 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0; 861 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0; 862 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0; 863} 864 865static void 866micro_u64mod(union tgsi_double_channel *dst, 867 const union tgsi_double_channel *src) 868{ 869 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull; 870 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull; 871 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull; 872 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull; 873} 874 875static void 876micro_i64mod(union tgsi_double_channel *dst, 877 const union tgsi_double_channel *src) 878{ 879 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll; 880 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll; 881 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll; 882 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll; 883} 884 885static void 886micro_u64shl(union tgsi_double_channel *dst, 887 const union tgsi_double_channel *src0, 888 union tgsi_exec_channel *src1) 889{ 890 unsigned masked_count; 891 masked_count = src1->u[0] & 0x3f; 892 dst->u64[0] = src0->u64[0] << masked_count; 893 masked_count = src1->u[1] & 0x3f; 894 dst->u64[1] = src0->u64[1] << masked_count; 895 masked_count = src1->u[2] & 0x3f; 896 dst->u64[2] = src0->u64[2] << masked_count; 897 masked_count = src1->u[3] & 0x3f; 898 dst->u64[3] = src0->u64[3] << masked_count; 899} 900 901static void 902micro_i64shr(union tgsi_double_channel *dst, 903 const union tgsi_double_channel *src0, 904 union tgsi_exec_channel *src1) 905{ 906 unsigned masked_count; 907 masked_count = src1->u[0] & 0x3f; 908 dst->i64[0] = src0->i64[0] >> masked_count; 909 masked_count = src1->u[1] & 0x3f; 910 dst->i64[1] = src0->i64[1] >> masked_count; 911 masked_count = src1->u[2] & 0x3f; 912 dst->i64[2] = src0->i64[2] >> masked_count; 913 masked_count = src1->u[3] & 0x3f; 914 dst->i64[3] = src0->i64[3] >> masked_count; 915} 916 917static void 918micro_u64shr(union tgsi_double_channel *dst, 919 const union tgsi_double_channel *src0, 920 union tgsi_exec_channel *src1) 921{ 922 unsigned masked_count; 923 masked_count = src1->u[0] & 0x3f; 924 dst->u64[0] = src0->u64[0] >> masked_count; 925 masked_count = src1->u[1] & 0x3f; 926 dst->u64[1] = src0->u64[1] >> masked_count; 927 masked_count = src1->u[2] & 0x3f; 928 dst->u64[2] = src0->u64[2] >> masked_count; 929 masked_count = src1->u[3] & 0x3f; 930 dst->u64[3] = src0->u64[3] >> masked_count; 931} 932 933enum tgsi_exec_datatype { 934 TGSI_EXEC_DATA_FLOAT, 935 TGSI_EXEC_DATA_INT, 936 TGSI_EXEC_DATA_UINT, 937 TGSI_EXEC_DATA_DOUBLE, 938 TGSI_EXEC_DATA_INT64, 939 TGSI_EXEC_DATA_UINT64, 940}; 941 942/* 943 * Shorthand locations of various utility registers (_I = Index, _C = Channel) 944 */ 945#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I 946#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C 947#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I 948#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C 949#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I 950#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C 951#define TEMP_PRIMITIVE_S1_I TGSI_EXEC_TEMP_PRIMITIVE_S1_I 952#define TEMP_PRIMITIVE_S1_C TGSI_EXEC_TEMP_PRIMITIVE_S1_C 953#define TEMP_PRIMITIVE_S2_I TGSI_EXEC_TEMP_PRIMITIVE_S2_I 954#define TEMP_PRIMITIVE_S2_C TGSI_EXEC_TEMP_PRIMITIVE_S2_C 955#define TEMP_PRIMITIVE_S3_I TGSI_EXEC_TEMP_PRIMITIVE_S3_I 956#define TEMP_PRIMITIVE_S3_C TGSI_EXEC_TEMP_PRIMITIVE_S3_C 957 958static const struct { 959 int idx; 960 int chan; 961} temp_prim_idxs[] = { 962 { TEMP_PRIMITIVE_I, TEMP_PRIMITIVE_C }, 963 { TEMP_PRIMITIVE_S1_I, TEMP_PRIMITIVE_S1_C }, 964 { TEMP_PRIMITIVE_S2_I, TEMP_PRIMITIVE_S2_C }, 965 { TEMP_PRIMITIVE_S3_I, TEMP_PRIMITIVE_S3_C }, 966}; 967 968/** The execution mask depends on the conditional mask and the loop mask */ 969#define UPDATE_EXEC_MASK(MACH) \ 970 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask 971 972 973static const union tgsi_exec_channel ZeroVec = 974 { { 0.0, 0.0, 0.0, 0.0 } }; 975 976static const union tgsi_exec_channel OneVec = { 977 {1.0f, 1.0f, 1.0f, 1.0f} 978}; 979 980static const union tgsi_exec_channel P128Vec = { 981 {128.0f, 128.0f, 128.0f, 128.0f} 982}; 983 984static const union tgsi_exec_channel M128Vec = { 985 {-128.0f, -128.0f, -128.0f, -128.0f} 986}; 987 988 989/** 990 * Assert that none of the float values in 'chan' are infinite or NaN. 991 * NaN and Inf may occur normally during program execution and should 992 * not lead to crashes, etc. But when debugging, it's helpful to catch 993 * them. 994 */ 995static inline void 996check_inf_or_nan(const union tgsi_exec_channel *chan) 997{ 998 assert(!util_is_inf_or_nan((chan)->f[0])); 999 assert(!util_is_inf_or_nan((chan)->f[1])); 1000 assert(!util_is_inf_or_nan((chan)->f[2])); 1001 assert(!util_is_inf_or_nan((chan)->f[3])); 1002} 1003 1004 1005#ifdef DEBUG 1006static void 1007print_chan(const char *msg, const union tgsi_exec_channel *chan) 1008{ 1009 debug_printf("%s = {%f, %f, %f, %f}\n", 1010 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 1011} 1012#endif 1013 1014 1015#ifdef DEBUG 1016static void 1017print_temp(const struct tgsi_exec_machine *mach, uint index) 1018{ 1019 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 1020 int i; 1021 debug_printf("Temp[%u] =\n", index); 1022 for (i = 0; i < 4; i++) { 1023 debug_printf(" %c: { %f, %f, %f, %f }\n", 1024 "XYZW"[i], 1025 tmp->xyzw[i].f[0], 1026 tmp->xyzw[i].f[1], 1027 tmp->xyzw[i].f[2], 1028 tmp->xyzw[i].f[3]); 1029 } 1030} 1031#endif 1032 1033 1034void 1035tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, 1036 unsigned num_bufs, 1037 const void **bufs, 1038 const unsigned *buf_sizes) 1039{ 1040 unsigned i; 1041 1042 for (i = 0; i < num_bufs; i++) { 1043 mach->Consts[i] = bufs[i]; 1044 mach->ConstsSize[i] = buf_sizes[i]; 1045 } 1046} 1047 1048 1049/** 1050 * Check if there's a potential src/dst register data dependency when 1051 * using SOA execution. 1052 * Example: 1053 * MOV T, T.yxwz; 1054 * This would expand into: 1055 * MOV t0, t1; 1056 * MOV t1, t0; 1057 * MOV t2, t3; 1058 * MOV t3, t2; 1059 * The second instruction will have the wrong value for t0 if executed as-is. 1060 */ 1061boolean 1062tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst) 1063{ 1064 uint i, chan; 1065 1066 uint writemask = inst->Dst[0].Register.WriteMask; 1067 if (writemask == TGSI_WRITEMASK_X || 1068 writemask == TGSI_WRITEMASK_Y || 1069 writemask == TGSI_WRITEMASK_Z || 1070 writemask == TGSI_WRITEMASK_W || 1071 writemask == TGSI_WRITEMASK_NONE) { 1072 /* no chance of data dependency */ 1073 return FALSE; 1074 } 1075 1076 /* loop over src regs */ 1077 for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { 1078 if ((inst->Src[i].Register.File == 1079 inst->Dst[0].Register.File) && 1080 ((inst->Src[i].Register.Index == 1081 inst->Dst[0].Register.Index) || 1082 inst->Src[i].Register.Indirect || 1083 inst->Dst[0].Register.Indirect)) { 1084 /* loop over dest channels */ 1085 uint channelsWritten = 0x0; 1086 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 1087 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 1088 /* check if we're reading a channel that's been written */ 1089 uint swizzle = tgsi_util_get_full_src_register_swizzle(&inst->Src[i], chan); 1090 if (channelsWritten & (1 << swizzle)) { 1091 return TRUE; 1092 } 1093 1094 channelsWritten |= (1 << chan); 1095 } 1096 } 1097 } 1098 } 1099 return FALSE; 1100} 1101 1102 1103/** 1104 * Initialize machine state by expanding tokens to full instructions, 1105 * allocating temporary storage, setting up constants, etc. 1106 * After this, we can call tgsi_exec_machine_run() many times. 1107 */ 1108void 1109tgsi_exec_machine_bind_shader( 1110 struct tgsi_exec_machine *mach, 1111 const struct tgsi_token *tokens, 1112 struct tgsi_sampler *sampler, 1113 struct tgsi_image *image, 1114 struct tgsi_buffer *buffer) 1115{ 1116 uint k; 1117 struct tgsi_parse_context parse; 1118 struct tgsi_full_instruction *instructions; 1119 struct tgsi_full_declaration *declarations; 1120 uint maxInstructions = 10, numInstructions = 0; 1121 uint maxDeclarations = 10, numDeclarations = 0; 1122 1123#if 0 1124 tgsi_dump(tokens, 0); 1125#endif 1126 1127 util_init_math(); 1128 1129 1130 mach->Tokens = tokens; 1131 mach->Sampler = sampler; 1132 mach->Image = image; 1133 mach->Buffer = buffer; 1134 1135 if (!tokens) { 1136 /* unbind and free all */ 1137 FREE(mach->Declarations); 1138 mach->Declarations = NULL; 1139 mach->NumDeclarations = 0; 1140 1141 FREE(mach->Instructions); 1142 mach->Instructions = NULL; 1143 mach->NumInstructions = 0; 1144 1145 return; 1146 } 1147 1148 k = tgsi_parse_init (&parse, mach->Tokens); 1149 if (k != TGSI_PARSE_OK) { 1150 debug_printf( "Problem parsing!\n" ); 1151 return; 1152 } 1153 1154 mach->ImmLimit = 0; 1155 mach->NumOutputs = 0; 1156 1157 for (k = 0; k < TGSI_SEMANTIC_COUNT; k++) 1158 mach->SysSemanticToIndex[k] = -1; 1159 1160 if (mach->ShaderType == PIPE_SHADER_GEOMETRY && 1161 !mach->UsedGeometryShader) { 1162 struct tgsi_exec_vector *inputs; 1163 struct tgsi_exec_vector *outputs; 1164 1165 inputs = align_malloc(sizeof(struct tgsi_exec_vector) * 1166 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS, 1167 16); 1168 1169 if (!inputs) 1170 return; 1171 1172 outputs = align_malloc(sizeof(struct tgsi_exec_vector) * 1173 TGSI_MAX_TOTAL_VERTICES, 16); 1174 1175 if (!outputs) { 1176 align_free(inputs); 1177 return; 1178 } 1179 1180 align_free(mach->Inputs); 1181 align_free(mach->Outputs); 1182 1183 mach->Inputs = inputs; 1184 mach->Outputs = outputs; 1185 mach->UsedGeometryShader = TRUE; 1186 } 1187 1188 declarations = (struct tgsi_full_declaration *) 1189 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 1190 1191 if (!declarations) { 1192 return; 1193 } 1194 1195 instructions = (struct tgsi_full_instruction *) 1196 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 1197 1198 if (!instructions) { 1199 FREE( declarations ); 1200 return; 1201 } 1202 1203 while( !tgsi_parse_end_of_tokens( &parse ) ) { 1204 uint i; 1205 1206 tgsi_parse_token( &parse ); 1207 switch( parse.FullToken.Token.Type ) { 1208 case TGSI_TOKEN_TYPE_DECLARATION: 1209 /* save expanded declaration */ 1210 if (numDeclarations == maxDeclarations) { 1211 declarations = REALLOC(declarations, 1212 maxDeclarations 1213 * sizeof(struct tgsi_full_declaration), 1214 (maxDeclarations + 10) 1215 * sizeof(struct tgsi_full_declaration)); 1216 maxDeclarations += 10; 1217 } 1218 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) { 1219 unsigned reg; 1220 for (reg = parse.FullToken.FullDeclaration.Range.First; 1221 reg <= parse.FullToken.FullDeclaration.Range.Last; 1222 ++reg) { 1223 ++mach->NumOutputs; 1224 } 1225 } 1226 else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1227 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; 1228 mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First; 1229 } 1230 1231 memcpy(declarations + numDeclarations, 1232 &parse.FullToken.FullDeclaration, 1233 sizeof(declarations[0])); 1234 numDeclarations++; 1235 break; 1236 1237 case TGSI_TOKEN_TYPE_IMMEDIATE: 1238 { 1239 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1240 assert( size <= 4 ); 1241 if (mach->ImmLimit >= mach->ImmsReserved) { 1242 unsigned newReserved = mach->ImmsReserved ? 2 * mach->ImmsReserved : 128; 1243 float4 *imms = REALLOC(mach->Imms, mach->ImmsReserved, newReserved * sizeof(float4)); 1244 if (imms) { 1245 mach->ImmsReserved = newReserved; 1246 mach->Imms = imms; 1247 } else { 1248 debug_printf("Unable to (re)allocate space for immidiate constants\n"); 1249 break; 1250 } 1251 } 1252 1253 for( i = 0; i < size; i++ ) { 1254 mach->Imms[mach->ImmLimit][i] = 1255 parse.FullToken.FullImmediate.u[i].Float; 1256 } 1257 mach->ImmLimit += 1; 1258 } 1259 break; 1260 1261 case TGSI_TOKEN_TYPE_INSTRUCTION: 1262 1263 /* save expanded instruction */ 1264 if (numInstructions == maxInstructions) { 1265 instructions = REALLOC(instructions, 1266 maxInstructions 1267 * sizeof(struct tgsi_full_instruction), 1268 (maxInstructions + 10) 1269 * sizeof(struct tgsi_full_instruction)); 1270 maxInstructions += 10; 1271 } 1272 1273 memcpy(instructions + numInstructions, 1274 &parse.FullToken.FullInstruction, 1275 sizeof(instructions[0])); 1276 1277 numInstructions++; 1278 break; 1279 1280 case TGSI_TOKEN_TYPE_PROPERTY: 1281 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { 1282 if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) { 1283 mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data; 1284 } 1285 } 1286 break; 1287 1288 default: 1289 assert( 0 ); 1290 } 1291 } 1292 tgsi_parse_free (&parse); 1293 1294 FREE(mach->Declarations); 1295 mach->Declarations = declarations; 1296 mach->NumDeclarations = numDeclarations; 1297 1298 FREE(mach->Instructions); 1299 mach->Instructions = instructions; 1300 mach->NumInstructions = numInstructions; 1301} 1302 1303 1304struct tgsi_exec_machine * 1305tgsi_exec_machine_create(enum pipe_shader_type shader_type) 1306{ 1307 struct tgsi_exec_machine *mach; 1308 uint i; 1309 1310 mach = align_malloc( sizeof *mach, 16 ); 1311 if (!mach) 1312 goto fail; 1313 1314 memset(mach, 0, sizeof(*mach)); 1315 1316 mach->ShaderType = shader_type; 1317 mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; 1318 mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES; 1319 1320 if (shader_type != PIPE_SHADER_COMPUTE) { 1321 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16); 1322 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16); 1323 if (!mach->Inputs || !mach->Outputs) 1324 goto fail; 1325 } 1326 1327 if (shader_type == PIPE_SHADER_FRAGMENT) { 1328 mach->InputSampleOffsetApply = align_malloc(sizeof(apply_sample_offset_func) * PIPE_MAX_SHADER_INPUTS, 16); 1329 if (!mach->InputSampleOffsetApply) 1330 goto fail; 1331 } 1332 1333 /* Setup constants needed by the SSE2 executor. */ 1334 for( i = 0; i < 4; i++ ) { 1335 mach->Temps[TGSI_EXEC_TEMP_00000000_I].xyzw[TGSI_EXEC_TEMP_00000000_C].u[i] = 0x00000000; 1336 mach->Temps[TGSI_EXEC_TEMP_7FFFFFFF_I].xyzw[TGSI_EXEC_TEMP_7FFFFFFF_C].u[i] = 0x7FFFFFFF; 1337 mach->Temps[TGSI_EXEC_TEMP_80000000_I].xyzw[TGSI_EXEC_TEMP_80000000_C].u[i] = 0x80000000; 1338 mach->Temps[TGSI_EXEC_TEMP_FFFFFFFF_I].xyzw[TGSI_EXEC_TEMP_FFFFFFFF_C].u[i] = 0xFFFFFFFF; /* not used */ 1339 mach->Temps[TGSI_EXEC_TEMP_ONE_I].xyzw[TGSI_EXEC_TEMP_ONE_C].f[i] = 1.0f; 1340 mach->Temps[TGSI_EXEC_TEMP_TWO_I].xyzw[TGSI_EXEC_TEMP_TWO_C].f[i] = 2.0f; /* not used */ 1341 mach->Temps[TGSI_EXEC_TEMP_128_I].xyzw[TGSI_EXEC_TEMP_128_C].f[i] = 128.0f; 1342 mach->Temps[TGSI_EXEC_TEMP_MINUS_128_I].xyzw[TGSI_EXEC_TEMP_MINUS_128_C].f[i] = -128.0f; 1343 mach->Temps[TGSI_EXEC_TEMP_THREE_I].xyzw[TGSI_EXEC_TEMP_THREE_C].f[i] = 3.0f; 1344 mach->Temps[TGSI_EXEC_TEMP_HALF_I].xyzw[TGSI_EXEC_TEMP_HALF_C].f[i] = 0.5f; 1345 } 1346 1347#ifdef DEBUG 1348 /* silence warnings */ 1349 (void) print_chan; 1350 (void) print_temp; 1351#endif 1352 1353 return mach; 1354 1355fail: 1356 if (mach) { 1357 align_free(mach->InputSampleOffsetApply); 1358 align_free(mach->Inputs); 1359 align_free(mach->Outputs); 1360 align_free(mach); 1361 } 1362 return NULL; 1363} 1364 1365 1366void 1367tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 1368{ 1369 if (mach) { 1370 FREE(mach->Instructions); 1371 FREE(mach->Declarations); 1372 FREE(mach->Imms); 1373 1374 align_free(mach->InputSampleOffsetApply); 1375 align_free(mach->Inputs); 1376 align_free(mach->Outputs); 1377 1378 align_free(mach); 1379 } 1380} 1381 1382static void 1383micro_add(union tgsi_exec_channel *dst, 1384 const union tgsi_exec_channel *src0, 1385 const union tgsi_exec_channel *src1) 1386{ 1387 dst->f[0] = src0->f[0] + src1->f[0]; 1388 dst->f[1] = src0->f[1] + src1->f[1]; 1389 dst->f[2] = src0->f[2] + src1->f[2]; 1390 dst->f[3] = src0->f[3] + src1->f[3]; 1391} 1392 1393static void 1394micro_div( 1395 union tgsi_exec_channel *dst, 1396 const union tgsi_exec_channel *src0, 1397 const union tgsi_exec_channel *src1 ) 1398{ 1399 if (src1->f[0] != 0) { 1400 dst->f[0] = src0->f[0] / src1->f[0]; 1401 } 1402 if (src1->f[1] != 0) { 1403 dst->f[1] = src0->f[1] / src1->f[1]; 1404 } 1405 if (src1->f[2] != 0) { 1406 dst->f[2] = src0->f[2] / src1->f[2]; 1407 } 1408 if (src1->f[3] != 0) { 1409 dst->f[3] = src0->f[3] / src1->f[3]; 1410 } 1411} 1412 1413static void 1414micro_lt( 1415 union tgsi_exec_channel *dst, 1416 const union tgsi_exec_channel *src0, 1417 const union tgsi_exec_channel *src1, 1418 const union tgsi_exec_channel *src2, 1419 const union tgsi_exec_channel *src3 ) 1420{ 1421 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 1422 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 1423 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 1424 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 1425} 1426 1427static void 1428micro_max(union tgsi_exec_channel *dst, 1429 const union tgsi_exec_channel *src0, 1430 const union tgsi_exec_channel *src1) 1431{ 1432 dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; 1433 dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; 1434 dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; 1435 dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; 1436} 1437 1438static void 1439micro_min(union tgsi_exec_channel *dst, 1440 const union tgsi_exec_channel *src0, 1441 const union tgsi_exec_channel *src1) 1442{ 1443 dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; 1444 dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; 1445 dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; 1446 dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; 1447} 1448 1449static void 1450micro_mul(union tgsi_exec_channel *dst, 1451 const union tgsi_exec_channel *src0, 1452 const union tgsi_exec_channel *src1) 1453{ 1454 dst->f[0] = src0->f[0] * src1->f[0]; 1455 dst->f[1] = src0->f[1] * src1->f[1]; 1456 dst->f[2] = src0->f[2] * src1->f[2]; 1457 dst->f[3] = src0->f[3] * src1->f[3]; 1458} 1459 1460static void 1461micro_neg( 1462 union tgsi_exec_channel *dst, 1463 const union tgsi_exec_channel *src ) 1464{ 1465 dst->f[0] = -src->f[0]; 1466 dst->f[1] = -src->f[1]; 1467 dst->f[2] = -src->f[2]; 1468 dst->f[3] = -src->f[3]; 1469} 1470 1471static void 1472micro_pow( 1473 union tgsi_exec_channel *dst, 1474 const union tgsi_exec_channel *src0, 1475 const union tgsi_exec_channel *src1 ) 1476{ 1477#if FAST_MATH 1478 dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); 1479 dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); 1480 dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); 1481 dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); 1482#else 1483 dst->f[0] = powf( src0->f[0], src1->f[0] ); 1484 dst->f[1] = powf( src0->f[1], src1->f[1] ); 1485 dst->f[2] = powf( src0->f[2], src1->f[2] ); 1486 dst->f[3] = powf( src0->f[3], src1->f[3] ); 1487#endif 1488} 1489 1490static void 1491micro_ldexp(union tgsi_exec_channel *dst, 1492 const union tgsi_exec_channel *src0, 1493 const union tgsi_exec_channel *src1) 1494{ 1495 dst->f[0] = ldexpf(src0->f[0], src1->i[0]); 1496 dst->f[1] = ldexpf(src0->f[1], src1->i[1]); 1497 dst->f[2] = ldexpf(src0->f[2], src1->i[2]); 1498 dst->f[3] = ldexpf(src0->f[3], src1->i[3]); 1499} 1500 1501static void 1502micro_sub(union tgsi_exec_channel *dst, 1503 const union tgsi_exec_channel *src0, 1504 const union tgsi_exec_channel *src1) 1505{ 1506 dst->f[0] = src0->f[0] - src1->f[0]; 1507 dst->f[1] = src0->f[1] - src1->f[1]; 1508 dst->f[2] = src0->f[2] - src1->f[2]; 1509 dst->f[3] = src0->f[3] - src1->f[3]; 1510} 1511 1512static void 1513fetch_src_file_channel(const struct tgsi_exec_machine *mach, 1514 const uint file, 1515 const uint swizzle, 1516 const union tgsi_exec_channel *index, 1517 const union tgsi_exec_channel *index2D, 1518 union tgsi_exec_channel *chan) 1519{ 1520 uint i; 1521 1522 assert(swizzle < 4); 1523 1524 switch (file) { 1525 case TGSI_FILE_CONSTANT: 1526 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1527 assert(index2D->i[i] >= 0 && index2D->i[i] < PIPE_MAX_CONSTANT_BUFFERS); 1528 assert(mach->Consts[index2D->i[i]]); 1529 1530 if (index->i[i] < 0) { 1531 chan->u[i] = 0; 1532 } else { 1533 /* NOTE: copying the const value as a uint instead of float */ 1534 const uint constbuf = index2D->i[i]; 1535 const uint *buf = (const uint *)mach->Consts[constbuf]; 1536 const int pos = index->i[i] * 4 + swizzle; 1537 /* const buffer bounds check */ 1538 if (pos < 0 || pos >= (int) mach->ConstsSize[constbuf]) { 1539 if (0) { 1540 /* Debug: print warning */ 1541 static int count = 0; 1542 if (count++ < 100) 1543 debug_printf("TGSI Exec: const buffer index %d" 1544 " out of bounds\n", pos); 1545 } 1546 chan->u[i] = 0; 1547 } 1548 else 1549 chan->u[i] = buf[pos]; 1550 } 1551 } 1552 break; 1553 1554 case TGSI_FILE_INPUT: 1555 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1556 /* 1557 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1558 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", 1559 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], 1560 index2D->i[i], index->i[i]); 1561 }*/ 1562 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; 1563 assert(pos >= 0); 1564 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); 1565 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; 1566 } 1567 break; 1568 1569 case TGSI_FILE_SYSTEM_VALUE: 1570 /* XXX no swizzling at this point. Will be needed if we put 1571 * gl_FragCoord, for example, in a sys value register. 1572 */ 1573 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1574 chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i]; 1575 } 1576 break; 1577 1578 case TGSI_FILE_TEMPORARY: 1579 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1580 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); 1581 assert(index2D->i[i] == 0); 1582 1583 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; 1584 } 1585 break; 1586 1587 case TGSI_FILE_IMMEDIATE: 1588 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1589 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); 1590 assert(index2D->i[i] == 0); 1591 1592 chan->f[i] = mach->Imms[index->i[i]][swizzle]; 1593 } 1594 break; 1595 1596 case TGSI_FILE_ADDRESS: 1597 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1598 assert(index->i[i] >= 0); 1599 assert(index2D->i[i] == 0); 1600 1601 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; 1602 } 1603 break; 1604 1605 case TGSI_FILE_OUTPUT: 1606 /* vertex/fragment output vars can be read too */ 1607 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1608 assert(index->i[i] >= 0); 1609 assert(index2D->i[i] == 0); 1610 1611 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; 1612 } 1613 break; 1614 1615 default: 1616 assert(0); 1617 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1618 chan->u[i] = 0; 1619 } 1620 } 1621} 1622 1623static void 1624get_index_registers(const struct tgsi_exec_machine *mach, 1625 const struct tgsi_full_src_register *reg, 1626 union tgsi_exec_channel *index, 1627 union tgsi_exec_channel *index2D) 1628{ 1629 uint swizzle; 1630 1631 /* We start with a direct index into a register file. 1632 * 1633 * file[1], 1634 * where: 1635 * file = Register.File 1636 * [1] = Register.Index 1637 */ 1638 index->i[0] = 1639 index->i[1] = 1640 index->i[2] = 1641 index->i[3] = reg->Register.Index; 1642 1643 /* There is an extra source register that indirectly subscripts 1644 * a register file. The direct index now becomes an offset 1645 * that is being added to the indirect register. 1646 * 1647 * file[ind[2].x+1], 1648 * where: 1649 * ind = Indirect.File 1650 * [2] = Indirect.Index 1651 * .x = Indirect.SwizzleX 1652 */ 1653 if (reg->Register.Indirect) { 1654 union tgsi_exec_channel index2; 1655 union tgsi_exec_channel indir_index; 1656 const uint execmask = mach->ExecMask; 1657 uint i; 1658 1659 /* which address register (always zero now) */ 1660 index2.i[0] = 1661 index2.i[1] = 1662 index2.i[2] = 1663 index2.i[3] = reg->Indirect.Index; 1664 /* get current value of address register[swizzle] */ 1665 swizzle = reg->Indirect.Swizzle; 1666 fetch_src_file_channel(mach, 1667 reg->Indirect.File, 1668 swizzle, 1669 &index2, 1670 &ZeroVec, 1671 &indir_index); 1672 1673 /* add value of address register to the offset */ 1674 index->i[0] += indir_index.i[0]; 1675 index->i[1] += indir_index.i[1]; 1676 index->i[2] += indir_index.i[2]; 1677 index->i[3] += indir_index.i[3]; 1678 1679 /* for disabled execution channels, zero-out the index to 1680 * avoid using a potential garbage value. 1681 */ 1682 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1683 if ((execmask & (1 << i)) == 0) 1684 index->i[i] = 0; 1685 } 1686 } 1687 1688 /* There is an extra source register that is a second 1689 * subscript to a register file. Effectively it means that 1690 * the register file is actually a 2D array of registers. 1691 * 1692 * file[3][1], 1693 * where: 1694 * [3] = Dimension.Index 1695 */ 1696 if (reg->Register.Dimension) { 1697 index2D->i[0] = 1698 index2D->i[1] = 1699 index2D->i[2] = 1700 index2D->i[3] = reg->Dimension.Index; 1701 1702 /* Again, the second subscript index can be addressed indirectly 1703 * identically to the first one. 1704 * Nothing stops us from indirectly addressing the indirect register, 1705 * but there is no need for that, so we won't exercise it. 1706 * 1707 * file[ind[4].y+3][1], 1708 * where: 1709 * ind = DimIndirect.File 1710 * [4] = DimIndirect.Index 1711 * .y = DimIndirect.SwizzleX 1712 */ 1713 if (reg->Dimension.Indirect) { 1714 union tgsi_exec_channel index2; 1715 union tgsi_exec_channel indir_index; 1716 const uint execmask = mach->ExecMask; 1717 uint i; 1718 1719 index2.i[0] = 1720 index2.i[1] = 1721 index2.i[2] = 1722 index2.i[3] = reg->DimIndirect.Index; 1723 1724 swizzle = reg->DimIndirect.Swizzle; 1725 fetch_src_file_channel(mach, 1726 reg->DimIndirect.File, 1727 swizzle, 1728 &index2, 1729 &ZeroVec, 1730 &indir_index); 1731 1732 index2D->i[0] += indir_index.i[0]; 1733 index2D->i[1] += indir_index.i[1]; 1734 index2D->i[2] += indir_index.i[2]; 1735 index2D->i[3] += indir_index.i[3]; 1736 1737 /* for disabled execution channels, zero-out the index to 1738 * avoid using a potential garbage value. 1739 */ 1740 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1741 if ((execmask & (1 << i)) == 0) { 1742 index2D->i[i] = 0; 1743 } 1744 } 1745 } 1746 1747 /* If by any chance there was a need for a 3D array of register 1748 * files, we would have to check whether Dimension is followed 1749 * by a dimension register and continue the saga. 1750 */ 1751 } else { 1752 index2D->i[0] = 1753 index2D->i[1] = 1754 index2D->i[2] = 1755 index2D->i[3] = 0; 1756 } 1757} 1758 1759 1760static void 1761fetch_source_d(const struct tgsi_exec_machine *mach, 1762 union tgsi_exec_channel *chan, 1763 const struct tgsi_full_src_register *reg, 1764 const uint chan_index) 1765{ 1766 union tgsi_exec_channel index; 1767 union tgsi_exec_channel index2D; 1768 uint swizzle; 1769 1770 get_index_registers(mach, reg, &index, &index2D); 1771 1772 1773 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1774 fetch_src_file_channel(mach, 1775 reg->Register.File, 1776 swizzle, 1777 &index, 1778 &index2D, 1779 chan); 1780} 1781 1782static void 1783fetch_source(const struct tgsi_exec_machine *mach, 1784 union tgsi_exec_channel *chan, 1785 const struct tgsi_full_src_register *reg, 1786 const uint chan_index, 1787 enum tgsi_exec_datatype src_datatype) 1788{ 1789 fetch_source_d(mach, chan, reg, chan_index); 1790 1791 if (reg->Register.Absolute) { 1792 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1793 micro_abs(chan, chan); 1794 } else { 1795 micro_iabs(chan, chan); 1796 } 1797 } 1798 1799 if (reg->Register.Negate) { 1800 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1801 micro_neg(chan, chan); 1802 } else { 1803 micro_ineg(chan, chan); 1804 } 1805 } 1806} 1807 1808static union tgsi_exec_channel * 1809store_dest_dstret(struct tgsi_exec_machine *mach, 1810 const union tgsi_exec_channel *chan, 1811 const struct tgsi_full_dst_register *reg, 1812 uint chan_index, 1813 enum tgsi_exec_datatype dst_datatype) 1814{ 1815 static union tgsi_exec_channel null; 1816 union tgsi_exec_channel *dst; 1817 union tgsi_exec_channel index2D; 1818 int offset = 0; /* indirection offset */ 1819 int index; 1820 1821 /* for debugging */ 1822 if (0 && dst_datatype == TGSI_EXEC_DATA_FLOAT) { 1823 check_inf_or_nan(chan); 1824 } 1825 1826 /* There is an extra source register that indirectly subscripts 1827 * a register file. The direct index now becomes an offset 1828 * that is being added to the indirect register. 1829 * 1830 * file[ind[2].x+1], 1831 * where: 1832 * ind = Indirect.File 1833 * [2] = Indirect.Index 1834 * .x = Indirect.SwizzleX 1835 */ 1836 if (reg->Register.Indirect) { 1837 union tgsi_exec_channel index; 1838 union tgsi_exec_channel indir_index; 1839 uint swizzle; 1840 1841 /* which address register (always zero for now) */ 1842 index.i[0] = 1843 index.i[1] = 1844 index.i[2] = 1845 index.i[3] = reg->Indirect.Index; 1846 1847 /* get current value of address register[swizzle] */ 1848 swizzle = reg->Indirect.Swizzle; 1849 1850 /* fetch values from the address/indirection register */ 1851 fetch_src_file_channel(mach, 1852 reg->Indirect.File, 1853 swizzle, 1854 &index, 1855 &ZeroVec, 1856 &indir_index); 1857 1858 /* save indirection offset */ 1859 offset = indir_index.i[0]; 1860 } 1861 1862 /* There is an extra source register that is a second 1863 * subscript to a register file. Effectively it means that 1864 * the register file is actually a 2D array of registers. 1865 * 1866 * file[3][1], 1867 * where: 1868 * [3] = Dimension.Index 1869 */ 1870 if (reg->Register.Dimension) { 1871 index2D.i[0] = 1872 index2D.i[1] = 1873 index2D.i[2] = 1874 index2D.i[3] = reg->Dimension.Index; 1875 1876 /* Again, the second subscript index can be addressed indirectly 1877 * identically to the first one. 1878 * Nothing stops us from indirectly addressing the indirect register, 1879 * but there is no need for that, so we won't exercise it. 1880 * 1881 * file[ind[4].y+3][1], 1882 * where: 1883 * ind = DimIndirect.File 1884 * [4] = DimIndirect.Index 1885 * .y = DimIndirect.SwizzleX 1886 */ 1887 if (reg->Dimension.Indirect) { 1888 union tgsi_exec_channel index2; 1889 union tgsi_exec_channel indir_index; 1890 const uint execmask = mach->ExecMask; 1891 unsigned swizzle; 1892 uint i; 1893 1894 index2.i[0] = 1895 index2.i[1] = 1896 index2.i[2] = 1897 index2.i[3] = reg->DimIndirect.Index; 1898 1899 swizzle = reg->DimIndirect.Swizzle; 1900 fetch_src_file_channel(mach, 1901 reg->DimIndirect.File, 1902 swizzle, 1903 &index2, 1904 &ZeroVec, 1905 &indir_index); 1906 1907 index2D.i[0] += indir_index.i[0]; 1908 index2D.i[1] += indir_index.i[1]; 1909 index2D.i[2] += indir_index.i[2]; 1910 index2D.i[3] += indir_index.i[3]; 1911 1912 /* for disabled execution channels, zero-out the index to 1913 * avoid using a potential garbage value. 1914 */ 1915 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1916 if ((execmask & (1 << i)) == 0) { 1917 index2D.i[i] = 0; 1918 } 1919 } 1920 } 1921 1922 /* If by any chance there was a need for a 3D array of register 1923 * files, we would have to check whether Dimension is followed 1924 * by a dimension register and continue the saga. 1925 */ 1926 } else { 1927 index2D.i[0] = 1928 index2D.i[1] = 1929 index2D.i[2] = 1930 index2D.i[3] = 0; 1931 } 1932 1933 switch (reg->Register.File) { 1934 case TGSI_FILE_NULL: 1935 dst = &null; 1936 break; 1937 1938 case TGSI_FILE_OUTPUT: 1939 index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] 1940 + reg->Register.Index; 1941 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1942#if 0 1943 debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n", 1944 mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0], 1945 reg->Register.Index); 1946 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1947 debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask); 1948 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1949 if (execmask & (1 << i)) 1950 debug_printf("%f, ", chan->f[i]); 1951 debug_printf(")\n"); 1952 } 1953#endif 1954 break; 1955 1956 case TGSI_FILE_TEMPORARY: 1957 index = reg->Register.Index; 1958 assert( index < TGSI_EXEC_NUM_TEMPS ); 1959 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1960 break; 1961 1962 case TGSI_FILE_ADDRESS: 1963 index = reg->Register.Index; 1964 dst = &mach->Addrs[index].xyzw[chan_index]; 1965 break; 1966 1967 default: 1968 assert( 0 ); 1969 return NULL; 1970 } 1971 1972 return dst; 1973} 1974 1975static void 1976store_dest_double(struct tgsi_exec_machine *mach, 1977 const union tgsi_exec_channel *chan, 1978 const struct tgsi_full_dst_register *reg, 1979 uint chan_index, 1980 enum tgsi_exec_datatype dst_datatype) 1981{ 1982 union tgsi_exec_channel *dst; 1983 const uint execmask = mach->ExecMask; 1984 int i; 1985 1986 dst = store_dest_dstret(mach, chan, reg, chan_index, dst_datatype); 1987 if (!dst) 1988 return; 1989 1990 /* doubles path */ 1991 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1992 if (execmask & (1 << i)) 1993 dst->i[i] = chan->i[i]; 1994} 1995 1996static void 1997store_dest(struct tgsi_exec_machine *mach, 1998 const union tgsi_exec_channel *chan, 1999 const struct tgsi_full_dst_register *reg, 2000 const struct tgsi_full_instruction *inst, 2001 uint chan_index, 2002 enum tgsi_exec_datatype dst_datatype) 2003{ 2004 union tgsi_exec_channel *dst; 2005 const uint execmask = mach->ExecMask; 2006 int i; 2007 2008 dst = store_dest_dstret(mach, chan, reg, chan_index, dst_datatype); 2009 if (!dst) 2010 return; 2011 2012 if (!inst->Instruction.Saturate) { 2013 for (i = 0; i < TGSI_QUAD_SIZE; i++) 2014 if (execmask & (1 << i)) 2015 dst->i[i] = chan->i[i]; 2016 } 2017 else { 2018 for (i = 0; i < TGSI_QUAD_SIZE; i++) 2019 if (execmask & (1 << i)) { 2020 if (chan->f[i] < 0.0f) 2021 dst->f[i] = 0.0f; 2022 else if (chan->f[i] > 1.0f) 2023 dst->f[i] = 1.0f; 2024 else 2025 dst->i[i] = chan->i[i]; 2026 } 2027 } 2028} 2029 2030#define FETCH(VAL,INDEX,CHAN)\ 2031 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) 2032 2033#define IFETCH(VAL,INDEX,CHAN)\ 2034 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT) 2035 2036 2037/** 2038 * Execute ARB-style KIL which is predicated by a src register. 2039 * Kill fragment if any of the four values is less than zero. 2040 */ 2041static void 2042exec_kill_if(struct tgsi_exec_machine *mach, 2043 const struct tgsi_full_instruction *inst) 2044{ 2045 uint uniquemask; 2046 uint chan_index; 2047 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 2048 union tgsi_exec_channel r[1]; 2049 2050 /* This mask stores component bits that were already tested. */ 2051 uniquemask = 0; 2052 2053 for (chan_index = 0; chan_index < 4; chan_index++) 2054 { 2055 uint swizzle; 2056 uint i; 2057 2058 /* unswizzle channel */ 2059 swizzle = tgsi_util_get_full_src_register_swizzle ( 2060 &inst->Src[0], 2061 chan_index); 2062 2063 /* check if the component has not been already tested */ 2064 if (uniquemask & (1 << swizzle)) 2065 continue; 2066 uniquemask |= 1 << swizzle; 2067 2068 FETCH(&r[0], 0, chan_index); 2069 for (i = 0; i < 4; i++) 2070 if (r[0].f[i] < 0.0f) 2071 kilmask |= 1 << i; 2072 } 2073 2074 /* restrict to fragments currently executing */ 2075 kilmask &= mach->ExecMask; 2076 2077 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 2078} 2079 2080/** 2081 * Unconditional fragment kill/discard. 2082 */ 2083static void 2084exec_kill(struct tgsi_exec_machine *mach) 2085{ 2086 uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 2087 2088 /* kill fragment for all fragments currently executing */ 2089 kilmask = mach->ExecMask; 2090 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; 2091} 2092 2093static void 2094emit_vertex(struct tgsi_exec_machine *mach, 2095 const struct tgsi_full_instruction *inst) 2096{ 2097 union tgsi_exec_channel r[1]; 2098 unsigned stream_id; 2099 unsigned *prim_count; 2100 /* FIXME: check for exec mask correctly 2101 unsigned i; 2102 for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 2103 if ((mach->ExecMask & (1 << i))) 2104 */ 2105 IFETCH(&r[0], 0, TGSI_CHAN_X); 2106 stream_id = r[0].u[0]; 2107 prim_count = &mach->Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0]; 2108 if (mach->ExecMask) { 2109 if (mach->Primitives[stream_id][*prim_count] >= mach->MaxOutputVertices) 2110 return; 2111 2112 if (mach->Primitives[stream_id][*prim_count] == 0) 2113 mach->PrimitiveOffsets[stream_id][*prim_count] = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]; 2114 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += mach->NumOutputs; 2115 mach->Primitives[stream_id][*prim_count]++; 2116 } 2117} 2118 2119static void 2120emit_primitive(struct tgsi_exec_machine *mach, 2121 const struct tgsi_full_instruction *inst) 2122{ 2123 unsigned *prim_count; 2124 union tgsi_exec_channel r[1]; 2125 unsigned stream_id = 0; 2126 /* FIXME: check for exec mask correctly 2127 unsigned i; 2128 for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 2129 if ((mach->ExecMask & (1 << i))) 2130 */ 2131 if (inst) { 2132 IFETCH(&r[0], 0, TGSI_CHAN_X); 2133 stream_id = r[0].u[0]; 2134 } 2135 prim_count = &mach->Temps[temp_prim_idxs[stream_id].idx].xyzw[temp_prim_idxs[stream_id].chan].u[0]; 2136 if (mach->ExecMask) { 2137 ++(*prim_count); 2138 debug_assert((*prim_count * mach->NumOutputs) < mach->MaxGeometryShaderOutputs); 2139 mach->Primitives[stream_id][*prim_count] = 0; 2140 } 2141} 2142 2143static void 2144conditional_emit_primitive(struct tgsi_exec_machine *mach) 2145{ 2146 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 2147 int emitted_verts = 2148 mach->Primitives[0][mach->Temps[temp_prim_idxs[0].idx].xyzw[temp_prim_idxs[0].chan].u[0]]; 2149 if (emitted_verts) { 2150 emit_primitive(mach, NULL); 2151 } 2152 } 2153} 2154 2155 2156/* 2157 * Fetch four texture samples using STR texture coordinates. 2158 */ 2159static void 2160fetch_texel( struct tgsi_sampler *sampler, 2161 const unsigned sview_idx, 2162 const unsigned sampler_idx, 2163 const union tgsi_exec_channel *s, 2164 const union tgsi_exec_channel *t, 2165 const union tgsi_exec_channel *p, 2166 const union tgsi_exec_channel *c0, 2167 const union tgsi_exec_channel *c1, 2168 float derivs[3][2][TGSI_QUAD_SIZE], 2169 const int8_t offset[3], 2170 enum tgsi_sampler_control control, 2171 union tgsi_exec_channel *r, 2172 union tgsi_exec_channel *g, 2173 union tgsi_exec_channel *b, 2174 union tgsi_exec_channel *a ) 2175{ 2176 uint j; 2177 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 2178 2179 /* FIXME: handle explicit derivs, offsets */ 2180 sampler->get_samples(sampler, sview_idx, sampler_idx, 2181 s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba); 2182 2183 for (j = 0; j < 4; j++) { 2184 r->f[j] = rgba[0][j]; 2185 g->f[j] = rgba[1][j]; 2186 b->f[j] = rgba[2][j]; 2187 a->f[j] = rgba[3][j]; 2188 } 2189} 2190 2191 2192#define TEX_MODIFIER_NONE 0 2193#define TEX_MODIFIER_PROJECTED 1 2194#define TEX_MODIFIER_LOD_BIAS 2 2195#define TEX_MODIFIER_EXPLICIT_LOD 3 2196#define TEX_MODIFIER_LEVEL_ZERO 4 2197#define TEX_MODIFIER_GATHER 5 2198 2199/* 2200 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array. 2201 */ 2202static void 2203fetch_texel_offsets(struct tgsi_exec_machine *mach, 2204 const struct tgsi_full_instruction *inst, 2205 int8_t offsets[3]) 2206{ 2207 if (inst->Texture.NumOffsets == 1) { 2208 union tgsi_exec_channel index; 2209 union tgsi_exec_channel offset[3]; 2210 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index; 2211 fetch_src_file_channel(mach, inst->TexOffsets[0].File, 2212 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]); 2213 fetch_src_file_channel(mach, inst->TexOffsets[0].File, 2214 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]); 2215 fetch_src_file_channel(mach, inst->TexOffsets[0].File, 2216 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]); 2217 offsets[0] = offset[0].i[0]; 2218 offsets[1] = offset[1].i[0]; 2219 offsets[2] = offset[2].i[0]; 2220 } else { 2221 assert(inst->Texture.NumOffsets == 0); 2222 offsets[0] = offsets[1] = offsets[2] = 0; 2223 } 2224} 2225 2226 2227/* 2228 * Fetch dx and dy values for one channel (s, t or r). 2229 * Put dx values into one float array, dy values into another. 2230 */ 2231static void 2232fetch_assign_deriv_channel(struct tgsi_exec_machine *mach, 2233 const struct tgsi_full_instruction *inst, 2234 unsigned regdsrcx, 2235 unsigned chan, 2236 float derivs[2][TGSI_QUAD_SIZE]) 2237{ 2238 union tgsi_exec_channel d; 2239 FETCH(&d, regdsrcx, chan); 2240 derivs[0][0] = d.f[0]; 2241 derivs[0][1] = d.f[1]; 2242 derivs[0][2] = d.f[2]; 2243 derivs[0][3] = d.f[3]; 2244 FETCH(&d, regdsrcx + 1, chan); 2245 derivs[1][0] = d.f[0]; 2246 derivs[1][1] = d.f[1]; 2247 derivs[1][2] = d.f[2]; 2248 derivs[1][3] = d.f[3]; 2249} 2250 2251static uint 2252fetch_sampler_unit(struct tgsi_exec_machine *mach, 2253 const struct tgsi_full_instruction *inst, 2254 uint sampler) 2255{ 2256 uint unit = 0; 2257 int i; 2258 if (inst->Src[sampler].Register.Indirect) { 2259 const struct tgsi_full_src_register *reg = &inst->Src[sampler]; 2260 union tgsi_exec_channel indir_index, index2; 2261 const uint execmask = mach->ExecMask; 2262 index2.i[0] = 2263 index2.i[1] = 2264 index2.i[2] = 2265 index2.i[3] = reg->Indirect.Index; 2266 2267 fetch_src_file_channel(mach, 2268 reg->Indirect.File, 2269 reg->Indirect.Swizzle, 2270 &index2, 2271 &ZeroVec, 2272 &indir_index); 2273 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2274 if (execmask & (1 << i)) { 2275 unit = inst->Src[sampler].Register.Index + indir_index.i[i]; 2276 break; 2277 } 2278 } 2279 2280 } else { 2281 unit = inst->Src[sampler].Register.Index; 2282 } 2283 return unit; 2284} 2285 2286/* 2287 * execute a texture instruction. 2288 * 2289 * modifier is used to control the channel routing for the 2290 * instruction variants like proj, lod, and texture with lod bias. 2291 * sampler indicates which src register the sampler is contained in. 2292 */ 2293static void 2294exec_tex(struct tgsi_exec_machine *mach, 2295 const struct tgsi_full_instruction *inst, 2296 uint modifier, uint sampler) 2297{ 2298 const union tgsi_exec_channel *args[5], *proj = NULL; 2299 union tgsi_exec_channel r[5]; 2300 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; 2301 uint chan; 2302 uint unit; 2303 int8_t offsets[3]; 2304 int dim, shadow_ref, i; 2305 2306 unit = fetch_sampler_unit(mach, inst, sampler); 2307 /* always fetch all 3 offsets, overkill but keeps code simple */ 2308 fetch_texel_offsets(mach, inst, offsets); 2309 2310 assert(modifier != TEX_MODIFIER_LEVEL_ZERO); 2311 assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER); 2312 2313 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 2314 shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture); 2315 2316 assert(dim <= 4); 2317 if (shadow_ref >= 0) 2318 assert(shadow_ref >= dim && shadow_ref < (int)ARRAY_SIZE(args)); 2319 2320 /* fetch modifier to the last argument */ 2321 if (modifier != TEX_MODIFIER_NONE) { 2322 const int last = ARRAY_SIZE(args) - 1; 2323 2324 /* fetch modifier from src0.w or src1.x */ 2325 if (sampler == 1) { 2326 assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W); 2327 FETCH(&r[last], 0, TGSI_CHAN_W); 2328 } 2329 else { 2330 FETCH(&r[last], 1, TGSI_CHAN_X); 2331 } 2332 2333 if (modifier != TEX_MODIFIER_PROJECTED) { 2334 args[last] = &r[last]; 2335 } 2336 else { 2337 proj = &r[last]; 2338 args[last] = &ZeroVec; 2339 } 2340 2341 /* point unused arguments to zero vector */ 2342 for (i = dim; i < last; i++) 2343 args[i] = &ZeroVec; 2344 2345 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) 2346 control = TGSI_SAMPLER_LOD_EXPLICIT; 2347 else if (modifier == TEX_MODIFIER_LOD_BIAS) 2348 control = TGSI_SAMPLER_LOD_BIAS; 2349 else if (modifier == TEX_MODIFIER_GATHER) 2350 control = TGSI_SAMPLER_GATHER; 2351 } 2352 else { 2353 for (i = dim; i < (int)ARRAY_SIZE(args); i++) 2354 args[i] = &ZeroVec; 2355 } 2356 2357 /* fetch coordinates */ 2358 for (i = 0; i < dim; i++) { 2359 FETCH(&r[i], 0, TGSI_CHAN_X + i); 2360 2361 if (proj) 2362 micro_div(&r[i], &r[i], proj); 2363 2364 args[i] = &r[i]; 2365 } 2366 2367 /* fetch reference value */ 2368 if (shadow_ref >= 0) { 2369 FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4)); 2370 2371 if (proj) 2372 micro_div(&r[shadow_ref], &r[shadow_ref], proj); 2373 2374 args[shadow_ref] = &r[shadow_ref]; 2375 } 2376 2377 fetch_texel(mach->Sampler, unit, unit, 2378 args[0], args[1], args[2], args[3], args[4], 2379 NULL, offsets, control, 2380 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2381 2382#if 0 2383 debug_printf("fetch r: %g %g %g %g\n", 2384 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]); 2385 debug_printf("fetch g: %g %g %g %g\n", 2386 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]); 2387 debug_printf("fetch b: %g %g %g %g\n", 2388 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]); 2389 debug_printf("fetch a: %g %g %g %g\n", 2390 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]); 2391#endif 2392 2393 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2394 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2395 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2396 } 2397 } 2398} 2399 2400static void 2401exec_lodq(struct tgsi_exec_machine *mach, 2402 const struct tgsi_full_instruction *inst) 2403{ 2404 uint resource_unit, sampler_unit; 2405 unsigned dim; 2406 unsigned i; 2407 union tgsi_exec_channel coords[4]; 2408 const union tgsi_exec_channel *args[ARRAY_SIZE(coords)]; 2409 union tgsi_exec_channel r[2]; 2410 2411 resource_unit = fetch_sampler_unit(mach, inst, 1); 2412 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { 2413 uint target = mach->SamplerViews[resource_unit].Resource; 2414 dim = tgsi_util_get_texture_coord_dim(target); 2415 sampler_unit = fetch_sampler_unit(mach, inst, 2); 2416 } else { 2417 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 2418 sampler_unit = resource_unit; 2419 } 2420 assert(dim <= ARRAY_SIZE(coords)); 2421 /* fetch coordinates */ 2422 for (i = 0; i < dim; i++) { 2423 FETCH(&coords[i], 0, TGSI_CHAN_X + i); 2424 args[i] = &coords[i]; 2425 } 2426 for (i = dim; i < ARRAY_SIZE(coords); i++) { 2427 args[i] = &ZeroVec; 2428 } 2429 mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit, 2430 args[0]->f, 2431 args[1]->f, 2432 args[2]->f, 2433 args[3]->f, 2434 TGSI_SAMPLER_LOD_NONE, 2435 r[0].f, 2436 r[1].f); 2437 2438 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2439 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, 2440 TGSI_EXEC_DATA_FLOAT); 2441 } 2442 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2443 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, 2444 TGSI_EXEC_DATA_FLOAT); 2445 } 2446 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { 2447 unsigned char swizzles[4]; 2448 unsigned chan; 2449 swizzles[0] = inst->Src[1].Register.SwizzleX; 2450 swizzles[1] = inst->Src[1].Register.SwizzleY; 2451 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2452 swizzles[3] = inst->Src[1].Register.SwizzleW; 2453 2454 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2455 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2456 if (swizzles[chan] >= 2) { 2457 store_dest(mach, &ZeroVec, 2458 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2459 } else { 2460 store_dest(mach, &r[swizzles[chan]], 2461 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2462 } 2463 } 2464 } 2465 } else { 2466 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2467 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, 2468 TGSI_EXEC_DATA_FLOAT); 2469 } 2470 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2471 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y, 2472 TGSI_EXEC_DATA_FLOAT); 2473 } 2474 } 2475} 2476 2477static void 2478exec_txd(struct tgsi_exec_machine *mach, 2479 const struct tgsi_full_instruction *inst) 2480{ 2481 union tgsi_exec_channel r[4]; 2482 float derivs[3][2][TGSI_QUAD_SIZE]; 2483 uint chan; 2484 uint unit; 2485 int8_t offsets[3]; 2486 2487 unit = fetch_sampler_unit(mach, inst, 3); 2488 /* always fetch all 3 offsets, overkill but keeps code simple */ 2489 fetch_texel_offsets(mach, inst, offsets); 2490 2491 switch (inst->Texture.Texture) { 2492 case TGSI_TEXTURE_1D: 2493 FETCH(&r[0], 0, TGSI_CHAN_X); 2494 2495 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2496 2497 fetch_texel(mach->Sampler, unit, unit, 2498 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2499 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2500 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2501 break; 2502 2503 case TGSI_TEXTURE_SHADOW1D: 2504 case TGSI_TEXTURE_1D_ARRAY: 2505 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2506 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */ 2507 FETCH(&r[0], 0, TGSI_CHAN_X); 2508 FETCH(&r[1], 0, TGSI_CHAN_Y); 2509 FETCH(&r[2], 0, TGSI_CHAN_Z); 2510 2511 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2512 2513 fetch_texel(mach->Sampler, unit, unit, 2514 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2515 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2516 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2517 break; 2518 2519 case TGSI_TEXTURE_2D: 2520 case TGSI_TEXTURE_RECT: 2521 FETCH(&r[0], 0, TGSI_CHAN_X); 2522 FETCH(&r[1], 0, TGSI_CHAN_Y); 2523 2524 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2525 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2526 2527 fetch_texel(mach->Sampler, unit, unit, 2528 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2529 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2530 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2531 break; 2532 2533 2534 case TGSI_TEXTURE_SHADOW2D: 2535 case TGSI_TEXTURE_SHADOWRECT: 2536 case TGSI_TEXTURE_2D_ARRAY: 2537 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2538 /* only SHADOW2D_ARRAY actually needs W */ 2539 FETCH(&r[0], 0, TGSI_CHAN_X); 2540 FETCH(&r[1], 0, TGSI_CHAN_Y); 2541 FETCH(&r[2], 0, TGSI_CHAN_Z); 2542 FETCH(&r[3], 0, TGSI_CHAN_W); 2543 2544 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2545 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2546 2547 fetch_texel(mach->Sampler, unit, unit, 2548 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 2549 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2550 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2551 break; 2552 2553 case TGSI_TEXTURE_3D: 2554 case TGSI_TEXTURE_CUBE: 2555 case TGSI_TEXTURE_CUBE_ARRAY: 2556 case TGSI_TEXTURE_SHADOWCUBE: 2557 /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */ 2558 FETCH(&r[0], 0, TGSI_CHAN_X); 2559 FETCH(&r[1], 0, TGSI_CHAN_Y); 2560 FETCH(&r[2], 0, TGSI_CHAN_Z); 2561 FETCH(&r[3], 0, TGSI_CHAN_W); 2562 2563 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2564 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2565 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]); 2566 2567 fetch_texel(mach->Sampler, unit, unit, 2568 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 2569 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2570 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2571 break; 2572 2573 default: 2574 assert(0); 2575 } 2576 2577 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2578 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2579 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2580 } 2581 } 2582} 2583 2584 2585static void 2586exec_txf(struct tgsi_exec_machine *mach, 2587 const struct tgsi_full_instruction *inst) 2588{ 2589 union tgsi_exec_channel r[4]; 2590 uint chan; 2591 uint unit; 2592 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 2593 int j; 2594 int8_t offsets[3]; 2595 unsigned target; 2596 2597 unit = fetch_sampler_unit(mach, inst, 1); 2598 /* always fetch all 3 offsets, overkill but keeps code simple */ 2599 fetch_texel_offsets(mach, inst, offsets); 2600 2601 IFETCH(&r[3], 0, TGSI_CHAN_W); 2602 2603 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || 2604 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { 2605 target = mach->SamplerViews[unit].Resource; 2606 } 2607 else { 2608 target = inst->Texture.Texture; 2609 } 2610 switch(target) { 2611 case TGSI_TEXTURE_3D: 2612 case TGSI_TEXTURE_2D_ARRAY: 2613 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2614 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2615 IFETCH(&r[2], 0, TGSI_CHAN_Z); 2616 /* fallthrough */ 2617 case TGSI_TEXTURE_2D: 2618 case TGSI_TEXTURE_RECT: 2619 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2620 case TGSI_TEXTURE_SHADOW2D: 2621 case TGSI_TEXTURE_SHADOWRECT: 2622 case TGSI_TEXTURE_1D_ARRAY: 2623 case TGSI_TEXTURE_2D_MSAA: 2624 IFETCH(&r[1], 0, TGSI_CHAN_Y); 2625 /* fallthrough */ 2626 case TGSI_TEXTURE_BUFFER: 2627 case TGSI_TEXTURE_1D: 2628 case TGSI_TEXTURE_SHADOW1D: 2629 IFETCH(&r[0], 0, TGSI_CHAN_X); 2630 break; 2631 default: 2632 assert(0); 2633 break; 2634 } 2635 2636 mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i, 2637 offsets, rgba); 2638 2639 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 2640 r[0].f[j] = rgba[0][j]; 2641 r[1].f[j] = rgba[1][j]; 2642 r[2].f[j] = rgba[2][j]; 2643 r[3].f[j] = rgba[3][j]; 2644 } 2645 2646 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || 2647 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { 2648 unsigned char swizzles[4]; 2649 swizzles[0] = inst->Src[1].Register.SwizzleX; 2650 swizzles[1] = inst->Src[1].Register.SwizzleY; 2651 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2652 swizzles[3] = inst->Src[1].Register.SwizzleW; 2653 2654 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2655 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2656 store_dest(mach, &r[swizzles[chan]], 2657 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2658 } 2659 } 2660 } 2661 else { 2662 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2663 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2664 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2665 } 2666 } 2667 } 2668} 2669 2670static void 2671exec_txq(struct tgsi_exec_machine *mach, 2672 const struct tgsi_full_instruction *inst) 2673{ 2674 int result[4]; 2675 union tgsi_exec_channel r[4], src; 2676 uint chan; 2677 uint unit; 2678 int i,j; 2679 2680 unit = fetch_sampler_unit(mach, inst, 1); 2681 2682 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 2683 2684 /* XXX: This interface can't return per-pixel values */ 2685 mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result); 2686 2687 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2688 for (j = 0; j < 4; j++) { 2689 r[j].i[i] = result[j]; 2690 } 2691 } 2692 2693 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2694 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2695 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, 2696 TGSI_EXEC_DATA_INT); 2697 } 2698 } 2699} 2700 2701static void 2702exec_sample(struct tgsi_exec_machine *mach, 2703 const struct tgsi_full_instruction *inst, 2704 uint modifier, boolean compare) 2705{ 2706 const uint resource_unit = inst->Src[1].Register.Index; 2707 const uint sampler_unit = inst->Src[2].Register.Index; 2708 union tgsi_exec_channel r[5], c1; 2709 const union tgsi_exec_channel *lod = &ZeroVec; 2710 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; 2711 uint chan; 2712 unsigned char swizzles[4]; 2713 int8_t offsets[3]; 2714 2715 /* always fetch all 3 offsets, overkill but keeps code simple */ 2716 fetch_texel_offsets(mach, inst, offsets); 2717 2718 assert(modifier != TEX_MODIFIER_PROJECTED); 2719 2720 if (modifier != TEX_MODIFIER_NONE) { 2721 if (modifier == TEX_MODIFIER_LOD_BIAS) { 2722 FETCH(&c1, 3, TGSI_CHAN_X); 2723 lod = &c1; 2724 control = TGSI_SAMPLER_LOD_BIAS; 2725 } 2726 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 2727 FETCH(&c1, 3, TGSI_CHAN_X); 2728 lod = &c1; 2729 control = TGSI_SAMPLER_LOD_EXPLICIT; 2730 } 2731 else if (modifier == TEX_MODIFIER_GATHER) { 2732 control = TGSI_SAMPLER_GATHER; 2733 } 2734 else { 2735 assert(modifier == TEX_MODIFIER_LEVEL_ZERO); 2736 control = TGSI_SAMPLER_LOD_ZERO; 2737 } 2738 } 2739 2740 FETCH(&r[0], 0, TGSI_CHAN_X); 2741 2742 switch (mach->SamplerViews[resource_unit].Resource) { 2743 case TGSI_TEXTURE_1D: 2744 if (compare) { 2745 FETCH(&r[2], 3, TGSI_CHAN_X); 2746 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2747 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2748 NULL, offsets, control, 2749 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2750 } 2751 else { 2752 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2753 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2754 NULL, offsets, control, 2755 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2756 } 2757 break; 2758 2759 case TGSI_TEXTURE_1D_ARRAY: 2760 case TGSI_TEXTURE_2D: 2761 case TGSI_TEXTURE_RECT: 2762 FETCH(&r[1], 0, TGSI_CHAN_Y); 2763 if (compare) { 2764 FETCH(&r[2], 3, TGSI_CHAN_X); 2765 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2766 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2767 NULL, offsets, control, 2768 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2769 } 2770 else { 2771 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2772 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2773 NULL, offsets, control, 2774 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2775 } 2776 break; 2777 2778 case TGSI_TEXTURE_2D_ARRAY: 2779 case TGSI_TEXTURE_3D: 2780 case TGSI_TEXTURE_CUBE: 2781 FETCH(&r[1], 0, TGSI_CHAN_Y); 2782 FETCH(&r[2], 0, TGSI_CHAN_Z); 2783 if(compare) { 2784 FETCH(&r[3], 3, TGSI_CHAN_X); 2785 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2786 &r[0], &r[1], &r[2], &r[3], lod, 2787 NULL, offsets, control, 2788 &r[0], &r[1], &r[2], &r[3]); 2789 } 2790 else { 2791 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2792 &r[0], &r[1], &r[2], &ZeroVec, lod, 2793 NULL, offsets, control, 2794 &r[0], &r[1], &r[2], &r[3]); 2795 } 2796 break; 2797 2798 case TGSI_TEXTURE_CUBE_ARRAY: 2799 FETCH(&r[1], 0, TGSI_CHAN_Y); 2800 FETCH(&r[2], 0, TGSI_CHAN_Z); 2801 FETCH(&r[3], 0, TGSI_CHAN_W); 2802 if(compare) { 2803 FETCH(&r[4], 3, TGSI_CHAN_X); 2804 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2805 &r[0], &r[1], &r[2], &r[3], &r[4], 2806 NULL, offsets, control, 2807 &r[0], &r[1], &r[2], &r[3]); 2808 } 2809 else { 2810 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2811 &r[0], &r[1], &r[2], &r[3], lod, 2812 NULL, offsets, control, 2813 &r[0], &r[1], &r[2], &r[3]); 2814 } 2815 break; 2816 2817 2818 default: 2819 assert(0); 2820 } 2821 2822 swizzles[0] = inst->Src[1].Register.SwizzleX; 2823 swizzles[1] = inst->Src[1].Register.SwizzleY; 2824 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2825 swizzles[3] = inst->Src[1].Register.SwizzleW; 2826 2827 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2828 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2829 store_dest(mach, &r[swizzles[chan]], 2830 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2831 } 2832 } 2833} 2834 2835static void 2836exec_sample_d(struct tgsi_exec_machine *mach, 2837 const struct tgsi_full_instruction *inst) 2838{ 2839 const uint resource_unit = inst->Src[1].Register.Index; 2840 const uint sampler_unit = inst->Src[2].Register.Index; 2841 union tgsi_exec_channel r[4]; 2842 float derivs[3][2][TGSI_QUAD_SIZE]; 2843 uint chan; 2844 unsigned char swizzles[4]; 2845 int8_t offsets[3]; 2846 2847 /* always fetch all 3 offsets, overkill but keeps code simple */ 2848 fetch_texel_offsets(mach, inst, offsets); 2849 2850 FETCH(&r[0], 0, TGSI_CHAN_X); 2851 2852 switch (mach->SamplerViews[resource_unit].Resource) { 2853 case TGSI_TEXTURE_1D: 2854 case TGSI_TEXTURE_1D_ARRAY: 2855 /* only 1D array actually needs Y */ 2856 FETCH(&r[1], 0, TGSI_CHAN_Y); 2857 2858 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2859 2860 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2861 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2862 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2863 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2864 break; 2865 2866 case TGSI_TEXTURE_2D: 2867 case TGSI_TEXTURE_RECT: 2868 case TGSI_TEXTURE_2D_ARRAY: 2869 /* only 2D array actually needs Z */ 2870 FETCH(&r[1], 0, TGSI_CHAN_Y); 2871 FETCH(&r[2], 0, TGSI_CHAN_Z); 2872 2873 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2874 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2875 2876 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2877 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */ 2878 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2879 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2880 break; 2881 2882 case TGSI_TEXTURE_3D: 2883 case TGSI_TEXTURE_CUBE: 2884 case TGSI_TEXTURE_CUBE_ARRAY: 2885 /* only cube array actually needs W */ 2886 FETCH(&r[1], 0, TGSI_CHAN_Y); 2887 FETCH(&r[2], 0, TGSI_CHAN_Z); 2888 FETCH(&r[3], 0, TGSI_CHAN_W); 2889 2890 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2891 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2892 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]); 2893 2894 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2895 &r[0], &r[1], &r[2], &r[3], &ZeroVec, 2896 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2897 &r[0], &r[1], &r[2], &r[3]); 2898 break; 2899 2900 default: 2901 assert(0); 2902 } 2903 2904 swizzles[0] = inst->Src[1].Register.SwizzleX; 2905 swizzles[1] = inst->Src[1].Register.SwizzleY; 2906 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2907 swizzles[3] = inst->Src[1].Register.SwizzleW; 2908 2909 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2910 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2911 store_dest(mach, &r[swizzles[chan]], 2912 &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 2913 } 2914 } 2915} 2916 2917 2918/** 2919 * Evaluate a constant-valued coefficient at the position of the 2920 * current quad. 2921 */ 2922static void 2923eval_constant_coef( 2924 struct tgsi_exec_machine *mach, 2925 unsigned attrib, 2926 unsigned chan ) 2927{ 2928 unsigned i; 2929 2930 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) { 2931 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 2932 } 2933} 2934 2935static void 2936interp_constant_offset( 2937 UNUSED const struct tgsi_exec_machine *mach, 2938 UNUSED unsigned attrib, 2939 UNUSED unsigned chan, 2940 UNUSED float ofs_x, 2941 UNUSED float ofs_y, 2942 UNUSED union tgsi_exec_channel *out_chan) 2943{ 2944} 2945 2946/** 2947 * Evaluate a linear-valued coefficient at the position of the 2948 * current quad. 2949 */ 2950static void 2951interp_linear_offset( 2952 const struct tgsi_exec_machine *mach, 2953 unsigned attrib, 2954 unsigned chan, 2955 float ofs_x, 2956 float ofs_y, 2957 union tgsi_exec_channel *out_chan) 2958{ 2959 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2960 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2961 const float delta = ofs_x * dadx + ofs_y * dady; 2962 out_chan->f[0] += delta; 2963 out_chan->f[1] += delta; 2964 out_chan->f[2] += delta; 2965 out_chan->f[3] += delta; 2966} 2967 2968static void 2969eval_linear_coef(struct tgsi_exec_machine *mach, 2970 unsigned attrib, 2971 unsigned chan) 2972{ 2973 const float x = mach->QuadPos.xyzw[0].f[0]; 2974 const float y = mach->QuadPos.xyzw[1].f[0]; 2975 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2976 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2977 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2978 2979 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 2980 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 2981 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 2982 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 2983} 2984 2985/** 2986 * Evaluate a perspective-valued coefficient at the position of the 2987 * current quad. 2988 */ 2989 2990static void 2991interp_perspective_offset( 2992 const struct tgsi_exec_machine *mach, 2993 unsigned attrib, 2994 unsigned chan, 2995 float ofs_x, 2996 float ofs_y, 2997 union tgsi_exec_channel *out_chan) 2998{ 2999 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 3000 const float dady = mach->InterpCoefs[attrib].dady[chan]; 3001 const float *w = mach->QuadPos.xyzw[3].f; 3002 const float delta = ofs_x * dadx + ofs_y * dady; 3003 out_chan->f[0] += delta / w[0]; 3004 out_chan->f[1] += delta / w[1]; 3005 out_chan->f[2] += delta / w[2]; 3006 out_chan->f[3] += delta / w[3]; 3007} 3008 3009static void 3010eval_perspective_coef( 3011 struct tgsi_exec_machine *mach, 3012 unsigned attrib, 3013 unsigned chan ) 3014{ 3015 const float x = mach->QuadPos.xyzw[0].f[0]; 3016 const float y = mach->QuadPos.xyzw[1].f[0]; 3017 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 3018 const float dady = mach->InterpCoefs[attrib].dady[chan]; 3019 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 3020 const float *w = mach->QuadPos.xyzw[3].f; 3021 /* divide by W here */ 3022 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 3023 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 3024 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 3025 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 3026} 3027 3028 3029typedef void (* eval_coef_func)( 3030 struct tgsi_exec_machine *mach, 3031 unsigned attrib, 3032 unsigned chan ); 3033 3034static void 3035exec_declaration(struct tgsi_exec_machine *mach, 3036 const struct tgsi_full_declaration *decl) 3037{ 3038 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { 3039 mach->SamplerViews[decl->Range.First] = decl->SamplerView; 3040 return; 3041 } 3042 3043 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { 3044 if (decl->Declaration.File == TGSI_FILE_INPUT) { 3045 uint first, last, mask; 3046 3047 first = decl->Range.First; 3048 last = decl->Range.Last; 3049 mask = decl->Declaration.UsageMask; 3050 3051 /* XXX we could remove this special-case code since 3052 * mach->InterpCoefs[first].a0 should already have the 3053 * front/back-face value. But we should first update the 3054 * ureg code to emit the right UsageMask value (WRITEMASK_X). 3055 * Then, we could remove the tgsi_exec_machine::Face field. 3056 */ 3057 /* XXX make FACE a system value */ 3058 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { 3059 uint i; 3060 3061 assert(decl->Semantic.Index == 0); 3062 assert(first == last); 3063 3064 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 3065 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 3066 } 3067 } else { 3068 eval_coef_func eval; 3069 apply_sample_offset_func interp; 3070 uint i, j; 3071 3072 switch (decl->Interp.Interpolate) { 3073 case TGSI_INTERPOLATE_CONSTANT: 3074 eval = eval_constant_coef; 3075 interp = interp_constant_offset; 3076 break; 3077 3078 case TGSI_INTERPOLATE_LINEAR: 3079 eval = eval_linear_coef; 3080 interp = interp_linear_offset; 3081 break; 3082 3083 case TGSI_INTERPOLATE_PERSPECTIVE: 3084 eval = eval_perspective_coef; 3085 interp = interp_perspective_offset; 3086 break; 3087 3088 case TGSI_INTERPOLATE_COLOR: 3089 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef; 3090 break; 3091 3092 default: 3093 assert(0); 3094 return; 3095 } 3096 3097 for (i = first; i <= last; i++) 3098 mach->InputSampleOffsetApply[i] = interp; 3099 3100 for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 3101 if (mask & (1 << j)) { 3102 for (i = first; i <= last; i++) { 3103 eval(mach, i, j); 3104 } 3105 } 3106 } 3107 } 3108 3109 if (DEBUG_EXECUTION) { 3110 uint i, j; 3111 for (i = first; i <= last; ++i) { 3112 debug_printf("IN[%2u] = ", i); 3113 for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 3114 if (j > 0) { 3115 debug_printf(" "); 3116 } 3117 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 3118 mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j], 3119 mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j], 3120 mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j], 3121 mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]); 3122 } 3123 } 3124 } 3125 } 3126 } 3127 3128} 3129 3130typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, 3131 const union tgsi_exec_channel *src); 3132 3133static void 3134exec_scalar_unary(struct tgsi_exec_machine *mach, 3135 const struct tgsi_full_instruction *inst, 3136 micro_unary_op op, 3137 enum tgsi_exec_datatype dst_datatype, 3138 enum tgsi_exec_datatype src_datatype) 3139{ 3140 unsigned int chan; 3141 union tgsi_exec_channel src; 3142 union tgsi_exec_channel dst; 3143 3144 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 3145 op(&dst, &src); 3146 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3147 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3148 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 3149 } 3150 } 3151} 3152 3153static void 3154exec_vector_unary(struct tgsi_exec_machine *mach, 3155 const struct tgsi_full_instruction *inst, 3156 micro_unary_op op, 3157 enum tgsi_exec_datatype dst_datatype, 3158 enum tgsi_exec_datatype src_datatype) 3159{ 3160 unsigned int chan; 3161 struct tgsi_exec_vector dst; 3162 3163 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3164 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3165 union tgsi_exec_channel src; 3166 3167 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); 3168 op(&dst.xyzw[chan], &src); 3169 } 3170 } 3171 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3172 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3173 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3174 } 3175 } 3176} 3177 3178typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, 3179 const union tgsi_exec_channel *src0, 3180 const union tgsi_exec_channel *src1); 3181 3182static void 3183exec_scalar_binary(struct tgsi_exec_machine *mach, 3184 const struct tgsi_full_instruction *inst, 3185 micro_binary_op op, 3186 enum tgsi_exec_datatype dst_datatype, 3187 enum tgsi_exec_datatype src_datatype) 3188{ 3189 unsigned int chan; 3190 union tgsi_exec_channel src[2]; 3191 union tgsi_exec_channel dst; 3192 3193 fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype); 3194 fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype); 3195 op(&dst, &src[0], &src[1]); 3196 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3197 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3198 store_dest(mach, &dst, &inst->Dst[0], inst, chan, dst_datatype); 3199 } 3200 } 3201} 3202 3203static void 3204exec_vector_binary(struct tgsi_exec_machine *mach, 3205 const struct tgsi_full_instruction *inst, 3206 micro_binary_op op, 3207 enum tgsi_exec_datatype dst_datatype, 3208 enum tgsi_exec_datatype src_datatype) 3209{ 3210 unsigned int chan; 3211 struct tgsi_exec_vector dst; 3212 3213 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3214 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3215 union tgsi_exec_channel src[2]; 3216 3217 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3218 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3219 op(&dst.xyzw[chan], &src[0], &src[1]); 3220 } 3221 } 3222 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3223 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3224 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3225 } 3226 } 3227} 3228 3229typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst, 3230 const union tgsi_exec_channel *src0, 3231 const union tgsi_exec_channel *src1, 3232 const union tgsi_exec_channel *src2); 3233 3234static void 3235exec_vector_trinary(struct tgsi_exec_machine *mach, 3236 const struct tgsi_full_instruction *inst, 3237 micro_trinary_op op, 3238 enum tgsi_exec_datatype dst_datatype, 3239 enum tgsi_exec_datatype src_datatype) 3240{ 3241 unsigned int chan; 3242 struct tgsi_exec_vector dst; 3243 3244 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3245 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3246 union tgsi_exec_channel src[3]; 3247 3248 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3249 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3250 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 3251 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 3252 } 3253 } 3254 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3255 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3256 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3257 } 3258 } 3259} 3260 3261typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst, 3262 const union tgsi_exec_channel *src0, 3263 const union tgsi_exec_channel *src1, 3264 const union tgsi_exec_channel *src2, 3265 const union tgsi_exec_channel *src3); 3266 3267static void 3268exec_vector_quaternary(struct tgsi_exec_machine *mach, 3269 const struct tgsi_full_instruction *inst, 3270 micro_quaternary_op op, 3271 enum tgsi_exec_datatype dst_datatype, 3272 enum tgsi_exec_datatype src_datatype) 3273{ 3274 unsigned int chan; 3275 struct tgsi_exec_vector dst; 3276 3277 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3278 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3279 union tgsi_exec_channel src[4]; 3280 3281 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3282 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3283 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 3284 fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype); 3285 op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]); 3286 } 3287 } 3288 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3289 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3290 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, dst_datatype); 3291 } 3292 } 3293} 3294 3295static void 3296exec_dp3(struct tgsi_exec_machine *mach, 3297 const struct tgsi_full_instruction *inst) 3298{ 3299 unsigned int chan; 3300 union tgsi_exec_channel arg[3]; 3301 3302 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3303 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3304 micro_mul(&arg[2], &arg[0], &arg[1]); 3305 3306 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) { 3307 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 3308 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 3309 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3310 } 3311 3312 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3313 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3314 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3315 } 3316 } 3317} 3318 3319static void 3320exec_dp4(struct tgsi_exec_machine *mach, 3321 const struct tgsi_full_instruction *inst) 3322{ 3323 unsigned int chan; 3324 union tgsi_exec_channel arg[3]; 3325 3326 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3327 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3328 micro_mul(&arg[2], &arg[0], &arg[1]); 3329 3330 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) { 3331 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 3332 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 3333 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3334 } 3335 3336 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3337 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3338 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3339 } 3340 } 3341} 3342 3343static void 3344exec_dp2(struct tgsi_exec_machine *mach, 3345 const struct tgsi_full_instruction *inst) 3346{ 3347 unsigned int chan; 3348 union tgsi_exec_channel arg[3]; 3349 3350 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3351 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3352 micro_mul(&arg[2], &arg[0], &arg[1]); 3353 3354 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3355 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3356 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3357 3358 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3359 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3360 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3361 } 3362 } 3363} 3364 3365static void 3366exec_pk2h(struct tgsi_exec_machine *mach, 3367 const struct tgsi_full_instruction *inst) 3368{ 3369 unsigned chan; 3370 union tgsi_exec_channel arg[2], dst; 3371 3372 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3373 fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3374 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { 3375 dst.u[chan] = util_float_to_half(arg[0].f[chan]) | 3376 (util_float_to_half(arg[1].f[chan]) << 16); 3377 } 3378 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3379 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3380 store_dest(mach, &dst, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_UINT); 3381 } 3382 } 3383} 3384 3385static void 3386exec_up2h(struct tgsi_exec_machine *mach, 3387 const struct tgsi_full_instruction *inst) 3388{ 3389 unsigned chan; 3390 union tgsi_exec_channel arg, dst[2]; 3391 3392 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3393 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { 3394 dst[0].f[chan] = util_half_to_float(arg.u[chan] & 0xffff); 3395 dst[1].f[chan] = util_half_to_float(arg.u[chan] >> 16); 3396 } 3397 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3398 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3399 store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3400 } 3401 } 3402} 3403 3404static void 3405micro_ucmp(union tgsi_exec_channel *dst, 3406 const union tgsi_exec_channel *src0, 3407 const union tgsi_exec_channel *src1, 3408 const union tgsi_exec_channel *src2) 3409{ 3410 dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0]; 3411 dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1]; 3412 dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2]; 3413 dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3]; 3414} 3415 3416static void 3417exec_ucmp(struct tgsi_exec_machine *mach, 3418 const struct tgsi_full_instruction *inst) 3419{ 3420 unsigned int chan; 3421 struct tgsi_exec_vector dst; 3422 3423 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3424 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3425 union tgsi_exec_channel src[3]; 3426 3427 fetch_source(mach, &src[0], &inst->Src[0], chan, 3428 TGSI_EXEC_DATA_UINT); 3429 fetch_source(mach, &src[1], &inst->Src[1], chan, 3430 TGSI_EXEC_DATA_FLOAT); 3431 fetch_source(mach, &src[2], &inst->Src[2], chan, 3432 TGSI_EXEC_DATA_FLOAT); 3433 micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 3434 } 3435 } 3436 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3437 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3438 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan, 3439 TGSI_EXEC_DATA_FLOAT); 3440 } 3441 } 3442} 3443 3444static void 3445exec_dst(struct tgsi_exec_machine *mach, 3446 const struct tgsi_full_instruction *inst) 3447{ 3448 union tgsi_exec_channel r[2]; 3449 union tgsi_exec_channel d[4]; 3450 3451 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3452 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3453 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3454 micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]); 3455 } 3456 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3457 fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3458 } 3459 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3460 fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3461 } 3462 3463 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3464 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3465 } 3466 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3467 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3468 } 3469 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3470 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3471 } 3472 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3473 store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3474 } 3475} 3476 3477static void 3478exec_log(struct tgsi_exec_machine *mach, 3479 const struct tgsi_full_instruction *inst) 3480{ 3481 union tgsi_exec_channel r[3]; 3482 3483 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3484 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */ 3485 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */ 3486 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */ 3487 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3488 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3489 } 3490 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3491 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */ 3492 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */ 3493 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3494 } 3495 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3496 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3497 } 3498 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3499 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3500 } 3501} 3502 3503static void 3504exec_exp(struct tgsi_exec_machine *mach, 3505 const struct tgsi_full_instruction *inst) 3506{ 3507 union tgsi_exec_channel r[3]; 3508 3509 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3510 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */ 3511 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3512 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */ 3513 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3514 } 3515 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3516 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */ 3517 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3518 } 3519 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3520 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */ 3521 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3522 } 3523 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3524 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3525 } 3526} 3527 3528static void 3529exec_lit(struct tgsi_exec_machine *mach, 3530 const struct tgsi_full_instruction *inst) 3531{ 3532 union tgsi_exec_channel r[3]; 3533 union tgsi_exec_channel d[3]; 3534 3535 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) { 3536 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3537 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3538 fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3539 micro_max(&r[1], &r[1], &ZeroVec); 3540 3541 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3542 micro_min(&r[2], &r[2], &P128Vec); 3543 micro_max(&r[2], &r[2], &M128Vec); 3544 micro_pow(&r[1], &r[1], &r[2]); 3545 micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec); 3546 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3547 } 3548 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3549 micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec); 3550 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3551 } 3552 } 3553 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3554 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3555 } 3556 3557 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3558 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3559 } 3560} 3561 3562static void 3563exec_break(struct tgsi_exec_machine *mach) 3564{ 3565 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { 3566 /* turn off loop channels for each enabled exec channel */ 3567 mach->LoopMask &= ~mach->ExecMask; 3568 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3569 UPDATE_EXEC_MASK(mach); 3570 } else { 3571 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); 3572 3573 mach->Switch.mask = 0x0; 3574 3575 UPDATE_EXEC_MASK(mach); 3576 } 3577} 3578 3579static void 3580exec_switch(struct tgsi_exec_machine *mach, 3581 const struct tgsi_full_instruction *inst) 3582{ 3583 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 3584 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3585 3586 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 3587 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3588 mach->Switch.mask = 0x0; 3589 mach->Switch.defaultMask = 0x0; 3590 3591 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3592 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; 3593 3594 UPDATE_EXEC_MASK(mach); 3595} 3596 3597static void 3598exec_case(struct tgsi_exec_machine *mach, 3599 const struct tgsi_full_instruction *inst) 3600{ 3601 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3602 union tgsi_exec_channel src; 3603 uint mask = 0; 3604 3605 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3606 3607 if (mach->Switch.selector.u[0] == src.u[0]) { 3608 mask |= 0x1; 3609 } 3610 if (mach->Switch.selector.u[1] == src.u[1]) { 3611 mask |= 0x2; 3612 } 3613 if (mach->Switch.selector.u[2] == src.u[2]) { 3614 mask |= 0x4; 3615 } 3616 if (mach->Switch.selector.u[3] == src.u[3]) { 3617 mask |= 0x8; 3618 } 3619 3620 mach->Switch.defaultMask |= mask; 3621 3622 mach->Switch.mask |= mask & prevMask; 3623 3624 UPDATE_EXEC_MASK(mach); 3625} 3626 3627/* FIXME: this will only work if default is last */ 3628static void 3629exec_default(struct tgsi_exec_machine *mach) 3630{ 3631 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3632 3633 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; 3634 3635 UPDATE_EXEC_MASK(mach); 3636} 3637 3638static void 3639exec_endswitch(struct tgsi_exec_machine *mach) 3640{ 3641 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; 3642 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 3643 3644 UPDATE_EXEC_MASK(mach); 3645} 3646 3647typedef void (* micro_dop)(union tgsi_double_channel *dst, 3648 const union tgsi_double_channel *src); 3649 3650typedef void (* micro_dop_sop)(union tgsi_double_channel *dst, 3651 const union tgsi_double_channel *src0, 3652 union tgsi_exec_channel *src1); 3653 3654typedef void (* micro_dop_s)(union tgsi_double_channel *dst, 3655 const union tgsi_exec_channel *src); 3656 3657typedef void (* micro_sop_d)(union tgsi_exec_channel *dst, 3658 const union tgsi_double_channel *src); 3659 3660static void 3661fetch_double_channel(struct tgsi_exec_machine *mach, 3662 union tgsi_double_channel *chan, 3663 const struct tgsi_full_src_register *reg, 3664 uint chan_0, 3665 uint chan_1) 3666{ 3667 union tgsi_exec_channel src[2]; 3668 uint i; 3669 3670 fetch_source_d(mach, &src[0], reg, chan_0); 3671 fetch_source_d(mach, &src[1], reg, chan_1); 3672 3673 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 3674 chan->u[i][0] = src[0].u[i]; 3675 chan->u[i][1] = src[1].u[i]; 3676 } 3677 if (reg->Register.Absolute) { 3678 micro_dabs(chan, chan); 3679 } 3680 if (reg->Register.Negate) { 3681 micro_dneg(chan, chan); 3682 } 3683} 3684 3685static void 3686store_double_channel(struct tgsi_exec_machine *mach, 3687 const union tgsi_double_channel *chan, 3688 const struct tgsi_full_dst_register *reg, 3689 const struct tgsi_full_instruction *inst, 3690 uint chan_0, 3691 uint chan_1) 3692{ 3693 union tgsi_exec_channel dst[2]; 3694 uint i; 3695 union tgsi_double_channel temp; 3696 const uint execmask = mach->ExecMask; 3697 3698 if (!inst->Instruction.Saturate) { 3699 for (i = 0; i < TGSI_QUAD_SIZE; i++) 3700 if (execmask & (1 << i)) { 3701 dst[0].u[i] = chan->u[i][0]; 3702 dst[1].u[i] = chan->u[i][1]; 3703 } 3704 } 3705 else { 3706 for (i = 0; i < TGSI_QUAD_SIZE; i++) 3707 if (execmask & (1 << i)) { 3708 if (chan->d[i] < 0.0) 3709 temp.d[i] = 0.0; 3710 else if (chan->d[i] > 1.0) 3711 temp.d[i] = 1.0; 3712 else 3713 temp.d[i] = chan->d[i]; 3714 3715 dst[0].u[i] = temp.u[i][0]; 3716 dst[1].u[i] = temp.u[i][1]; 3717 } 3718 } 3719 3720 store_dest_double(mach, &dst[0], reg, chan_0, TGSI_EXEC_DATA_UINT); 3721 if (chan_1 != (unsigned)-1) 3722 store_dest_double(mach, &dst[1], reg, chan_1, TGSI_EXEC_DATA_UINT); 3723} 3724 3725static void 3726exec_double_unary(struct tgsi_exec_machine *mach, 3727 const struct tgsi_full_instruction *inst, 3728 micro_dop op) 3729{ 3730 union tgsi_double_channel src; 3731 union tgsi_double_channel dst; 3732 3733 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 3734 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3735 op(&dst, &src); 3736 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3737 } 3738 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 3739 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3740 op(&dst, &src); 3741 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3742 } 3743} 3744 3745static void 3746exec_double_binary(struct tgsi_exec_machine *mach, 3747 const struct tgsi_full_instruction *inst, 3748 micro_dop op, 3749 enum tgsi_exec_datatype dst_datatype) 3750{ 3751 union tgsi_double_channel src[2]; 3752 union tgsi_double_channel dst; 3753 int first_dest_chan, second_dest_chan; 3754 int wmask; 3755 3756 wmask = inst->Dst[0].Register.WriteMask; 3757 /* these are & because of the way DSLT etc store their destinations */ 3758 if (wmask & TGSI_WRITEMASK_XY) { 3759 first_dest_chan = TGSI_CHAN_X; 3760 second_dest_chan = TGSI_CHAN_Y; 3761 if (dst_datatype == TGSI_EXEC_DATA_UINT) { 3762 first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y; 3763 second_dest_chan = -1; 3764 } 3765 3766 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3767 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y); 3768 op(&dst, src); 3769 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan); 3770 } 3771 3772 if (wmask & TGSI_WRITEMASK_ZW) { 3773 first_dest_chan = TGSI_CHAN_Z; 3774 second_dest_chan = TGSI_CHAN_W; 3775 if (dst_datatype == TGSI_EXEC_DATA_UINT) { 3776 first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W; 3777 second_dest_chan = -1; 3778 } 3779 3780 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3781 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W); 3782 op(&dst, src); 3783 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan); 3784 } 3785} 3786 3787static void 3788exec_double_trinary(struct tgsi_exec_machine *mach, 3789 const struct tgsi_full_instruction *inst, 3790 micro_dop op) 3791{ 3792 union tgsi_double_channel src[3]; 3793 union tgsi_double_channel dst; 3794 3795 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 3796 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3797 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y); 3798 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y); 3799 op(&dst, src); 3800 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3801 } 3802 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 3803 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3804 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W); 3805 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W); 3806 op(&dst, src); 3807 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3808 } 3809} 3810 3811static void 3812exec_dldexp(struct tgsi_exec_machine *mach, 3813 const struct tgsi_full_instruction *inst) 3814{ 3815 union tgsi_double_channel src0; 3816 union tgsi_exec_channel src1; 3817 union tgsi_double_channel dst; 3818 int wmask; 3819 3820 wmask = inst->Dst[0].Register.WriteMask; 3821 if (wmask & TGSI_WRITEMASK_XY) { 3822 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3823 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 3824 micro_dldexp(&dst, &src0, &src1); 3825 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3826 } 3827 3828 if (wmask & TGSI_WRITEMASK_ZW) { 3829 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3830 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); 3831 micro_dldexp(&dst, &src0, &src1); 3832 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3833 } 3834} 3835 3836static void 3837exec_dfracexp(struct tgsi_exec_machine *mach, 3838 const struct tgsi_full_instruction *inst) 3839{ 3840 union tgsi_double_channel src; 3841 union tgsi_double_channel dst; 3842 union tgsi_exec_channel dst_exp; 3843 3844 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3845 micro_dfracexp(&dst, &dst_exp, &src); 3846 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) 3847 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3848 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) 3849 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3850 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3851 if (inst->Dst[1].Register.WriteMask & (1 << chan)) 3852 store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan, TGSI_EXEC_DATA_INT); 3853 } 3854} 3855 3856static void 3857exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach, 3858 const struct tgsi_full_instruction *inst, 3859 micro_dop_sop op) 3860{ 3861 union tgsi_double_channel src0; 3862 union tgsi_exec_channel src1; 3863 union tgsi_double_channel dst; 3864 int wmask; 3865 3866 wmask = inst->Dst[0].Register.WriteMask; 3867 if (wmask & TGSI_WRITEMASK_XY) { 3868 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3869 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 3870 op(&dst, &src0, &src1); 3871 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3872 } 3873 3874 if (wmask & TGSI_WRITEMASK_ZW) { 3875 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3876 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); 3877 op(&dst, &src0, &src1); 3878 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3879 } 3880} 3881 3882static int 3883get_image_coord_dim(unsigned tgsi_tex) 3884{ 3885 int dim; 3886 switch (tgsi_tex) { 3887 case TGSI_TEXTURE_BUFFER: 3888 case TGSI_TEXTURE_1D: 3889 dim = 1; 3890 break; 3891 case TGSI_TEXTURE_2D: 3892 case TGSI_TEXTURE_RECT: 3893 case TGSI_TEXTURE_1D_ARRAY: 3894 case TGSI_TEXTURE_2D_MSAA: 3895 dim = 2; 3896 break; 3897 case TGSI_TEXTURE_3D: 3898 case TGSI_TEXTURE_CUBE: 3899 case TGSI_TEXTURE_2D_ARRAY: 3900 case TGSI_TEXTURE_2D_ARRAY_MSAA: 3901 case TGSI_TEXTURE_CUBE_ARRAY: 3902 dim = 3; 3903 break; 3904 default: 3905 assert(!"unknown texture target"); 3906 dim = 0; 3907 break; 3908 } 3909 3910 return dim; 3911} 3912 3913static int 3914get_image_coord_sample(unsigned tgsi_tex) 3915{ 3916 int sample = 0; 3917 switch (tgsi_tex) { 3918 case TGSI_TEXTURE_2D_MSAA: 3919 sample = 3; 3920 break; 3921 case TGSI_TEXTURE_2D_ARRAY_MSAA: 3922 sample = 4; 3923 break; 3924 default: 3925 break; 3926 } 3927 return sample; 3928} 3929 3930static void 3931exec_load_img(struct tgsi_exec_machine *mach, 3932 const struct tgsi_full_instruction *inst) 3933{ 3934 union tgsi_exec_channel r[4], sample_r; 3935 uint unit; 3936 int sample; 3937 int i, j; 3938 int dim; 3939 uint chan; 3940 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3941 struct tgsi_image_params params; 3942 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3943 3944 unit = fetch_sampler_unit(mach, inst, 0); 3945 dim = get_image_coord_dim(inst->Memory.Texture); 3946 sample = get_image_coord_sample(inst->Memory.Texture); 3947 assert(dim <= 3); 3948 3949 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 3950 params.unit = unit; 3951 params.tgsi_tex_instr = inst->Memory.Texture; 3952 params.format = inst->Memory.Format; 3953 3954 for (i = 0; i < dim; i++) { 3955 IFETCH(&r[i], 1, TGSI_CHAN_X + i); 3956 } 3957 3958 if (sample) 3959 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); 3960 3961 mach->Image->load(mach->Image, ¶ms, 3962 r[0].i, r[1].i, r[2].i, sample_r.i, 3963 rgba); 3964 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3965 r[0].f[j] = rgba[0][j]; 3966 r[1].f[j] = rgba[1][j]; 3967 r[2].f[j] = rgba[2][j]; 3968 r[3].f[j] = rgba[3][j]; 3969 } 3970 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3971 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3972 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 3973 } 3974 } 3975} 3976 3977static void 3978exec_load_buf(struct tgsi_exec_machine *mach, 3979 const struct tgsi_full_instruction *inst) 3980{ 3981 union tgsi_exec_channel r[4]; 3982 uint unit; 3983 int j; 3984 uint chan; 3985 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3986 struct tgsi_buffer_params params; 3987 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 3988 3989 unit = fetch_sampler_unit(mach, inst, 0); 3990 3991 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 3992 params.unit = unit; 3993 IFETCH(&r[0], 1, TGSI_CHAN_X); 3994 3995 mach->Buffer->load(mach->Buffer, ¶ms, 3996 r[0].i, rgba); 3997 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3998 r[0].f[j] = rgba[0][j]; 3999 r[1].f[j] = rgba[1][j]; 4000 r[2].f[j] = rgba[2][j]; 4001 r[3].f[j] = rgba[3][j]; 4002 } 4003 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4004 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4005 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4006 } 4007 } 4008} 4009 4010static void 4011exec_load_mem(struct tgsi_exec_machine *mach, 4012 const struct tgsi_full_instruction *inst) 4013{ 4014 union tgsi_exec_channel r[4]; 4015 uint chan; 4016 char *ptr = mach->LocalMem; 4017 uint32_t offset; 4018 int j; 4019 4020 IFETCH(&r[0], 1, TGSI_CHAN_X); 4021 if (r[0].u[0] >= mach->LocalMemSize) 4022 return; 4023 4024 offset = r[0].u[0]; 4025 ptr += offset; 4026 4027 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4028 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4029 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4030 memcpy(&r[chan].u[j], ptr + (4 * chan), 4); 4031 } 4032 } 4033 } 4034 4035 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4036 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4037 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4038 } 4039 } 4040} 4041 4042static void 4043exec_load(struct tgsi_exec_machine *mach, 4044 const struct tgsi_full_instruction *inst) 4045{ 4046 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 4047 exec_load_img(mach, inst); 4048 else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) 4049 exec_load_buf(mach, inst); 4050 else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) 4051 exec_load_mem(mach, inst); 4052} 4053 4054static uint 4055fetch_store_img_unit(struct tgsi_exec_machine *mach, 4056 const struct tgsi_full_dst_register *dst) 4057{ 4058 uint unit = 0; 4059 int i; 4060 if (dst->Register.Indirect) { 4061 union tgsi_exec_channel indir_index, index2; 4062 const uint execmask = mach->ExecMask; 4063 index2.i[0] = 4064 index2.i[1] = 4065 index2.i[2] = 4066 index2.i[3] = dst->Indirect.Index; 4067 4068 fetch_src_file_channel(mach, 4069 dst->Indirect.File, 4070 dst->Indirect.Swizzle, 4071 &index2, 4072 &ZeroVec, 4073 &indir_index); 4074 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4075 if (execmask & (1 << i)) { 4076 unit = dst->Register.Index + indir_index.i[i]; 4077 break; 4078 } 4079 } 4080 } else { 4081 unit = dst->Register.Index; 4082 } 4083 return unit; 4084} 4085 4086static void 4087exec_store_img(struct tgsi_exec_machine *mach, 4088 const struct tgsi_full_instruction *inst) 4089{ 4090 union tgsi_exec_channel r[3], sample_r; 4091 union tgsi_exec_channel value[4]; 4092 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4093 struct tgsi_image_params params; 4094 int dim; 4095 int sample; 4096 int i, j; 4097 uint unit; 4098 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4099 unit = fetch_store_img_unit(mach, &inst->Dst[0]); 4100 dim = get_image_coord_dim(inst->Memory.Texture); 4101 sample = get_image_coord_sample(inst->Memory.Texture); 4102 assert(dim <= 3); 4103 4104 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4105 params.unit = unit; 4106 params.tgsi_tex_instr = inst->Memory.Texture; 4107 params.format = inst->Memory.Format; 4108 4109 for (i = 0; i < dim; i++) { 4110 IFETCH(&r[i], 0, TGSI_CHAN_X + i); 4111 } 4112 4113 for (i = 0; i < 4; i++) { 4114 FETCH(&value[i], 1, TGSI_CHAN_X + i); 4115 } 4116 if (sample) 4117 IFETCH(&sample_r, 0, TGSI_CHAN_X + sample); 4118 4119 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4120 rgba[0][j] = value[0].f[j]; 4121 rgba[1][j] = value[1].f[j]; 4122 rgba[2][j] = value[2].f[j]; 4123 rgba[3][j] = value[3].f[j]; 4124 } 4125 4126 mach->Image->store(mach->Image, ¶ms, 4127 r[0].i, r[1].i, r[2].i, sample_r.i, 4128 rgba); 4129} 4130 4131static void 4132exec_store_buf(struct tgsi_exec_machine *mach, 4133 const struct tgsi_full_instruction *inst) 4134{ 4135 union tgsi_exec_channel r[3]; 4136 union tgsi_exec_channel value[4]; 4137 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4138 struct tgsi_buffer_params params; 4139 int i, j; 4140 uint unit; 4141 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4142 4143 unit = fetch_store_img_unit(mach, &inst->Dst[0]); 4144 4145 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4146 params.unit = unit; 4147 params.writemask = inst->Dst[0].Register.WriteMask; 4148 4149 IFETCH(&r[0], 0, TGSI_CHAN_X); 4150 for (i = 0; i < 4; i++) { 4151 FETCH(&value[i], 1, TGSI_CHAN_X + i); 4152 } 4153 4154 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4155 rgba[0][j] = value[0].f[j]; 4156 rgba[1][j] = value[1].f[j]; 4157 rgba[2][j] = value[2].f[j]; 4158 rgba[3][j] = value[3].f[j]; 4159 } 4160 4161 mach->Buffer->store(mach->Buffer, ¶ms, 4162 r[0].i, 4163 rgba); 4164} 4165 4166static void 4167exec_store_mem(struct tgsi_exec_machine *mach, 4168 const struct tgsi_full_instruction *inst) 4169{ 4170 union tgsi_exec_channel r[3]; 4171 union tgsi_exec_channel value[4]; 4172 uint i, chan; 4173 char *ptr = mach->LocalMem; 4174 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4175 int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4176 4177 IFETCH(&r[0], 0, TGSI_CHAN_X); 4178 4179 for (i = 0; i < 4; i++) { 4180 FETCH(&value[i], 1, TGSI_CHAN_X + i); 4181 } 4182 4183 if (r[0].u[0] >= mach->LocalMemSize) 4184 return; 4185 ptr += r[0].u[0]; 4186 4187 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4188 if (execmask & (1 << i)) { 4189 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4190 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4191 memcpy(ptr + (chan * 4), &value[chan].u[0], 4); 4192 } 4193 } 4194 } 4195 } 4196} 4197 4198static void 4199exec_store(struct tgsi_exec_machine *mach, 4200 const struct tgsi_full_instruction *inst) 4201{ 4202 if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) 4203 exec_store_img(mach, inst); 4204 else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) 4205 exec_store_buf(mach, inst); 4206 else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) 4207 exec_store_mem(mach, inst); 4208} 4209 4210static void 4211exec_atomop_img(struct tgsi_exec_machine *mach, 4212 const struct tgsi_full_instruction *inst) 4213{ 4214 union tgsi_exec_channel r[4], sample_r; 4215 union tgsi_exec_channel value[4], value2[4]; 4216 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4217 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4218 struct tgsi_image_params params; 4219 int dim; 4220 int sample; 4221 int i, j; 4222 uint unit, chan; 4223 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4224 unit = fetch_sampler_unit(mach, inst, 0); 4225 dim = get_image_coord_dim(inst->Memory.Texture); 4226 sample = get_image_coord_sample(inst->Memory.Texture); 4227 assert(dim <= 3); 4228 4229 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4230 params.unit = unit; 4231 params.tgsi_tex_instr = inst->Memory.Texture; 4232 params.format = inst->Memory.Format; 4233 4234 for (i = 0; i < dim; i++) { 4235 IFETCH(&r[i], 1, TGSI_CHAN_X + i); 4236 } 4237 4238 for (i = 0; i < 4; i++) { 4239 FETCH(&value[i], 2, TGSI_CHAN_X + i); 4240 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4241 FETCH(&value2[i], 3, TGSI_CHAN_X + i); 4242 } 4243 if (sample) 4244 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); 4245 4246 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4247 rgba[0][j] = value[0].f[j]; 4248 rgba[1][j] = value[1].f[j]; 4249 rgba[2][j] = value[2].f[j]; 4250 rgba[3][j] = value[3].f[j]; 4251 } 4252 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 4253 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4254 rgba2[0][j] = value2[0].f[j]; 4255 rgba2[1][j] = value2[1].f[j]; 4256 rgba2[2][j] = value2[2].f[j]; 4257 rgba2[3][j] = value2[3].f[j]; 4258 } 4259 } 4260 4261 mach->Image->op(mach->Image, ¶ms, inst->Instruction.Opcode, 4262 r[0].i, r[1].i, r[2].i, sample_r.i, 4263 rgba, rgba2); 4264 4265 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4266 r[0].f[j] = rgba[0][j]; 4267 r[1].f[j] = rgba[1][j]; 4268 r[2].f[j] = rgba[2][j]; 4269 r[3].f[j] = rgba[3][j]; 4270 } 4271 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4272 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4273 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4274 } 4275 } 4276} 4277 4278static void 4279exec_atomop_buf(struct tgsi_exec_machine *mach, 4280 const struct tgsi_full_instruction *inst) 4281{ 4282 union tgsi_exec_channel r[4]; 4283 union tgsi_exec_channel value[4], value2[4]; 4284 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4285 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 4286 struct tgsi_buffer_params params; 4287 int i, j; 4288 uint unit, chan; 4289 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4290 4291 unit = fetch_sampler_unit(mach, inst, 0); 4292 4293 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4294 params.unit = unit; 4295 params.writemask = inst->Dst[0].Register.WriteMask; 4296 4297 IFETCH(&r[0], 1, TGSI_CHAN_X); 4298 4299 for (i = 0; i < 4; i++) { 4300 FETCH(&value[i], 2, TGSI_CHAN_X + i); 4301 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4302 FETCH(&value2[i], 3, TGSI_CHAN_X + i); 4303 } 4304 4305 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4306 rgba[0][j] = value[0].f[j]; 4307 rgba[1][j] = value[1].f[j]; 4308 rgba[2][j] = value[2].f[j]; 4309 rgba[3][j] = value[3].f[j]; 4310 } 4311 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 4312 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4313 rgba2[0][j] = value2[0].f[j]; 4314 rgba2[1][j] = value2[1].f[j]; 4315 rgba2[2][j] = value2[2].f[j]; 4316 rgba2[3][j] = value2[3].f[j]; 4317 } 4318 } 4319 4320 mach->Buffer->op(mach->Buffer, ¶ms, inst->Instruction.Opcode, 4321 r[0].i, 4322 rgba, rgba2); 4323 4324 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4325 r[0].f[j] = rgba[0][j]; 4326 r[1].f[j] = rgba[1][j]; 4327 r[2].f[j] = rgba[2][j]; 4328 r[3].f[j] = rgba[3][j]; 4329 } 4330 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4331 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4332 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4333 } 4334 } 4335} 4336 4337static void 4338exec_atomop_mem(struct tgsi_exec_machine *mach, 4339 const struct tgsi_full_instruction *inst) 4340{ 4341 union tgsi_exec_channel r[4]; 4342 union tgsi_exec_channel value[4], value2[4]; 4343 char *ptr = mach->LocalMem; 4344 uint32_t val; 4345 uint chan, i; 4346 uint32_t offset; 4347 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4348 int execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4349 IFETCH(&r[0], 1, TGSI_CHAN_X); 4350 4351 if (r[0].u[0] >= mach->LocalMemSize) 4352 return; 4353 4354 offset = r[0].u[0]; 4355 ptr += offset; 4356 for (i = 0; i < 4; i++) { 4357 FETCH(&value[i], 2, TGSI_CHAN_X + i); 4358 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4359 FETCH(&value2[i], 3, TGSI_CHAN_X + i); 4360 } 4361 4362 memcpy(&r[0].u[0], ptr, 4); 4363 val = r[0].u[0]; 4364 switch (inst->Instruction.Opcode) { 4365 case TGSI_OPCODE_ATOMUADD: 4366 val += value[0].u[0]; 4367 break; 4368 case TGSI_OPCODE_ATOMXOR: 4369 val ^= value[0].u[0]; 4370 break; 4371 case TGSI_OPCODE_ATOMOR: 4372 val |= value[0].u[0]; 4373 break; 4374 case TGSI_OPCODE_ATOMAND: 4375 val &= value[0].u[0]; 4376 break; 4377 case TGSI_OPCODE_ATOMUMIN: 4378 val = MIN2(val, value[0].u[0]); 4379 break; 4380 case TGSI_OPCODE_ATOMUMAX: 4381 val = MAX2(val, value[0].u[0]); 4382 break; 4383 case TGSI_OPCODE_ATOMIMIN: 4384 val = MIN2(r[0].i[0], value[0].i[0]); 4385 break; 4386 case TGSI_OPCODE_ATOMIMAX: 4387 val = MAX2(r[0].i[0], value[0].i[0]); 4388 break; 4389 case TGSI_OPCODE_ATOMXCHG: 4390 val = value[0].i[0]; 4391 break; 4392 case TGSI_OPCODE_ATOMCAS: 4393 if (val == value[0].u[0]) 4394 val = value2[0].u[0]; 4395 break; 4396 case TGSI_OPCODE_ATOMFADD: 4397 val = fui(r[0].f[0] + value[0].f[0]); 4398 break; 4399 default: 4400 break; 4401 } 4402 for (i = 0; i < TGSI_QUAD_SIZE; i++) 4403 if (execmask & (1 << i)) 4404 memcpy(ptr, &val, 4); 4405 4406 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4407 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4408 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 4409 } 4410 } 4411} 4412 4413static void 4414exec_atomop(struct tgsi_exec_machine *mach, 4415 const struct tgsi_full_instruction *inst) 4416{ 4417 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 4418 exec_atomop_img(mach, inst); 4419 else if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) 4420 exec_atomop_buf(mach, inst); 4421 else if (inst->Src[0].Register.File == TGSI_FILE_MEMORY) 4422 exec_atomop_mem(mach, inst); 4423} 4424 4425static void 4426exec_resq_img(struct tgsi_exec_machine *mach, 4427 const struct tgsi_full_instruction *inst) 4428{ 4429 int result[4]; 4430 union tgsi_exec_channel r[4]; 4431 uint unit; 4432 int i, chan, j; 4433 struct tgsi_image_params params; 4434 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4435 4436 unit = fetch_sampler_unit(mach, inst, 0); 4437 4438 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4439 params.unit = unit; 4440 params.tgsi_tex_instr = inst->Memory.Texture; 4441 params.format = inst->Memory.Format; 4442 4443 mach->Image->get_dims(mach->Image, ¶ms, result); 4444 4445 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4446 for (j = 0; j < 4; j++) { 4447 r[j].i[i] = result[j]; 4448 } 4449 } 4450 4451 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4452 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4453 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, 4454 TGSI_EXEC_DATA_INT); 4455 } 4456 } 4457} 4458 4459static void 4460exec_resq_buf(struct tgsi_exec_machine *mach, 4461 const struct tgsi_full_instruction *inst) 4462{ 4463 int result; 4464 union tgsi_exec_channel r[4]; 4465 uint unit; 4466 int i, chan; 4467 struct tgsi_buffer_params params; 4468 int kilmask = mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 4469 4470 unit = fetch_sampler_unit(mach, inst, 0); 4471 4472 params.execmask = mach->ExecMask & mach->NonHelperMask & ~kilmask; 4473 params.unit = unit; 4474 4475 mach->Buffer->get_dims(mach->Buffer, ¶ms, &result); 4476 4477 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4478 r[0].i[i] = result; 4479 } 4480 4481 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4482 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4483 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan, 4484 TGSI_EXEC_DATA_INT); 4485 } 4486 } 4487} 4488 4489static void 4490exec_resq(struct tgsi_exec_machine *mach, 4491 const struct tgsi_full_instruction *inst) 4492{ 4493 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 4494 exec_resq_img(mach, inst); 4495 else 4496 exec_resq_buf(mach, inst); 4497} 4498 4499static void 4500micro_f2u64(union tgsi_double_channel *dst, 4501 const union tgsi_exec_channel *src) 4502{ 4503 dst->u64[0] = (uint64_t)src->f[0]; 4504 dst->u64[1] = (uint64_t)src->f[1]; 4505 dst->u64[2] = (uint64_t)src->f[2]; 4506 dst->u64[3] = (uint64_t)src->f[3]; 4507} 4508 4509static void 4510micro_f2i64(union tgsi_double_channel *dst, 4511 const union tgsi_exec_channel *src) 4512{ 4513 dst->i64[0] = (int64_t)src->f[0]; 4514 dst->i64[1] = (int64_t)src->f[1]; 4515 dst->i64[2] = (int64_t)src->f[2]; 4516 dst->i64[3] = (int64_t)src->f[3]; 4517} 4518 4519static void 4520micro_u2i64(union tgsi_double_channel *dst, 4521 const union tgsi_exec_channel *src) 4522{ 4523 dst->u64[0] = (uint64_t)src->u[0]; 4524 dst->u64[1] = (uint64_t)src->u[1]; 4525 dst->u64[2] = (uint64_t)src->u[2]; 4526 dst->u64[3] = (uint64_t)src->u[3]; 4527} 4528 4529static void 4530micro_i2i64(union tgsi_double_channel *dst, 4531 const union tgsi_exec_channel *src) 4532{ 4533 dst->i64[0] = (int64_t)src->i[0]; 4534 dst->i64[1] = (int64_t)src->i[1]; 4535 dst->i64[2] = (int64_t)src->i[2]; 4536 dst->i64[3] = (int64_t)src->i[3]; 4537} 4538 4539static void 4540micro_d2u64(union tgsi_double_channel *dst, 4541 const union tgsi_double_channel *src) 4542{ 4543 dst->u64[0] = (uint64_t)src->d[0]; 4544 dst->u64[1] = (uint64_t)src->d[1]; 4545 dst->u64[2] = (uint64_t)src->d[2]; 4546 dst->u64[3] = (uint64_t)src->d[3]; 4547} 4548 4549static void 4550micro_d2i64(union tgsi_double_channel *dst, 4551 const union tgsi_double_channel *src) 4552{ 4553 dst->i64[0] = (int64_t)src->d[0]; 4554 dst->i64[1] = (int64_t)src->d[1]; 4555 dst->i64[2] = (int64_t)src->d[2]; 4556 dst->i64[3] = (int64_t)src->d[3]; 4557} 4558 4559static void 4560micro_u642d(union tgsi_double_channel *dst, 4561 const union tgsi_double_channel *src) 4562{ 4563 dst->d[0] = (double)src->u64[0]; 4564 dst->d[1] = (double)src->u64[1]; 4565 dst->d[2] = (double)src->u64[2]; 4566 dst->d[3] = (double)src->u64[3]; 4567} 4568 4569static void 4570micro_i642d(union tgsi_double_channel *dst, 4571 const union tgsi_double_channel *src) 4572{ 4573 dst->d[0] = (double)src->i64[0]; 4574 dst->d[1] = (double)src->i64[1]; 4575 dst->d[2] = (double)src->i64[2]; 4576 dst->d[3] = (double)src->i64[3]; 4577} 4578 4579static void 4580micro_u642f(union tgsi_exec_channel *dst, 4581 const union tgsi_double_channel *src) 4582{ 4583 dst->f[0] = (float)src->u64[0]; 4584 dst->f[1] = (float)src->u64[1]; 4585 dst->f[2] = (float)src->u64[2]; 4586 dst->f[3] = (float)src->u64[3]; 4587} 4588 4589static void 4590micro_i642f(union tgsi_exec_channel *dst, 4591 const union tgsi_double_channel *src) 4592{ 4593 dst->f[0] = (float)src->i64[0]; 4594 dst->f[1] = (float)src->i64[1]; 4595 dst->f[2] = (float)src->i64[2]; 4596 dst->f[3] = (float)src->i64[3]; 4597} 4598 4599static void 4600exec_t_2_64(struct tgsi_exec_machine *mach, 4601 const struct tgsi_full_instruction *inst, 4602 micro_dop_s op, 4603 enum tgsi_exec_datatype src_datatype) 4604{ 4605 union tgsi_exec_channel src; 4606 union tgsi_double_channel dst; 4607 4608 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 4609 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 4610 op(&dst, &src); 4611 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 4612 } 4613 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 4614 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype); 4615 op(&dst, &src); 4616 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 4617 } 4618} 4619 4620static void 4621exec_64_2_t(struct tgsi_exec_machine *mach, 4622 const struct tgsi_full_instruction *inst, 4623 micro_sop_d op, 4624 enum tgsi_exec_datatype dst_datatype) 4625{ 4626 union tgsi_double_channel src; 4627 union tgsi_exec_channel dst; 4628 int wm = inst->Dst[0].Register.WriteMask; 4629 int i; 4630 int bit; 4631 for (i = 0; i < 2; i++) { 4632 bit = ffs(wm); 4633 if (bit) { 4634 wm &= ~(1 << (bit - 1)); 4635 if (i == 0) 4636 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 4637 else 4638 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 4639 op(&dst, &src); 4640 store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1, dst_datatype); 4641 } 4642 } 4643} 4644 4645static void 4646micro_i2f(union tgsi_exec_channel *dst, 4647 const union tgsi_exec_channel *src) 4648{ 4649 dst->f[0] = (float)src->i[0]; 4650 dst->f[1] = (float)src->i[1]; 4651 dst->f[2] = (float)src->i[2]; 4652 dst->f[3] = (float)src->i[3]; 4653} 4654 4655static void 4656micro_not(union tgsi_exec_channel *dst, 4657 const union tgsi_exec_channel *src) 4658{ 4659 dst->u[0] = ~src->u[0]; 4660 dst->u[1] = ~src->u[1]; 4661 dst->u[2] = ~src->u[2]; 4662 dst->u[3] = ~src->u[3]; 4663} 4664 4665static void 4666micro_shl(union tgsi_exec_channel *dst, 4667 const union tgsi_exec_channel *src0, 4668 const union tgsi_exec_channel *src1) 4669{ 4670 unsigned masked_count; 4671 masked_count = src1->u[0] & 0x1f; 4672 dst->u[0] = src0->u[0] << masked_count; 4673 masked_count = src1->u[1] & 0x1f; 4674 dst->u[1] = src0->u[1] << masked_count; 4675 masked_count = src1->u[2] & 0x1f; 4676 dst->u[2] = src0->u[2] << masked_count; 4677 masked_count = src1->u[3] & 0x1f; 4678 dst->u[3] = src0->u[3] << masked_count; 4679} 4680 4681static void 4682micro_and(union tgsi_exec_channel *dst, 4683 const union tgsi_exec_channel *src0, 4684 const union tgsi_exec_channel *src1) 4685{ 4686 dst->u[0] = src0->u[0] & src1->u[0]; 4687 dst->u[1] = src0->u[1] & src1->u[1]; 4688 dst->u[2] = src0->u[2] & src1->u[2]; 4689 dst->u[3] = src0->u[3] & src1->u[3]; 4690} 4691 4692static void 4693micro_or(union tgsi_exec_channel *dst, 4694 const union tgsi_exec_channel *src0, 4695 const union tgsi_exec_channel *src1) 4696{ 4697 dst->u[0] = src0->u[0] | src1->u[0]; 4698 dst->u[1] = src0->u[1] | src1->u[1]; 4699 dst->u[2] = src0->u[2] | src1->u[2]; 4700 dst->u[3] = src0->u[3] | src1->u[3]; 4701} 4702 4703static void 4704micro_xor(union tgsi_exec_channel *dst, 4705 const union tgsi_exec_channel *src0, 4706 const union tgsi_exec_channel *src1) 4707{ 4708 dst->u[0] = src0->u[0] ^ src1->u[0]; 4709 dst->u[1] = src0->u[1] ^ src1->u[1]; 4710 dst->u[2] = src0->u[2] ^ src1->u[2]; 4711 dst->u[3] = src0->u[3] ^ src1->u[3]; 4712} 4713 4714static void 4715micro_mod(union tgsi_exec_channel *dst, 4716 const union tgsi_exec_channel *src0, 4717 const union tgsi_exec_channel *src1) 4718{ 4719 dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0; 4720 dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0; 4721 dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0; 4722 dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0; 4723} 4724 4725static void 4726micro_f2i(union tgsi_exec_channel *dst, 4727 const union tgsi_exec_channel *src) 4728{ 4729 dst->i[0] = (int)src->f[0]; 4730 dst->i[1] = (int)src->f[1]; 4731 dst->i[2] = (int)src->f[2]; 4732 dst->i[3] = (int)src->f[3]; 4733} 4734 4735static void 4736micro_fseq(union tgsi_exec_channel *dst, 4737 const union tgsi_exec_channel *src0, 4738 const union tgsi_exec_channel *src1) 4739{ 4740 dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0; 4741 dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0; 4742 dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0; 4743 dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0; 4744} 4745 4746static void 4747micro_fsge(union tgsi_exec_channel *dst, 4748 const union tgsi_exec_channel *src0, 4749 const union tgsi_exec_channel *src1) 4750{ 4751 dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0; 4752 dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0; 4753 dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0; 4754 dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0; 4755} 4756 4757static void 4758micro_fslt(union tgsi_exec_channel *dst, 4759 const union tgsi_exec_channel *src0, 4760 const union tgsi_exec_channel *src1) 4761{ 4762 dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0; 4763 dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0; 4764 dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0; 4765 dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0; 4766} 4767 4768static void 4769micro_fsne(union tgsi_exec_channel *dst, 4770 const union tgsi_exec_channel *src0, 4771 const union tgsi_exec_channel *src1) 4772{ 4773 dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0; 4774 dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0; 4775 dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0; 4776 dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0; 4777} 4778 4779static void 4780micro_idiv(union tgsi_exec_channel *dst, 4781 const union tgsi_exec_channel *src0, 4782 const union tgsi_exec_channel *src1) 4783{ 4784 dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0; 4785 dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0; 4786 dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0; 4787 dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0; 4788} 4789 4790static void 4791micro_imax(union tgsi_exec_channel *dst, 4792 const union tgsi_exec_channel *src0, 4793 const union tgsi_exec_channel *src1) 4794{ 4795 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 4796 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 4797 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 4798 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 4799} 4800 4801static void 4802micro_imin(union tgsi_exec_channel *dst, 4803 const union tgsi_exec_channel *src0, 4804 const union tgsi_exec_channel *src1) 4805{ 4806 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 4807 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 4808 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 4809 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 4810} 4811 4812static void 4813micro_isge(union tgsi_exec_channel *dst, 4814 const union tgsi_exec_channel *src0, 4815 const union tgsi_exec_channel *src1) 4816{ 4817 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0; 4818 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0; 4819 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0; 4820 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0; 4821} 4822 4823static void 4824micro_ishr(union tgsi_exec_channel *dst, 4825 const union tgsi_exec_channel *src0, 4826 const union tgsi_exec_channel *src1) 4827{ 4828 unsigned masked_count; 4829 masked_count = src1->i[0] & 0x1f; 4830 dst->i[0] = src0->i[0] >> masked_count; 4831 masked_count = src1->i[1] & 0x1f; 4832 dst->i[1] = src0->i[1] >> masked_count; 4833 masked_count = src1->i[2] & 0x1f; 4834 dst->i[2] = src0->i[2] >> masked_count; 4835 masked_count = src1->i[3] & 0x1f; 4836 dst->i[3] = src0->i[3] >> masked_count; 4837} 4838 4839static void 4840micro_islt(union tgsi_exec_channel *dst, 4841 const union tgsi_exec_channel *src0, 4842 const union tgsi_exec_channel *src1) 4843{ 4844 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0; 4845 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0; 4846 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0; 4847 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0; 4848} 4849 4850static void 4851micro_f2u(union tgsi_exec_channel *dst, 4852 const union tgsi_exec_channel *src) 4853{ 4854 dst->u[0] = (uint)src->f[0]; 4855 dst->u[1] = (uint)src->f[1]; 4856 dst->u[2] = (uint)src->f[2]; 4857 dst->u[3] = (uint)src->f[3]; 4858} 4859 4860static void 4861micro_u2f(union tgsi_exec_channel *dst, 4862 const union tgsi_exec_channel *src) 4863{ 4864 dst->f[0] = (float)src->u[0]; 4865 dst->f[1] = (float)src->u[1]; 4866 dst->f[2] = (float)src->u[2]; 4867 dst->f[3] = (float)src->u[3]; 4868} 4869 4870static void 4871micro_uadd(union tgsi_exec_channel *dst, 4872 const union tgsi_exec_channel *src0, 4873 const union tgsi_exec_channel *src1) 4874{ 4875 dst->u[0] = src0->u[0] + src1->u[0]; 4876 dst->u[1] = src0->u[1] + src1->u[1]; 4877 dst->u[2] = src0->u[2] + src1->u[2]; 4878 dst->u[3] = src0->u[3] + src1->u[3]; 4879} 4880 4881static void 4882micro_udiv(union tgsi_exec_channel *dst, 4883 const union tgsi_exec_channel *src0, 4884 const union tgsi_exec_channel *src1) 4885{ 4886 dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u; 4887 dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u; 4888 dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u; 4889 dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u; 4890} 4891 4892static void 4893micro_umad(union tgsi_exec_channel *dst, 4894 const union tgsi_exec_channel *src0, 4895 const union tgsi_exec_channel *src1, 4896 const union tgsi_exec_channel *src2) 4897{ 4898 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0]; 4899 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1]; 4900 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2]; 4901 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3]; 4902} 4903 4904static void 4905micro_umax(union tgsi_exec_channel *dst, 4906 const union tgsi_exec_channel *src0, 4907 const union tgsi_exec_channel *src1) 4908{ 4909 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 4910 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 4911 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 4912 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 4913} 4914 4915static void 4916micro_umin(union tgsi_exec_channel *dst, 4917 const union tgsi_exec_channel *src0, 4918 const union tgsi_exec_channel *src1) 4919{ 4920 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 4921 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 4922 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 4923 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 4924} 4925 4926static void 4927micro_umod(union tgsi_exec_channel *dst, 4928 const union tgsi_exec_channel *src0, 4929 const union tgsi_exec_channel *src1) 4930{ 4931 dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u; 4932 dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u; 4933 dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u; 4934 dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u; 4935} 4936 4937static void 4938micro_umul(union tgsi_exec_channel *dst, 4939 const union tgsi_exec_channel *src0, 4940 const union tgsi_exec_channel *src1) 4941{ 4942 dst->u[0] = src0->u[0] * src1->u[0]; 4943 dst->u[1] = src0->u[1] * src1->u[1]; 4944 dst->u[2] = src0->u[2] * src1->u[2]; 4945 dst->u[3] = src0->u[3] * src1->u[3]; 4946} 4947 4948static void 4949micro_imul_hi(union tgsi_exec_channel *dst, 4950 const union tgsi_exec_channel *src0, 4951 const union tgsi_exec_channel *src1) 4952{ 4953#define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32) 4954 dst->i[0] = I64M(src0->i[0], src1->i[0]); 4955 dst->i[1] = I64M(src0->i[1], src1->i[1]); 4956 dst->i[2] = I64M(src0->i[2], src1->i[2]); 4957 dst->i[3] = I64M(src0->i[3], src1->i[3]); 4958#undef I64M 4959} 4960 4961static void 4962micro_umul_hi(union tgsi_exec_channel *dst, 4963 const union tgsi_exec_channel *src0, 4964 const union tgsi_exec_channel *src1) 4965{ 4966#define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32) 4967 dst->u[0] = U64M(src0->u[0], src1->u[0]); 4968 dst->u[1] = U64M(src0->u[1], src1->u[1]); 4969 dst->u[2] = U64M(src0->u[2], src1->u[2]); 4970 dst->u[3] = U64M(src0->u[3], src1->u[3]); 4971#undef U64M 4972} 4973 4974static void 4975micro_useq(union tgsi_exec_channel *dst, 4976 const union tgsi_exec_channel *src0, 4977 const union tgsi_exec_channel *src1) 4978{ 4979 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0; 4980 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0; 4981 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0; 4982 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0; 4983} 4984 4985static void 4986micro_usge(union tgsi_exec_channel *dst, 4987 const union tgsi_exec_channel *src0, 4988 const union tgsi_exec_channel *src1) 4989{ 4990 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0; 4991 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0; 4992 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0; 4993 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0; 4994} 4995 4996static void 4997micro_ushr(union tgsi_exec_channel *dst, 4998 const union tgsi_exec_channel *src0, 4999 const union tgsi_exec_channel *src1) 5000{ 5001 unsigned masked_count; 5002 masked_count = src1->u[0] & 0x1f; 5003 dst->u[0] = src0->u[0] >> masked_count; 5004 masked_count = src1->u[1] & 0x1f; 5005 dst->u[1] = src0->u[1] >> masked_count; 5006 masked_count = src1->u[2] & 0x1f; 5007 dst->u[2] = src0->u[2] >> masked_count; 5008 masked_count = src1->u[3] & 0x1f; 5009 dst->u[3] = src0->u[3] >> masked_count; 5010} 5011 5012static void 5013micro_uslt(union tgsi_exec_channel *dst, 5014 const union tgsi_exec_channel *src0, 5015 const union tgsi_exec_channel *src1) 5016{ 5017 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0; 5018 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0; 5019 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0; 5020 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0; 5021} 5022 5023static void 5024micro_usne(union tgsi_exec_channel *dst, 5025 const union tgsi_exec_channel *src0, 5026 const union tgsi_exec_channel *src1) 5027{ 5028 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0; 5029 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0; 5030 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0; 5031 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0; 5032} 5033 5034static void 5035micro_uarl(union tgsi_exec_channel *dst, 5036 const union tgsi_exec_channel *src) 5037{ 5038 dst->i[0] = src->u[0]; 5039 dst->i[1] = src->u[1]; 5040 dst->i[2] = src->u[2]; 5041 dst->i[3] = src->u[3]; 5042} 5043 5044/** 5045 * Signed bitfield extract (i.e. sign-extend the extracted bits) 5046 */ 5047static void 5048micro_ibfe(union tgsi_exec_channel *dst, 5049 const union tgsi_exec_channel *src0, 5050 const union tgsi_exec_channel *src1, 5051 const union tgsi_exec_channel *src2) 5052{ 5053 int i; 5054 for (i = 0; i < 4; i++) { 5055 int width = src2->i[i]; 5056 int offset = src1->i[i] & 0x1f; 5057 if (width == 32 && offset == 0) { 5058 dst->i[i] = src0->i[i]; 5059 continue; 5060 } 5061 width &= 0x1f; 5062 if (width == 0) 5063 dst->i[i] = 0; 5064 else if (width + offset < 32) 5065 dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width); 5066 else 5067 dst->i[i] = src0->i[i] >> offset; 5068 } 5069} 5070 5071/** 5072 * Unsigned bitfield extract 5073 */ 5074static void 5075micro_ubfe(union tgsi_exec_channel *dst, 5076 const union tgsi_exec_channel *src0, 5077 const union tgsi_exec_channel *src1, 5078 const union tgsi_exec_channel *src2) 5079{ 5080 int i; 5081 for (i = 0; i < 4; i++) { 5082 int width = src2->u[i]; 5083 int offset = src1->u[i] & 0x1f; 5084 if (width == 32 && offset == 0) { 5085 dst->u[i] = src0->u[i]; 5086 continue; 5087 } 5088 width &= 0x1f; 5089 if (width == 0) 5090 dst->u[i] = 0; 5091 else if (width + offset < 32) 5092 dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width); 5093 else 5094 dst->u[i] = src0->u[i] >> offset; 5095 } 5096} 5097 5098/** 5099 * Bitfield insert: copy low bits from src1 into a region of src0. 5100 */ 5101static void 5102micro_bfi(union tgsi_exec_channel *dst, 5103 const union tgsi_exec_channel *src0, 5104 const union tgsi_exec_channel *src1, 5105 const union tgsi_exec_channel *src2, 5106 const union tgsi_exec_channel *src3) 5107{ 5108 int i; 5109 for (i = 0; i < 4; i++) { 5110 int width = src3->u[i]; 5111 int offset = src2->u[i] & 0x1f; 5112 if (width == 32) { 5113 dst->u[i] = src1->u[i]; 5114 } else { 5115 int bitmask = ((1 << width) - 1) << offset; 5116 dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask); 5117 } 5118 } 5119} 5120 5121static void 5122micro_brev(union tgsi_exec_channel *dst, 5123 const union tgsi_exec_channel *src) 5124{ 5125 dst->u[0] = util_bitreverse(src->u[0]); 5126 dst->u[1] = util_bitreverse(src->u[1]); 5127 dst->u[2] = util_bitreverse(src->u[2]); 5128 dst->u[3] = util_bitreverse(src->u[3]); 5129} 5130 5131static void 5132micro_popc(union tgsi_exec_channel *dst, 5133 const union tgsi_exec_channel *src) 5134{ 5135 dst->u[0] = util_bitcount(src->u[0]); 5136 dst->u[1] = util_bitcount(src->u[1]); 5137 dst->u[2] = util_bitcount(src->u[2]); 5138 dst->u[3] = util_bitcount(src->u[3]); 5139} 5140 5141static void 5142micro_lsb(union tgsi_exec_channel *dst, 5143 const union tgsi_exec_channel *src) 5144{ 5145 dst->i[0] = ffs(src->u[0]) - 1; 5146 dst->i[1] = ffs(src->u[1]) - 1; 5147 dst->i[2] = ffs(src->u[2]) - 1; 5148 dst->i[3] = ffs(src->u[3]) - 1; 5149} 5150 5151static void 5152micro_imsb(union tgsi_exec_channel *dst, 5153 const union tgsi_exec_channel *src) 5154{ 5155 dst->i[0] = util_last_bit_signed(src->i[0]) - 1; 5156 dst->i[1] = util_last_bit_signed(src->i[1]) - 1; 5157 dst->i[2] = util_last_bit_signed(src->i[2]) - 1; 5158 dst->i[3] = util_last_bit_signed(src->i[3]) - 1; 5159} 5160 5161static void 5162micro_umsb(union tgsi_exec_channel *dst, 5163 const union tgsi_exec_channel *src) 5164{ 5165 dst->i[0] = util_last_bit(src->u[0]) - 1; 5166 dst->i[1] = util_last_bit(src->u[1]) - 1; 5167 dst->i[2] = util_last_bit(src->u[2]) - 1; 5168 dst->i[3] = util_last_bit(src->u[3]) - 1; 5169} 5170 5171 5172static void 5173exec_interp_at_sample(struct tgsi_exec_machine *mach, 5174 const struct tgsi_full_instruction *inst) 5175{ 5176 union tgsi_exec_channel index; 5177 union tgsi_exec_channel index2D; 5178 union tgsi_exec_channel result[TGSI_NUM_CHANNELS]; 5179 const struct tgsi_full_src_register *reg = &inst->Src[0]; 5180 5181 assert(reg->Register.File == TGSI_FILE_INPUT); 5182 assert(inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE); 5183 5184 get_index_registers(mach, reg, &index, &index2D); 5185 float sample = mach->Imms[inst->Src[1].Register.Index][inst->Src[1].Register.SwizzleX]; 5186 5187 /* Short cut: sample 0 is like a normal fetch */ 5188 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 5189 if (!(inst->Dst[0].Register.WriteMask & (1 << chan))) 5190 continue; 5191 5192 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, 5193 &result[chan]); 5194 if (sample != 0.0f) { 5195 5196 /* TODO: define the samples > 0, but so far we only do fake MSAA */ 5197 float x = 0; 5198 float y = 0; 5199 5200 unsigned pos = index2D.i[chan] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[chan]; 5201 assert(pos >= 0); 5202 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); 5203 mach->InputSampleOffsetApply[pos](mach, pos, chan, x, y, &result[chan]); 5204 } 5205 store_dest(mach, &result[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 5206 } 5207} 5208 5209 5210static void 5211exec_interp_at_offset(struct tgsi_exec_machine *mach, 5212 const struct tgsi_full_instruction *inst) 5213{ 5214 union tgsi_exec_channel index; 5215 union tgsi_exec_channel index2D; 5216 union tgsi_exec_channel ofsx; 5217 union tgsi_exec_channel ofsy; 5218 const struct tgsi_full_src_register *reg = &inst->Src[0]; 5219 5220 assert(reg->Register.File == TGSI_FILE_INPUT); 5221 5222 get_index_registers(mach, reg, &index, &index2D); 5223 unsigned pos = index2D.i[0] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[0]; 5224 5225 fetch_source(mach, &ofsx, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 5226 fetch_source(mach, &ofsy, &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 5227 5228 for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 5229 if (!(inst->Dst[0].Register.WriteMask & (1 << chan))) 5230 continue; 5231 union tgsi_exec_channel result; 5232 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, &result); 5233 mach->InputSampleOffsetApply[pos](mach, pos, chan, ofsx.f[chan], ofsy.f[chan], &result); 5234 store_dest(mach, &result, &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 5235 } 5236} 5237 5238 5239static void 5240exec_interp_at_centroid(struct tgsi_exec_machine *mach, 5241 const struct tgsi_full_instruction *inst) 5242{ 5243 union tgsi_exec_channel index; 5244 union tgsi_exec_channel index2D; 5245 union tgsi_exec_channel result[TGSI_NUM_CHANNELS]; 5246 const struct tgsi_full_src_register *reg = &inst->Src[0]; 5247 5248 assert(reg->Register.File == TGSI_FILE_INPUT); 5249 get_index_registers(mach, reg, &index, &index2D); 5250 5251 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 5252 if (!(inst->Dst[0].Register.WriteMask & (1 << chan))) 5253 continue; 5254 5255 /* Here we should add the change to use a sample that lies within the 5256 * primitive (Section 15.2): 5257 * 5258 * "When interpolating variables declared using centroid in , 5259 * the variable is sampled at a location within the pixel covered 5260 * by the primitive generating the fragment. 5261 * ... 5262 * The built-in functions interpolateAtCentroid ... will sample 5263 * variables as though they were declared with the centroid ... 5264 * qualifier[s]." 5265 * 5266 * Since we only support 1 sample currently, this is just a pass-through. 5267 */ 5268 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, 5269 &result[chan]); 5270 store_dest(mach, &result[chan], &inst->Dst[0], inst, chan, TGSI_EXEC_DATA_FLOAT); 5271 } 5272 5273} 5274 5275 5276/** 5277 * Execute a TGSI instruction. 5278 * Returns TRUE if a barrier instruction is hit, 5279 * otherwise FALSE. 5280 */ 5281static boolean 5282exec_instruction( 5283 struct tgsi_exec_machine *mach, 5284 const struct tgsi_full_instruction *inst, 5285 int *pc ) 5286{ 5287 union tgsi_exec_channel r[10]; 5288 5289 (*pc)++; 5290 5291 switch (inst->Instruction.Opcode) { 5292 case TGSI_OPCODE_ARL: 5293 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 5294 break; 5295 5296 case TGSI_OPCODE_MOV: 5297 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5298 break; 5299 5300 case TGSI_OPCODE_LIT: 5301 exec_lit(mach, inst); 5302 break; 5303 5304 case TGSI_OPCODE_RCP: 5305 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5306 break; 5307 5308 case TGSI_OPCODE_RSQ: 5309 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5310 break; 5311 5312 case TGSI_OPCODE_EXP: 5313 exec_exp(mach, inst); 5314 break; 5315 5316 case TGSI_OPCODE_LOG: 5317 exec_log(mach, inst); 5318 break; 5319 5320 case TGSI_OPCODE_MUL: 5321 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5322 break; 5323 5324 case TGSI_OPCODE_ADD: 5325 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5326 break; 5327 5328 case TGSI_OPCODE_DP3: 5329 exec_dp3(mach, inst); 5330 break; 5331 5332 case TGSI_OPCODE_DP4: 5333 exec_dp4(mach, inst); 5334 break; 5335 5336 case TGSI_OPCODE_DST: 5337 exec_dst(mach, inst); 5338 break; 5339 5340 case TGSI_OPCODE_MIN: 5341 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5342 break; 5343 5344 case TGSI_OPCODE_MAX: 5345 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5346 break; 5347 5348 case TGSI_OPCODE_SLT: 5349 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5350 break; 5351 5352 case TGSI_OPCODE_SGE: 5353 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5354 break; 5355 5356 case TGSI_OPCODE_MAD: 5357 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5358 break; 5359 5360 case TGSI_OPCODE_LRP: 5361 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5362 break; 5363 5364 case TGSI_OPCODE_SQRT: 5365 exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5366 break; 5367 5368 case TGSI_OPCODE_FRC: 5369 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5370 break; 5371 5372 case TGSI_OPCODE_FLR: 5373 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5374 break; 5375 5376 case TGSI_OPCODE_ROUND: 5377 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5378 break; 5379 5380 case TGSI_OPCODE_EX2: 5381 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5382 break; 5383 5384 case TGSI_OPCODE_LG2: 5385 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5386 break; 5387 5388 case TGSI_OPCODE_POW: 5389 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5390 break; 5391 5392 case TGSI_OPCODE_LDEXP: 5393 exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5394 break; 5395 5396 case TGSI_OPCODE_COS: 5397 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5398 break; 5399 5400 case TGSI_OPCODE_DDX: 5401 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5402 break; 5403 5404 case TGSI_OPCODE_DDY: 5405 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5406 break; 5407 5408 case TGSI_OPCODE_KILL: 5409 exec_kill (mach); 5410 break; 5411 5412 case TGSI_OPCODE_KILL_IF: 5413 exec_kill_if (mach, inst); 5414 break; 5415 5416 case TGSI_OPCODE_PK2H: 5417 exec_pk2h(mach, inst); 5418 break; 5419 5420 case TGSI_OPCODE_PK2US: 5421 assert (0); 5422 break; 5423 5424 case TGSI_OPCODE_PK4B: 5425 assert (0); 5426 break; 5427 5428 case TGSI_OPCODE_PK4UB: 5429 assert (0); 5430 break; 5431 5432 case TGSI_OPCODE_SEQ: 5433 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5434 break; 5435 5436 case TGSI_OPCODE_SGT: 5437 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5438 break; 5439 5440 case TGSI_OPCODE_SIN: 5441 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5442 break; 5443 5444 case TGSI_OPCODE_SLE: 5445 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5446 break; 5447 5448 case TGSI_OPCODE_SNE: 5449 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5450 break; 5451 5452 case TGSI_OPCODE_TEX: 5453 /* simple texture lookup */ 5454 /* src[0] = texcoord */ 5455 /* src[1] = sampler unit */ 5456 exec_tex(mach, inst, TEX_MODIFIER_NONE, 1); 5457 break; 5458 5459 case TGSI_OPCODE_TXB: 5460 /* Texture lookup with lod bias */ 5461 /* src[0] = texcoord (src[0].w = LOD bias) */ 5462 /* src[1] = sampler unit */ 5463 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1); 5464 break; 5465 5466 case TGSI_OPCODE_TXD: 5467 /* Texture lookup with explict partial derivatives */ 5468 /* src[0] = texcoord */ 5469 /* src[1] = d[strq]/dx */ 5470 /* src[2] = d[strq]/dy */ 5471 /* src[3] = sampler unit */ 5472 exec_txd(mach, inst); 5473 break; 5474 5475 case TGSI_OPCODE_TXL: 5476 /* Texture lookup with explit LOD */ 5477 /* src[0] = texcoord (src[0].w = LOD) */ 5478 /* src[1] = sampler unit */ 5479 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1); 5480 break; 5481 5482 case TGSI_OPCODE_TXP: 5483 /* Texture lookup with projection */ 5484 /* src[0] = texcoord (src[0].w = projection) */ 5485 /* src[1] = sampler unit */ 5486 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1); 5487 break; 5488 5489 case TGSI_OPCODE_TG4: 5490 /* src[0] = texcoord */ 5491 /* src[1] = component */ 5492 /* src[2] = sampler unit */ 5493 exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2); 5494 break; 5495 5496 case TGSI_OPCODE_LODQ: 5497 /* src[0] = texcoord */ 5498 /* src[1] = sampler unit */ 5499 exec_lodq(mach, inst); 5500 break; 5501 5502 case TGSI_OPCODE_UP2H: 5503 exec_up2h(mach, inst); 5504 break; 5505 5506 case TGSI_OPCODE_UP2US: 5507 assert (0); 5508 break; 5509 5510 case TGSI_OPCODE_UP4B: 5511 assert (0); 5512 break; 5513 5514 case TGSI_OPCODE_UP4UB: 5515 assert (0); 5516 break; 5517 5518 case TGSI_OPCODE_ARR: 5519 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 5520 break; 5521 5522 case TGSI_OPCODE_CAL: 5523 /* skip the call if no execution channels are enabled */ 5524 if (mach->ExecMask) { 5525 /* do the call */ 5526 5527 /* First, record the depths of the execution stacks. 5528 * This is important for deeply nested/looped return statements. 5529 * We have to unwind the stacks by the correct amount. For a 5530 * real code generator, we could determine the number of entries 5531 * to pop off each stack with simple static analysis and avoid 5532 * implementing this data structure at run time. 5533 */ 5534 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 5535 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 5536 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 5537 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; 5538 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; 5539 /* note that PC was already incremented above */ 5540 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 5541 5542 mach->CallStackTop++; 5543 5544 /* Second, push the Cond, Loop, Cont, Func stacks */ 5545 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5546 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5547 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5548 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 5549 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 5550 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 5551 5552 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5553 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 5554 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 5555 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 5556 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 5557 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 5558 5559 /* Finally, jump to the subroutine. The label is a pointer 5560 * (an instruction number) to the BGNSUB instruction. 5561 */ 5562 *pc = inst->Label.Label; 5563 assert(mach->Instructions[*pc].Instruction.Opcode 5564 == TGSI_OPCODE_BGNSUB); 5565 } 5566 break; 5567 5568 case TGSI_OPCODE_RET: 5569 mach->FuncMask &= ~mach->ExecMask; 5570 UPDATE_EXEC_MASK(mach); 5571 5572 if (mach->FuncMask == 0x0) { 5573 /* really return now (otherwise, keep executing */ 5574 5575 if (mach->CallStackTop == 0) { 5576 /* returning from main() */ 5577 mach->CondStackTop = 0; 5578 mach->LoopStackTop = 0; 5579 mach->ContStackTop = 0; 5580 mach->LoopLabelStackTop = 0; 5581 mach->SwitchStackTop = 0; 5582 mach->BreakStackTop = 0; 5583 *pc = -1; 5584 return FALSE; 5585 } 5586 5587 assert(mach->CallStackTop > 0); 5588 mach->CallStackTop--; 5589 5590 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 5591 mach->CondMask = mach->CondStack[mach->CondStackTop]; 5592 5593 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 5594 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 5595 5596 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 5597 mach->ContMask = mach->ContStack[mach->ContStackTop]; 5598 5599 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 5600 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 5601 5602 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 5603 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 5604 5605 assert(mach->FuncStackTop > 0); 5606 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 5607 5608 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 5609 5610 UPDATE_EXEC_MASK(mach); 5611 } 5612 break; 5613 5614 case TGSI_OPCODE_SSG: 5615 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5616 break; 5617 5618 case TGSI_OPCODE_CMP: 5619 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5620 break; 5621 5622 case TGSI_OPCODE_DIV: 5623 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5624 break; 5625 5626 case TGSI_OPCODE_DP2: 5627 exec_dp2(mach, inst); 5628 break; 5629 5630 case TGSI_OPCODE_IF: 5631 /* push CondMask */ 5632 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5633 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5634 FETCH( &r[0], 0, TGSI_CHAN_X ); 5635 /* update CondMask */ 5636 if( ! r[0].f[0] ) { 5637 mach->CondMask &= ~0x1; 5638 } 5639 if( ! r[0].f[1] ) { 5640 mach->CondMask &= ~0x2; 5641 } 5642 if( ! r[0].f[2] ) { 5643 mach->CondMask &= ~0x4; 5644 } 5645 if( ! r[0].f[3] ) { 5646 mach->CondMask &= ~0x8; 5647 } 5648 UPDATE_EXEC_MASK(mach); 5649 /* Todo: If CondMask==0, jump to ELSE */ 5650 break; 5651 5652 case TGSI_OPCODE_UIF: 5653 /* push CondMask */ 5654 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5655 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5656 IFETCH( &r[0], 0, TGSI_CHAN_X ); 5657 /* update CondMask */ 5658 if( ! r[0].u[0] ) { 5659 mach->CondMask &= ~0x1; 5660 } 5661 if( ! r[0].u[1] ) { 5662 mach->CondMask &= ~0x2; 5663 } 5664 if( ! r[0].u[2] ) { 5665 mach->CondMask &= ~0x4; 5666 } 5667 if( ! r[0].u[3] ) { 5668 mach->CondMask &= ~0x8; 5669 } 5670 UPDATE_EXEC_MASK(mach); 5671 /* Todo: If CondMask==0, jump to ELSE */ 5672 break; 5673 5674 case TGSI_OPCODE_ELSE: 5675 /* invert CondMask wrt previous mask */ 5676 { 5677 uint prevMask; 5678 assert(mach->CondStackTop > 0); 5679 prevMask = mach->CondStack[mach->CondStackTop - 1]; 5680 mach->CondMask = ~mach->CondMask & prevMask; 5681 UPDATE_EXEC_MASK(mach); 5682 /* Todo: If CondMask==0, jump to ENDIF */ 5683 } 5684 break; 5685 5686 case TGSI_OPCODE_ENDIF: 5687 /* pop CondMask */ 5688 assert(mach->CondStackTop > 0); 5689 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 5690 UPDATE_EXEC_MASK(mach); 5691 break; 5692 5693 case TGSI_OPCODE_END: 5694 /* make sure we end primitives which haven't 5695 * been explicitly emitted */ 5696 conditional_emit_primitive(mach); 5697 /* halt execution */ 5698 *pc = -1; 5699 break; 5700 5701 case TGSI_OPCODE_CEIL: 5702 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5703 break; 5704 5705 case TGSI_OPCODE_I2F: 5706 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_INT); 5707 break; 5708 5709 case TGSI_OPCODE_NOT: 5710 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5711 break; 5712 5713 case TGSI_OPCODE_TRUNC: 5714 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_FLOAT); 5715 break; 5716 5717 case TGSI_OPCODE_SHL: 5718 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5719 break; 5720 5721 case TGSI_OPCODE_AND: 5722 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5723 break; 5724 5725 case TGSI_OPCODE_OR: 5726 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5727 break; 5728 5729 case TGSI_OPCODE_MOD: 5730 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5731 break; 5732 5733 case TGSI_OPCODE_XOR: 5734 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5735 break; 5736 5737 case TGSI_OPCODE_TXF: 5738 exec_txf(mach, inst); 5739 break; 5740 5741 case TGSI_OPCODE_TXQ: 5742 exec_txq(mach, inst); 5743 break; 5744 5745 case TGSI_OPCODE_EMIT: 5746 emit_vertex(mach, inst); 5747 break; 5748 5749 case TGSI_OPCODE_ENDPRIM: 5750 emit_primitive(mach, inst); 5751 break; 5752 5753 case TGSI_OPCODE_BGNLOOP: 5754 /* push LoopMask and ContMasks */ 5755 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5756 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5757 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5758 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 5759 5760 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 5761 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 5762 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 5763 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 5764 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; 5765 break; 5766 5767 case TGSI_OPCODE_ENDLOOP: 5768 /* Restore ContMask, but don't pop */ 5769 assert(mach->ContStackTop > 0); 5770 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 5771 UPDATE_EXEC_MASK(mach); 5772 if (mach->ExecMask) { 5773 /* repeat loop: jump to instruction just past BGNLOOP */ 5774 assert(mach->LoopLabelStackTop > 0); 5775 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 5776 } 5777 else { 5778 /* exit loop: pop LoopMask */ 5779 assert(mach->LoopStackTop > 0); 5780 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 5781 /* pop ContMask */ 5782 assert(mach->ContStackTop > 0); 5783 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 5784 assert(mach->LoopLabelStackTop > 0); 5785 --mach->LoopLabelStackTop; 5786 5787 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 5788 } 5789 UPDATE_EXEC_MASK(mach); 5790 break; 5791 5792 case TGSI_OPCODE_BRK: 5793 exec_break(mach); 5794 break; 5795 5796 case TGSI_OPCODE_CONT: 5797 /* turn off cont channels for each enabled exec channel */ 5798 mach->ContMask &= ~mach->ExecMask; 5799 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 5800 UPDATE_EXEC_MASK(mach); 5801 break; 5802 5803 case TGSI_OPCODE_BGNSUB: 5804 /* no-op */ 5805 break; 5806 5807 case TGSI_OPCODE_ENDSUB: 5808 /* 5809 * XXX: This really should be a no-op. We should never reach this opcode. 5810 */ 5811 5812 assert(mach->CallStackTop > 0); 5813 mach->CallStackTop--; 5814 5815 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 5816 mach->CondMask = mach->CondStack[mach->CondStackTop]; 5817 5818 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 5819 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 5820 5821 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 5822 mach->ContMask = mach->ContStack[mach->ContStackTop]; 5823 5824 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 5825 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 5826 5827 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 5828 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 5829 5830 assert(mach->FuncStackTop > 0); 5831 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 5832 5833 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 5834 5835 UPDATE_EXEC_MASK(mach); 5836 break; 5837 5838 case TGSI_OPCODE_NOP: 5839 break; 5840 5841 case TGSI_OPCODE_F2I: 5842 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_FLOAT); 5843 break; 5844 5845 case TGSI_OPCODE_FSEQ: 5846 exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5847 break; 5848 5849 case TGSI_OPCODE_FSGE: 5850 exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5851 break; 5852 5853 case TGSI_OPCODE_FSLT: 5854 exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5855 break; 5856 5857 case TGSI_OPCODE_FSNE: 5858 exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5859 break; 5860 5861 case TGSI_OPCODE_IDIV: 5862 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5863 break; 5864 5865 case TGSI_OPCODE_IMAX: 5866 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5867 break; 5868 5869 case TGSI_OPCODE_IMIN: 5870 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5871 break; 5872 5873 case TGSI_OPCODE_INEG: 5874 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5875 break; 5876 5877 case TGSI_OPCODE_ISGE: 5878 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5879 break; 5880 5881 case TGSI_OPCODE_ISHR: 5882 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5883 break; 5884 5885 case TGSI_OPCODE_ISLT: 5886 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5887 break; 5888 5889 case TGSI_OPCODE_F2U: 5890 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_FLOAT); 5891 break; 5892 5893 case TGSI_OPCODE_U2F: 5894 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_FLOAT, TGSI_EXEC_DATA_UINT); 5895 break; 5896 5897 case TGSI_OPCODE_UADD: 5898 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5899 break; 5900 5901 case TGSI_OPCODE_UDIV: 5902 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5903 break; 5904 5905 case TGSI_OPCODE_UMAD: 5906 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5907 break; 5908 5909 case TGSI_OPCODE_UMAX: 5910 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5911 break; 5912 5913 case TGSI_OPCODE_UMIN: 5914 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5915 break; 5916 5917 case TGSI_OPCODE_UMOD: 5918 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5919 break; 5920 5921 case TGSI_OPCODE_UMUL: 5922 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5923 break; 5924 5925 case TGSI_OPCODE_IMUL_HI: 5926 exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 5927 break; 5928 5929 case TGSI_OPCODE_UMUL_HI: 5930 exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5931 break; 5932 5933 case TGSI_OPCODE_USEQ: 5934 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5935 break; 5936 5937 case TGSI_OPCODE_USGE: 5938 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5939 break; 5940 5941 case TGSI_OPCODE_USHR: 5942 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5943 break; 5944 5945 case TGSI_OPCODE_USLT: 5946 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5947 break; 5948 5949 case TGSI_OPCODE_USNE: 5950 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 5951 break; 5952 5953 case TGSI_OPCODE_SWITCH: 5954 exec_switch(mach, inst); 5955 break; 5956 5957 case TGSI_OPCODE_CASE: 5958 exec_case(mach, inst); 5959 break; 5960 5961 case TGSI_OPCODE_DEFAULT: 5962 exec_default(mach); 5963 break; 5964 5965 case TGSI_OPCODE_ENDSWITCH: 5966 exec_endswitch(mach); 5967 break; 5968 5969 case TGSI_OPCODE_SAMPLE_I: 5970 exec_txf(mach, inst); 5971 break; 5972 5973 case TGSI_OPCODE_SAMPLE_I_MS: 5974 exec_txf(mach, inst); 5975 break; 5976 5977 case TGSI_OPCODE_SAMPLE: 5978 exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE); 5979 break; 5980 5981 case TGSI_OPCODE_SAMPLE_B: 5982 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE); 5983 break; 5984 5985 case TGSI_OPCODE_SAMPLE_C: 5986 exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE); 5987 break; 5988 5989 case TGSI_OPCODE_SAMPLE_C_LZ: 5990 exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE); 5991 break; 5992 5993 case TGSI_OPCODE_SAMPLE_D: 5994 exec_sample_d(mach, inst); 5995 break; 5996 5997 case TGSI_OPCODE_SAMPLE_L: 5998 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE); 5999 break; 6000 6001 case TGSI_OPCODE_GATHER4: 6002 exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE); 6003 break; 6004 6005 case TGSI_OPCODE_SVIEWINFO: 6006 exec_txq(mach, inst); 6007 break; 6008 6009 case TGSI_OPCODE_SAMPLE_POS: 6010 assert(0); 6011 break; 6012 6013 case TGSI_OPCODE_SAMPLE_INFO: 6014 assert(0); 6015 break; 6016 6017 case TGSI_OPCODE_LOD: 6018 exec_lodq(mach, inst); 6019 break; 6020 6021 case TGSI_OPCODE_UARL: 6022 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 6023 break; 6024 6025 case TGSI_OPCODE_UCMP: 6026 exec_ucmp(mach, inst); 6027 break; 6028 6029 case TGSI_OPCODE_IABS: 6030 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 6031 break; 6032 6033 case TGSI_OPCODE_ISSG: 6034 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 6035 break; 6036 6037 case TGSI_OPCODE_TEX2: 6038 /* simple texture lookup */ 6039 /* src[0] = texcoord */ 6040 /* src[1] = compare */ 6041 /* src[2] = sampler unit */ 6042 exec_tex(mach, inst, TEX_MODIFIER_NONE, 2); 6043 break; 6044 case TGSI_OPCODE_TXB2: 6045 /* simple texture lookup */ 6046 /* src[0] = texcoord */ 6047 /* src[1] = bias */ 6048 /* src[2] = sampler unit */ 6049 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2); 6050 break; 6051 case TGSI_OPCODE_TXL2: 6052 /* simple texture lookup */ 6053 /* src[0] = texcoord */ 6054 /* src[1] = lod */ 6055 /* src[2] = sampler unit */ 6056 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2); 6057 break; 6058 6059 case TGSI_OPCODE_IBFE: 6060 exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 6061 break; 6062 case TGSI_OPCODE_UBFE: 6063 exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 6064 break; 6065 case TGSI_OPCODE_BFI: 6066 exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 6067 break; 6068 case TGSI_OPCODE_BREV: 6069 exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 6070 break; 6071 case TGSI_OPCODE_POPC: 6072 exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT, TGSI_EXEC_DATA_UINT); 6073 break; 6074 case TGSI_OPCODE_LSB: 6075 exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 6076 break; 6077 case TGSI_OPCODE_IMSB: 6078 exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_INT); 6079 break; 6080 case TGSI_OPCODE_UMSB: 6081 exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_INT, TGSI_EXEC_DATA_UINT); 6082 break; 6083 6084 case TGSI_OPCODE_F2D: 6085 exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT); 6086 break; 6087 6088 case TGSI_OPCODE_D2F: 6089 exec_64_2_t(mach, inst, micro_d2f, TGSI_EXEC_DATA_FLOAT); 6090 break; 6091 6092 case TGSI_OPCODE_DABS: 6093 exec_double_unary(mach, inst, micro_dabs); 6094 break; 6095 6096 case TGSI_OPCODE_DNEG: 6097 exec_double_unary(mach, inst, micro_dneg); 6098 break; 6099 6100 case TGSI_OPCODE_DADD: 6101 exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE); 6102 break; 6103 6104 case TGSI_OPCODE_DDIV: 6105 exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE); 6106 break; 6107 6108 case TGSI_OPCODE_DMUL: 6109 exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE); 6110 break; 6111 6112 case TGSI_OPCODE_DMAX: 6113 exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE); 6114 break; 6115 6116 case TGSI_OPCODE_DMIN: 6117 exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE); 6118 break; 6119 6120 case TGSI_OPCODE_DSLT: 6121 exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT); 6122 break; 6123 6124 case TGSI_OPCODE_DSGE: 6125 exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT); 6126 break; 6127 6128 case TGSI_OPCODE_DSEQ: 6129 exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT); 6130 break; 6131 6132 case TGSI_OPCODE_DSNE: 6133 exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT); 6134 break; 6135 6136 case TGSI_OPCODE_DRCP: 6137 exec_double_unary(mach, inst, micro_drcp); 6138 break; 6139 6140 case TGSI_OPCODE_DSQRT: 6141 exec_double_unary(mach, inst, micro_dsqrt); 6142 break; 6143 6144 case TGSI_OPCODE_DRSQ: 6145 exec_double_unary(mach, inst, micro_drsq); 6146 break; 6147 6148 case TGSI_OPCODE_DMAD: 6149 exec_double_trinary(mach, inst, micro_dmad); 6150 break; 6151 6152 case TGSI_OPCODE_DFRAC: 6153 exec_double_unary(mach, inst, micro_dfrac); 6154 break; 6155 6156 case TGSI_OPCODE_DLDEXP: 6157 exec_dldexp(mach, inst); 6158 break; 6159 6160 case TGSI_OPCODE_DFRACEXP: 6161 exec_dfracexp(mach, inst); 6162 break; 6163 6164 case TGSI_OPCODE_I2D: 6165 exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_INT); 6166 break; 6167 6168 case TGSI_OPCODE_D2I: 6169 exec_64_2_t(mach, inst, micro_d2i, TGSI_EXEC_DATA_INT); 6170 break; 6171 6172 case TGSI_OPCODE_U2D: 6173 exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_UINT); 6174 break; 6175 6176 case TGSI_OPCODE_D2U: 6177 exec_64_2_t(mach, inst, micro_d2u, TGSI_EXEC_DATA_INT); 6178 break; 6179 6180 case TGSI_OPCODE_LOAD: 6181 exec_load(mach, inst); 6182 break; 6183 6184 case TGSI_OPCODE_STORE: 6185 exec_store(mach, inst); 6186 break; 6187 6188 case TGSI_OPCODE_ATOMUADD: 6189 case TGSI_OPCODE_ATOMXCHG: 6190 case TGSI_OPCODE_ATOMCAS: 6191 case TGSI_OPCODE_ATOMAND: 6192 case TGSI_OPCODE_ATOMOR: 6193 case TGSI_OPCODE_ATOMXOR: 6194 case TGSI_OPCODE_ATOMUMIN: 6195 case TGSI_OPCODE_ATOMUMAX: 6196 case TGSI_OPCODE_ATOMIMIN: 6197 case TGSI_OPCODE_ATOMIMAX: 6198 case TGSI_OPCODE_ATOMFADD: 6199 exec_atomop(mach, inst); 6200 break; 6201 6202 case TGSI_OPCODE_RESQ: 6203 exec_resq(mach, inst); 6204 break; 6205 case TGSI_OPCODE_BARRIER: 6206 case TGSI_OPCODE_MEMBAR: 6207 return TRUE; 6208 break; 6209 6210 case TGSI_OPCODE_I64ABS: 6211 exec_double_unary(mach, inst, micro_i64abs); 6212 break; 6213 6214 case TGSI_OPCODE_I64SSG: 6215 exec_double_unary(mach, inst, micro_i64sgn); 6216 break; 6217 6218 case TGSI_OPCODE_I64NEG: 6219 exec_double_unary(mach, inst, micro_i64neg); 6220 break; 6221 6222 case TGSI_OPCODE_U64SEQ: 6223 exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT); 6224 break; 6225 6226 case TGSI_OPCODE_U64SNE: 6227 exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT); 6228 break; 6229 6230 case TGSI_OPCODE_I64SLT: 6231 exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT); 6232 break; 6233 case TGSI_OPCODE_U64SLT: 6234 exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT); 6235 break; 6236 6237 case TGSI_OPCODE_I64SGE: 6238 exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT); 6239 break; 6240 case TGSI_OPCODE_U64SGE: 6241 exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT); 6242 break; 6243 6244 case TGSI_OPCODE_I64MIN: 6245 exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64); 6246 break; 6247 case TGSI_OPCODE_U64MIN: 6248 exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64); 6249 break; 6250 case TGSI_OPCODE_I64MAX: 6251 exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64); 6252 break; 6253 case TGSI_OPCODE_U64MAX: 6254 exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64); 6255 break; 6256 case TGSI_OPCODE_U64ADD: 6257 exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64); 6258 break; 6259 case TGSI_OPCODE_U64MUL: 6260 exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64); 6261 break; 6262 case TGSI_OPCODE_U64SHL: 6263 exec_arg0_64_arg1_32(mach, inst, micro_u64shl); 6264 break; 6265 case TGSI_OPCODE_I64SHR: 6266 exec_arg0_64_arg1_32(mach, inst, micro_i64shr); 6267 break; 6268 case TGSI_OPCODE_U64SHR: 6269 exec_arg0_64_arg1_32(mach, inst, micro_u64shr); 6270 break; 6271 case TGSI_OPCODE_U64DIV: 6272 exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64); 6273 break; 6274 case TGSI_OPCODE_I64DIV: 6275 exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64); 6276 break; 6277 case TGSI_OPCODE_U64MOD: 6278 exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64); 6279 break; 6280 case TGSI_OPCODE_I64MOD: 6281 exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64); 6282 break; 6283 6284 case TGSI_OPCODE_F2U64: 6285 exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT); 6286 break; 6287 6288 case TGSI_OPCODE_F2I64: 6289 exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT); 6290 break; 6291 6292 case TGSI_OPCODE_U2I64: 6293 exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT); 6294 break; 6295 case TGSI_OPCODE_I2I64: 6296 exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT); 6297 break; 6298 6299 case TGSI_OPCODE_D2U64: 6300 exec_double_unary(mach, inst, micro_d2u64); 6301 break; 6302 6303 case TGSI_OPCODE_D2I64: 6304 exec_double_unary(mach, inst, micro_d2i64); 6305 break; 6306 6307 case TGSI_OPCODE_U642F: 6308 exec_64_2_t(mach, inst, micro_u642f, TGSI_EXEC_DATA_FLOAT); 6309 break; 6310 case TGSI_OPCODE_I642F: 6311 exec_64_2_t(mach, inst, micro_i642f, TGSI_EXEC_DATA_FLOAT); 6312 break; 6313 6314 case TGSI_OPCODE_U642D: 6315 exec_double_unary(mach, inst, micro_u642d); 6316 break; 6317 case TGSI_OPCODE_I642D: 6318 exec_double_unary(mach, inst, micro_i642d); 6319 break; 6320 case TGSI_OPCODE_INTERP_SAMPLE: 6321 exec_interp_at_sample(mach, inst); 6322 break; 6323 case TGSI_OPCODE_INTERP_OFFSET: 6324 exec_interp_at_offset(mach, inst); 6325 break; 6326 case TGSI_OPCODE_INTERP_CENTROID: 6327 exec_interp_at_centroid(mach, inst); 6328 break; 6329 default: 6330 assert( 0 ); 6331 } 6332 return FALSE; 6333} 6334 6335static void 6336tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach) 6337{ 6338 uint default_mask = 0xf; 6339 6340 mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; 6341 mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; 6342 6343 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { 6344 for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) { 6345 mach->Temps[temp_prim_idxs[i].idx].xyzw[temp_prim_idxs[i].chan].u[0] = 0; 6346 mach->Primitives[i][0] = 0; 6347 } 6348 /* GS runs on a single primitive for now */ 6349 default_mask = 0x1; 6350 } 6351 6352 if (mach->NonHelperMask == 0) 6353 mach->NonHelperMask = default_mask; 6354 mach->CondMask = default_mask; 6355 mach->LoopMask = default_mask; 6356 mach->ContMask = default_mask; 6357 mach->FuncMask = default_mask; 6358 mach->ExecMask = default_mask; 6359 6360 mach->Switch.mask = default_mask; 6361 6362 assert(mach->CondStackTop == 0); 6363 assert(mach->LoopStackTop == 0); 6364 assert(mach->ContStackTop == 0); 6365 assert(mach->SwitchStackTop == 0); 6366 assert(mach->BreakStackTop == 0); 6367 assert(mach->CallStackTop == 0); 6368} 6369 6370/** 6371 * Run TGSI interpreter. 6372 * \return bitmask of "alive" quad components 6373 */ 6374uint 6375tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc ) 6376{ 6377 uint i; 6378 6379 mach->pc = start_pc; 6380 6381 if (!start_pc) { 6382 tgsi_exec_machine_setup_masks(mach); 6383 6384 /* execute declarations (interpolants) */ 6385 for (i = 0; i < mach->NumDeclarations; i++) { 6386 exec_declaration( mach, mach->Declarations+i ); 6387 } 6388 } 6389 6390 { 6391#if DEBUG_EXECUTION 6392 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS]; 6393 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 6394 uint inst = 1; 6395 6396 if (!start_pc) { 6397 memset(mach->Temps, 0, sizeof(temps)); 6398 if (mach->Outputs) 6399 memset(mach->Outputs, 0, sizeof(outputs)); 6400 memset(temps, 0, sizeof(temps)); 6401 memset(outputs, 0, sizeof(outputs)); 6402 } 6403#endif 6404 6405 /* execute instructions, until pc is set to -1 */ 6406 while (mach->pc != -1) { 6407 boolean barrier_hit; 6408#if DEBUG_EXECUTION 6409 uint i; 6410 6411 tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++); 6412#endif 6413 6414 assert(mach->pc < (int) mach->NumInstructions); 6415 barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc); 6416 6417 /* for compute shaders if we hit a barrier return now for later rescheduling */ 6418 if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE) 6419 return 0; 6420 6421#if DEBUG_EXECUTION 6422 for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) { 6423 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 6424 uint j; 6425 6426 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 6427 debug_printf("TEMP[%2u] = ", i); 6428 for (j = 0; j < 4; j++) { 6429 if (j > 0) { 6430 debug_printf(" "); 6431 } 6432 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 6433 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], 6434 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], 6435 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], 6436 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); 6437 } 6438 } 6439 } 6440 if (mach->Outputs) { 6441 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 6442 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 6443 uint j; 6444 6445 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 6446 debug_printf("OUT[%2u] = ", i); 6447 for (j = 0; j < 4; j++) { 6448 if (j > 0) { 6449 debug_printf(" "); 6450 } 6451 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 6452 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], 6453 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], 6454 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], 6455 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); 6456 } 6457 } 6458 } 6459 } 6460#endif 6461 } 6462 } 6463 6464#if 0 6465 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 6466 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { 6467 /* 6468 * Scale back depth component. 6469 */ 6470 for (i = 0; i < 4; i++) 6471 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 6472 } 6473#endif 6474 6475 /* Strictly speaking, these assertions aren't really needed but they 6476 * can potentially catch some bugs in the control flow code. 6477 */ 6478 assert(mach->CondStackTop == 0); 6479 assert(mach->LoopStackTop == 0); 6480 assert(mach->ContStackTop == 0); 6481 assert(mach->SwitchStackTop == 0); 6482 assert(mach->BreakStackTop == 0); 6483 assert(mach->CallStackTop == 0); 6484 6485 return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; 6486} 6487