tgsi_exec.c revision 7ec681f3
1/************************************************************************** 2 * 3 * Copyright 2007-2008 VMware, Inc. 4 * All Rights Reserved. 5 * Copyright 2009-2010 VMware, Inc. All rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial portions 17 * of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 **************************************************************************/ 28 29/** 30 * TGSI interpreter/executor. 31 * 32 * Flow control information: 33 * 34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) 35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special 36 * care since a condition may be true for some quad components but false 37 * for other components. 38 * 39 * We basically execute all statements (even if they're in the part of 40 * an IF/ELSE clause that's "not taken") and use a special mask to 41 * control writing to destination registers. This is the ExecMask. 42 * See store_dest(). 43 * 44 * The ExecMask is computed from three other masks (CondMask, LoopMask and 45 * ContMask) which are controlled by the flow control instructions (namely: 46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). 47 * 48 * 49 * Authors: 50 * Michal Krol 51 * Brian Paul 52 */ 53 54#include "pipe/p_compiler.h" 55#include "pipe/p_state.h" 56#include "pipe/p_shader_tokens.h" 57#include "tgsi/tgsi_dump.h" 58#include "tgsi/tgsi_parse.h" 59#include "tgsi/tgsi_util.h" 60#include "tgsi_exec.h" 61#include "util/compiler.h" 62#include "util/half_float.h" 63#include "util/u_memory.h" 64#include "util/u_math.h" 65#include "util/rounding.h" 66 67 68#define DEBUG_EXECUTION 0 69 70 71#define TILE_TOP_LEFT 0 72#define TILE_TOP_RIGHT 1 73#define TILE_BOTTOM_LEFT 2 74#define TILE_BOTTOM_RIGHT 3 75 76union tgsi_double_channel { 77 double d[TGSI_QUAD_SIZE]; 78 unsigned u[TGSI_QUAD_SIZE][2]; 79 uint64_t u64[TGSI_QUAD_SIZE]; 80 int64_t i64[TGSI_QUAD_SIZE]; 81} ALIGN16; 82 83struct ALIGN16 tgsi_double_vector { 84 union tgsi_double_channel xy; 85 union tgsi_double_channel zw; 86}; 87 88static void 89micro_abs(union tgsi_exec_channel *dst, 90 const union tgsi_exec_channel *src) 91{ 92 dst->f[0] = fabsf(src->f[0]); 93 dst->f[1] = fabsf(src->f[1]); 94 dst->f[2] = fabsf(src->f[2]); 95 dst->f[3] = fabsf(src->f[3]); 96} 97 98static void 99micro_arl(union tgsi_exec_channel *dst, 100 const union tgsi_exec_channel *src) 101{ 102 dst->i[0] = (int)floorf(src->f[0]); 103 dst->i[1] = (int)floorf(src->f[1]); 104 dst->i[2] = (int)floorf(src->f[2]); 105 dst->i[3] = (int)floorf(src->f[3]); 106} 107 108static void 109micro_arr(union tgsi_exec_channel *dst, 110 const union tgsi_exec_channel *src) 111{ 112 dst->i[0] = (int)floorf(src->f[0] + 0.5f); 113 dst->i[1] = (int)floorf(src->f[1] + 0.5f); 114 dst->i[2] = (int)floorf(src->f[2] + 0.5f); 115 dst->i[3] = (int)floorf(src->f[3] + 0.5f); 116} 117 118static void 119micro_ceil(union tgsi_exec_channel *dst, 120 const union tgsi_exec_channel *src) 121{ 122 dst->f[0] = ceilf(src->f[0]); 123 dst->f[1] = ceilf(src->f[1]); 124 dst->f[2] = ceilf(src->f[2]); 125 dst->f[3] = ceilf(src->f[3]); 126} 127 128static void 129micro_cmp(union tgsi_exec_channel *dst, 130 const union tgsi_exec_channel *src0, 131 const union tgsi_exec_channel *src1, 132 const union tgsi_exec_channel *src2) 133{ 134 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0]; 135 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1]; 136 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2]; 137 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3]; 138} 139 140static void 141micro_cos(union tgsi_exec_channel *dst, 142 const union tgsi_exec_channel *src) 143{ 144 dst->f[0] = cosf(src->f[0]); 145 dst->f[1] = cosf(src->f[1]); 146 dst->f[2] = cosf(src->f[2]); 147 dst->f[3] = cosf(src->f[3]); 148} 149 150static void 151micro_d2f(union tgsi_exec_channel *dst, 152 const union tgsi_double_channel *src) 153{ 154 dst->f[0] = (float)src->d[0]; 155 dst->f[1] = (float)src->d[1]; 156 dst->f[2] = (float)src->d[2]; 157 dst->f[3] = (float)src->d[3]; 158} 159 160static void 161micro_d2i(union tgsi_exec_channel *dst, 162 const union tgsi_double_channel *src) 163{ 164 dst->i[0] = (int)src->d[0]; 165 dst->i[1] = (int)src->d[1]; 166 dst->i[2] = (int)src->d[2]; 167 dst->i[3] = (int)src->d[3]; 168} 169 170static void 171micro_d2u(union tgsi_exec_channel *dst, 172 const union tgsi_double_channel *src) 173{ 174 dst->u[0] = (unsigned)src->d[0]; 175 dst->u[1] = (unsigned)src->d[1]; 176 dst->u[2] = (unsigned)src->d[2]; 177 dst->u[3] = (unsigned)src->d[3]; 178} 179static void 180micro_dabs(union tgsi_double_channel *dst, 181 const union tgsi_double_channel *src) 182{ 183 dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0]; 184 dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1]; 185 dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2]; 186 dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3]; 187} 188 189static void 190micro_dadd(union tgsi_double_channel *dst, 191 const union tgsi_double_channel *src) 192{ 193 dst->d[0] = src[0].d[0] + src[1].d[0]; 194 dst->d[1] = src[0].d[1] + src[1].d[1]; 195 dst->d[2] = src[0].d[2] + src[1].d[2]; 196 dst->d[3] = src[0].d[3] + src[1].d[3]; 197} 198 199static void 200micro_ddiv(union tgsi_double_channel *dst, 201 const union tgsi_double_channel *src) 202{ 203 dst->d[0] = src[0].d[0] / src[1].d[0]; 204 dst->d[1] = src[0].d[1] / src[1].d[1]; 205 dst->d[2] = src[0].d[2] / src[1].d[2]; 206 dst->d[3] = src[0].d[3] / src[1].d[3]; 207} 208 209static void 210micro_ddx(union tgsi_exec_channel *dst, 211 const union tgsi_exec_channel *src) 212{ 213 dst->f[0] = 214 dst->f[1] = 215 dst->f[2] = 216 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 217} 218 219static void 220micro_ddx_fine(union tgsi_exec_channel *dst, 221 const union tgsi_exec_channel *src) 222{ 223 dst->f[0] = 224 dst->f[1] = src->f[TILE_TOP_RIGHT] - src->f[TILE_TOP_LEFT]; 225 dst->f[2] = 226 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; 227} 228 229 230static void 231micro_ddy(union tgsi_exec_channel *dst, 232 const union tgsi_exec_channel *src) 233{ 234 dst->f[0] = 235 dst->f[1] = 236 dst->f[2] = 237 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 238} 239 240static void 241micro_ddy_fine(union tgsi_exec_channel *dst, 242 const union tgsi_exec_channel *src) 243{ 244 dst->f[0] = 245 dst->f[2] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT]; 246 dst->f[1] = 247 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_TOP_RIGHT]; 248} 249 250static void 251micro_dmul(union tgsi_double_channel *dst, 252 const union tgsi_double_channel *src) 253{ 254 dst->d[0] = src[0].d[0] * src[1].d[0]; 255 dst->d[1] = src[0].d[1] * src[1].d[1]; 256 dst->d[2] = src[0].d[2] * src[1].d[2]; 257 dst->d[3] = src[0].d[3] * src[1].d[3]; 258} 259 260static void 261micro_dmax(union tgsi_double_channel *dst, 262 const union tgsi_double_channel *src) 263{ 264 dst->d[0] = fmax(src[0].d[0], src[1].d[0]); 265 dst->d[1] = fmax(src[0].d[1], src[1].d[1]); 266 dst->d[2] = fmax(src[0].d[2], src[1].d[2]); 267 dst->d[3] = fmax(src[0].d[3], src[1].d[3]); 268} 269 270static void 271micro_dmin(union tgsi_double_channel *dst, 272 const union tgsi_double_channel *src) 273{ 274 dst->d[0] = fmin(src[0].d[0], src[1].d[0]); 275 dst->d[1] = fmin(src[0].d[1], src[1].d[1]); 276 dst->d[2] = fmin(src[0].d[2], src[1].d[2]); 277 dst->d[3] = fmin(src[0].d[3], src[1].d[3]); 278} 279 280static void 281micro_dneg(union tgsi_double_channel *dst, 282 const union tgsi_double_channel *src) 283{ 284 dst->d[0] = -src->d[0]; 285 dst->d[1] = -src->d[1]; 286 dst->d[2] = -src->d[2]; 287 dst->d[3] = -src->d[3]; 288} 289 290static void 291micro_dslt(union tgsi_double_channel *dst, 292 const union tgsi_double_channel *src) 293{ 294 dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U; 295 dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U; 296 dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U; 297 dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U; 298} 299 300static void 301micro_dsne(union tgsi_double_channel *dst, 302 const union tgsi_double_channel *src) 303{ 304 dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U; 305 dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U; 306 dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U; 307 dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U; 308} 309 310static void 311micro_dsge(union tgsi_double_channel *dst, 312 const union tgsi_double_channel *src) 313{ 314 dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U; 315 dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U; 316 dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U; 317 dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U; 318} 319 320static void 321micro_dseq(union tgsi_double_channel *dst, 322 const union tgsi_double_channel *src) 323{ 324 dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U; 325 dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U; 326 dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U; 327 dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U; 328} 329 330static void 331micro_drcp(union tgsi_double_channel *dst, 332 const union tgsi_double_channel *src) 333{ 334 dst->d[0] = 1.0 / src->d[0]; 335 dst->d[1] = 1.0 / src->d[1]; 336 dst->d[2] = 1.0 / src->d[2]; 337 dst->d[3] = 1.0 / src->d[3]; 338} 339 340static void 341micro_dsqrt(union tgsi_double_channel *dst, 342 const union tgsi_double_channel *src) 343{ 344 dst->d[0] = sqrt(src->d[0]); 345 dst->d[1] = sqrt(src->d[1]); 346 dst->d[2] = sqrt(src->d[2]); 347 dst->d[3] = sqrt(src->d[3]); 348} 349 350static void 351micro_drsq(union tgsi_double_channel *dst, 352 const union tgsi_double_channel *src) 353{ 354 dst->d[0] = 1.0 / sqrt(src->d[0]); 355 dst->d[1] = 1.0 / sqrt(src->d[1]); 356 dst->d[2] = 1.0 / sqrt(src->d[2]); 357 dst->d[3] = 1.0 / sqrt(src->d[3]); 358} 359 360static void 361micro_dmad(union tgsi_double_channel *dst, 362 const union tgsi_double_channel *src) 363{ 364 dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0]; 365 dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1]; 366 dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2]; 367 dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3]; 368} 369 370static void 371micro_dfrac(union tgsi_double_channel *dst, 372 const union tgsi_double_channel *src) 373{ 374 dst->d[0] = src->d[0] - floor(src->d[0]); 375 dst->d[1] = src->d[1] - floor(src->d[1]); 376 dst->d[2] = src->d[2] - floor(src->d[2]); 377 dst->d[3] = src->d[3] - floor(src->d[3]); 378} 379 380static void 381micro_dflr(union tgsi_double_channel *dst, 382 const union tgsi_double_channel *src) 383{ 384 dst->d[0] = floor(src->d[0]); 385 dst->d[1] = floor(src->d[1]); 386 dst->d[2] = floor(src->d[2]); 387 dst->d[3] = floor(src->d[3]); 388} 389 390static void 391micro_dldexp(union tgsi_double_channel *dst, 392 const union tgsi_double_channel *src0, 393 union tgsi_exec_channel *src1) 394{ 395 dst->d[0] = ldexp(src0->d[0], src1->i[0]); 396 dst->d[1] = ldexp(src0->d[1], src1->i[1]); 397 dst->d[2] = ldexp(src0->d[2], src1->i[2]); 398 dst->d[3] = ldexp(src0->d[3], src1->i[3]); 399} 400 401static void 402micro_dfracexp(union tgsi_double_channel *dst, 403 union tgsi_exec_channel *dst_exp, 404 const union tgsi_double_channel *src) 405{ 406 dst->d[0] = frexp(src->d[0], &dst_exp->i[0]); 407 dst->d[1] = frexp(src->d[1], &dst_exp->i[1]); 408 dst->d[2] = frexp(src->d[2], &dst_exp->i[2]); 409 dst->d[3] = frexp(src->d[3], &dst_exp->i[3]); 410} 411 412static void 413micro_exp2(union tgsi_exec_channel *dst, 414 const union tgsi_exec_channel *src) 415{ 416#if DEBUG 417 /* Inf is okay for this instruction, so clamp it to silence assertions. */ 418 uint i; 419 union tgsi_exec_channel clamped; 420 421 for (i = 0; i < 4; i++) { 422 if (src->f[i] > 127.99999f) { 423 clamped.f[i] = 127.99999f; 424 } else if (src->f[i] < -126.99999f) { 425 clamped.f[i] = -126.99999f; 426 } else { 427 clamped.f[i] = src->f[i]; 428 } 429 } 430 src = &clamped; 431#endif /* DEBUG */ 432 433 dst->f[0] = powf(2.0f, src->f[0]); 434 dst->f[1] = powf(2.0f, src->f[1]); 435 dst->f[2] = powf(2.0f, src->f[2]); 436 dst->f[3] = powf(2.0f, src->f[3]); 437} 438 439static void 440micro_f2d(union tgsi_double_channel *dst, 441 const union tgsi_exec_channel *src) 442{ 443 dst->d[0] = (double)src->f[0]; 444 dst->d[1] = (double)src->f[1]; 445 dst->d[2] = (double)src->f[2]; 446 dst->d[3] = (double)src->f[3]; 447} 448 449static void 450micro_flr(union tgsi_exec_channel *dst, 451 const union tgsi_exec_channel *src) 452{ 453 dst->f[0] = floorf(src->f[0]); 454 dst->f[1] = floorf(src->f[1]); 455 dst->f[2] = floorf(src->f[2]); 456 dst->f[3] = floorf(src->f[3]); 457} 458 459static void 460micro_frc(union tgsi_exec_channel *dst, 461 const union tgsi_exec_channel *src) 462{ 463 dst->f[0] = src->f[0] - floorf(src->f[0]); 464 dst->f[1] = src->f[1] - floorf(src->f[1]); 465 dst->f[2] = src->f[2] - floorf(src->f[2]); 466 dst->f[3] = src->f[3] - floorf(src->f[3]); 467} 468 469static void 470micro_i2d(union tgsi_double_channel *dst, 471 const union tgsi_exec_channel *src) 472{ 473 dst->d[0] = (double)src->i[0]; 474 dst->d[1] = (double)src->i[1]; 475 dst->d[2] = (double)src->i[2]; 476 dst->d[3] = (double)src->i[3]; 477} 478 479static void 480micro_iabs(union tgsi_exec_channel *dst, 481 const union tgsi_exec_channel *src) 482{ 483 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0]; 484 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1]; 485 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2]; 486 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3]; 487} 488 489static void 490micro_ineg(union tgsi_exec_channel *dst, 491 const union tgsi_exec_channel *src) 492{ 493 dst->i[0] = -src->i[0]; 494 dst->i[1] = -src->i[1]; 495 dst->i[2] = -src->i[2]; 496 dst->i[3] = -src->i[3]; 497} 498 499static void 500micro_lg2(union tgsi_exec_channel *dst, 501 const union tgsi_exec_channel *src) 502{ 503 dst->f[0] = logf(src->f[0]) * 1.442695f; 504 dst->f[1] = logf(src->f[1]) * 1.442695f; 505 dst->f[2] = logf(src->f[2]) * 1.442695f; 506 dst->f[3] = logf(src->f[3]) * 1.442695f; 507} 508 509static void 510micro_lrp(union tgsi_exec_channel *dst, 511 const union tgsi_exec_channel *src0, 512 const union tgsi_exec_channel *src1, 513 const union tgsi_exec_channel *src2) 514{ 515 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0]; 516 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1]; 517 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2]; 518 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3]; 519} 520 521static void 522micro_mad(union tgsi_exec_channel *dst, 523 const union tgsi_exec_channel *src0, 524 const union tgsi_exec_channel *src1, 525 const union tgsi_exec_channel *src2) 526{ 527 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0]; 528 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1]; 529 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2]; 530 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3]; 531} 532 533static void 534micro_mov(union tgsi_exec_channel *dst, 535 const union tgsi_exec_channel *src) 536{ 537 dst->u[0] = src->u[0]; 538 dst->u[1] = src->u[1]; 539 dst->u[2] = src->u[2]; 540 dst->u[3] = src->u[3]; 541} 542 543static void 544micro_rcp(union tgsi_exec_channel *dst, 545 const union tgsi_exec_channel *src) 546{ 547#if 0 /* for debugging */ 548 assert(src->f[0] != 0.0f); 549 assert(src->f[1] != 0.0f); 550 assert(src->f[2] != 0.0f); 551 assert(src->f[3] != 0.0f); 552#endif 553 dst->f[0] = 1.0f / src->f[0]; 554 dst->f[1] = 1.0f / src->f[1]; 555 dst->f[2] = 1.0f / src->f[2]; 556 dst->f[3] = 1.0f / src->f[3]; 557} 558 559static void 560micro_rnd(union tgsi_exec_channel *dst, 561 const union tgsi_exec_channel *src) 562{ 563 dst->f[0] = _mesa_roundevenf(src->f[0]); 564 dst->f[1] = _mesa_roundevenf(src->f[1]); 565 dst->f[2] = _mesa_roundevenf(src->f[2]); 566 dst->f[3] = _mesa_roundevenf(src->f[3]); 567} 568 569static void 570micro_rsq(union tgsi_exec_channel *dst, 571 const union tgsi_exec_channel *src) 572{ 573#if 0 /* for debugging */ 574 assert(src->f[0] != 0.0f); 575 assert(src->f[1] != 0.0f); 576 assert(src->f[2] != 0.0f); 577 assert(src->f[3] != 0.0f); 578#endif 579 dst->f[0] = 1.0f / sqrtf(src->f[0]); 580 dst->f[1] = 1.0f / sqrtf(src->f[1]); 581 dst->f[2] = 1.0f / sqrtf(src->f[2]); 582 dst->f[3] = 1.0f / sqrtf(src->f[3]); 583} 584 585static void 586micro_sqrt(union tgsi_exec_channel *dst, 587 const union tgsi_exec_channel *src) 588{ 589 dst->f[0] = sqrtf(src->f[0]); 590 dst->f[1] = sqrtf(src->f[1]); 591 dst->f[2] = sqrtf(src->f[2]); 592 dst->f[3] = sqrtf(src->f[3]); 593} 594 595static void 596micro_seq(union tgsi_exec_channel *dst, 597 const union tgsi_exec_channel *src0, 598 const union tgsi_exec_channel *src1) 599{ 600 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f; 601 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f; 602 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f; 603 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f; 604} 605 606static void 607micro_sge(union tgsi_exec_channel *dst, 608 const union tgsi_exec_channel *src0, 609 const union tgsi_exec_channel *src1) 610{ 611 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f; 612 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f; 613 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f; 614 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f; 615} 616 617static void 618micro_sgn(union tgsi_exec_channel *dst, 619 const union tgsi_exec_channel *src) 620{ 621 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; 622 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; 623 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; 624 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; 625} 626 627static void 628micro_isgn(union tgsi_exec_channel *dst, 629 const union tgsi_exec_channel *src) 630{ 631 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0; 632 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0; 633 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0; 634 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0; 635} 636 637static void 638micro_sgt(union tgsi_exec_channel *dst, 639 const union tgsi_exec_channel *src0, 640 const union tgsi_exec_channel *src1) 641{ 642 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f; 643 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f; 644 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f; 645 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f; 646} 647 648static void 649micro_sin(union tgsi_exec_channel *dst, 650 const union tgsi_exec_channel *src) 651{ 652 dst->f[0] = sinf(src->f[0]); 653 dst->f[1] = sinf(src->f[1]); 654 dst->f[2] = sinf(src->f[2]); 655 dst->f[3] = sinf(src->f[3]); 656} 657 658static void 659micro_sle(union tgsi_exec_channel *dst, 660 const union tgsi_exec_channel *src0, 661 const union tgsi_exec_channel *src1) 662{ 663 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f; 664 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f; 665 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f; 666 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f; 667} 668 669static void 670micro_slt(union tgsi_exec_channel *dst, 671 const union tgsi_exec_channel *src0, 672 const union tgsi_exec_channel *src1) 673{ 674 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f; 675 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f; 676 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f; 677 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f; 678} 679 680static void 681micro_sne(union tgsi_exec_channel *dst, 682 const union tgsi_exec_channel *src0, 683 const union tgsi_exec_channel *src1) 684{ 685 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f; 686 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f; 687 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f; 688 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f; 689} 690 691static void 692micro_trunc(union tgsi_exec_channel *dst, 693 const union tgsi_exec_channel *src) 694{ 695 dst->f[0] = truncf(src->f[0]); 696 dst->f[1] = truncf(src->f[1]); 697 dst->f[2] = truncf(src->f[2]); 698 dst->f[3] = truncf(src->f[3]); 699} 700 701static void 702micro_u2d(union tgsi_double_channel *dst, 703 const union tgsi_exec_channel *src) 704{ 705 dst->d[0] = (double)src->u[0]; 706 dst->d[1] = (double)src->u[1]; 707 dst->d[2] = (double)src->u[2]; 708 dst->d[3] = (double)src->u[3]; 709} 710 711static void 712micro_i64abs(union tgsi_double_channel *dst, 713 const union tgsi_double_channel *src) 714{ 715 dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0]; 716 dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1]; 717 dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2]; 718 dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3]; 719} 720 721static void 722micro_i64sgn(union tgsi_double_channel *dst, 723 const union tgsi_double_channel *src) 724{ 725 dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0; 726 dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0; 727 dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0; 728 dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0; 729} 730 731static void 732micro_i64neg(union tgsi_double_channel *dst, 733 const union tgsi_double_channel *src) 734{ 735 dst->i64[0] = -src->i64[0]; 736 dst->i64[1] = -src->i64[1]; 737 dst->i64[2] = -src->i64[2]; 738 dst->i64[3] = -src->i64[3]; 739} 740 741static void 742micro_u64seq(union tgsi_double_channel *dst, 743 const union tgsi_double_channel *src) 744{ 745 dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U; 746 dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U; 747 dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U; 748 dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U; 749} 750 751static void 752micro_u64sne(union tgsi_double_channel *dst, 753 const union tgsi_double_channel *src) 754{ 755 dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U; 756 dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U; 757 dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U; 758 dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U; 759} 760 761static void 762micro_i64slt(union tgsi_double_channel *dst, 763 const union tgsi_double_channel *src) 764{ 765 dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U; 766 dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U; 767 dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U; 768 dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U; 769} 770 771static void 772micro_u64slt(union tgsi_double_channel *dst, 773 const union tgsi_double_channel *src) 774{ 775 dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U; 776 dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U; 777 dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U; 778 dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U; 779} 780 781static void 782micro_i64sge(union tgsi_double_channel *dst, 783 const union tgsi_double_channel *src) 784{ 785 dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U; 786 dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U; 787 dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U; 788 dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U; 789} 790 791static void 792micro_u64sge(union tgsi_double_channel *dst, 793 const union tgsi_double_channel *src) 794{ 795 dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U; 796 dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U; 797 dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U; 798 dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U; 799} 800 801static void 802micro_u64max(union tgsi_double_channel *dst, 803 const union tgsi_double_channel *src) 804{ 805 dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; 806 dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; 807 dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; 808 dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; 809} 810 811static void 812micro_i64max(union tgsi_double_channel *dst, 813 const union tgsi_double_channel *src) 814{ 815 dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; 816 dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; 817 dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; 818 dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; 819} 820 821static void 822micro_u64min(union tgsi_double_channel *dst, 823 const union tgsi_double_channel *src) 824{ 825 dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0]; 826 dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1]; 827 dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2]; 828 dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3]; 829} 830 831static void 832micro_i64min(union tgsi_double_channel *dst, 833 const union tgsi_double_channel *src) 834{ 835 dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0]; 836 dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1]; 837 dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2]; 838 dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3]; 839} 840 841static void 842micro_u64add(union tgsi_double_channel *dst, 843 const union tgsi_double_channel *src) 844{ 845 dst->u64[0] = src[0].u64[0] + src[1].u64[0]; 846 dst->u64[1] = src[0].u64[1] + src[1].u64[1]; 847 dst->u64[2] = src[0].u64[2] + src[1].u64[2]; 848 dst->u64[3] = src[0].u64[3] + src[1].u64[3]; 849} 850 851static void 852micro_u64mul(union tgsi_double_channel *dst, 853 const union tgsi_double_channel *src) 854{ 855 dst->u64[0] = src[0].u64[0] * src[1].u64[0]; 856 dst->u64[1] = src[0].u64[1] * src[1].u64[1]; 857 dst->u64[2] = src[0].u64[2] * src[1].u64[2]; 858 dst->u64[3] = src[0].u64[3] * src[1].u64[3]; 859} 860 861static void 862micro_u64div(union tgsi_double_channel *dst, 863 const union tgsi_double_channel *src) 864{ 865 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull; 866 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull; 867 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull; 868 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull; 869} 870 871static void 872micro_i64div(union tgsi_double_channel *dst, 873 const union tgsi_double_channel *src) 874{ 875 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0; 876 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0; 877 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0; 878 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0; 879} 880 881static void 882micro_u64mod(union tgsi_double_channel *dst, 883 const union tgsi_double_channel *src) 884{ 885 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull; 886 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull; 887 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull; 888 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull; 889} 890 891static void 892micro_i64mod(union tgsi_double_channel *dst, 893 const union tgsi_double_channel *src) 894{ 895 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll; 896 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll; 897 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll; 898 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll; 899} 900 901static void 902micro_u64shl(union tgsi_double_channel *dst, 903 const union tgsi_double_channel *src0, 904 union tgsi_exec_channel *src1) 905{ 906 unsigned masked_count; 907 masked_count = src1->u[0] & 0x3f; 908 dst->u64[0] = src0->u64[0] << masked_count; 909 masked_count = src1->u[1] & 0x3f; 910 dst->u64[1] = src0->u64[1] << masked_count; 911 masked_count = src1->u[2] & 0x3f; 912 dst->u64[2] = src0->u64[2] << masked_count; 913 masked_count = src1->u[3] & 0x3f; 914 dst->u64[3] = src0->u64[3] << masked_count; 915} 916 917static void 918micro_i64shr(union tgsi_double_channel *dst, 919 const union tgsi_double_channel *src0, 920 union tgsi_exec_channel *src1) 921{ 922 unsigned masked_count; 923 masked_count = src1->u[0] & 0x3f; 924 dst->i64[0] = src0->i64[0] >> masked_count; 925 masked_count = src1->u[1] & 0x3f; 926 dst->i64[1] = src0->i64[1] >> masked_count; 927 masked_count = src1->u[2] & 0x3f; 928 dst->i64[2] = src0->i64[2] >> masked_count; 929 masked_count = src1->u[3] & 0x3f; 930 dst->i64[3] = src0->i64[3] >> masked_count; 931} 932 933static void 934micro_u64shr(union tgsi_double_channel *dst, 935 const union tgsi_double_channel *src0, 936 union tgsi_exec_channel *src1) 937{ 938 unsigned masked_count; 939 masked_count = src1->u[0] & 0x3f; 940 dst->u64[0] = src0->u64[0] >> masked_count; 941 masked_count = src1->u[1] & 0x3f; 942 dst->u64[1] = src0->u64[1] >> masked_count; 943 masked_count = src1->u[2] & 0x3f; 944 dst->u64[2] = src0->u64[2] >> masked_count; 945 masked_count = src1->u[3] & 0x3f; 946 dst->u64[3] = src0->u64[3] >> masked_count; 947} 948 949enum tgsi_exec_datatype { 950 TGSI_EXEC_DATA_FLOAT, 951 TGSI_EXEC_DATA_INT, 952 TGSI_EXEC_DATA_UINT, 953 TGSI_EXEC_DATA_DOUBLE, 954 TGSI_EXEC_DATA_INT64, 955 TGSI_EXEC_DATA_UINT64, 956}; 957 958/** The execution mask depends on the conditional mask and the loop mask */ 959#define UPDATE_EXEC_MASK(MACH) \ 960 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask 961 962 963static const union tgsi_exec_channel ZeroVec = 964 { { 0.0, 0.0, 0.0, 0.0 } }; 965 966static const union tgsi_exec_channel OneVec = { 967 {1.0f, 1.0f, 1.0f, 1.0f} 968}; 969 970static const union tgsi_exec_channel P128Vec = { 971 {128.0f, 128.0f, 128.0f, 128.0f} 972}; 973 974static const union tgsi_exec_channel M128Vec = { 975 {-128.0f, -128.0f, -128.0f, -128.0f} 976}; 977 978 979/** 980 * Assert that none of the float values in 'chan' are infinite or NaN. 981 * NaN and Inf may occur normally during program execution and should 982 * not lead to crashes, etc. But when debugging, it's helpful to catch 983 * them. 984 */ 985static inline void 986check_inf_or_nan(const union tgsi_exec_channel *chan) 987{ 988 assert(!util_is_inf_or_nan((chan)->f[0])); 989 assert(!util_is_inf_or_nan((chan)->f[1])); 990 assert(!util_is_inf_or_nan((chan)->f[2])); 991 assert(!util_is_inf_or_nan((chan)->f[3])); 992} 993 994 995#ifdef DEBUG 996static void 997print_chan(const char *msg, const union tgsi_exec_channel *chan) 998{ 999 debug_printf("%s = {%f, %f, %f, %f}\n", 1000 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]); 1001} 1002#endif 1003 1004 1005#ifdef DEBUG 1006static void 1007print_temp(const struct tgsi_exec_machine *mach, uint index) 1008{ 1009 const struct tgsi_exec_vector *tmp = &mach->Temps[index]; 1010 int i; 1011 debug_printf("Temp[%u] =\n", index); 1012 for (i = 0; i < 4; i++) { 1013 debug_printf(" %c: { %f, %f, %f, %f }\n", 1014 "XYZW"[i], 1015 tmp->xyzw[i].f[0], 1016 tmp->xyzw[i].f[1], 1017 tmp->xyzw[i].f[2], 1018 tmp->xyzw[i].f[3]); 1019 } 1020} 1021#endif 1022 1023 1024void 1025tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach, 1026 unsigned num_bufs, 1027 const void **bufs, 1028 const unsigned *buf_sizes) 1029{ 1030 unsigned i; 1031 1032 for (i = 0; i < num_bufs; i++) { 1033 mach->Consts[i] = bufs[i]; 1034 mach->ConstsSize[i] = buf_sizes[i]; 1035 } 1036} 1037 1038/** 1039 * Initialize machine state by expanding tokens to full instructions, 1040 * allocating temporary storage, setting up constants, etc. 1041 * After this, we can call tgsi_exec_machine_run() many times. 1042 */ 1043void 1044tgsi_exec_machine_bind_shader( 1045 struct tgsi_exec_machine *mach, 1046 const struct tgsi_token *tokens, 1047 struct tgsi_sampler *sampler, 1048 struct tgsi_image *image, 1049 struct tgsi_buffer *buffer) 1050{ 1051 uint k; 1052 struct tgsi_parse_context parse; 1053 struct tgsi_full_instruction *instructions; 1054 struct tgsi_full_declaration *declarations; 1055 uint maxInstructions = 10, numInstructions = 0; 1056 uint maxDeclarations = 10, numDeclarations = 0; 1057 1058#if 0 1059 tgsi_dump(tokens, 0); 1060#endif 1061 1062 mach->Tokens = tokens; 1063 mach->Sampler = sampler; 1064 mach->Image = image; 1065 mach->Buffer = buffer; 1066 1067 if (!tokens) { 1068 /* unbind and free all */ 1069 FREE(mach->Declarations); 1070 mach->Declarations = NULL; 1071 mach->NumDeclarations = 0; 1072 1073 FREE(mach->Instructions); 1074 mach->Instructions = NULL; 1075 mach->NumInstructions = 0; 1076 1077 return; 1078 } 1079 1080 k = tgsi_parse_init (&parse, mach->Tokens); 1081 if (k != TGSI_PARSE_OK) { 1082 debug_printf( "Problem parsing!\n" ); 1083 return; 1084 } 1085 1086 mach->ImmLimit = 0; 1087 mach->NumOutputs = 0; 1088 1089 for (k = 0; k < TGSI_SEMANTIC_COUNT; k++) 1090 mach->SysSemanticToIndex[k] = -1; 1091 1092 if (mach->ShaderType == PIPE_SHADER_GEOMETRY && 1093 !mach->UsedGeometryShader) { 1094 struct tgsi_exec_vector *inputs; 1095 struct tgsi_exec_vector *outputs; 1096 1097 inputs = align_malloc(sizeof(struct tgsi_exec_vector) * 1098 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS, 1099 16); 1100 1101 if (!inputs) 1102 return; 1103 1104 outputs = align_malloc(sizeof(struct tgsi_exec_vector) * 1105 TGSI_MAX_TOTAL_VERTICES, 16); 1106 1107 if (!outputs) { 1108 align_free(inputs); 1109 return; 1110 } 1111 1112 align_free(mach->Inputs); 1113 align_free(mach->Outputs); 1114 1115 mach->Inputs = inputs; 1116 mach->Outputs = outputs; 1117 mach->UsedGeometryShader = TRUE; 1118 } 1119 1120 declarations = (struct tgsi_full_declaration *) 1121 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); 1122 1123 if (!declarations) { 1124 return; 1125 } 1126 1127 instructions = (struct tgsi_full_instruction *) 1128 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); 1129 1130 if (!instructions) { 1131 FREE( declarations ); 1132 return; 1133 } 1134 1135 while( !tgsi_parse_end_of_tokens( &parse ) ) { 1136 uint i; 1137 1138 tgsi_parse_token( &parse ); 1139 switch( parse.FullToken.Token.Type ) { 1140 case TGSI_TOKEN_TYPE_DECLARATION: 1141 /* save expanded declaration */ 1142 if (numDeclarations == maxDeclarations) { 1143 declarations = REALLOC(declarations, 1144 maxDeclarations 1145 * sizeof(struct tgsi_full_declaration), 1146 (maxDeclarations + 10) 1147 * sizeof(struct tgsi_full_declaration)); 1148 maxDeclarations += 10; 1149 } 1150 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT) 1151 mach->NumOutputs = MAX2(mach->NumOutputs, parse.FullToken.FullDeclaration.Range.Last + 1); 1152 else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) { 1153 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration; 1154 mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First; 1155 } 1156 1157 memcpy(declarations + numDeclarations, 1158 &parse.FullToken.FullDeclaration, 1159 sizeof(declarations[0])); 1160 numDeclarations++; 1161 break; 1162 1163 case TGSI_TOKEN_TYPE_IMMEDIATE: 1164 { 1165 uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1; 1166 assert( size <= 4 ); 1167 if (mach->ImmLimit >= mach->ImmsReserved) { 1168 unsigned newReserved = mach->ImmsReserved ? 2 * mach->ImmsReserved : 128; 1169 float4 *imms = REALLOC(mach->Imms, mach->ImmsReserved, newReserved * sizeof(float4)); 1170 if (imms) { 1171 mach->ImmsReserved = newReserved; 1172 mach->Imms = imms; 1173 } else { 1174 debug_printf("Unable to (re)allocate space for immidiate constants\n"); 1175 break; 1176 } 1177 } 1178 1179 for( i = 0; i < size; i++ ) { 1180 mach->Imms[mach->ImmLimit][i] = 1181 parse.FullToken.FullImmediate.u[i].Float; 1182 } 1183 mach->ImmLimit += 1; 1184 } 1185 break; 1186 1187 case TGSI_TOKEN_TYPE_INSTRUCTION: 1188 1189 /* save expanded instruction */ 1190 if (numInstructions == maxInstructions) { 1191 instructions = REALLOC(instructions, 1192 maxInstructions 1193 * sizeof(struct tgsi_full_instruction), 1194 (maxInstructions + 10) 1195 * sizeof(struct tgsi_full_instruction)); 1196 maxInstructions += 10; 1197 } 1198 1199 memcpy(instructions + numInstructions, 1200 &parse.FullToken.FullInstruction, 1201 sizeof(instructions[0])); 1202 1203 numInstructions++; 1204 break; 1205 1206 case TGSI_TOKEN_TYPE_PROPERTY: 1207 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { 1208 if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) { 1209 mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data; 1210 } 1211 } 1212 break; 1213 1214 default: 1215 assert( 0 ); 1216 } 1217 } 1218 tgsi_parse_free (&parse); 1219 1220 FREE(mach->Declarations); 1221 mach->Declarations = declarations; 1222 mach->NumDeclarations = numDeclarations; 1223 1224 FREE(mach->Instructions); 1225 mach->Instructions = instructions; 1226 mach->NumInstructions = numInstructions; 1227} 1228 1229 1230struct tgsi_exec_machine * 1231tgsi_exec_machine_create(enum pipe_shader_type shader_type) 1232{ 1233 struct tgsi_exec_machine *mach; 1234 1235 mach = align_malloc( sizeof *mach, 16 ); 1236 if (!mach) 1237 goto fail; 1238 1239 memset(mach, 0, sizeof(*mach)); 1240 1241 mach->ShaderType = shader_type; 1242 1243 if (shader_type != PIPE_SHADER_COMPUTE) { 1244 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16); 1245 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16); 1246 if (!mach->Inputs || !mach->Outputs) 1247 goto fail; 1248 } 1249 1250 if (shader_type == PIPE_SHADER_FRAGMENT) { 1251 mach->InputSampleOffsetApply = align_malloc(sizeof(apply_sample_offset_func) * PIPE_MAX_SHADER_INPUTS, 16); 1252 if (!mach->InputSampleOffsetApply) 1253 goto fail; 1254 } 1255 1256#ifdef DEBUG 1257 /* silence warnings */ 1258 (void) print_chan; 1259 (void) print_temp; 1260#endif 1261 1262 return mach; 1263 1264fail: 1265 if (mach) { 1266 align_free(mach->InputSampleOffsetApply); 1267 align_free(mach->Inputs); 1268 align_free(mach->Outputs); 1269 align_free(mach); 1270 } 1271 return NULL; 1272} 1273 1274 1275void 1276tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach) 1277{ 1278 if (mach) { 1279 FREE(mach->Instructions); 1280 FREE(mach->Declarations); 1281 FREE(mach->Imms); 1282 1283 align_free(mach->InputSampleOffsetApply); 1284 align_free(mach->Inputs); 1285 align_free(mach->Outputs); 1286 1287 align_free(mach); 1288 } 1289} 1290 1291static void 1292micro_add(union tgsi_exec_channel *dst, 1293 const union tgsi_exec_channel *src0, 1294 const union tgsi_exec_channel *src1) 1295{ 1296 dst->f[0] = src0->f[0] + src1->f[0]; 1297 dst->f[1] = src0->f[1] + src1->f[1]; 1298 dst->f[2] = src0->f[2] + src1->f[2]; 1299 dst->f[3] = src0->f[3] + src1->f[3]; 1300} 1301 1302static void 1303micro_div( 1304 union tgsi_exec_channel *dst, 1305 const union tgsi_exec_channel *src0, 1306 const union tgsi_exec_channel *src1 ) 1307{ 1308 if (src1->f[0] != 0) { 1309 dst->f[0] = src0->f[0] / src1->f[0]; 1310 } 1311 if (src1->f[1] != 0) { 1312 dst->f[1] = src0->f[1] / src1->f[1]; 1313 } 1314 if (src1->f[2] != 0) { 1315 dst->f[2] = src0->f[2] / src1->f[2]; 1316 } 1317 if (src1->f[3] != 0) { 1318 dst->f[3] = src0->f[3] / src1->f[3]; 1319 } 1320} 1321 1322static void 1323micro_lt( 1324 union tgsi_exec_channel *dst, 1325 const union tgsi_exec_channel *src0, 1326 const union tgsi_exec_channel *src1, 1327 const union tgsi_exec_channel *src2, 1328 const union tgsi_exec_channel *src3 ) 1329{ 1330 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; 1331 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; 1332 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; 1333 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; 1334} 1335 1336static void 1337micro_max(union tgsi_exec_channel *dst, 1338 const union tgsi_exec_channel *src0, 1339 const union tgsi_exec_channel *src1) 1340{ 1341 dst->f[0] = fmaxf(src0->f[0], src1->f[0]); 1342 dst->f[1] = fmaxf(src0->f[1], src1->f[1]); 1343 dst->f[2] = fmaxf(src0->f[2], src1->f[2]); 1344 dst->f[3] = fmaxf(src0->f[3], src1->f[3]); 1345} 1346 1347static void 1348micro_min(union tgsi_exec_channel *dst, 1349 const union tgsi_exec_channel *src0, 1350 const union tgsi_exec_channel *src1) 1351{ 1352 dst->f[0] = fminf(src0->f[0], src1->f[0]); 1353 dst->f[1] = fminf(src0->f[1], src1->f[1]); 1354 dst->f[2] = fminf(src0->f[2], src1->f[2]); 1355 dst->f[3] = fminf(src0->f[3], src1->f[3]); 1356} 1357 1358static void 1359micro_mul(union tgsi_exec_channel *dst, 1360 const union tgsi_exec_channel *src0, 1361 const union tgsi_exec_channel *src1) 1362{ 1363 dst->f[0] = src0->f[0] * src1->f[0]; 1364 dst->f[1] = src0->f[1] * src1->f[1]; 1365 dst->f[2] = src0->f[2] * src1->f[2]; 1366 dst->f[3] = src0->f[3] * src1->f[3]; 1367} 1368 1369static void 1370micro_neg( 1371 union tgsi_exec_channel *dst, 1372 const union tgsi_exec_channel *src ) 1373{ 1374 dst->f[0] = -src->f[0]; 1375 dst->f[1] = -src->f[1]; 1376 dst->f[2] = -src->f[2]; 1377 dst->f[3] = -src->f[3]; 1378} 1379 1380static void 1381micro_pow( 1382 union tgsi_exec_channel *dst, 1383 const union tgsi_exec_channel *src0, 1384 const union tgsi_exec_channel *src1 ) 1385{ 1386 dst->f[0] = powf( src0->f[0], src1->f[0] ); 1387 dst->f[1] = powf( src0->f[1], src1->f[1] ); 1388 dst->f[2] = powf( src0->f[2], src1->f[2] ); 1389 dst->f[3] = powf( src0->f[3], src1->f[3] ); 1390} 1391 1392static void 1393micro_ldexp(union tgsi_exec_channel *dst, 1394 const union tgsi_exec_channel *src0, 1395 const union tgsi_exec_channel *src1) 1396{ 1397 dst->f[0] = ldexpf(src0->f[0], src1->i[0]); 1398 dst->f[1] = ldexpf(src0->f[1], src1->i[1]); 1399 dst->f[2] = ldexpf(src0->f[2], src1->i[2]); 1400 dst->f[3] = ldexpf(src0->f[3], src1->i[3]); 1401} 1402 1403static void 1404micro_sub(union tgsi_exec_channel *dst, 1405 const union tgsi_exec_channel *src0, 1406 const union tgsi_exec_channel *src1) 1407{ 1408 dst->f[0] = src0->f[0] - src1->f[0]; 1409 dst->f[1] = src0->f[1] - src1->f[1]; 1410 dst->f[2] = src0->f[2] - src1->f[2]; 1411 dst->f[3] = src0->f[3] - src1->f[3]; 1412} 1413 1414static void 1415fetch_src_file_channel(const struct tgsi_exec_machine *mach, 1416 const uint file, 1417 const uint swizzle, 1418 const union tgsi_exec_channel *index, 1419 const union tgsi_exec_channel *index2D, 1420 union tgsi_exec_channel *chan) 1421{ 1422 uint i; 1423 1424 assert(swizzle < 4); 1425 1426 switch (file) { 1427 case TGSI_FILE_CONSTANT: 1428 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1429 /* NOTE: copying the const value as a uint instead of float */ 1430 const uint constbuf = index2D->i[i]; 1431 const unsigned pos = index->i[i] * 4 + swizzle; 1432 /* const buffer bounds check */ 1433 if (pos >= mach->ConstsSize[constbuf] / 4) { 1434 if (0) { 1435 /* Debug: print warning */ 1436 static int count = 0; 1437 if (count++ < 100) 1438 debug_printf("TGSI Exec: const buffer index %d" 1439 " out of bounds\n", pos); 1440 } 1441 chan->u[i] = 0; 1442 } else { 1443 const uint *buf = (const uint *)mach->Consts[constbuf]; 1444 chan->u[i] = buf[pos]; 1445 } 1446 } 1447 break; 1448 1449 case TGSI_FILE_INPUT: 1450 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1451 /* 1452 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1453 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n", 1454 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i], 1455 index2D->i[i], index->i[i]); 1456 }*/ 1457 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i]; 1458 assert(pos >= 0); 1459 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); 1460 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i]; 1461 } 1462 break; 1463 1464 case TGSI_FILE_SYSTEM_VALUE: 1465 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1466 chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i]; 1467 } 1468 break; 1469 1470 case TGSI_FILE_TEMPORARY: 1471 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1472 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS); 1473 assert(index2D->i[i] == 0); 1474 1475 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i]; 1476 } 1477 break; 1478 1479 case TGSI_FILE_IMMEDIATE: 1480 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1481 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit); 1482 assert(index2D->i[i] == 0); 1483 1484 chan->f[i] = mach->Imms[index->i[i]][swizzle]; 1485 } 1486 break; 1487 1488 case TGSI_FILE_ADDRESS: 1489 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1490 assert(index->i[i] >= 0 && index->i[i] < ARRAY_SIZE(mach->Addrs)); 1491 assert(index2D->i[i] == 0); 1492 1493 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i]; 1494 } 1495 break; 1496 1497 case TGSI_FILE_OUTPUT: 1498 /* vertex/fragment output vars can be read too */ 1499 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1500 assert(index->i[i] >= 0); 1501 assert(index2D->i[i] == 0); 1502 1503 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i]; 1504 } 1505 break; 1506 1507 default: 1508 assert(0); 1509 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1510 chan->u[i] = 0; 1511 } 1512 } 1513} 1514 1515static void 1516get_index_registers(const struct tgsi_exec_machine *mach, 1517 const struct tgsi_full_src_register *reg, 1518 union tgsi_exec_channel *index, 1519 union tgsi_exec_channel *index2D) 1520{ 1521 uint swizzle; 1522 1523 /* We start with a direct index into a register file. 1524 * 1525 * file[1], 1526 * where: 1527 * file = Register.File 1528 * [1] = Register.Index 1529 */ 1530 index->i[0] = 1531 index->i[1] = 1532 index->i[2] = 1533 index->i[3] = reg->Register.Index; 1534 1535 /* There is an extra source register that indirectly subscripts 1536 * a register file. The direct index now becomes an offset 1537 * that is being added to the indirect register. 1538 * 1539 * file[ind[2].x+1], 1540 * where: 1541 * ind = Indirect.File 1542 * [2] = Indirect.Index 1543 * .x = Indirect.SwizzleX 1544 */ 1545 if (reg->Register.Indirect) { 1546 union tgsi_exec_channel index2; 1547 union tgsi_exec_channel indir_index; 1548 const uint execmask = mach->ExecMask; 1549 uint i; 1550 1551 /* which address register (always zero now) */ 1552 index2.i[0] = 1553 index2.i[1] = 1554 index2.i[2] = 1555 index2.i[3] = reg->Indirect.Index; 1556 /* get current value of address register[swizzle] */ 1557 swizzle = reg->Indirect.Swizzle; 1558 fetch_src_file_channel(mach, 1559 reg->Indirect.File, 1560 swizzle, 1561 &index2, 1562 &ZeroVec, 1563 &indir_index); 1564 1565 /* add value of address register to the offset */ 1566 index->i[0] += indir_index.i[0]; 1567 index->i[1] += indir_index.i[1]; 1568 index->i[2] += indir_index.i[2]; 1569 index->i[3] += indir_index.i[3]; 1570 1571 /* for disabled execution channels, zero-out the index to 1572 * avoid using a potential garbage value. 1573 */ 1574 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1575 if ((execmask & (1 << i)) == 0) 1576 index->i[i] = 0; 1577 } 1578 } 1579 1580 /* There is an extra source register that is a second 1581 * subscript to a register file. Effectively it means that 1582 * the register file is actually a 2D array of registers. 1583 * 1584 * file[3][1], 1585 * where: 1586 * [3] = Dimension.Index 1587 */ 1588 if (reg->Register.Dimension) { 1589 index2D->i[0] = 1590 index2D->i[1] = 1591 index2D->i[2] = 1592 index2D->i[3] = reg->Dimension.Index; 1593 1594 /* Again, the second subscript index can be addressed indirectly 1595 * identically to the first one. 1596 * Nothing stops us from indirectly addressing the indirect register, 1597 * but there is no need for that, so we won't exercise it. 1598 * 1599 * file[ind[4].y+3][1], 1600 * where: 1601 * ind = DimIndirect.File 1602 * [4] = DimIndirect.Index 1603 * .y = DimIndirect.SwizzleX 1604 */ 1605 if (reg->Dimension.Indirect) { 1606 union tgsi_exec_channel index2; 1607 union tgsi_exec_channel indir_index; 1608 const uint execmask = mach->ExecMask; 1609 uint i; 1610 1611 index2.i[0] = 1612 index2.i[1] = 1613 index2.i[2] = 1614 index2.i[3] = reg->DimIndirect.Index; 1615 1616 swizzle = reg->DimIndirect.Swizzle; 1617 fetch_src_file_channel(mach, 1618 reg->DimIndirect.File, 1619 swizzle, 1620 &index2, 1621 &ZeroVec, 1622 &indir_index); 1623 1624 index2D->i[0] += indir_index.i[0]; 1625 index2D->i[1] += indir_index.i[1]; 1626 index2D->i[2] += indir_index.i[2]; 1627 index2D->i[3] += indir_index.i[3]; 1628 1629 /* for disabled execution channels, zero-out the index to 1630 * avoid using a potential garbage value. 1631 */ 1632 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 1633 if ((execmask & (1 << i)) == 0) { 1634 index2D->i[i] = 0; 1635 } 1636 } 1637 } 1638 1639 /* If by any chance there was a need for a 3D array of register 1640 * files, we would have to check whether Dimension is followed 1641 * by a dimension register and continue the saga. 1642 */ 1643 } else { 1644 index2D->i[0] = 1645 index2D->i[1] = 1646 index2D->i[2] = 1647 index2D->i[3] = 0; 1648 } 1649} 1650 1651 1652static void 1653fetch_source_d(const struct tgsi_exec_machine *mach, 1654 union tgsi_exec_channel *chan, 1655 const struct tgsi_full_src_register *reg, 1656 const uint chan_index) 1657{ 1658 union tgsi_exec_channel index; 1659 union tgsi_exec_channel index2D; 1660 uint swizzle; 1661 1662 get_index_registers(mach, reg, &index, &index2D); 1663 1664 1665 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); 1666 fetch_src_file_channel(mach, 1667 reg->Register.File, 1668 swizzle, 1669 &index, 1670 &index2D, 1671 chan); 1672} 1673 1674static void 1675fetch_source(const struct tgsi_exec_machine *mach, 1676 union tgsi_exec_channel *chan, 1677 const struct tgsi_full_src_register *reg, 1678 const uint chan_index, 1679 enum tgsi_exec_datatype src_datatype) 1680{ 1681 fetch_source_d(mach, chan, reg, chan_index); 1682 1683 if (reg->Register.Absolute) { 1684 assert(src_datatype == TGSI_EXEC_DATA_FLOAT); 1685 micro_abs(chan, chan); 1686 } 1687 1688 if (reg->Register.Negate) { 1689 if (src_datatype == TGSI_EXEC_DATA_FLOAT) { 1690 micro_neg(chan, chan); 1691 } else { 1692 micro_ineg(chan, chan); 1693 } 1694 } 1695} 1696 1697static union tgsi_exec_channel * 1698store_dest_dstret(struct tgsi_exec_machine *mach, 1699 const union tgsi_exec_channel *chan, 1700 const struct tgsi_full_dst_register *reg, 1701 uint chan_index) 1702{ 1703 static union tgsi_exec_channel null; 1704 union tgsi_exec_channel *dst; 1705 int offset = 0; /* indirection offset */ 1706 int index; 1707 1708 1709 /* There is an extra source register that indirectly subscripts 1710 * a register file. The direct index now becomes an offset 1711 * that is being added to the indirect register. 1712 * 1713 * file[ind[2].x+1], 1714 * where: 1715 * ind = Indirect.File 1716 * [2] = Indirect.Index 1717 * .x = Indirect.SwizzleX 1718 */ 1719 if (reg->Register.Indirect) { 1720 union tgsi_exec_channel index; 1721 union tgsi_exec_channel indir_index; 1722 uint swizzle; 1723 1724 /* which address register (always zero for now) */ 1725 index.i[0] = 1726 index.i[1] = 1727 index.i[2] = 1728 index.i[3] = reg->Indirect.Index; 1729 1730 /* get current value of address register[swizzle] */ 1731 swizzle = reg->Indirect.Swizzle; 1732 1733 /* fetch values from the address/indirection register */ 1734 fetch_src_file_channel(mach, 1735 reg->Indirect.File, 1736 swizzle, 1737 &index, 1738 &ZeroVec, 1739 &indir_index); 1740 1741 /* save indirection offset */ 1742 offset = indir_index.i[0]; 1743 } 1744 1745 switch (reg->Register.File) { 1746 case TGSI_FILE_NULL: 1747 dst = &null; 1748 break; 1749 1750 case TGSI_FILE_OUTPUT: 1751 index = mach->OutputVertexOffset + reg->Register.Index; 1752 dst = &mach->Outputs[offset + index].xyzw[chan_index]; 1753#if 0 1754 debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n", 1755 mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0], 1756 reg->Register.Index); 1757 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1758 debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask); 1759 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1760 if (execmask & (1 << i)) 1761 debug_printf("%f, ", chan->f[i]); 1762 debug_printf(")\n"); 1763 } 1764#endif 1765 break; 1766 1767 case TGSI_FILE_TEMPORARY: 1768 index = reg->Register.Index; 1769 assert( index < TGSI_EXEC_NUM_TEMPS ); 1770 dst = &mach->Temps[offset + index].xyzw[chan_index]; 1771 break; 1772 1773 case TGSI_FILE_ADDRESS: 1774 index = reg->Register.Index; 1775 assert(index >= 0 && index < ARRAY_SIZE(mach->Addrs)); 1776 dst = &mach->Addrs[index].xyzw[chan_index]; 1777 break; 1778 1779 default: 1780 unreachable("Bad destination file"); 1781 } 1782 1783 return dst; 1784} 1785 1786static void 1787store_dest_double(struct tgsi_exec_machine *mach, 1788 const union tgsi_exec_channel *chan, 1789 const struct tgsi_full_dst_register *reg, 1790 uint chan_index) 1791{ 1792 union tgsi_exec_channel *dst; 1793 const uint execmask = mach->ExecMask; 1794 int i; 1795 1796 dst = store_dest_dstret(mach, chan, reg, chan_index); 1797 if (!dst) 1798 return; 1799 1800 /* doubles path */ 1801 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1802 if (execmask & (1 << i)) 1803 dst->i[i] = chan->i[i]; 1804} 1805 1806static void 1807store_dest(struct tgsi_exec_machine *mach, 1808 const union tgsi_exec_channel *chan, 1809 const struct tgsi_full_dst_register *reg, 1810 const struct tgsi_full_instruction *inst, 1811 uint chan_index) 1812{ 1813 union tgsi_exec_channel *dst; 1814 const uint execmask = mach->ExecMask; 1815 int i; 1816 1817 dst = store_dest_dstret(mach, chan, reg, chan_index); 1818 if (!dst) 1819 return; 1820 1821 if (!inst->Instruction.Saturate) { 1822 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1823 if (execmask & (1 << i)) 1824 dst->i[i] = chan->i[i]; 1825 } 1826 else { 1827 for (i = 0; i < TGSI_QUAD_SIZE; i++) 1828 if (execmask & (1 << i)) 1829 dst->f[i] = fminf(fmaxf(chan->f[i], 0.0f), 1.0f); 1830 } 1831} 1832 1833#define FETCH(VAL,INDEX,CHAN)\ 1834 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT) 1835 1836#define IFETCH(VAL,INDEX,CHAN)\ 1837 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT) 1838 1839 1840/** 1841 * Execute ARB-style KIL which is predicated by a src register. 1842 * Kill fragment if any of the four values is less than zero. 1843 */ 1844static void 1845exec_kill_if(struct tgsi_exec_machine *mach, 1846 const struct tgsi_full_instruction *inst) 1847{ 1848 uint uniquemask; 1849 uint chan_index; 1850 uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ 1851 union tgsi_exec_channel r[1]; 1852 1853 /* This mask stores component bits that were already tested. */ 1854 uniquemask = 0; 1855 1856 for (chan_index = 0; chan_index < 4; chan_index++) 1857 { 1858 uint swizzle; 1859 uint i; 1860 1861 /* unswizzle channel */ 1862 swizzle = tgsi_util_get_full_src_register_swizzle ( 1863 &inst->Src[0], 1864 chan_index); 1865 1866 /* check if the component has not been already tested */ 1867 if (uniquemask & (1 << swizzle)) 1868 continue; 1869 uniquemask |= 1 << swizzle; 1870 1871 FETCH(&r[0], 0, chan_index); 1872 for (i = 0; i < 4; i++) 1873 if (r[0].f[i] < 0.0f) 1874 kilmask |= 1 << i; 1875 } 1876 1877 /* restrict to fragments currently executing */ 1878 kilmask &= mach->ExecMask; 1879 1880 mach->KillMask |= kilmask; 1881} 1882 1883/** 1884 * Unconditional fragment kill/discard. 1885 */ 1886static void 1887exec_kill(struct tgsi_exec_machine *mach) 1888{ 1889 /* kill fragment for all fragments currently executing. 1890 * bit 0 = pixel 0, bit 1 = pixel 1, etc. 1891 */ 1892 mach->KillMask |= mach->ExecMask; 1893} 1894 1895static void 1896emit_vertex(struct tgsi_exec_machine *mach, 1897 const struct tgsi_full_instruction *inst) 1898{ 1899 union tgsi_exec_channel r[1]; 1900 unsigned stream_id; 1901 unsigned prim_count; 1902 /* FIXME: check for exec mask correctly 1903 unsigned i; 1904 for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 1905 if ((mach->ExecMask & (1 << i))) 1906 */ 1907 IFETCH(&r[0], 0, TGSI_CHAN_X); 1908 stream_id = r[0].u[0]; 1909 prim_count = mach->OutputPrimCount[stream_id]; 1910 if (mach->ExecMask) { 1911 if (mach->Primitives[stream_id][prim_count] >= mach->MaxOutputVertices) 1912 return; 1913 1914 if (mach->Primitives[stream_id][prim_count] == 0) 1915 mach->PrimitiveOffsets[stream_id][prim_count] = mach->OutputVertexOffset; 1916 mach->OutputVertexOffset += mach->NumOutputs; 1917 mach->Primitives[stream_id][prim_count]++; 1918 } 1919} 1920 1921static void 1922emit_primitive(struct tgsi_exec_machine *mach, 1923 const struct tgsi_full_instruction *inst) 1924{ 1925 unsigned *prim_count; 1926 union tgsi_exec_channel r[1]; 1927 unsigned stream_id = 0; 1928 /* FIXME: check for exec mask correctly 1929 unsigned i; 1930 for (i = 0; i < TGSI_QUAD_SIZE; ++i) { 1931 if ((mach->ExecMask & (1 << i))) 1932 */ 1933 if (inst) { 1934 IFETCH(&r[0], 0, TGSI_CHAN_X); 1935 stream_id = r[0].u[0]; 1936 } 1937 prim_count = &mach->OutputPrimCount[stream_id]; 1938 if (mach->ExecMask) { 1939 ++(*prim_count); 1940 debug_assert((*prim_count * mach->NumOutputs) < TGSI_MAX_TOTAL_VERTICES); 1941 mach->Primitives[stream_id][*prim_count] = 0; 1942 } 1943} 1944 1945static void 1946conditional_emit_primitive(struct tgsi_exec_machine *mach) 1947{ 1948 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) { 1949 int emitted_verts = mach->Primitives[0][mach->OutputPrimCount[0]]; 1950 if (emitted_verts) { 1951 emit_primitive(mach, NULL); 1952 } 1953 } 1954} 1955 1956 1957/* 1958 * Fetch four texture samples using STR texture coordinates. 1959 */ 1960static void 1961fetch_texel( struct tgsi_sampler *sampler, 1962 const unsigned sview_idx, 1963 const unsigned sampler_idx, 1964 const union tgsi_exec_channel *s, 1965 const union tgsi_exec_channel *t, 1966 const union tgsi_exec_channel *p, 1967 const union tgsi_exec_channel *c0, 1968 const union tgsi_exec_channel *c1, 1969 float derivs[3][2][TGSI_QUAD_SIZE], 1970 const int8_t offset[3], 1971 enum tgsi_sampler_control control, 1972 union tgsi_exec_channel *r, 1973 union tgsi_exec_channel *g, 1974 union tgsi_exec_channel *b, 1975 union tgsi_exec_channel *a ) 1976{ 1977 uint j; 1978 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 1979 1980 /* FIXME: handle explicit derivs, offsets */ 1981 sampler->get_samples(sampler, sview_idx, sampler_idx, 1982 s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba); 1983 1984 for (j = 0; j < 4; j++) { 1985 r->f[j] = rgba[0][j]; 1986 g->f[j] = rgba[1][j]; 1987 b->f[j] = rgba[2][j]; 1988 a->f[j] = rgba[3][j]; 1989 } 1990} 1991 1992 1993#define TEX_MODIFIER_NONE 0 1994#define TEX_MODIFIER_PROJECTED 1 1995#define TEX_MODIFIER_LOD_BIAS 2 1996#define TEX_MODIFIER_EXPLICIT_LOD 3 1997#define TEX_MODIFIER_LEVEL_ZERO 4 1998#define TEX_MODIFIER_GATHER 5 1999 2000/* 2001 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array. 2002 */ 2003static void 2004fetch_texel_offsets(struct tgsi_exec_machine *mach, 2005 const struct tgsi_full_instruction *inst, 2006 int8_t offsets[3]) 2007{ 2008 if (inst->Texture.NumOffsets == 1) { 2009 union tgsi_exec_channel index; 2010 union tgsi_exec_channel offset[3]; 2011 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index; 2012 fetch_src_file_channel(mach, inst->TexOffsets[0].File, 2013 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]); 2014 fetch_src_file_channel(mach, inst->TexOffsets[0].File, 2015 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]); 2016 fetch_src_file_channel(mach, inst->TexOffsets[0].File, 2017 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]); 2018 offsets[0] = offset[0].i[0]; 2019 offsets[1] = offset[1].i[0]; 2020 offsets[2] = offset[2].i[0]; 2021 } else { 2022 assert(inst->Texture.NumOffsets == 0); 2023 offsets[0] = offsets[1] = offsets[2] = 0; 2024 } 2025} 2026 2027 2028/* 2029 * Fetch dx and dy values for one channel (s, t or r). 2030 * Put dx values into one float array, dy values into another. 2031 */ 2032static void 2033fetch_assign_deriv_channel(struct tgsi_exec_machine *mach, 2034 const struct tgsi_full_instruction *inst, 2035 unsigned regdsrcx, 2036 unsigned chan, 2037 float derivs[2][TGSI_QUAD_SIZE]) 2038{ 2039 union tgsi_exec_channel d; 2040 FETCH(&d, regdsrcx, chan); 2041 derivs[0][0] = d.f[0]; 2042 derivs[0][1] = d.f[1]; 2043 derivs[0][2] = d.f[2]; 2044 derivs[0][3] = d.f[3]; 2045 FETCH(&d, regdsrcx + 1, chan); 2046 derivs[1][0] = d.f[0]; 2047 derivs[1][1] = d.f[1]; 2048 derivs[1][2] = d.f[2]; 2049 derivs[1][3] = d.f[3]; 2050} 2051 2052static uint 2053fetch_sampler_unit(struct tgsi_exec_machine *mach, 2054 const struct tgsi_full_instruction *inst, 2055 uint sampler) 2056{ 2057 uint unit = 0; 2058 int i; 2059 if (inst->Src[sampler].Register.Indirect) { 2060 const struct tgsi_full_src_register *reg = &inst->Src[sampler]; 2061 union tgsi_exec_channel indir_index, index2; 2062 const uint execmask = mach->ExecMask; 2063 index2.i[0] = 2064 index2.i[1] = 2065 index2.i[2] = 2066 index2.i[3] = reg->Indirect.Index; 2067 2068 fetch_src_file_channel(mach, 2069 reg->Indirect.File, 2070 reg->Indirect.Swizzle, 2071 &index2, 2072 &ZeroVec, 2073 &indir_index); 2074 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2075 if (execmask & (1 << i)) { 2076 unit = inst->Src[sampler].Register.Index + indir_index.i[i]; 2077 break; 2078 } 2079 } 2080 2081 } else { 2082 unit = inst->Src[sampler].Register.Index; 2083 } 2084 return unit; 2085} 2086 2087/* 2088 * execute a texture instruction. 2089 * 2090 * modifier is used to control the channel routing for the 2091 * instruction variants like proj, lod, and texture with lod bias. 2092 * sampler indicates which src register the sampler is contained in. 2093 */ 2094static void 2095exec_tex(struct tgsi_exec_machine *mach, 2096 const struct tgsi_full_instruction *inst, 2097 uint modifier, uint sampler) 2098{ 2099 const union tgsi_exec_channel *args[5], *proj = NULL; 2100 union tgsi_exec_channel r[5]; 2101 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; 2102 uint chan; 2103 uint unit; 2104 int8_t offsets[3]; 2105 int dim, shadow_ref, i; 2106 2107 unit = fetch_sampler_unit(mach, inst, sampler); 2108 /* always fetch all 3 offsets, overkill but keeps code simple */ 2109 fetch_texel_offsets(mach, inst, offsets); 2110 2111 assert(modifier != TEX_MODIFIER_LEVEL_ZERO); 2112 assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER); 2113 2114 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 2115 shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture); 2116 2117 assert(dim <= 4); 2118 if (shadow_ref >= 0) 2119 assert(shadow_ref >= dim && shadow_ref < (int)ARRAY_SIZE(args)); 2120 2121 /* fetch modifier to the last argument */ 2122 if (modifier != TEX_MODIFIER_NONE) { 2123 const int last = ARRAY_SIZE(args) - 1; 2124 2125 /* fetch modifier from src0.w or src1.x */ 2126 if (sampler == 1) { 2127 assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W); 2128 FETCH(&r[last], 0, TGSI_CHAN_W); 2129 } 2130 else { 2131 FETCH(&r[last], 1, TGSI_CHAN_X); 2132 } 2133 2134 if (modifier != TEX_MODIFIER_PROJECTED) { 2135 args[last] = &r[last]; 2136 } 2137 else { 2138 proj = &r[last]; 2139 args[last] = &ZeroVec; 2140 } 2141 2142 /* point unused arguments to zero vector */ 2143 for (i = dim; i < last; i++) 2144 args[i] = &ZeroVec; 2145 2146 if (modifier == TEX_MODIFIER_EXPLICIT_LOD) 2147 control = TGSI_SAMPLER_LOD_EXPLICIT; 2148 else if (modifier == TEX_MODIFIER_LOD_BIAS) 2149 control = TGSI_SAMPLER_LOD_BIAS; 2150 else if (modifier == TEX_MODIFIER_GATHER) 2151 control = TGSI_SAMPLER_GATHER; 2152 } 2153 else { 2154 for (i = dim; i < (int)ARRAY_SIZE(args); i++) 2155 args[i] = &ZeroVec; 2156 } 2157 2158 /* fetch coordinates */ 2159 for (i = 0; i < dim; i++) { 2160 FETCH(&r[i], 0, TGSI_CHAN_X + i); 2161 2162 if (proj) 2163 micro_div(&r[i], &r[i], proj); 2164 2165 args[i] = &r[i]; 2166 } 2167 2168 /* fetch reference value */ 2169 if (shadow_ref >= 0) { 2170 FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4)); 2171 2172 if (proj) 2173 micro_div(&r[shadow_ref], &r[shadow_ref], proj); 2174 2175 args[shadow_ref] = &r[shadow_ref]; 2176 } 2177 2178 fetch_texel(mach->Sampler, unit, unit, 2179 args[0], args[1], args[2], args[3], args[4], 2180 NULL, offsets, control, 2181 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2182 2183#if 0 2184 debug_printf("fetch r: %g %g %g %g\n", 2185 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]); 2186 debug_printf("fetch g: %g %g %g %g\n", 2187 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]); 2188 debug_printf("fetch b: %g %g %g %g\n", 2189 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]); 2190 debug_printf("fetch a: %g %g %g %g\n", 2191 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]); 2192#endif 2193 2194 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2195 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2196 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 2197 } 2198 } 2199} 2200 2201static void 2202exec_lodq(struct tgsi_exec_machine *mach, 2203 const struct tgsi_full_instruction *inst) 2204{ 2205 uint resource_unit, sampler_unit; 2206 unsigned dim; 2207 unsigned i; 2208 union tgsi_exec_channel coords[4]; 2209 const union tgsi_exec_channel *args[ARRAY_SIZE(coords)]; 2210 union tgsi_exec_channel r[2]; 2211 2212 resource_unit = fetch_sampler_unit(mach, inst, 1); 2213 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { 2214 uint target = mach->SamplerViews[resource_unit].Resource; 2215 dim = tgsi_util_get_texture_coord_dim(target); 2216 sampler_unit = fetch_sampler_unit(mach, inst, 2); 2217 } else { 2218 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture); 2219 sampler_unit = resource_unit; 2220 } 2221 assert(dim <= ARRAY_SIZE(coords)); 2222 /* fetch coordinates */ 2223 for (i = 0; i < dim; i++) { 2224 FETCH(&coords[i], 0, TGSI_CHAN_X + i); 2225 args[i] = &coords[i]; 2226 } 2227 for (i = dim; i < ARRAY_SIZE(coords); i++) { 2228 args[i] = &ZeroVec; 2229 } 2230 mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit, 2231 args[0]->f, 2232 args[1]->f, 2233 args[2]->f, 2234 args[3]->f, 2235 TGSI_SAMPLER_LOD_NONE, 2236 r[0].f, 2237 r[1].f); 2238 2239 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2240 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X); 2241 } 2242 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2243 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y); 2244 } 2245 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) { 2246 unsigned char swizzles[4]; 2247 unsigned chan; 2248 swizzles[0] = inst->Src[1].Register.SwizzleX; 2249 swizzles[1] = inst->Src[1].Register.SwizzleY; 2250 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2251 swizzles[3] = inst->Src[1].Register.SwizzleW; 2252 2253 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2254 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2255 if (swizzles[chan] >= 2) { 2256 store_dest(mach, &ZeroVec, 2257 &inst->Dst[0], inst, chan); 2258 } else { 2259 store_dest(mach, &r[swizzles[chan]], 2260 &inst->Dst[0], inst, chan); 2261 } 2262 } 2263 } 2264 } else { 2265 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 2266 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X); 2267 } 2268 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 2269 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y); 2270 } 2271 } 2272} 2273 2274static void 2275exec_txd(struct tgsi_exec_machine *mach, 2276 const struct tgsi_full_instruction *inst) 2277{ 2278 union tgsi_exec_channel r[4]; 2279 float derivs[3][2][TGSI_QUAD_SIZE]; 2280 uint chan; 2281 uint unit; 2282 int8_t offsets[3]; 2283 2284 unit = fetch_sampler_unit(mach, inst, 3); 2285 /* always fetch all 3 offsets, overkill but keeps code simple */ 2286 fetch_texel_offsets(mach, inst, offsets); 2287 2288 switch (inst->Texture.Texture) { 2289 case TGSI_TEXTURE_1D: 2290 FETCH(&r[0], 0, TGSI_CHAN_X); 2291 2292 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2293 2294 fetch_texel(mach->Sampler, unit, unit, 2295 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2296 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2297 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2298 break; 2299 2300 case TGSI_TEXTURE_SHADOW1D: 2301 case TGSI_TEXTURE_1D_ARRAY: 2302 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2303 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */ 2304 FETCH(&r[0], 0, TGSI_CHAN_X); 2305 FETCH(&r[1], 0, TGSI_CHAN_Y); 2306 FETCH(&r[2], 0, TGSI_CHAN_Z); 2307 2308 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2309 2310 fetch_texel(mach->Sampler, unit, unit, 2311 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2312 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2313 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2314 break; 2315 2316 case TGSI_TEXTURE_2D: 2317 case TGSI_TEXTURE_RECT: 2318 FETCH(&r[0], 0, TGSI_CHAN_X); 2319 FETCH(&r[1], 0, TGSI_CHAN_Y); 2320 2321 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2322 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2323 2324 fetch_texel(mach->Sampler, unit, unit, 2325 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2326 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2327 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2328 break; 2329 2330 2331 case TGSI_TEXTURE_SHADOW2D: 2332 case TGSI_TEXTURE_SHADOWRECT: 2333 case TGSI_TEXTURE_2D_ARRAY: 2334 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2335 /* only SHADOW2D_ARRAY actually needs W */ 2336 FETCH(&r[0], 0, TGSI_CHAN_X); 2337 FETCH(&r[1], 0, TGSI_CHAN_Y); 2338 FETCH(&r[2], 0, TGSI_CHAN_Z); 2339 FETCH(&r[3], 0, TGSI_CHAN_W); 2340 2341 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2342 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2343 2344 fetch_texel(mach->Sampler, unit, unit, 2345 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 2346 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2347 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2348 break; 2349 2350 case TGSI_TEXTURE_3D: 2351 case TGSI_TEXTURE_CUBE: 2352 case TGSI_TEXTURE_CUBE_ARRAY: 2353 case TGSI_TEXTURE_SHADOWCUBE: 2354 /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */ 2355 FETCH(&r[0], 0, TGSI_CHAN_X); 2356 FETCH(&r[1], 0, TGSI_CHAN_Y); 2357 FETCH(&r[2], 0, TGSI_CHAN_Z); 2358 FETCH(&r[3], 0, TGSI_CHAN_W); 2359 2360 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]); 2361 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]); 2362 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]); 2363 2364 fetch_texel(mach->Sampler, unit, unit, 2365 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */ 2366 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2367 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2368 break; 2369 2370 default: 2371 assert(0); 2372 } 2373 2374 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2375 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2376 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 2377 } 2378 } 2379} 2380 2381 2382static void 2383exec_txf(struct tgsi_exec_machine *mach, 2384 const struct tgsi_full_instruction *inst) 2385{ 2386 union tgsi_exec_channel r[4]; 2387 uint chan; 2388 uint unit; 2389 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 2390 int j; 2391 int8_t offsets[3]; 2392 unsigned target; 2393 2394 unit = fetch_sampler_unit(mach, inst, 1); 2395 /* always fetch all 3 offsets, overkill but keeps code simple */ 2396 fetch_texel_offsets(mach, inst, offsets); 2397 2398 IFETCH(&r[3], 0, TGSI_CHAN_W); 2399 2400 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || 2401 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { 2402 target = mach->SamplerViews[unit].Resource; 2403 } 2404 else { 2405 target = inst->Texture.Texture; 2406 } 2407 switch(target) { 2408 case TGSI_TEXTURE_3D: 2409 case TGSI_TEXTURE_2D_ARRAY: 2410 case TGSI_TEXTURE_SHADOW2D_ARRAY: 2411 case TGSI_TEXTURE_2D_ARRAY_MSAA: 2412 IFETCH(&r[2], 0, TGSI_CHAN_Z); 2413 FALLTHROUGH; 2414 case TGSI_TEXTURE_2D: 2415 case TGSI_TEXTURE_RECT: 2416 case TGSI_TEXTURE_SHADOW1D_ARRAY: 2417 case TGSI_TEXTURE_SHADOW2D: 2418 case TGSI_TEXTURE_SHADOWRECT: 2419 case TGSI_TEXTURE_1D_ARRAY: 2420 case TGSI_TEXTURE_2D_MSAA: 2421 IFETCH(&r[1], 0, TGSI_CHAN_Y); 2422 FALLTHROUGH; 2423 case TGSI_TEXTURE_BUFFER: 2424 case TGSI_TEXTURE_1D: 2425 case TGSI_TEXTURE_SHADOW1D: 2426 IFETCH(&r[0], 0, TGSI_CHAN_X); 2427 break; 2428 default: 2429 assert(0); 2430 break; 2431 } 2432 2433 mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i, 2434 offsets, rgba); 2435 2436 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 2437 r[0].f[j] = rgba[0][j]; 2438 r[1].f[j] = rgba[1][j]; 2439 r[2].f[j] = rgba[2][j]; 2440 r[3].f[j] = rgba[3][j]; 2441 } 2442 2443 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I || 2444 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) { 2445 unsigned char swizzles[4]; 2446 swizzles[0] = inst->Src[1].Register.SwizzleX; 2447 swizzles[1] = inst->Src[1].Register.SwizzleY; 2448 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2449 swizzles[3] = inst->Src[1].Register.SwizzleW; 2450 2451 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2452 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2453 store_dest(mach, &r[swizzles[chan]], 2454 &inst->Dst[0], inst, chan); 2455 } 2456 } 2457 } 2458 else { 2459 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2460 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2461 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 2462 } 2463 } 2464 } 2465} 2466 2467static void 2468exec_txq(struct tgsi_exec_machine *mach, 2469 const struct tgsi_full_instruction *inst) 2470{ 2471 int result[4]; 2472 union tgsi_exec_channel r[4], src; 2473 uint chan; 2474 uint unit; 2475 int i,j; 2476 2477 unit = fetch_sampler_unit(mach, inst, 1); 2478 2479 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 2480 2481 /* XXX: This interface can't return per-pixel values */ 2482 mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result); 2483 2484 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2485 for (j = 0; j < 4; j++) { 2486 r[j].i[i] = result[j]; 2487 } 2488 } 2489 2490 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2491 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2492 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 2493 } 2494 } 2495} 2496 2497static void 2498exec_sample(struct tgsi_exec_machine *mach, 2499 const struct tgsi_full_instruction *inst, 2500 uint modifier, boolean compare) 2501{ 2502 const uint resource_unit = inst->Src[1].Register.Index; 2503 const uint sampler_unit = inst->Src[2].Register.Index; 2504 union tgsi_exec_channel r[5], c1; 2505 const union tgsi_exec_channel *lod = &ZeroVec; 2506 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE; 2507 uint chan; 2508 unsigned char swizzles[4]; 2509 int8_t offsets[3]; 2510 2511 /* always fetch all 3 offsets, overkill but keeps code simple */ 2512 fetch_texel_offsets(mach, inst, offsets); 2513 2514 assert(modifier != TEX_MODIFIER_PROJECTED); 2515 2516 if (modifier != TEX_MODIFIER_NONE) { 2517 if (modifier == TEX_MODIFIER_LOD_BIAS) { 2518 FETCH(&c1, 3, TGSI_CHAN_X); 2519 lod = &c1; 2520 control = TGSI_SAMPLER_LOD_BIAS; 2521 } 2522 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) { 2523 FETCH(&c1, 3, TGSI_CHAN_X); 2524 lod = &c1; 2525 control = TGSI_SAMPLER_LOD_EXPLICIT; 2526 } 2527 else if (modifier == TEX_MODIFIER_GATHER) { 2528 control = TGSI_SAMPLER_GATHER; 2529 } 2530 else { 2531 assert(modifier == TEX_MODIFIER_LEVEL_ZERO); 2532 control = TGSI_SAMPLER_LOD_ZERO; 2533 } 2534 } 2535 2536 FETCH(&r[0], 0, TGSI_CHAN_X); 2537 2538 switch (mach->SamplerViews[resource_unit].Resource) { 2539 case TGSI_TEXTURE_1D: 2540 if (compare) { 2541 FETCH(&r[2], 3, TGSI_CHAN_X); 2542 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2543 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2544 NULL, offsets, control, 2545 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2546 } 2547 else { 2548 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2549 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2550 NULL, offsets, control, 2551 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2552 } 2553 break; 2554 2555 case TGSI_TEXTURE_1D_ARRAY: 2556 case TGSI_TEXTURE_2D: 2557 case TGSI_TEXTURE_RECT: 2558 FETCH(&r[1], 0, TGSI_CHAN_Y); 2559 if (compare) { 2560 FETCH(&r[2], 3, TGSI_CHAN_X); 2561 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2562 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */ 2563 NULL, offsets, control, 2564 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2565 } 2566 else { 2567 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2568 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */ 2569 NULL, offsets, control, 2570 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2571 } 2572 break; 2573 2574 case TGSI_TEXTURE_2D_ARRAY: 2575 case TGSI_TEXTURE_3D: 2576 case TGSI_TEXTURE_CUBE: 2577 FETCH(&r[1], 0, TGSI_CHAN_Y); 2578 FETCH(&r[2], 0, TGSI_CHAN_Z); 2579 if(compare) { 2580 FETCH(&r[3], 3, TGSI_CHAN_X); 2581 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2582 &r[0], &r[1], &r[2], &r[3], lod, 2583 NULL, offsets, control, 2584 &r[0], &r[1], &r[2], &r[3]); 2585 } 2586 else { 2587 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2588 &r[0], &r[1], &r[2], &ZeroVec, lod, 2589 NULL, offsets, control, 2590 &r[0], &r[1], &r[2], &r[3]); 2591 } 2592 break; 2593 2594 case TGSI_TEXTURE_CUBE_ARRAY: 2595 FETCH(&r[1], 0, TGSI_CHAN_Y); 2596 FETCH(&r[2], 0, TGSI_CHAN_Z); 2597 FETCH(&r[3], 0, TGSI_CHAN_W); 2598 if(compare) { 2599 FETCH(&r[4], 3, TGSI_CHAN_X); 2600 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2601 &r[0], &r[1], &r[2], &r[3], &r[4], 2602 NULL, offsets, control, 2603 &r[0], &r[1], &r[2], &r[3]); 2604 } 2605 else { 2606 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2607 &r[0], &r[1], &r[2], &r[3], lod, 2608 NULL, offsets, control, 2609 &r[0], &r[1], &r[2], &r[3]); 2610 } 2611 break; 2612 2613 2614 default: 2615 assert(0); 2616 } 2617 2618 swizzles[0] = inst->Src[1].Register.SwizzleX; 2619 swizzles[1] = inst->Src[1].Register.SwizzleY; 2620 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2621 swizzles[3] = inst->Src[1].Register.SwizzleW; 2622 2623 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2624 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2625 store_dest(mach, &r[swizzles[chan]], 2626 &inst->Dst[0], inst, chan); 2627 } 2628 } 2629} 2630 2631static void 2632exec_sample_d(struct tgsi_exec_machine *mach, 2633 const struct tgsi_full_instruction *inst) 2634{ 2635 const uint resource_unit = inst->Src[1].Register.Index; 2636 const uint sampler_unit = inst->Src[2].Register.Index; 2637 union tgsi_exec_channel r[4]; 2638 float derivs[3][2][TGSI_QUAD_SIZE]; 2639 uint chan; 2640 unsigned char swizzles[4]; 2641 int8_t offsets[3]; 2642 2643 /* always fetch all 3 offsets, overkill but keeps code simple */ 2644 fetch_texel_offsets(mach, inst, offsets); 2645 2646 FETCH(&r[0], 0, TGSI_CHAN_X); 2647 2648 switch (mach->SamplerViews[resource_unit].Resource) { 2649 case TGSI_TEXTURE_1D: 2650 case TGSI_TEXTURE_1D_ARRAY: 2651 /* only 1D array actually needs Y */ 2652 FETCH(&r[1], 0, TGSI_CHAN_Y); 2653 2654 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2655 2656 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2657 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */ 2658 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2659 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ 2660 break; 2661 2662 case TGSI_TEXTURE_2D: 2663 case TGSI_TEXTURE_RECT: 2664 case TGSI_TEXTURE_2D_ARRAY: 2665 /* only 2D array actually needs Z */ 2666 FETCH(&r[1], 0, TGSI_CHAN_Y); 2667 FETCH(&r[2], 0, TGSI_CHAN_Z); 2668 2669 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2670 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2671 2672 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2673 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */ 2674 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2675 &r[0], &r[1], &r[2], &r[3]); /* outputs */ 2676 break; 2677 2678 case TGSI_TEXTURE_3D: 2679 case TGSI_TEXTURE_CUBE: 2680 case TGSI_TEXTURE_CUBE_ARRAY: 2681 /* only cube array actually needs W */ 2682 FETCH(&r[1], 0, TGSI_CHAN_Y); 2683 FETCH(&r[2], 0, TGSI_CHAN_Z); 2684 FETCH(&r[3], 0, TGSI_CHAN_W); 2685 2686 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]); 2687 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]); 2688 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]); 2689 2690 fetch_texel(mach->Sampler, resource_unit, sampler_unit, 2691 &r[0], &r[1], &r[2], &r[3], &ZeroVec, 2692 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT, 2693 &r[0], &r[1], &r[2], &r[3]); 2694 break; 2695 2696 default: 2697 assert(0); 2698 } 2699 2700 swizzles[0] = inst->Src[1].Register.SwizzleX; 2701 swizzles[1] = inst->Src[1].Register.SwizzleY; 2702 swizzles[2] = inst->Src[1].Register.SwizzleZ; 2703 swizzles[3] = inst->Src[1].Register.SwizzleW; 2704 2705 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2706 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2707 store_dest(mach, &r[swizzles[chan]], 2708 &inst->Dst[0], inst, chan); 2709 } 2710 } 2711} 2712 2713 2714/** 2715 * Evaluate a constant-valued coefficient at the position of the 2716 * current quad. 2717 */ 2718static void 2719eval_constant_coef( 2720 struct tgsi_exec_machine *mach, 2721 unsigned attrib, 2722 unsigned chan ) 2723{ 2724 unsigned i; 2725 2726 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) { 2727 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; 2728 } 2729} 2730 2731static void 2732interp_constant_offset( 2733 UNUSED const struct tgsi_exec_machine *mach, 2734 UNUSED unsigned attrib, 2735 UNUSED unsigned chan, 2736 UNUSED float ofs_x, 2737 UNUSED float ofs_y, 2738 UNUSED union tgsi_exec_channel *out_chan) 2739{ 2740} 2741 2742/** 2743 * Evaluate a linear-valued coefficient at the position of the 2744 * current quad. 2745 */ 2746static void 2747interp_linear_offset( 2748 const struct tgsi_exec_machine *mach, 2749 unsigned attrib, 2750 unsigned chan, 2751 float ofs_x, 2752 float ofs_y, 2753 union tgsi_exec_channel *out_chan) 2754{ 2755 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2756 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2757 const float delta = ofs_x * dadx + ofs_y * dady; 2758 out_chan->f[0] += delta; 2759 out_chan->f[1] += delta; 2760 out_chan->f[2] += delta; 2761 out_chan->f[3] += delta; 2762} 2763 2764static void 2765eval_linear_coef(struct tgsi_exec_machine *mach, 2766 unsigned attrib, 2767 unsigned chan) 2768{ 2769 const float x = mach->QuadPos.xyzw[0].f[0]; 2770 const float y = mach->QuadPos.xyzw[1].f[0]; 2771 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2772 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2773 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2774 2775 mach->Inputs[attrib].xyzw[chan].f[0] = a0; 2776 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; 2777 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; 2778 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; 2779} 2780 2781/** 2782 * Evaluate a perspective-valued coefficient at the position of the 2783 * current quad. 2784 */ 2785 2786static void 2787interp_perspective_offset( 2788 const struct tgsi_exec_machine *mach, 2789 unsigned attrib, 2790 unsigned chan, 2791 float ofs_x, 2792 float ofs_y, 2793 union tgsi_exec_channel *out_chan) 2794{ 2795 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2796 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2797 const float *w = mach->QuadPos.xyzw[3].f; 2798 const float delta = ofs_x * dadx + ofs_y * dady; 2799 out_chan->f[0] += delta / w[0]; 2800 out_chan->f[1] += delta / w[1]; 2801 out_chan->f[2] += delta / w[2]; 2802 out_chan->f[3] += delta / w[3]; 2803} 2804 2805static void 2806eval_perspective_coef( 2807 struct tgsi_exec_machine *mach, 2808 unsigned attrib, 2809 unsigned chan ) 2810{ 2811 const float x = mach->QuadPos.xyzw[0].f[0]; 2812 const float y = mach->QuadPos.xyzw[1].f[0]; 2813 const float dadx = mach->InterpCoefs[attrib].dadx[chan]; 2814 const float dady = mach->InterpCoefs[attrib].dady[chan]; 2815 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; 2816 const float *w = mach->QuadPos.xyzw[3].f; 2817 /* divide by W here */ 2818 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; 2819 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; 2820 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; 2821 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; 2822} 2823 2824 2825typedef void (* eval_coef_func)( 2826 struct tgsi_exec_machine *mach, 2827 unsigned attrib, 2828 unsigned chan ); 2829 2830static void 2831exec_declaration(struct tgsi_exec_machine *mach, 2832 const struct tgsi_full_declaration *decl) 2833{ 2834 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) { 2835 mach->SamplerViews[decl->Range.First] = decl->SamplerView; 2836 return; 2837 } 2838 2839 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { 2840 if (decl->Declaration.File == TGSI_FILE_INPUT) { 2841 uint first, last, mask; 2842 2843 first = decl->Range.First; 2844 last = decl->Range.Last; 2845 mask = decl->Declaration.UsageMask; 2846 2847 /* XXX we could remove this special-case code since 2848 * mach->InterpCoefs[first].a0 should already have the 2849 * front/back-face value. But we should first update the 2850 * ureg code to emit the right UsageMask value (WRITEMASK_X). 2851 * Then, we could remove the tgsi_exec_machine::Face field. 2852 */ 2853 /* XXX make FACE a system value */ 2854 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) { 2855 uint i; 2856 2857 assert(decl->Semantic.Index == 0); 2858 assert(first == last); 2859 2860 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 2861 mach->Inputs[first].xyzw[0].f[i] = mach->Face; 2862 } 2863 } else { 2864 eval_coef_func eval; 2865 apply_sample_offset_func interp; 2866 uint i, j; 2867 2868 switch (decl->Interp.Interpolate) { 2869 case TGSI_INTERPOLATE_CONSTANT: 2870 eval = eval_constant_coef; 2871 interp = interp_constant_offset; 2872 break; 2873 2874 case TGSI_INTERPOLATE_LINEAR: 2875 eval = eval_linear_coef; 2876 interp = interp_linear_offset; 2877 break; 2878 2879 case TGSI_INTERPOLATE_PERSPECTIVE: 2880 eval = eval_perspective_coef; 2881 interp = interp_perspective_offset; 2882 break; 2883 2884 case TGSI_INTERPOLATE_COLOR: 2885 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef; 2886 interp = mach->flatshade_color ? interp_constant_offset : interp_perspective_offset; 2887 break; 2888 2889 default: 2890 assert(0); 2891 return; 2892 } 2893 2894 for (i = first; i <= last; i++) 2895 mach->InputSampleOffsetApply[i] = interp; 2896 2897 for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 2898 if (mask & (1 << j)) { 2899 for (i = first; i <= last; i++) { 2900 eval(mach, i, j); 2901 } 2902 } 2903 } 2904 } 2905 2906 if (DEBUG_EXECUTION) { 2907 uint i, j; 2908 for (i = first; i <= last; ++i) { 2909 debug_printf("IN[%2u] = ", i); 2910 for (j = 0; j < TGSI_NUM_CHANNELS; j++) { 2911 if (j > 0) { 2912 debug_printf(" "); 2913 } 2914 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 2915 mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j], 2916 mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j], 2917 mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j], 2918 mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]); 2919 } 2920 } 2921 } 2922 } 2923 } 2924 2925} 2926 2927typedef void (* micro_unary_op)(union tgsi_exec_channel *dst, 2928 const union tgsi_exec_channel *src); 2929 2930static void 2931exec_scalar_unary(struct tgsi_exec_machine *mach, 2932 const struct tgsi_full_instruction *inst, 2933 micro_unary_op op, 2934 enum tgsi_exec_datatype src_datatype) 2935{ 2936 unsigned int chan; 2937 union tgsi_exec_channel src; 2938 union tgsi_exec_channel dst; 2939 2940 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 2941 op(&dst, &src); 2942 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2943 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2944 store_dest(mach, &dst, &inst->Dst[0], inst, chan); 2945 } 2946 } 2947} 2948 2949static void 2950exec_vector_unary(struct tgsi_exec_machine *mach, 2951 const struct tgsi_full_instruction *inst, 2952 micro_unary_op op, 2953 enum tgsi_exec_datatype src_datatype) 2954{ 2955 unsigned int chan; 2956 struct tgsi_exec_vector dst; 2957 2958 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2959 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2960 union tgsi_exec_channel src; 2961 2962 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype); 2963 op(&dst.xyzw[chan], &src); 2964 } 2965 } 2966 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2967 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2968 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 2969 } 2970 } 2971} 2972 2973typedef void (* micro_binary_op)(union tgsi_exec_channel *dst, 2974 const union tgsi_exec_channel *src0, 2975 const union tgsi_exec_channel *src1); 2976 2977static void 2978exec_scalar_binary(struct tgsi_exec_machine *mach, 2979 const struct tgsi_full_instruction *inst, 2980 micro_binary_op op, 2981 enum tgsi_exec_datatype src_datatype) 2982{ 2983 unsigned int chan; 2984 union tgsi_exec_channel src[2]; 2985 union tgsi_exec_channel dst; 2986 2987 fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype); 2988 fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype); 2989 op(&dst, &src[0], &src[1]); 2990 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 2991 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 2992 store_dest(mach, &dst, &inst->Dst[0], inst, chan); 2993 } 2994 } 2995} 2996 2997static void 2998exec_vector_binary(struct tgsi_exec_machine *mach, 2999 const struct tgsi_full_instruction *inst, 3000 micro_binary_op op, 3001 enum tgsi_exec_datatype src_datatype) 3002{ 3003 unsigned int chan; 3004 struct tgsi_exec_vector dst; 3005 3006 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3007 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3008 union tgsi_exec_channel src[2]; 3009 3010 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3011 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3012 op(&dst.xyzw[chan], &src[0], &src[1]); 3013 } 3014 } 3015 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3016 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3017 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 3018 } 3019 } 3020} 3021 3022typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst, 3023 const union tgsi_exec_channel *src0, 3024 const union tgsi_exec_channel *src1, 3025 const union tgsi_exec_channel *src2); 3026 3027static void 3028exec_vector_trinary(struct tgsi_exec_machine *mach, 3029 const struct tgsi_full_instruction *inst, 3030 micro_trinary_op op, 3031 enum tgsi_exec_datatype src_datatype) 3032{ 3033 unsigned int chan; 3034 struct tgsi_exec_vector dst; 3035 3036 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3037 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3038 union tgsi_exec_channel src[3]; 3039 3040 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3041 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3042 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 3043 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 3044 } 3045 } 3046 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3047 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3048 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 3049 } 3050 } 3051} 3052 3053typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst, 3054 const union tgsi_exec_channel *src0, 3055 const union tgsi_exec_channel *src1, 3056 const union tgsi_exec_channel *src2, 3057 const union tgsi_exec_channel *src3); 3058 3059static void 3060exec_vector_quaternary(struct tgsi_exec_machine *mach, 3061 const struct tgsi_full_instruction *inst, 3062 micro_quaternary_op op, 3063 enum tgsi_exec_datatype src_datatype) 3064{ 3065 unsigned int chan; 3066 struct tgsi_exec_vector dst; 3067 3068 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3069 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3070 union tgsi_exec_channel src[4]; 3071 3072 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype); 3073 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype); 3074 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype); 3075 fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype); 3076 op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]); 3077 } 3078 } 3079 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3080 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3081 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 3082 } 3083 } 3084} 3085 3086static void 3087exec_dp3(struct tgsi_exec_machine *mach, 3088 const struct tgsi_full_instruction *inst) 3089{ 3090 unsigned int chan; 3091 union tgsi_exec_channel arg[3]; 3092 3093 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3094 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3095 micro_mul(&arg[2], &arg[0], &arg[1]); 3096 3097 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) { 3098 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 3099 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 3100 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3101 } 3102 3103 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3104 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3105 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan); 3106 } 3107 } 3108} 3109 3110static void 3111exec_dp4(struct tgsi_exec_machine *mach, 3112 const struct tgsi_full_instruction *inst) 3113{ 3114 unsigned int chan; 3115 union tgsi_exec_channel arg[3]; 3116 3117 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3118 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3119 micro_mul(&arg[2], &arg[0], &arg[1]); 3120 3121 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) { 3122 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT); 3123 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT); 3124 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3125 } 3126 3127 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3128 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3129 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan); 3130 } 3131 } 3132} 3133 3134static void 3135exec_dp2(struct tgsi_exec_machine *mach, 3136 const struct tgsi_full_instruction *inst) 3137{ 3138 unsigned int chan; 3139 union tgsi_exec_channel arg[3]; 3140 3141 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3142 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3143 micro_mul(&arg[2], &arg[0], &arg[1]); 3144 3145 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3146 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3147 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]); 3148 3149 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3150 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3151 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan); 3152 } 3153 } 3154} 3155 3156static void 3157exec_pk2h(struct tgsi_exec_machine *mach, 3158 const struct tgsi_full_instruction *inst) 3159{ 3160 unsigned chan; 3161 union tgsi_exec_channel arg[2], dst; 3162 3163 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3164 fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3165 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { 3166 dst.u[chan] = _mesa_float_to_half(arg[0].f[chan]) | 3167 (_mesa_float_to_half(arg[1].f[chan]) << 16); 3168 } 3169 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3170 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3171 store_dest(mach, &dst, &inst->Dst[0], inst, chan); 3172 } 3173 } 3174} 3175 3176static void 3177exec_up2h(struct tgsi_exec_machine *mach, 3178 const struct tgsi_full_instruction *inst) 3179{ 3180 unsigned chan; 3181 union tgsi_exec_channel arg, dst[2]; 3182 3183 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3184 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) { 3185 dst[0].f[chan] = _mesa_half_to_float(arg.u[chan] & 0xffff); 3186 dst[1].f[chan] = _mesa_half_to_float(arg.u[chan] >> 16); 3187 } 3188 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3189 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3190 store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan); 3191 } 3192 } 3193} 3194 3195static void 3196micro_ucmp(union tgsi_exec_channel *dst, 3197 const union tgsi_exec_channel *src0, 3198 const union tgsi_exec_channel *src1, 3199 const union tgsi_exec_channel *src2) 3200{ 3201 dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0]; 3202 dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1]; 3203 dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2]; 3204 dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3]; 3205} 3206 3207static void 3208exec_ucmp(struct tgsi_exec_machine *mach, 3209 const struct tgsi_full_instruction *inst) 3210{ 3211 unsigned int chan; 3212 struct tgsi_exec_vector dst; 3213 3214 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3215 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3216 union tgsi_exec_channel src[3]; 3217 3218 fetch_source(mach, &src[0], &inst->Src[0], chan, 3219 TGSI_EXEC_DATA_UINT); 3220 fetch_source(mach, &src[1], &inst->Src[1], chan, 3221 TGSI_EXEC_DATA_FLOAT); 3222 fetch_source(mach, &src[2], &inst->Src[2], chan, 3223 TGSI_EXEC_DATA_FLOAT); 3224 micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]); 3225 } 3226 } 3227 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3228 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3229 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan); 3230 } 3231 } 3232} 3233 3234static void 3235exec_dst(struct tgsi_exec_machine *mach, 3236 const struct tgsi_full_instruction *inst) 3237{ 3238 union tgsi_exec_channel r[2]; 3239 union tgsi_exec_channel d[4]; 3240 3241 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3242 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3243 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3244 micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]); 3245 } 3246 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3247 fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT); 3248 } 3249 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3250 fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3251 } 3252 3253 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3254 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X); 3255 } 3256 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3257 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y); 3258 } 3259 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3260 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z); 3261 } 3262 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3263 store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W); 3264 } 3265} 3266 3267static void 3268exec_log(struct tgsi_exec_machine *mach, 3269 const struct tgsi_full_instruction *inst) 3270{ 3271 union tgsi_exec_channel r[3]; 3272 3273 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3274 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */ 3275 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */ 3276 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */ 3277 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3278 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X); 3279 } 3280 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3281 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */ 3282 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */ 3283 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y); 3284 } 3285 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3286 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z); 3287 } 3288 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3289 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W); 3290 } 3291} 3292 3293static void 3294exec_exp(struct tgsi_exec_machine *mach, 3295 const struct tgsi_full_instruction *inst) 3296{ 3297 union tgsi_exec_channel r[3]; 3298 3299 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3300 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */ 3301 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3302 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */ 3303 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X); 3304 } 3305 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3306 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */ 3307 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y); 3308 } 3309 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3310 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */ 3311 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z); 3312 } 3313 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3314 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W); 3315 } 3316} 3317 3318static void 3319exec_lit(struct tgsi_exec_machine *mach, 3320 const struct tgsi_full_instruction *inst) 3321{ 3322 union tgsi_exec_channel r[3]; 3323 union tgsi_exec_channel d[3]; 3324 3325 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) { 3326 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 3327 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 3328 fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 3329 micro_max(&r[1], &r[1], &ZeroVec); 3330 3331 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT); 3332 micro_min(&r[2], &r[2], &P128Vec); 3333 micro_max(&r[2], &r[2], &M128Vec); 3334 micro_pow(&r[1], &r[1], &r[2]); 3335 micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec); 3336 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z); 3337 } 3338 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 3339 micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec); 3340 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y); 3341 } 3342 } 3343 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 3344 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X); 3345 } 3346 3347 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 3348 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W); 3349 } 3350} 3351 3352static void 3353exec_break(struct tgsi_exec_machine *mach) 3354{ 3355 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) { 3356 /* turn off loop channels for each enabled exec channel */ 3357 mach->LoopMask &= ~mach->ExecMask; 3358 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 3359 UPDATE_EXEC_MASK(mach); 3360 } else { 3361 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH); 3362 3363 mach->Switch.mask = 0x0; 3364 3365 UPDATE_EXEC_MASK(mach); 3366 } 3367} 3368 3369static void 3370exec_switch(struct tgsi_exec_machine *mach, 3371 const struct tgsi_full_instruction *inst) 3372{ 3373 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 3374 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 3375 3376 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 3377 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3378 mach->Switch.mask = 0x0; 3379 mach->Switch.defaultMask = 0x0; 3380 3381 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 3382 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH; 3383 3384 UPDATE_EXEC_MASK(mach); 3385} 3386 3387static void 3388exec_case(struct tgsi_exec_machine *mach, 3389 const struct tgsi_full_instruction *inst) 3390{ 3391 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3392 union tgsi_exec_channel src; 3393 uint mask = 0; 3394 3395 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT); 3396 3397 if (mach->Switch.selector.u[0] == src.u[0]) { 3398 mask |= 0x1; 3399 } 3400 if (mach->Switch.selector.u[1] == src.u[1]) { 3401 mask |= 0x2; 3402 } 3403 if (mach->Switch.selector.u[2] == src.u[2]) { 3404 mask |= 0x4; 3405 } 3406 if (mach->Switch.selector.u[3] == src.u[3]) { 3407 mask |= 0x8; 3408 } 3409 3410 mach->Switch.defaultMask |= mask; 3411 3412 mach->Switch.mask |= mask & prevMask; 3413 3414 UPDATE_EXEC_MASK(mach); 3415} 3416 3417/* FIXME: this will only work if default is last */ 3418static void 3419exec_default(struct tgsi_exec_machine *mach) 3420{ 3421 uint prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask; 3422 3423 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask; 3424 3425 UPDATE_EXEC_MASK(mach); 3426} 3427 3428static void 3429exec_endswitch(struct tgsi_exec_machine *mach) 3430{ 3431 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop]; 3432 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 3433 3434 UPDATE_EXEC_MASK(mach); 3435} 3436 3437typedef void (* micro_dop)(union tgsi_double_channel *dst, 3438 const union tgsi_double_channel *src); 3439 3440typedef void (* micro_dop_sop)(union tgsi_double_channel *dst, 3441 const union tgsi_double_channel *src0, 3442 union tgsi_exec_channel *src1); 3443 3444typedef void (* micro_dop_s)(union tgsi_double_channel *dst, 3445 const union tgsi_exec_channel *src); 3446 3447typedef void (* micro_sop_d)(union tgsi_exec_channel *dst, 3448 const union tgsi_double_channel *src); 3449 3450static void 3451fetch_double_channel(struct tgsi_exec_machine *mach, 3452 union tgsi_double_channel *chan, 3453 const struct tgsi_full_src_register *reg, 3454 uint chan_0, 3455 uint chan_1) 3456{ 3457 union tgsi_exec_channel src[2]; 3458 uint i; 3459 3460 fetch_source_d(mach, &src[0], reg, chan_0); 3461 fetch_source_d(mach, &src[1], reg, chan_1); 3462 3463 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 3464 chan->u[i][0] = src[0].u[i]; 3465 chan->u[i][1] = src[1].u[i]; 3466 } 3467 assert(!reg->Register.Absolute); 3468 assert(!reg->Register.Negate); 3469} 3470 3471static void 3472store_double_channel(struct tgsi_exec_machine *mach, 3473 const union tgsi_double_channel *chan, 3474 const struct tgsi_full_dst_register *reg, 3475 const struct tgsi_full_instruction *inst, 3476 uint chan_0, 3477 uint chan_1) 3478{ 3479 union tgsi_exec_channel dst[2]; 3480 uint i; 3481 union tgsi_double_channel temp; 3482 const uint execmask = mach->ExecMask; 3483 3484 if (!inst->Instruction.Saturate) { 3485 for (i = 0; i < TGSI_QUAD_SIZE; i++) 3486 if (execmask & (1 << i)) { 3487 dst[0].u[i] = chan->u[i][0]; 3488 dst[1].u[i] = chan->u[i][1]; 3489 } 3490 } 3491 else { 3492 for (i = 0; i < TGSI_QUAD_SIZE; i++) 3493 if (execmask & (1 << i)) { 3494 if (chan->d[i] < 0.0 || isnan(chan->d[i])) 3495 temp.d[i] = 0.0; 3496 else if (chan->d[i] > 1.0) 3497 temp.d[i] = 1.0; 3498 else 3499 temp.d[i] = chan->d[i]; 3500 3501 dst[0].u[i] = temp.u[i][0]; 3502 dst[1].u[i] = temp.u[i][1]; 3503 } 3504 } 3505 3506 store_dest_double(mach, &dst[0], reg, chan_0); 3507 if (chan_1 != (unsigned)-1) 3508 store_dest_double(mach, &dst[1], reg, chan_1); 3509} 3510 3511static void 3512exec_double_unary(struct tgsi_exec_machine *mach, 3513 const struct tgsi_full_instruction *inst, 3514 micro_dop op) 3515{ 3516 union tgsi_double_channel src; 3517 union tgsi_double_channel dst; 3518 3519 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 3520 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3521 op(&dst, &src); 3522 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3523 } 3524 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 3525 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3526 op(&dst, &src); 3527 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3528 } 3529} 3530 3531static void 3532exec_double_binary(struct tgsi_exec_machine *mach, 3533 const struct tgsi_full_instruction *inst, 3534 micro_dop op, 3535 enum tgsi_exec_datatype dst_datatype) 3536{ 3537 union tgsi_double_channel src[2]; 3538 union tgsi_double_channel dst; 3539 int first_dest_chan, second_dest_chan; 3540 int wmask; 3541 3542 wmask = inst->Dst[0].Register.WriteMask; 3543 /* these are & because of the way DSLT etc store their destinations */ 3544 if (wmask & TGSI_WRITEMASK_XY) { 3545 first_dest_chan = TGSI_CHAN_X; 3546 second_dest_chan = TGSI_CHAN_Y; 3547 if (dst_datatype == TGSI_EXEC_DATA_UINT) { 3548 first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y; 3549 second_dest_chan = -1; 3550 } 3551 3552 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3553 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y); 3554 op(&dst, src); 3555 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan); 3556 } 3557 3558 if (wmask & TGSI_WRITEMASK_ZW) { 3559 first_dest_chan = TGSI_CHAN_Z; 3560 second_dest_chan = TGSI_CHAN_W; 3561 if (dst_datatype == TGSI_EXEC_DATA_UINT) { 3562 first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W; 3563 second_dest_chan = -1; 3564 } 3565 3566 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3567 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W); 3568 op(&dst, src); 3569 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan); 3570 } 3571} 3572 3573static void 3574exec_double_trinary(struct tgsi_exec_machine *mach, 3575 const struct tgsi_full_instruction *inst, 3576 micro_dop op) 3577{ 3578 union tgsi_double_channel src[3]; 3579 union tgsi_double_channel dst; 3580 3581 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 3582 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3583 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y); 3584 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y); 3585 op(&dst, src); 3586 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3587 } 3588 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 3589 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3590 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W); 3591 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W); 3592 op(&dst, src); 3593 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3594 } 3595} 3596 3597static void 3598exec_dldexp(struct tgsi_exec_machine *mach, 3599 const struct tgsi_full_instruction *inst) 3600{ 3601 union tgsi_double_channel src0; 3602 union tgsi_exec_channel src1; 3603 union tgsi_double_channel dst; 3604 int wmask; 3605 3606 wmask = inst->Dst[0].Register.WriteMask; 3607 if (wmask & TGSI_WRITEMASK_XY) { 3608 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3609 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 3610 micro_dldexp(&dst, &src0, &src1); 3611 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3612 } 3613 3614 if (wmask & TGSI_WRITEMASK_ZW) { 3615 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3616 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); 3617 micro_dldexp(&dst, &src0, &src1); 3618 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3619 } 3620} 3621 3622static void 3623exec_dfracexp(struct tgsi_exec_machine *mach, 3624 const struct tgsi_full_instruction *inst) 3625{ 3626 union tgsi_double_channel src; 3627 union tgsi_double_channel dst; 3628 union tgsi_exec_channel dst_exp; 3629 3630 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3631 micro_dfracexp(&dst, &dst_exp, &src); 3632 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) 3633 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3634 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) 3635 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3636 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3637 if (inst->Dst[1].Register.WriteMask & (1 << chan)) 3638 store_dest(mach, &dst_exp, &inst->Dst[1], inst, chan); 3639 } 3640} 3641 3642static void 3643exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach, 3644 const struct tgsi_full_instruction *inst, 3645 micro_dop_sop op) 3646{ 3647 union tgsi_double_channel src0; 3648 union tgsi_exec_channel src1; 3649 union tgsi_double_channel dst; 3650 int wmask; 3651 3652 wmask = inst->Dst[0].Register.WriteMask; 3653 if (wmask & TGSI_WRITEMASK_XY) { 3654 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 3655 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT); 3656 op(&dst, &src0, &src1); 3657 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 3658 } 3659 3660 if (wmask & TGSI_WRITEMASK_ZW) { 3661 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 3662 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT); 3663 op(&dst, &src0, &src1); 3664 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 3665 } 3666} 3667 3668static int 3669get_image_coord_dim(unsigned tgsi_tex) 3670{ 3671 int dim; 3672 switch (tgsi_tex) { 3673 case TGSI_TEXTURE_BUFFER: 3674 case TGSI_TEXTURE_1D: 3675 dim = 1; 3676 break; 3677 case TGSI_TEXTURE_2D: 3678 case TGSI_TEXTURE_RECT: 3679 case TGSI_TEXTURE_1D_ARRAY: 3680 case TGSI_TEXTURE_2D_MSAA: 3681 dim = 2; 3682 break; 3683 case TGSI_TEXTURE_3D: 3684 case TGSI_TEXTURE_CUBE: 3685 case TGSI_TEXTURE_2D_ARRAY: 3686 case TGSI_TEXTURE_2D_ARRAY_MSAA: 3687 case TGSI_TEXTURE_CUBE_ARRAY: 3688 dim = 3; 3689 break; 3690 default: 3691 assert(!"unknown texture target"); 3692 dim = 0; 3693 break; 3694 } 3695 3696 return dim; 3697} 3698 3699static int 3700get_image_coord_sample(unsigned tgsi_tex) 3701{ 3702 int sample = 0; 3703 switch (tgsi_tex) { 3704 case TGSI_TEXTURE_2D_MSAA: 3705 sample = 3; 3706 break; 3707 case TGSI_TEXTURE_2D_ARRAY_MSAA: 3708 sample = 4; 3709 break; 3710 default: 3711 break; 3712 } 3713 return sample; 3714} 3715 3716static void 3717exec_load_img(struct tgsi_exec_machine *mach, 3718 const struct tgsi_full_instruction *inst) 3719{ 3720 union tgsi_exec_channel r[4], sample_r; 3721 uint unit; 3722 int sample; 3723 int i, j; 3724 int dim; 3725 uint chan; 3726 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3727 struct tgsi_image_params params; 3728 3729 unit = fetch_sampler_unit(mach, inst, 0); 3730 dim = get_image_coord_dim(inst->Memory.Texture); 3731 sample = get_image_coord_sample(inst->Memory.Texture); 3732 assert(dim <= 3); 3733 3734 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 3735 params.unit = unit; 3736 params.tgsi_tex_instr = inst->Memory.Texture; 3737 params.format = inst->Memory.Format; 3738 3739 for (i = 0; i < dim; i++) { 3740 IFETCH(&r[i], 1, TGSI_CHAN_X + i); 3741 } 3742 3743 if (sample) 3744 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); 3745 3746 mach->Image->load(mach->Image, ¶ms, 3747 r[0].i, r[1].i, r[2].i, sample_r.i, 3748 rgba); 3749 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3750 r[0].f[j] = rgba[0][j]; 3751 r[1].f[j] = rgba[1][j]; 3752 r[2].f[j] = rgba[2][j]; 3753 r[3].f[j] = rgba[3][j]; 3754 } 3755 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3756 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3757 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 3758 } 3759 } 3760} 3761 3762static void 3763exec_load_membuf(struct tgsi_exec_machine *mach, 3764 const struct tgsi_full_instruction *inst) 3765{ 3766 uint32_t unit = fetch_sampler_unit(mach, inst, 0); 3767 3768 uint32_t size; 3769 const char *ptr; 3770 switch (inst->Src[0].Register.File) { 3771 case TGSI_FILE_MEMORY: 3772 ptr = mach->LocalMem; 3773 size = mach->LocalMemSize; 3774 break; 3775 3776 case TGSI_FILE_BUFFER: 3777 ptr = mach->Buffer->lookup(mach->Buffer, unit, &size); 3778 break; 3779 3780 case TGSI_FILE_CONSTANT: 3781 if (unit < ARRAY_SIZE(mach->Consts)) { 3782 ptr = mach->Consts[unit]; 3783 size = mach->ConstsSize[unit]; 3784 } else { 3785 ptr = NULL; 3786 size = 0; 3787 } 3788 break; 3789 3790 default: 3791 unreachable("unsupported TGSI_OPCODE_LOAD file"); 3792 } 3793 3794 union tgsi_exec_channel offset; 3795 IFETCH(&offset, 1, TGSI_CHAN_X); 3796 3797 assert(inst->Dst[0].Register.WriteMask); 3798 uint32_t load_size = util_last_bit(inst->Dst[0].Register.WriteMask) * 4; 3799 3800 union tgsi_exec_channel rgba[TGSI_NUM_CHANNELS]; 3801 memset(&rgba, 0, sizeof(rgba)); 3802 for (int j = 0; j < TGSI_QUAD_SIZE; j++) { 3803 if (size >= load_size && offset.u[j] <= (size - load_size)) { 3804 for (int chan = 0; chan < load_size / 4; chan++) 3805 rgba[chan].u[j] = *(uint32_t *)(ptr + offset.u[j] + chan * 4); 3806 } 3807 } 3808 3809 for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3810 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3811 store_dest(mach, &rgba[chan], &inst->Dst[0], inst, chan); 3812 } 3813 } 3814} 3815 3816static void 3817exec_load(struct tgsi_exec_machine *mach, 3818 const struct tgsi_full_instruction *inst) 3819{ 3820 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 3821 exec_load_img(mach, inst); 3822 else 3823 exec_load_membuf(mach, inst); 3824} 3825 3826static uint 3827fetch_store_img_unit(struct tgsi_exec_machine *mach, 3828 const struct tgsi_full_dst_register *dst) 3829{ 3830 uint unit = 0; 3831 int i; 3832 if (dst->Register.Indirect) { 3833 union tgsi_exec_channel indir_index, index2; 3834 const uint execmask = mach->ExecMask; 3835 index2.i[0] = 3836 index2.i[1] = 3837 index2.i[2] = 3838 index2.i[3] = dst->Indirect.Index; 3839 3840 fetch_src_file_channel(mach, 3841 dst->Indirect.File, 3842 dst->Indirect.Swizzle, 3843 &index2, 3844 &ZeroVec, 3845 &indir_index); 3846 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 3847 if (execmask & (1 << i)) { 3848 unit = dst->Register.Index + indir_index.i[i]; 3849 break; 3850 } 3851 } 3852 } else { 3853 unit = dst->Register.Index; 3854 } 3855 return unit; 3856} 3857 3858static void 3859exec_store_img(struct tgsi_exec_machine *mach, 3860 const struct tgsi_full_instruction *inst) 3861{ 3862 union tgsi_exec_channel r[3], sample_r; 3863 union tgsi_exec_channel value[4]; 3864 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3865 struct tgsi_image_params params; 3866 int dim; 3867 int sample; 3868 int i, j; 3869 uint unit; 3870 unit = fetch_store_img_unit(mach, &inst->Dst[0]); 3871 dim = get_image_coord_dim(inst->Memory.Texture); 3872 sample = get_image_coord_sample(inst->Memory.Texture); 3873 assert(dim <= 3); 3874 3875 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 3876 params.unit = unit; 3877 params.tgsi_tex_instr = inst->Memory.Texture; 3878 params.format = inst->Memory.Format; 3879 3880 for (i = 0; i < dim; i++) { 3881 IFETCH(&r[i], 0, TGSI_CHAN_X + i); 3882 } 3883 3884 for (i = 0; i < 4; i++) { 3885 FETCH(&value[i], 1, TGSI_CHAN_X + i); 3886 } 3887 if (sample) 3888 IFETCH(&sample_r, 0, TGSI_CHAN_X + sample); 3889 3890 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 3891 rgba[0][j] = value[0].f[j]; 3892 rgba[1][j] = value[1].f[j]; 3893 rgba[2][j] = value[2].f[j]; 3894 rgba[3][j] = value[3].f[j]; 3895 } 3896 3897 mach->Image->store(mach->Image, ¶ms, 3898 r[0].i, r[1].i, r[2].i, sample_r.i, 3899 rgba); 3900} 3901 3902static void 3903exec_store_buf(struct tgsi_exec_machine *mach, 3904 const struct tgsi_full_instruction *inst) 3905{ 3906 uint32_t unit = fetch_store_img_unit(mach, &inst->Dst[0]); 3907 uint32_t size; 3908 char *ptr = mach->Buffer->lookup(mach->Buffer, unit, &size); 3909 3910 int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 3911 3912 union tgsi_exec_channel offset; 3913 IFETCH(&offset, 0, TGSI_CHAN_X); 3914 3915 union tgsi_exec_channel value[4]; 3916 for (int i = 0; i < 4; i++) 3917 FETCH(&value[i], 1, TGSI_CHAN_X + i); 3918 3919 for (int j = 0; j < TGSI_QUAD_SIZE; j++) { 3920 if (!(execmask & (1 << j))) 3921 continue; 3922 if (size < offset.u[j]) 3923 continue; 3924 3925 uint32_t *invocation_ptr = (uint32_t *)(ptr + offset.u[j]); 3926 uint32_t size_avail = size - offset.u[j]; 3927 3928 for (int chan = 0; chan < MIN2(4, size_avail / 4); chan++) { 3929 if (inst->Dst[0].Register.WriteMask & (1 << chan)) 3930 memcpy(&invocation_ptr[chan], &value[chan].u[j], 4); 3931 } 3932 } 3933} 3934 3935static void 3936exec_store_mem(struct tgsi_exec_machine *mach, 3937 const struct tgsi_full_instruction *inst) 3938{ 3939 union tgsi_exec_channel r[3]; 3940 union tgsi_exec_channel value[4]; 3941 uint i, chan; 3942 char *ptr = mach->LocalMem; 3943 int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 3944 3945 IFETCH(&r[0], 0, TGSI_CHAN_X); 3946 3947 for (i = 0; i < 4; i++) { 3948 FETCH(&value[i], 1, TGSI_CHAN_X + i); 3949 } 3950 3951 if (r[0].u[0] >= mach->LocalMemSize) 3952 return; 3953 ptr += r[0].u[0]; 3954 3955 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 3956 if (execmask & (1 << i)) { 3957 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 3958 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 3959 memcpy(ptr + (chan * 4), &value[chan].u[0], 4); 3960 } 3961 } 3962 } 3963 } 3964} 3965 3966static void 3967exec_store(struct tgsi_exec_machine *mach, 3968 const struct tgsi_full_instruction *inst) 3969{ 3970 if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE) 3971 exec_store_img(mach, inst); 3972 else if (inst->Dst[0].Register.File == TGSI_FILE_BUFFER) 3973 exec_store_buf(mach, inst); 3974 else if (inst->Dst[0].Register.File == TGSI_FILE_MEMORY) 3975 exec_store_mem(mach, inst); 3976} 3977 3978static void 3979exec_atomop_img(struct tgsi_exec_machine *mach, 3980 const struct tgsi_full_instruction *inst) 3981{ 3982 union tgsi_exec_channel r[4], sample_r; 3983 union tgsi_exec_channel value[4], value2[4]; 3984 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3985 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE]; 3986 struct tgsi_image_params params; 3987 int dim; 3988 int sample; 3989 int i, j; 3990 uint unit, chan; 3991 unit = fetch_sampler_unit(mach, inst, 0); 3992 dim = get_image_coord_dim(inst->Memory.Texture); 3993 sample = get_image_coord_sample(inst->Memory.Texture); 3994 assert(dim <= 3); 3995 3996 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 3997 params.unit = unit; 3998 params.tgsi_tex_instr = inst->Memory.Texture; 3999 params.format = inst->Memory.Format; 4000 4001 for (i = 0; i < dim; i++) { 4002 IFETCH(&r[i], 1, TGSI_CHAN_X + i); 4003 } 4004 4005 for (i = 0; i < 4; i++) { 4006 FETCH(&value[i], 2, TGSI_CHAN_X + i); 4007 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4008 FETCH(&value2[i], 3, TGSI_CHAN_X + i); 4009 } 4010 if (sample) 4011 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample); 4012 4013 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4014 rgba[0][j] = value[0].f[j]; 4015 rgba[1][j] = value[1].f[j]; 4016 rgba[2][j] = value[2].f[j]; 4017 rgba[3][j] = value[3].f[j]; 4018 } 4019 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) { 4020 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4021 rgba2[0][j] = value2[0].f[j]; 4022 rgba2[1][j] = value2[1].f[j]; 4023 rgba2[2][j] = value2[2].f[j]; 4024 rgba2[3][j] = value2[3].f[j]; 4025 } 4026 } 4027 4028 mach->Image->op(mach->Image, ¶ms, inst->Instruction.Opcode, 4029 r[0].i, r[1].i, r[2].i, sample_r.i, 4030 rgba, rgba2); 4031 4032 for (j = 0; j < TGSI_QUAD_SIZE; j++) { 4033 r[0].f[j] = rgba[0][j]; 4034 r[1].f[j] = rgba[1][j]; 4035 r[2].f[j] = rgba[2][j]; 4036 r[3].f[j] = rgba[3][j]; 4037 } 4038 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4039 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4040 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 4041 } 4042 } 4043} 4044 4045static void 4046exec_atomop_membuf(struct tgsi_exec_machine *mach, 4047 const struct tgsi_full_instruction *inst) 4048{ 4049 union tgsi_exec_channel offset, r0, r1; 4050 uint chan, i; 4051 int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 4052 IFETCH(&offset, 1, TGSI_CHAN_X); 4053 4054 if (!(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X)) 4055 return; 4056 4057 void *ptr[TGSI_QUAD_SIZE]; 4058 if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) { 4059 uint32_t unit = fetch_sampler_unit(mach, inst, 0); 4060 uint32_t size; 4061 char *buffer = mach->Buffer->lookup(mach->Buffer, unit, &size); 4062 for (int i = 0; i < TGSI_QUAD_SIZE; i++) { 4063 if (likely(size >= 4 && offset.u[i] <= size - 4)) 4064 ptr[i] = buffer + offset.u[i]; 4065 else 4066 ptr[i] = NULL; 4067 } 4068 } else { 4069 assert(inst->Src[0].Register.File == TGSI_FILE_MEMORY); 4070 4071 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4072 if (likely(mach->LocalMemSize >= 4 && offset.u[i] <= mach->LocalMemSize - 4)) 4073 ptr[i] = (char *)mach->LocalMem + offset.u[i]; 4074 else 4075 ptr[i] = NULL; 4076 } 4077 } 4078 4079 FETCH(&r0, 2, TGSI_CHAN_X); 4080 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) 4081 FETCH(&r1, 3, TGSI_CHAN_X); 4082 4083 /* The load/op/store sequence has to happen inside the loop since ptr 4084 * may have the same ptr in some of the invocations. 4085 */ 4086 for (int i = 0; i < TGSI_QUAD_SIZE; i++) { 4087 if (!(execmask & (1 << i))) 4088 continue; 4089 4090 uint32_t val = 0; 4091 if (ptr[i]) { 4092 memcpy(&val, ptr[i], sizeof(val)); 4093 4094 uint32_t result; 4095 switch (inst->Instruction.Opcode) { 4096 case TGSI_OPCODE_ATOMUADD: 4097 result = val + r0.u[i]; 4098 break; 4099 case TGSI_OPCODE_ATOMXOR: 4100 result = val ^ r0.u[i]; 4101 break; 4102 case TGSI_OPCODE_ATOMOR: 4103 result = val | r0.u[i]; 4104 break; 4105 case TGSI_OPCODE_ATOMAND: 4106 result = val & r0.u[i]; 4107 break; 4108 case TGSI_OPCODE_ATOMUMIN: 4109 result = MIN2(val, r0.u[i]); 4110 break; 4111 case TGSI_OPCODE_ATOMUMAX: 4112 result = MAX2(val, r0.u[i]); 4113 break; 4114 case TGSI_OPCODE_ATOMIMIN: 4115 result = MIN2((int32_t)val, r0.i[i]); 4116 break; 4117 case TGSI_OPCODE_ATOMIMAX: 4118 result = MAX2((int32_t)val, r0.i[i]); 4119 break; 4120 case TGSI_OPCODE_ATOMXCHG: 4121 result = r0.u[i]; 4122 break; 4123 case TGSI_OPCODE_ATOMCAS: 4124 if (val == r0.u[i]) 4125 result = r1.u[i]; 4126 else 4127 result = val; 4128 break; 4129 case TGSI_OPCODE_ATOMFADD: 4130 result = fui(uif(val) + r0.f[i]); 4131 break; 4132 default: 4133 unreachable("bad atomic op"); 4134 } 4135 memcpy(ptr[i], &result, sizeof(result)); 4136 } 4137 4138 r0.u[i] = val; 4139 } 4140 4141 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) 4142 store_dest(mach, &r0, &inst->Dst[0], inst, chan); 4143} 4144 4145static void 4146exec_atomop(struct tgsi_exec_machine *mach, 4147 const struct tgsi_full_instruction *inst) 4148{ 4149 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 4150 exec_atomop_img(mach, inst); 4151 else 4152 exec_atomop_membuf(mach, inst); 4153} 4154 4155static void 4156exec_resq_img(struct tgsi_exec_machine *mach, 4157 const struct tgsi_full_instruction *inst) 4158{ 4159 int result[4]; 4160 union tgsi_exec_channel r[4]; 4161 uint unit; 4162 int i, chan, j; 4163 struct tgsi_image_params params; 4164 4165 unit = fetch_sampler_unit(mach, inst, 0); 4166 4167 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask; 4168 params.unit = unit; 4169 params.tgsi_tex_instr = inst->Memory.Texture; 4170 params.format = inst->Memory.Format; 4171 4172 mach->Image->get_dims(mach->Image, ¶ms, result); 4173 4174 for (i = 0; i < TGSI_QUAD_SIZE; i++) { 4175 for (j = 0; j < 4; j++) { 4176 r[j].i[i] = result[j]; 4177 } 4178 } 4179 4180 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4181 if (inst->Dst[0].Register.WriteMask & (1 << chan)) { 4182 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan); 4183 } 4184 } 4185} 4186 4187static void 4188exec_resq_buf(struct tgsi_exec_machine *mach, 4189 const struct tgsi_full_instruction *inst) 4190{ 4191 uint32_t unit = fetch_sampler_unit(mach, inst, 0); 4192 uint32_t size; 4193 (void)mach->Buffer->lookup(mach->Buffer, unit, &size); 4194 4195 union tgsi_exec_channel r; 4196 for (int i = 0; i < TGSI_QUAD_SIZE; i++) 4197 r.i[i] = size; 4198 4199 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 4200 for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4201 store_dest(mach, &r, &inst->Dst[0], inst, TGSI_CHAN_X); 4202 } 4203 } 4204} 4205 4206static void 4207exec_resq(struct tgsi_exec_machine *mach, 4208 const struct tgsi_full_instruction *inst) 4209{ 4210 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE) 4211 exec_resq_img(mach, inst); 4212 else 4213 exec_resq_buf(mach, inst); 4214} 4215 4216static void 4217micro_f2u64(union tgsi_double_channel *dst, 4218 const union tgsi_exec_channel *src) 4219{ 4220 dst->u64[0] = (uint64_t)src->f[0]; 4221 dst->u64[1] = (uint64_t)src->f[1]; 4222 dst->u64[2] = (uint64_t)src->f[2]; 4223 dst->u64[3] = (uint64_t)src->f[3]; 4224} 4225 4226static void 4227micro_f2i64(union tgsi_double_channel *dst, 4228 const union tgsi_exec_channel *src) 4229{ 4230 dst->i64[0] = (int64_t)src->f[0]; 4231 dst->i64[1] = (int64_t)src->f[1]; 4232 dst->i64[2] = (int64_t)src->f[2]; 4233 dst->i64[3] = (int64_t)src->f[3]; 4234} 4235 4236static void 4237micro_u2i64(union tgsi_double_channel *dst, 4238 const union tgsi_exec_channel *src) 4239{ 4240 dst->u64[0] = (uint64_t)src->u[0]; 4241 dst->u64[1] = (uint64_t)src->u[1]; 4242 dst->u64[2] = (uint64_t)src->u[2]; 4243 dst->u64[3] = (uint64_t)src->u[3]; 4244} 4245 4246static void 4247micro_i2i64(union tgsi_double_channel *dst, 4248 const union tgsi_exec_channel *src) 4249{ 4250 dst->i64[0] = (int64_t)src->i[0]; 4251 dst->i64[1] = (int64_t)src->i[1]; 4252 dst->i64[2] = (int64_t)src->i[2]; 4253 dst->i64[3] = (int64_t)src->i[3]; 4254} 4255 4256static void 4257micro_d2u64(union tgsi_double_channel *dst, 4258 const union tgsi_double_channel *src) 4259{ 4260 dst->u64[0] = (uint64_t)src->d[0]; 4261 dst->u64[1] = (uint64_t)src->d[1]; 4262 dst->u64[2] = (uint64_t)src->d[2]; 4263 dst->u64[3] = (uint64_t)src->d[3]; 4264} 4265 4266static void 4267micro_d2i64(union tgsi_double_channel *dst, 4268 const union tgsi_double_channel *src) 4269{ 4270 dst->i64[0] = (int64_t)src->d[0]; 4271 dst->i64[1] = (int64_t)src->d[1]; 4272 dst->i64[2] = (int64_t)src->d[2]; 4273 dst->i64[3] = (int64_t)src->d[3]; 4274} 4275 4276static void 4277micro_u642d(union tgsi_double_channel *dst, 4278 const union tgsi_double_channel *src) 4279{ 4280 dst->d[0] = (double)src->u64[0]; 4281 dst->d[1] = (double)src->u64[1]; 4282 dst->d[2] = (double)src->u64[2]; 4283 dst->d[3] = (double)src->u64[3]; 4284} 4285 4286static void 4287micro_i642d(union tgsi_double_channel *dst, 4288 const union tgsi_double_channel *src) 4289{ 4290 dst->d[0] = (double)src->i64[0]; 4291 dst->d[1] = (double)src->i64[1]; 4292 dst->d[2] = (double)src->i64[2]; 4293 dst->d[3] = (double)src->i64[3]; 4294} 4295 4296static void 4297micro_u642f(union tgsi_exec_channel *dst, 4298 const union tgsi_double_channel *src) 4299{ 4300 dst->f[0] = (float)src->u64[0]; 4301 dst->f[1] = (float)src->u64[1]; 4302 dst->f[2] = (float)src->u64[2]; 4303 dst->f[3] = (float)src->u64[3]; 4304} 4305 4306static void 4307micro_i642f(union tgsi_exec_channel *dst, 4308 const union tgsi_double_channel *src) 4309{ 4310 dst->f[0] = (float)src->i64[0]; 4311 dst->f[1] = (float)src->i64[1]; 4312 dst->f[2] = (float)src->i64[2]; 4313 dst->f[3] = (float)src->i64[3]; 4314} 4315 4316static void 4317exec_t_2_64(struct tgsi_exec_machine *mach, 4318 const struct tgsi_full_instruction *inst, 4319 micro_dop_s op, 4320 enum tgsi_exec_datatype src_datatype) 4321{ 4322 union tgsi_exec_channel src; 4323 union tgsi_double_channel dst; 4324 4325 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) { 4326 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype); 4327 op(&dst, &src); 4328 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y); 4329 } 4330 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) { 4331 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype); 4332 op(&dst, &src); 4333 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W); 4334 } 4335} 4336 4337static void 4338exec_64_2_t(struct tgsi_exec_machine *mach, 4339 const struct tgsi_full_instruction *inst, 4340 micro_sop_d op) 4341{ 4342 union tgsi_double_channel src; 4343 union tgsi_exec_channel dst; 4344 int wm = inst->Dst[0].Register.WriteMask; 4345 int i; 4346 int bit; 4347 for (i = 0; i < 2; i++) { 4348 bit = ffs(wm); 4349 if (bit) { 4350 wm &= ~(1 << (bit - 1)); 4351 if (i == 0) 4352 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y); 4353 else 4354 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W); 4355 op(&dst, &src); 4356 store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1); 4357 } 4358 } 4359} 4360 4361static void 4362micro_i2f(union tgsi_exec_channel *dst, 4363 const union tgsi_exec_channel *src) 4364{ 4365 dst->f[0] = (float)src->i[0]; 4366 dst->f[1] = (float)src->i[1]; 4367 dst->f[2] = (float)src->i[2]; 4368 dst->f[3] = (float)src->i[3]; 4369} 4370 4371static void 4372micro_not(union tgsi_exec_channel *dst, 4373 const union tgsi_exec_channel *src) 4374{ 4375 dst->u[0] = ~src->u[0]; 4376 dst->u[1] = ~src->u[1]; 4377 dst->u[2] = ~src->u[2]; 4378 dst->u[3] = ~src->u[3]; 4379} 4380 4381static void 4382micro_shl(union tgsi_exec_channel *dst, 4383 const union tgsi_exec_channel *src0, 4384 const union tgsi_exec_channel *src1) 4385{ 4386 unsigned masked_count; 4387 masked_count = src1->u[0] & 0x1f; 4388 dst->u[0] = src0->u[0] << masked_count; 4389 masked_count = src1->u[1] & 0x1f; 4390 dst->u[1] = src0->u[1] << masked_count; 4391 masked_count = src1->u[2] & 0x1f; 4392 dst->u[2] = src0->u[2] << masked_count; 4393 masked_count = src1->u[3] & 0x1f; 4394 dst->u[3] = src0->u[3] << masked_count; 4395} 4396 4397static void 4398micro_and(union tgsi_exec_channel *dst, 4399 const union tgsi_exec_channel *src0, 4400 const union tgsi_exec_channel *src1) 4401{ 4402 dst->u[0] = src0->u[0] & src1->u[0]; 4403 dst->u[1] = src0->u[1] & src1->u[1]; 4404 dst->u[2] = src0->u[2] & src1->u[2]; 4405 dst->u[3] = src0->u[3] & src1->u[3]; 4406} 4407 4408static void 4409micro_or(union tgsi_exec_channel *dst, 4410 const union tgsi_exec_channel *src0, 4411 const union tgsi_exec_channel *src1) 4412{ 4413 dst->u[0] = src0->u[0] | src1->u[0]; 4414 dst->u[1] = src0->u[1] | src1->u[1]; 4415 dst->u[2] = src0->u[2] | src1->u[2]; 4416 dst->u[3] = src0->u[3] | src1->u[3]; 4417} 4418 4419static void 4420micro_xor(union tgsi_exec_channel *dst, 4421 const union tgsi_exec_channel *src0, 4422 const union tgsi_exec_channel *src1) 4423{ 4424 dst->u[0] = src0->u[0] ^ src1->u[0]; 4425 dst->u[1] = src0->u[1] ^ src1->u[1]; 4426 dst->u[2] = src0->u[2] ^ src1->u[2]; 4427 dst->u[3] = src0->u[3] ^ src1->u[3]; 4428} 4429 4430static void 4431micro_mod(union tgsi_exec_channel *dst, 4432 const union tgsi_exec_channel *src0, 4433 const union tgsi_exec_channel *src1) 4434{ 4435 dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0; 4436 dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0; 4437 dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0; 4438 dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0; 4439} 4440 4441static void 4442micro_f2i(union tgsi_exec_channel *dst, 4443 const union tgsi_exec_channel *src) 4444{ 4445 dst->i[0] = (int)src->f[0]; 4446 dst->i[1] = (int)src->f[1]; 4447 dst->i[2] = (int)src->f[2]; 4448 dst->i[3] = (int)src->f[3]; 4449} 4450 4451static void 4452micro_fseq(union tgsi_exec_channel *dst, 4453 const union tgsi_exec_channel *src0, 4454 const union tgsi_exec_channel *src1) 4455{ 4456 dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0; 4457 dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0; 4458 dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0; 4459 dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0; 4460} 4461 4462static void 4463micro_fsge(union tgsi_exec_channel *dst, 4464 const union tgsi_exec_channel *src0, 4465 const union tgsi_exec_channel *src1) 4466{ 4467 dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0; 4468 dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0; 4469 dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0; 4470 dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0; 4471} 4472 4473static void 4474micro_fslt(union tgsi_exec_channel *dst, 4475 const union tgsi_exec_channel *src0, 4476 const union tgsi_exec_channel *src1) 4477{ 4478 dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0; 4479 dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0; 4480 dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0; 4481 dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0; 4482} 4483 4484static void 4485micro_fsne(union tgsi_exec_channel *dst, 4486 const union tgsi_exec_channel *src0, 4487 const union tgsi_exec_channel *src1) 4488{ 4489 dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0; 4490 dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0; 4491 dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0; 4492 dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0; 4493} 4494 4495static void 4496micro_idiv(union tgsi_exec_channel *dst, 4497 const union tgsi_exec_channel *src0, 4498 const union tgsi_exec_channel *src1) 4499{ 4500 dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0; 4501 dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0; 4502 dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0; 4503 dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0; 4504} 4505 4506static void 4507micro_imax(union tgsi_exec_channel *dst, 4508 const union tgsi_exec_channel *src0, 4509 const union tgsi_exec_channel *src1) 4510{ 4511 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; 4512 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; 4513 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; 4514 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; 4515} 4516 4517static void 4518micro_imin(union tgsi_exec_channel *dst, 4519 const union tgsi_exec_channel *src0, 4520 const union tgsi_exec_channel *src1) 4521{ 4522 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; 4523 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; 4524 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; 4525 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; 4526} 4527 4528static void 4529micro_isge(union tgsi_exec_channel *dst, 4530 const union tgsi_exec_channel *src0, 4531 const union tgsi_exec_channel *src1) 4532{ 4533 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0; 4534 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0; 4535 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0; 4536 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0; 4537} 4538 4539static void 4540micro_ishr(union tgsi_exec_channel *dst, 4541 const union tgsi_exec_channel *src0, 4542 const union tgsi_exec_channel *src1) 4543{ 4544 unsigned masked_count; 4545 masked_count = src1->i[0] & 0x1f; 4546 dst->i[0] = src0->i[0] >> masked_count; 4547 masked_count = src1->i[1] & 0x1f; 4548 dst->i[1] = src0->i[1] >> masked_count; 4549 masked_count = src1->i[2] & 0x1f; 4550 dst->i[2] = src0->i[2] >> masked_count; 4551 masked_count = src1->i[3] & 0x1f; 4552 dst->i[3] = src0->i[3] >> masked_count; 4553} 4554 4555static void 4556micro_islt(union tgsi_exec_channel *dst, 4557 const union tgsi_exec_channel *src0, 4558 const union tgsi_exec_channel *src1) 4559{ 4560 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0; 4561 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0; 4562 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0; 4563 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0; 4564} 4565 4566static void 4567micro_f2u(union tgsi_exec_channel *dst, 4568 const union tgsi_exec_channel *src) 4569{ 4570 dst->u[0] = (uint)src->f[0]; 4571 dst->u[1] = (uint)src->f[1]; 4572 dst->u[2] = (uint)src->f[2]; 4573 dst->u[3] = (uint)src->f[3]; 4574} 4575 4576static void 4577micro_u2f(union tgsi_exec_channel *dst, 4578 const union tgsi_exec_channel *src) 4579{ 4580 dst->f[0] = (float)src->u[0]; 4581 dst->f[1] = (float)src->u[1]; 4582 dst->f[2] = (float)src->u[2]; 4583 dst->f[3] = (float)src->u[3]; 4584} 4585 4586static void 4587micro_uadd(union tgsi_exec_channel *dst, 4588 const union tgsi_exec_channel *src0, 4589 const union tgsi_exec_channel *src1) 4590{ 4591 dst->u[0] = src0->u[0] + src1->u[0]; 4592 dst->u[1] = src0->u[1] + src1->u[1]; 4593 dst->u[2] = src0->u[2] + src1->u[2]; 4594 dst->u[3] = src0->u[3] + src1->u[3]; 4595} 4596 4597static void 4598micro_udiv(union tgsi_exec_channel *dst, 4599 const union tgsi_exec_channel *src0, 4600 const union tgsi_exec_channel *src1) 4601{ 4602 dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u; 4603 dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u; 4604 dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u; 4605 dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u; 4606} 4607 4608static void 4609micro_umad(union tgsi_exec_channel *dst, 4610 const union tgsi_exec_channel *src0, 4611 const union tgsi_exec_channel *src1, 4612 const union tgsi_exec_channel *src2) 4613{ 4614 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0]; 4615 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1]; 4616 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2]; 4617 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3]; 4618} 4619 4620static void 4621micro_umax(union tgsi_exec_channel *dst, 4622 const union tgsi_exec_channel *src0, 4623 const union tgsi_exec_channel *src1) 4624{ 4625 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; 4626 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; 4627 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; 4628 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; 4629} 4630 4631static void 4632micro_umin(union tgsi_exec_channel *dst, 4633 const union tgsi_exec_channel *src0, 4634 const union tgsi_exec_channel *src1) 4635{ 4636 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; 4637 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; 4638 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; 4639 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; 4640} 4641 4642static void 4643micro_umod(union tgsi_exec_channel *dst, 4644 const union tgsi_exec_channel *src0, 4645 const union tgsi_exec_channel *src1) 4646{ 4647 dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u; 4648 dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u; 4649 dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u; 4650 dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u; 4651} 4652 4653static void 4654micro_umul(union tgsi_exec_channel *dst, 4655 const union tgsi_exec_channel *src0, 4656 const union tgsi_exec_channel *src1) 4657{ 4658 dst->u[0] = src0->u[0] * src1->u[0]; 4659 dst->u[1] = src0->u[1] * src1->u[1]; 4660 dst->u[2] = src0->u[2] * src1->u[2]; 4661 dst->u[3] = src0->u[3] * src1->u[3]; 4662} 4663 4664static void 4665micro_imul_hi(union tgsi_exec_channel *dst, 4666 const union tgsi_exec_channel *src0, 4667 const union tgsi_exec_channel *src1) 4668{ 4669#define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32) 4670 dst->i[0] = I64M(src0->i[0], src1->i[0]); 4671 dst->i[1] = I64M(src0->i[1], src1->i[1]); 4672 dst->i[2] = I64M(src0->i[2], src1->i[2]); 4673 dst->i[3] = I64M(src0->i[3], src1->i[3]); 4674#undef I64M 4675} 4676 4677static void 4678micro_umul_hi(union tgsi_exec_channel *dst, 4679 const union tgsi_exec_channel *src0, 4680 const union tgsi_exec_channel *src1) 4681{ 4682#define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32) 4683 dst->u[0] = U64M(src0->u[0], src1->u[0]); 4684 dst->u[1] = U64M(src0->u[1], src1->u[1]); 4685 dst->u[2] = U64M(src0->u[2], src1->u[2]); 4686 dst->u[3] = U64M(src0->u[3], src1->u[3]); 4687#undef U64M 4688} 4689 4690static void 4691micro_useq(union tgsi_exec_channel *dst, 4692 const union tgsi_exec_channel *src0, 4693 const union tgsi_exec_channel *src1) 4694{ 4695 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0; 4696 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0; 4697 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0; 4698 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0; 4699} 4700 4701static void 4702micro_usge(union tgsi_exec_channel *dst, 4703 const union tgsi_exec_channel *src0, 4704 const union tgsi_exec_channel *src1) 4705{ 4706 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0; 4707 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0; 4708 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0; 4709 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0; 4710} 4711 4712static void 4713micro_ushr(union tgsi_exec_channel *dst, 4714 const union tgsi_exec_channel *src0, 4715 const union tgsi_exec_channel *src1) 4716{ 4717 unsigned masked_count; 4718 masked_count = src1->u[0] & 0x1f; 4719 dst->u[0] = src0->u[0] >> masked_count; 4720 masked_count = src1->u[1] & 0x1f; 4721 dst->u[1] = src0->u[1] >> masked_count; 4722 masked_count = src1->u[2] & 0x1f; 4723 dst->u[2] = src0->u[2] >> masked_count; 4724 masked_count = src1->u[3] & 0x1f; 4725 dst->u[3] = src0->u[3] >> masked_count; 4726} 4727 4728static void 4729micro_uslt(union tgsi_exec_channel *dst, 4730 const union tgsi_exec_channel *src0, 4731 const union tgsi_exec_channel *src1) 4732{ 4733 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0; 4734 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0; 4735 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0; 4736 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0; 4737} 4738 4739static void 4740micro_usne(union tgsi_exec_channel *dst, 4741 const union tgsi_exec_channel *src0, 4742 const union tgsi_exec_channel *src1) 4743{ 4744 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0; 4745 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0; 4746 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0; 4747 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0; 4748} 4749 4750static void 4751micro_uarl(union tgsi_exec_channel *dst, 4752 const union tgsi_exec_channel *src) 4753{ 4754 dst->i[0] = src->u[0]; 4755 dst->i[1] = src->u[1]; 4756 dst->i[2] = src->u[2]; 4757 dst->i[3] = src->u[3]; 4758} 4759 4760/** 4761 * Signed bitfield extract (i.e. sign-extend the extracted bits) 4762 */ 4763static void 4764micro_ibfe(union tgsi_exec_channel *dst, 4765 const union tgsi_exec_channel *src0, 4766 const union tgsi_exec_channel *src1, 4767 const union tgsi_exec_channel *src2) 4768{ 4769 int i; 4770 for (i = 0; i < 4; i++) { 4771 int width = src2->i[i]; 4772 int offset = src1->i[i] & 0x1f; 4773 if (width == 32 && offset == 0) { 4774 dst->i[i] = src0->i[i]; 4775 continue; 4776 } 4777 width &= 0x1f; 4778 if (width == 0) 4779 dst->i[i] = 0; 4780 else if (width + offset < 32) 4781 dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width); 4782 else 4783 dst->i[i] = src0->i[i] >> offset; 4784 } 4785} 4786 4787/** 4788 * Unsigned bitfield extract 4789 */ 4790static void 4791micro_ubfe(union tgsi_exec_channel *dst, 4792 const union tgsi_exec_channel *src0, 4793 const union tgsi_exec_channel *src1, 4794 const union tgsi_exec_channel *src2) 4795{ 4796 int i; 4797 for (i = 0; i < 4; i++) { 4798 int width = src2->u[i]; 4799 int offset = src1->u[i] & 0x1f; 4800 if (width == 32 && offset == 0) { 4801 dst->u[i] = src0->u[i]; 4802 continue; 4803 } 4804 width &= 0x1f; 4805 if (width == 0) 4806 dst->u[i] = 0; 4807 else if (width + offset < 32) 4808 dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width); 4809 else 4810 dst->u[i] = src0->u[i] >> offset; 4811 } 4812} 4813 4814/** 4815 * Bitfield insert: copy low bits from src1 into a region of src0. 4816 */ 4817static void 4818micro_bfi(union tgsi_exec_channel *dst, 4819 const union tgsi_exec_channel *src0, 4820 const union tgsi_exec_channel *src1, 4821 const union tgsi_exec_channel *src2, 4822 const union tgsi_exec_channel *src3) 4823{ 4824 int i; 4825 for (i = 0; i < 4; i++) { 4826 int width = src3->u[i]; 4827 int offset = src2->u[i] & 0x1f; 4828 if (width == 32) { 4829 dst->u[i] = src1->u[i]; 4830 } else { 4831 int bitmask = ((1 << width) - 1) << offset; 4832 dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask); 4833 } 4834 } 4835} 4836 4837static void 4838micro_brev(union tgsi_exec_channel *dst, 4839 const union tgsi_exec_channel *src) 4840{ 4841 dst->u[0] = util_bitreverse(src->u[0]); 4842 dst->u[1] = util_bitreverse(src->u[1]); 4843 dst->u[2] = util_bitreverse(src->u[2]); 4844 dst->u[3] = util_bitreverse(src->u[3]); 4845} 4846 4847static void 4848micro_popc(union tgsi_exec_channel *dst, 4849 const union tgsi_exec_channel *src) 4850{ 4851 dst->u[0] = util_bitcount(src->u[0]); 4852 dst->u[1] = util_bitcount(src->u[1]); 4853 dst->u[2] = util_bitcount(src->u[2]); 4854 dst->u[3] = util_bitcount(src->u[3]); 4855} 4856 4857static void 4858micro_lsb(union tgsi_exec_channel *dst, 4859 const union tgsi_exec_channel *src) 4860{ 4861 dst->i[0] = ffs(src->u[0]) - 1; 4862 dst->i[1] = ffs(src->u[1]) - 1; 4863 dst->i[2] = ffs(src->u[2]) - 1; 4864 dst->i[3] = ffs(src->u[3]) - 1; 4865} 4866 4867static void 4868micro_imsb(union tgsi_exec_channel *dst, 4869 const union tgsi_exec_channel *src) 4870{ 4871 dst->i[0] = util_last_bit_signed(src->i[0]) - 1; 4872 dst->i[1] = util_last_bit_signed(src->i[1]) - 1; 4873 dst->i[2] = util_last_bit_signed(src->i[2]) - 1; 4874 dst->i[3] = util_last_bit_signed(src->i[3]) - 1; 4875} 4876 4877static void 4878micro_umsb(union tgsi_exec_channel *dst, 4879 const union tgsi_exec_channel *src) 4880{ 4881 dst->i[0] = util_last_bit(src->u[0]) - 1; 4882 dst->i[1] = util_last_bit(src->u[1]) - 1; 4883 dst->i[2] = util_last_bit(src->u[2]) - 1; 4884 dst->i[3] = util_last_bit(src->u[3]) - 1; 4885} 4886 4887 4888static void 4889exec_interp_at_sample(struct tgsi_exec_machine *mach, 4890 const struct tgsi_full_instruction *inst) 4891{ 4892 union tgsi_exec_channel index; 4893 union tgsi_exec_channel index2D; 4894 union tgsi_exec_channel result[TGSI_NUM_CHANNELS]; 4895 const struct tgsi_full_src_register *reg = &inst->Src[0]; 4896 4897 assert(reg->Register.File == TGSI_FILE_INPUT); 4898 assert(inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE); 4899 4900 get_index_registers(mach, reg, &index, &index2D); 4901 float sample = mach->Imms[inst->Src[1].Register.Index][inst->Src[1].Register.SwizzleX]; 4902 4903 /* Short cut: sample 0 is like a normal fetch */ 4904 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4905 if (!(inst->Dst[0].Register.WriteMask & (1 << chan))) 4906 continue; 4907 4908 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, 4909 &result[chan]); 4910 if (sample != 0.0f) { 4911 4912 /* TODO: define the samples > 0, but so far we only do fake MSAA */ 4913 float x = 0; 4914 float y = 0; 4915 4916 unsigned pos = index2D.i[chan] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[chan]; 4917 assert(pos >= 0); 4918 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS); 4919 mach->InputSampleOffsetApply[pos](mach, pos, chan, x, y, &result[chan]); 4920 } 4921 store_dest(mach, &result[chan], &inst->Dst[0], inst, chan); 4922 } 4923} 4924 4925 4926static void 4927exec_interp_at_offset(struct tgsi_exec_machine *mach, 4928 const struct tgsi_full_instruction *inst) 4929{ 4930 union tgsi_exec_channel index; 4931 union tgsi_exec_channel index2D; 4932 union tgsi_exec_channel ofsx; 4933 union tgsi_exec_channel ofsy; 4934 const struct tgsi_full_src_register *reg = &inst->Src[0]; 4935 4936 assert(reg->Register.File == TGSI_FILE_INPUT); 4937 4938 get_index_registers(mach, reg, &index, &index2D); 4939 unsigned pos = index2D.i[0] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[0]; 4940 4941 fetch_source(mach, &ofsx, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT); 4942 fetch_source(mach, &ofsy, &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT); 4943 4944 for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4945 if (!(inst->Dst[0].Register.WriteMask & (1 << chan))) 4946 continue; 4947 union tgsi_exec_channel result; 4948 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, &result); 4949 mach->InputSampleOffsetApply[pos](mach, pos, chan, ofsx.f[chan], ofsy.f[chan], &result); 4950 store_dest(mach, &result, &inst->Dst[0], inst, chan); 4951 } 4952} 4953 4954 4955static void 4956exec_interp_at_centroid(struct tgsi_exec_machine *mach, 4957 const struct tgsi_full_instruction *inst) 4958{ 4959 union tgsi_exec_channel index; 4960 union tgsi_exec_channel index2D; 4961 union tgsi_exec_channel result[TGSI_NUM_CHANNELS]; 4962 const struct tgsi_full_src_register *reg = &inst->Src[0]; 4963 4964 assert(reg->Register.File == TGSI_FILE_INPUT); 4965 get_index_registers(mach, reg, &index, &index2D); 4966 4967 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) { 4968 if (!(inst->Dst[0].Register.WriteMask & (1 << chan))) 4969 continue; 4970 4971 /* Here we should add the change to use a sample that lies within the 4972 * primitive (Section 15.2): 4973 * 4974 * "When interpolating variables declared using centroid in , 4975 * the variable is sampled at a location within the pixel covered 4976 * by the primitive generating the fragment. 4977 * ... 4978 * The built-in functions interpolateAtCentroid ... will sample 4979 * variables as though they were declared with the centroid ... 4980 * qualifier[s]." 4981 * 4982 * Since we only support 1 sample currently, this is just a pass-through. 4983 */ 4984 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, 4985 &result[chan]); 4986 store_dest(mach, &result[chan], &inst->Dst[0], inst, chan); 4987 } 4988 4989} 4990 4991 4992/** 4993 * Execute a TGSI instruction. 4994 * Returns TRUE if a barrier instruction is hit, 4995 * otherwise FALSE. 4996 */ 4997static boolean 4998exec_instruction( 4999 struct tgsi_exec_machine *mach, 5000 const struct tgsi_full_instruction *inst, 5001 int *pc ) 5002{ 5003 union tgsi_exec_channel r[10]; 5004 5005 (*pc)++; 5006 5007 switch (inst->Instruction.Opcode) { 5008 case TGSI_OPCODE_ARL: 5009 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_FLOAT); 5010 break; 5011 5012 case TGSI_OPCODE_MOV: 5013 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_FLOAT); 5014 break; 5015 5016 case TGSI_OPCODE_LIT: 5017 exec_lit(mach, inst); 5018 break; 5019 5020 case TGSI_OPCODE_RCP: 5021 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT); 5022 break; 5023 5024 case TGSI_OPCODE_RSQ: 5025 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT); 5026 break; 5027 5028 case TGSI_OPCODE_EXP: 5029 exec_exp(mach, inst); 5030 break; 5031 5032 case TGSI_OPCODE_LOG: 5033 exec_log(mach, inst); 5034 break; 5035 5036 case TGSI_OPCODE_MUL: 5037 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT); 5038 break; 5039 5040 case TGSI_OPCODE_ADD: 5041 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT); 5042 break; 5043 5044 case TGSI_OPCODE_DP3: 5045 exec_dp3(mach, inst); 5046 break; 5047 5048 case TGSI_OPCODE_DP4: 5049 exec_dp4(mach, inst); 5050 break; 5051 5052 case TGSI_OPCODE_DST: 5053 exec_dst(mach, inst); 5054 break; 5055 5056 case TGSI_OPCODE_MIN: 5057 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT); 5058 break; 5059 5060 case TGSI_OPCODE_MAX: 5061 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT); 5062 break; 5063 5064 case TGSI_OPCODE_SLT: 5065 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT); 5066 break; 5067 5068 case TGSI_OPCODE_SGE: 5069 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT); 5070 break; 5071 5072 case TGSI_OPCODE_MAD: 5073 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT); 5074 break; 5075 5076 case TGSI_OPCODE_LRP: 5077 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT); 5078 break; 5079 5080 case TGSI_OPCODE_SQRT: 5081 exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT); 5082 break; 5083 5084 case TGSI_OPCODE_FRC: 5085 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT); 5086 break; 5087 5088 case TGSI_OPCODE_FLR: 5089 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT); 5090 break; 5091 5092 case TGSI_OPCODE_ROUND: 5093 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT); 5094 break; 5095 5096 case TGSI_OPCODE_EX2: 5097 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT); 5098 break; 5099 5100 case TGSI_OPCODE_LG2: 5101 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT); 5102 break; 5103 5104 case TGSI_OPCODE_POW: 5105 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT); 5106 break; 5107 5108 case TGSI_OPCODE_LDEXP: 5109 exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT); 5110 break; 5111 5112 case TGSI_OPCODE_COS: 5113 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT); 5114 break; 5115 5116 case TGSI_OPCODE_DDX_FINE: 5117 exec_vector_unary(mach, inst, micro_ddx_fine, TGSI_EXEC_DATA_FLOAT); 5118 break; 5119 5120 case TGSI_OPCODE_DDX: 5121 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT); 5122 break; 5123 5124 case TGSI_OPCODE_DDY_FINE: 5125 exec_vector_unary(mach, inst, micro_ddy_fine, TGSI_EXEC_DATA_FLOAT); 5126 break; 5127 5128 case TGSI_OPCODE_DDY: 5129 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT); 5130 break; 5131 5132 case TGSI_OPCODE_KILL: 5133 exec_kill (mach); 5134 break; 5135 5136 case TGSI_OPCODE_KILL_IF: 5137 exec_kill_if (mach, inst); 5138 break; 5139 5140 case TGSI_OPCODE_PK2H: 5141 exec_pk2h(mach, inst); 5142 break; 5143 5144 case TGSI_OPCODE_PK2US: 5145 assert (0); 5146 break; 5147 5148 case TGSI_OPCODE_PK4B: 5149 assert (0); 5150 break; 5151 5152 case TGSI_OPCODE_PK4UB: 5153 assert (0); 5154 break; 5155 5156 case TGSI_OPCODE_SEQ: 5157 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT); 5158 break; 5159 5160 case TGSI_OPCODE_SGT: 5161 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT); 5162 break; 5163 5164 case TGSI_OPCODE_SIN: 5165 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT); 5166 break; 5167 5168 case TGSI_OPCODE_SLE: 5169 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT); 5170 break; 5171 5172 case TGSI_OPCODE_SNE: 5173 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT); 5174 break; 5175 5176 case TGSI_OPCODE_TEX: 5177 /* simple texture lookup */ 5178 /* src[0] = texcoord */ 5179 /* src[1] = sampler unit */ 5180 exec_tex(mach, inst, TEX_MODIFIER_NONE, 1); 5181 break; 5182 5183 case TGSI_OPCODE_TXB: 5184 /* Texture lookup with lod bias */ 5185 /* src[0] = texcoord (src[0].w = LOD bias) */ 5186 /* src[1] = sampler unit */ 5187 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1); 5188 break; 5189 5190 case TGSI_OPCODE_TXD: 5191 /* Texture lookup with explict partial derivatives */ 5192 /* src[0] = texcoord */ 5193 /* src[1] = d[strq]/dx */ 5194 /* src[2] = d[strq]/dy */ 5195 /* src[3] = sampler unit */ 5196 exec_txd(mach, inst); 5197 break; 5198 5199 case TGSI_OPCODE_TXL: 5200 /* Texture lookup with explit LOD */ 5201 /* src[0] = texcoord (src[0].w = LOD) */ 5202 /* src[1] = sampler unit */ 5203 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1); 5204 break; 5205 5206 case TGSI_OPCODE_TXP: 5207 /* Texture lookup with projection */ 5208 /* src[0] = texcoord (src[0].w = projection) */ 5209 /* src[1] = sampler unit */ 5210 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1); 5211 break; 5212 5213 case TGSI_OPCODE_TG4: 5214 /* src[0] = texcoord */ 5215 /* src[1] = component */ 5216 /* src[2] = sampler unit */ 5217 exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2); 5218 break; 5219 5220 case TGSI_OPCODE_LODQ: 5221 /* src[0] = texcoord */ 5222 /* src[1] = sampler unit */ 5223 exec_lodq(mach, inst); 5224 break; 5225 5226 case TGSI_OPCODE_UP2H: 5227 exec_up2h(mach, inst); 5228 break; 5229 5230 case TGSI_OPCODE_UP2US: 5231 assert (0); 5232 break; 5233 5234 case TGSI_OPCODE_UP4B: 5235 assert (0); 5236 break; 5237 5238 case TGSI_OPCODE_UP4UB: 5239 assert (0); 5240 break; 5241 5242 case TGSI_OPCODE_ARR: 5243 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_FLOAT); 5244 break; 5245 5246 case TGSI_OPCODE_CAL: 5247 /* skip the call if no execution channels are enabled */ 5248 if (mach->ExecMask) { 5249 /* do the call */ 5250 5251 /* First, record the depths of the execution stacks. 5252 * This is important for deeply nested/looped return statements. 5253 * We have to unwind the stacks by the correct amount. For a 5254 * real code generator, we could determine the number of entries 5255 * to pop off each stack with simple static analysis and avoid 5256 * implementing this data structure at run time. 5257 */ 5258 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop; 5259 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop; 5260 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop; 5261 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop; 5262 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop; 5263 /* note that PC was already incremented above */ 5264 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc; 5265 5266 mach->CallStackTop++; 5267 5268 /* Second, push the Cond, Loop, Cont, Func stacks */ 5269 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5270 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5271 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5272 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING); 5273 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 5274 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); 5275 5276 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5277 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 5278 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 5279 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch; 5280 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 5281 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; 5282 5283 /* Finally, jump to the subroutine. The label is a pointer 5284 * (an instruction number) to the BGNSUB instruction. 5285 */ 5286 *pc = inst->Label.Label; 5287 assert(mach->Instructions[*pc].Instruction.Opcode 5288 == TGSI_OPCODE_BGNSUB); 5289 } 5290 break; 5291 5292 case TGSI_OPCODE_RET: 5293 mach->FuncMask &= ~mach->ExecMask; 5294 UPDATE_EXEC_MASK(mach); 5295 5296 if (mach->FuncMask == 0x0) { 5297 /* really return now (otherwise, keep executing */ 5298 5299 if (mach->CallStackTop == 0) { 5300 /* returning from main() */ 5301 mach->CondStackTop = 0; 5302 mach->LoopStackTop = 0; 5303 mach->ContStackTop = 0; 5304 mach->LoopLabelStackTop = 0; 5305 mach->SwitchStackTop = 0; 5306 mach->BreakStackTop = 0; 5307 *pc = -1; 5308 return FALSE; 5309 } 5310 5311 assert(mach->CallStackTop > 0); 5312 mach->CallStackTop--; 5313 5314 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 5315 mach->CondMask = mach->CondStack[mach->CondStackTop]; 5316 5317 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 5318 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 5319 5320 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 5321 mach->ContMask = mach->ContStack[mach->ContStackTop]; 5322 5323 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 5324 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 5325 5326 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 5327 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 5328 5329 assert(mach->FuncStackTop > 0); 5330 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 5331 5332 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 5333 5334 UPDATE_EXEC_MASK(mach); 5335 } 5336 break; 5337 5338 case TGSI_OPCODE_SSG: 5339 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT); 5340 break; 5341 5342 case TGSI_OPCODE_CMP: 5343 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT); 5344 break; 5345 5346 case TGSI_OPCODE_DIV: 5347 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT); 5348 break; 5349 5350 case TGSI_OPCODE_DP2: 5351 exec_dp2(mach, inst); 5352 break; 5353 5354 case TGSI_OPCODE_IF: 5355 /* push CondMask */ 5356 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5357 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5358 FETCH( &r[0], 0, TGSI_CHAN_X ); 5359 for (int i = 0; i < TGSI_QUAD_SIZE; i++) { 5360 if (!r[0].f[i]) 5361 mach->CondMask &= ~(1 << i); 5362 } 5363 UPDATE_EXEC_MASK(mach); 5364 /* If no channels are taking the then branch, jump to ELSE. */ 5365 if (!mach->CondMask) 5366 *pc = inst->Label.Label; 5367 break; 5368 5369 case TGSI_OPCODE_UIF: 5370 /* push CondMask */ 5371 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); 5372 mach->CondStack[mach->CondStackTop++] = mach->CondMask; 5373 IFETCH( &r[0], 0, TGSI_CHAN_X ); 5374 for (int i = 0; i < TGSI_QUAD_SIZE; i++) { 5375 if (!r[0].u[i]) 5376 mach->CondMask &= ~(1 << i); 5377 } 5378 UPDATE_EXEC_MASK(mach); 5379 /* If no channels are taking the then branch, jump to ELSE. */ 5380 if (!mach->CondMask) 5381 *pc = inst->Label.Label; 5382 break; 5383 5384 case TGSI_OPCODE_ELSE: 5385 /* invert CondMask wrt previous mask */ 5386 { 5387 uint prevMask; 5388 assert(mach->CondStackTop > 0); 5389 prevMask = mach->CondStack[mach->CondStackTop - 1]; 5390 mach->CondMask = ~mach->CondMask & prevMask; 5391 UPDATE_EXEC_MASK(mach); 5392 5393 /* If no channels are taking ELSE, jump to ENDIF */ 5394 if (!mach->CondMask) 5395 *pc = inst->Label.Label; 5396 } 5397 break; 5398 5399 case TGSI_OPCODE_ENDIF: 5400 /* pop CondMask */ 5401 assert(mach->CondStackTop > 0); 5402 mach->CondMask = mach->CondStack[--mach->CondStackTop]; 5403 UPDATE_EXEC_MASK(mach); 5404 break; 5405 5406 case TGSI_OPCODE_END: 5407 /* make sure we end primitives which haven't 5408 * been explicitly emitted */ 5409 conditional_emit_primitive(mach); 5410 /* halt execution */ 5411 *pc = -1; 5412 break; 5413 5414 case TGSI_OPCODE_CEIL: 5415 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT); 5416 break; 5417 5418 case TGSI_OPCODE_I2F: 5419 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_INT); 5420 break; 5421 5422 case TGSI_OPCODE_NOT: 5423 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT); 5424 break; 5425 5426 case TGSI_OPCODE_TRUNC: 5427 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT); 5428 break; 5429 5430 case TGSI_OPCODE_SHL: 5431 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT); 5432 break; 5433 5434 case TGSI_OPCODE_AND: 5435 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT); 5436 break; 5437 5438 case TGSI_OPCODE_OR: 5439 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT); 5440 break; 5441 5442 case TGSI_OPCODE_MOD: 5443 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT); 5444 break; 5445 5446 case TGSI_OPCODE_XOR: 5447 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT); 5448 break; 5449 5450 case TGSI_OPCODE_TXF: 5451 exec_txf(mach, inst); 5452 break; 5453 5454 case TGSI_OPCODE_TXQ: 5455 exec_txq(mach, inst); 5456 break; 5457 5458 case TGSI_OPCODE_EMIT: 5459 emit_vertex(mach, inst); 5460 break; 5461 5462 case TGSI_OPCODE_ENDPRIM: 5463 emit_primitive(mach, inst); 5464 break; 5465 5466 case TGSI_OPCODE_BGNLOOP: 5467 /* push LoopMask and ContMasks */ 5468 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5469 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5470 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING); 5471 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK); 5472 5473 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; 5474 mach->ContStack[mach->ContStackTop++] = mach->ContMask; 5475 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1; 5476 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType; 5477 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP; 5478 break; 5479 5480 case TGSI_OPCODE_ENDLOOP: 5481 /* Restore ContMask, but don't pop */ 5482 assert(mach->ContStackTop > 0); 5483 mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; 5484 UPDATE_EXEC_MASK(mach); 5485 if (mach->ExecMask) { 5486 /* repeat loop: jump to instruction just past BGNLOOP */ 5487 assert(mach->LoopLabelStackTop > 0); 5488 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1; 5489 } 5490 else { 5491 /* exit loop: pop LoopMask */ 5492 assert(mach->LoopStackTop > 0); 5493 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; 5494 /* pop ContMask */ 5495 assert(mach->ContStackTop > 0); 5496 mach->ContMask = mach->ContStack[--mach->ContStackTop]; 5497 assert(mach->LoopLabelStackTop > 0); 5498 --mach->LoopLabelStackTop; 5499 5500 mach->BreakType = mach->BreakStack[--mach->BreakStackTop]; 5501 } 5502 UPDATE_EXEC_MASK(mach); 5503 break; 5504 5505 case TGSI_OPCODE_BRK: 5506 exec_break(mach); 5507 break; 5508 5509 case TGSI_OPCODE_CONT: 5510 /* turn off cont channels for each enabled exec channel */ 5511 mach->ContMask &= ~mach->ExecMask; 5512 /* Todo: if mach->LoopMask == 0, jump to end of loop */ 5513 UPDATE_EXEC_MASK(mach); 5514 break; 5515 5516 case TGSI_OPCODE_BGNSUB: 5517 /* no-op */ 5518 break; 5519 5520 case TGSI_OPCODE_ENDSUB: 5521 /* 5522 * XXX: This really should be a no-op. We should never reach this opcode. 5523 */ 5524 5525 assert(mach->CallStackTop > 0); 5526 mach->CallStackTop--; 5527 5528 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop; 5529 mach->CondMask = mach->CondStack[mach->CondStackTop]; 5530 5531 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop; 5532 mach->LoopMask = mach->LoopStack[mach->LoopStackTop]; 5533 5534 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop; 5535 mach->ContMask = mach->ContStack[mach->ContStackTop]; 5536 5537 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop; 5538 mach->Switch = mach->SwitchStack[mach->SwitchStackTop]; 5539 5540 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop; 5541 mach->BreakType = mach->BreakStack[mach->BreakStackTop]; 5542 5543 assert(mach->FuncStackTop > 0); 5544 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; 5545 5546 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr; 5547 5548 UPDATE_EXEC_MASK(mach); 5549 break; 5550 5551 case TGSI_OPCODE_NOP: 5552 break; 5553 5554 case TGSI_OPCODE_F2I: 5555 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_FLOAT); 5556 break; 5557 5558 case TGSI_OPCODE_FSEQ: 5559 exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_FLOAT); 5560 break; 5561 5562 case TGSI_OPCODE_FSGE: 5563 exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_FLOAT); 5564 break; 5565 5566 case TGSI_OPCODE_FSLT: 5567 exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_FLOAT); 5568 break; 5569 5570 case TGSI_OPCODE_FSNE: 5571 exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_FLOAT); 5572 break; 5573 5574 case TGSI_OPCODE_IDIV: 5575 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT); 5576 break; 5577 5578 case TGSI_OPCODE_IMAX: 5579 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT); 5580 break; 5581 5582 case TGSI_OPCODE_IMIN: 5583 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT); 5584 break; 5585 5586 case TGSI_OPCODE_INEG: 5587 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT); 5588 break; 5589 5590 case TGSI_OPCODE_ISGE: 5591 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT); 5592 break; 5593 5594 case TGSI_OPCODE_ISHR: 5595 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT); 5596 break; 5597 5598 case TGSI_OPCODE_ISLT: 5599 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT); 5600 break; 5601 5602 case TGSI_OPCODE_F2U: 5603 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_FLOAT); 5604 break; 5605 5606 case TGSI_OPCODE_U2F: 5607 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_UINT); 5608 break; 5609 5610 case TGSI_OPCODE_UADD: 5611 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT); 5612 break; 5613 5614 case TGSI_OPCODE_UDIV: 5615 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT); 5616 break; 5617 5618 case TGSI_OPCODE_UMAD: 5619 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT); 5620 break; 5621 5622 case TGSI_OPCODE_UMAX: 5623 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT); 5624 break; 5625 5626 case TGSI_OPCODE_UMIN: 5627 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT); 5628 break; 5629 5630 case TGSI_OPCODE_UMOD: 5631 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT); 5632 break; 5633 5634 case TGSI_OPCODE_UMUL: 5635 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT); 5636 break; 5637 5638 case TGSI_OPCODE_IMUL_HI: 5639 exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT); 5640 break; 5641 5642 case TGSI_OPCODE_UMUL_HI: 5643 exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT); 5644 break; 5645 5646 case TGSI_OPCODE_USEQ: 5647 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT); 5648 break; 5649 5650 case TGSI_OPCODE_USGE: 5651 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT); 5652 break; 5653 5654 case TGSI_OPCODE_USHR: 5655 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT); 5656 break; 5657 5658 case TGSI_OPCODE_USLT: 5659 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT); 5660 break; 5661 5662 case TGSI_OPCODE_USNE: 5663 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT); 5664 break; 5665 5666 case TGSI_OPCODE_SWITCH: 5667 exec_switch(mach, inst); 5668 break; 5669 5670 case TGSI_OPCODE_CASE: 5671 exec_case(mach, inst); 5672 break; 5673 5674 case TGSI_OPCODE_DEFAULT: 5675 exec_default(mach); 5676 break; 5677 5678 case TGSI_OPCODE_ENDSWITCH: 5679 exec_endswitch(mach); 5680 break; 5681 5682 case TGSI_OPCODE_SAMPLE_I: 5683 exec_txf(mach, inst); 5684 break; 5685 5686 case TGSI_OPCODE_SAMPLE_I_MS: 5687 exec_txf(mach, inst); 5688 break; 5689 5690 case TGSI_OPCODE_SAMPLE: 5691 exec_sample(mach, inst, TEX_MODIFIER_NONE, FALSE); 5692 break; 5693 5694 case TGSI_OPCODE_SAMPLE_B: 5695 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, FALSE); 5696 break; 5697 5698 case TGSI_OPCODE_SAMPLE_C: 5699 exec_sample(mach, inst, TEX_MODIFIER_NONE, TRUE); 5700 break; 5701 5702 case TGSI_OPCODE_SAMPLE_C_LZ: 5703 exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, TRUE); 5704 break; 5705 5706 case TGSI_OPCODE_SAMPLE_D: 5707 exec_sample_d(mach, inst); 5708 break; 5709 5710 case TGSI_OPCODE_SAMPLE_L: 5711 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, FALSE); 5712 break; 5713 5714 case TGSI_OPCODE_GATHER4: 5715 exec_sample(mach, inst, TEX_MODIFIER_GATHER, FALSE); 5716 break; 5717 5718 case TGSI_OPCODE_SVIEWINFO: 5719 exec_txq(mach, inst); 5720 break; 5721 5722 case TGSI_OPCODE_SAMPLE_POS: 5723 assert(0); 5724 break; 5725 5726 case TGSI_OPCODE_SAMPLE_INFO: 5727 assert(0); 5728 break; 5729 5730 case TGSI_OPCODE_LOD: 5731 exec_lodq(mach, inst); 5732 break; 5733 5734 case TGSI_OPCODE_UARL: 5735 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_UINT); 5736 break; 5737 5738 case TGSI_OPCODE_UCMP: 5739 exec_ucmp(mach, inst); 5740 break; 5741 5742 case TGSI_OPCODE_IABS: 5743 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT); 5744 break; 5745 5746 case TGSI_OPCODE_ISSG: 5747 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT); 5748 break; 5749 5750 case TGSI_OPCODE_TEX2: 5751 /* simple texture lookup */ 5752 /* src[0] = texcoord */ 5753 /* src[1] = compare */ 5754 /* src[2] = sampler unit */ 5755 exec_tex(mach, inst, TEX_MODIFIER_NONE, 2); 5756 break; 5757 case TGSI_OPCODE_TXB2: 5758 /* simple texture lookup */ 5759 /* src[0] = texcoord */ 5760 /* src[1] = bias */ 5761 /* src[2] = sampler unit */ 5762 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2); 5763 break; 5764 case TGSI_OPCODE_TXL2: 5765 /* simple texture lookup */ 5766 /* src[0] = texcoord */ 5767 /* src[1] = lod */ 5768 /* src[2] = sampler unit */ 5769 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2); 5770 break; 5771 5772 case TGSI_OPCODE_IBFE: 5773 exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT); 5774 break; 5775 case TGSI_OPCODE_UBFE: 5776 exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT); 5777 break; 5778 case TGSI_OPCODE_BFI: 5779 exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT); 5780 break; 5781 case TGSI_OPCODE_BREV: 5782 exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT); 5783 break; 5784 case TGSI_OPCODE_POPC: 5785 exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT); 5786 break; 5787 case TGSI_OPCODE_LSB: 5788 exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_UINT); 5789 break; 5790 case TGSI_OPCODE_IMSB: 5791 exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT); 5792 break; 5793 case TGSI_OPCODE_UMSB: 5794 exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_UINT); 5795 break; 5796 5797 case TGSI_OPCODE_F2D: 5798 exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT); 5799 break; 5800 5801 case TGSI_OPCODE_D2F: 5802 exec_64_2_t(mach, inst, micro_d2f); 5803 break; 5804 5805 case TGSI_OPCODE_DABS: 5806 exec_double_unary(mach, inst, micro_dabs); 5807 break; 5808 5809 case TGSI_OPCODE_DNEG: 5810 exec_double_unary(mach, inst, micro_dneg); 5811 break; 5812 5813 case TGSI_OPCODE_DADD: 5814 exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE); 5815 break; 5816 5817 case TGSI_OPCODE_DDIV: 5818 exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE); 5819 break; 5820 5821 case TGSI_OPCODE_DMUL: 5822 exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE); 5823 break; 5824 5825 case TGSI_OPCODE_DMAX: 5826 exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE); 5827 break; 5828 5829 case TGSI_OPCODE_DMIN: 5830 exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE); 5831 break; 5832 5833 case TGSI_OPCODE_DSLT: 5834 exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT); 5835 break; 5836 5837 case TGSI_OPCODE_DSGE: 5838 exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT); 5839 break; 5840 5841 case TGSI_OPCODE_DSEQ: 5842 exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT); 5843 break; 5844 5845 case TGSI_OPCODE_DSNE: 5846 exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT); 5847 break; 5848 5849 case TGSI_OPCODE_DRCP: 5850 exec_double_unary(mach, inst, micro_drcp); 5851 break; 5852 5853 case TGSI_OPCODE_DSQRT: 5854 exec_double_unary(mach, inst, micro_dsqrt); 5855 break; 5856 5857 case TGSI_OPCODE_DRSQ: 5858 exec_double_unary(mach, inst, micro_drsq); 5859 break; 5860 5861 case TGSI_OPCODE_DMAD: 5862 exec_double_trinary(mach, inst, micro_dmad); 5863 break; 5864 5865 case TGSI_OPCODE_DFRAC: 5866 exec_double_unary(mach, inst, micro_dfrac); 5867 break; 5868 5869 case TGSI_OPCODE_DFLR: 5870 exec_double_unary(mach, inst, micro_dflr); 5871 break; 5872 5873 case TGSI_OPCODE_DLDEXP: 5874 exec_dldexp(mach, inst); 5875 break; 5876 5877 case TGSI_OPCODE_DFRACEXP: 5878 exec_dfracexp(mach, inst); 5879 break; 5880 5881 case TGSI_OPCODE_I2D: 5882 exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_FLOAT); 5883 break; 5884 5885 case TGSI_OPCODE_D2I: 5886 exec_64_2_t(mach, inst, micro_d2i); 5887 break; 5888 5889 case TGSI_OPCODE_U2D: 5890 exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_FLOAT); 5891 break; 5892 5893 case TGSI_OPCODE_D2U: 5894 exec_64_2_t(mach, inst, micro_d2u); 5895 break; 5896 5897 case TGSI_OPCODE_LOAD: 5898 exec_load(mach, inst); 5899 break; 5900 5901 case TGSI_OPCODE_STORE: 5902 exec_store(mach, inst); 5903 break; 5904 5905 case TGSI_OPCODE_ATOMUADD: 5906 case TGSI_OPCODE_ATOMXCHG: 5907 case TGSI_OPCODE_ATOMCAS: 5908 case TGSI_OPCODE_ATOMAND: 5909 case TGSI_OPCODE_ATOMOR: 5910 case TGSI_OPCODE_ATOMXOR: 5911 case TGSI_OPCODE_ATOMUMIN: 5912 case TGSI_OPCODE_ATOMUMAX: 5913 case TGSI_OPCODE_ATOMIMIN: 5914 case TGSI_OPCODE_ATOMIMAX: 5915 case TGSI_OPCODE_ATOMFADD: 5916 exec_atomop(mach, inst); 5917 break; 5918 5919 case TGSI_OPCODE_RESQ: 5920 exec_resq(mach, inst); 5921 break; 5922 case TGSI_OPCODE_BARRIER: 5923 case TGSI_OPCODE_MEMBAR: 5924 return TRUE; 5925 break; 5926 5927 case TGSI_OPCODE_I64ABS: 5928 exec_double_unary(mach, inst, micro_i64abs); 5929 break; 5930 5931 case TGSI_OPCODE_I64SSG: 5932 exec_double_unary(mach, inst, micro_i64sgn); 5933 break; 5934 5935 case TGSI_OPCODE_I64NEG: 5936 exec_double_unary(mach, inst, micro_i64neg); 5937 break; 5938 5939 case TGSI_OPCODE_U64SEQ: 5940 exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT); 5941 break; 5942 5943 case TGSI_OPCODE_U64SNE: 5944 exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT); 5945 break; 5946 5947 case TGSI_OPCODE_I64SLT: 5948 exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT); 5949 break; 5950 case TGSI_OPCODE_U64SLT: 5951 exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT); 5952 break; 5953 5954 case TGSI_OPCODE_I64SGE: 5955 exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT); 5956 break; 5957 case TGSI_OPCODE_U64SGE: 5958 exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT); 5959 break; 5960 5961 case TGSI_OPCODE_I64MIN: 5962 exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64); 5963 break; 5964 case TGSI_OPCODE_U64MIN: 5965 exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64); 5966 break; 5967 case TGSI_OPCODE_I64MAX: 5968 exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64); 5969 break; 5970 case TGSI_OPCODE_U64MAX: 5971 exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64); 5972 break; 5973 case TGSI_OPCODE_U64ADD: 5974 exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64); 5975 break; 5976 case TGSI_OPCODE_U64MUL: 5977 exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64); 5978 break; 5979 case TGSI_OPCODE_U64SHL: 5980 exec_arg0_64_arg1_32(mach, inst, micro_u64shl); 5981 break; 5982 case TGSI_OPCODE_I64SHR: 5983 exec_arg0_64_arg1_32(mach, inst, micro_i64shr); 5984 break; 5985 case TGSI_OPCODE_U64SHR: 5986 exec_arg0_64_arg1_32(mach, inst, micro_u64shr); 5987 break; 5988 case TGSI_OPCODE_U64DIV: 5989 exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64); 5990 break; 5991 case TGSI_OPCODE_I64DIV: 5992 exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64); 5993 break; 5994 case TGSI_OPCODE_U64MOD: 5995 exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64); 5996 break; 5997 case TGSI_OPCODE_I64MOD: 5998 exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64); 5999 break; 6000 6001 case TGSI_OPCODE_F2U64: 6002 exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT); 6003 break; 6004 6005 case TGSI_OPCODE_F2I64: 6006 exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT); 6007 break; 6008 6009 case TGSI_OPCODE_U2I64: 6010 exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT); 6011 break; 6012 case TGSI_OPCODE_I2I64: 6013 exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT); 6014 break; 6015 6016 case TGSI_OPCODE_D2U64: 6017 exec_double_unary(mach, inst, micro_d2u64); 6018 break; 6019 6020 case TGSI_OPCODE_D2I64: 6021 exec_double_unary(mach, inst, micro_d2i64); 6022 break; 6023 6024 case TGSI_OPCODE_U642F: 6025 exec_64_2_t(mach, inst, micro_u642f); 6026 break; 6027 case TGSI_OPCODE_I642F: 6028 exec_64_2_t(mach, inst, micro_i642f); 6029 break; 6030 6031 case TGSI_OPCODE_U642D: 6032 exec_double_unary(mach, inst, micro_u642d); 6033 break; 6034 case TGSI_OPCODE_I642D: 6035 exec_double_unary(mach, inst, micro_i642d); 6036 break; 6037 case TGSI_OPCODE_INTERP_SAMPLE: 6038 exec_interp_at_sample(mach, inst); 6039 break; 6040 case TGSI_OPCODE_INTERP_OFFSET: 6041 exec_interp_at_offset(mach, inst); 6042 break; 6043 case TGSI_OPCODE_INTERP_CENTROID: 6044 exec_interp_at_centroid(mach, inst); 6045 break; 6046 default: 6047 assert( 0 ); 6048 } 6049 return FALSE; 6050} 6051 6052static void 6053tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach) 6054{ 6055 uint default_mask = 0xf; 6056 6057 mach->KillMask = 0; 6058 mach->OutputVertexOffset = 0; 6059 6060 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) { 6061 for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) { 6062 mach->OutputPrimCount[i] = 0; 6063 mach->Primitives[i][0] = 0; 6064 } 6065 /* GS runs on a single primitive for now */ 6066 default_mask = 0x1; 6067 } 6068 6069 if (mach->NonHelperMask == 0) 6070 mach->NonHelperMask = default_mask; 6071 mach->CondMask = default_mask; 6072 mach->LoopMask = default_mask; 6073 mach->ContMask = default_mask; 6074 mach->FuncMask = default_mask; 6075 mach->ExecMask = default_mask; 6076 6077 mach->Switch.mask = default_mask; 6078 6079 assert(mach->CondStackTop == 0); 6080 assert(mach->LoopStackTop == 0); 6081 assert(mach->ContStackTop == 0); 6082 assert(mach->SwitchStackTop == 0); 6083 assert(mach->BreakStackTop == 0); 6084 assert(mach->CallStackTop == 0); 6085} 6086 6087/** 6088 * Run TGSI interpreter. 6089 * \return bitmask of "alive" quad components 6090 */ 6091uint 6092tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc ) 6093{ 6094 uint i; 6095 6096 mach->pc = start_pc; 6097 6098 if (!start_pc) { 6099 tgsi_exec_machine_setup_masks(mach); 6100 6101 /* execute declarations (interpolants) */ 6102 for (i = 0; i < mach->NumDeclarations; i++) { 6103 exec_declaration( mach, mach->Declarations+i ); 6104 } 6105 } 6106 6107 { 6108#if DEBUG_EXECUTION 6109 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS]; 6110 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS]; 6111 uint inst = 1; 6112 6113 if (!start_pc) { 6114 memset(mach->Temps, 0, sizeof(temps)); 6115 if (mach->Outputs) 6116 memset(mach->Outputs, 0, sizeof(outputs)); 6117 memset(temps, 0, sizeof(temps)); 6118 memset(outputs, 0, sizeof(outputs)); 6119 } 6120#endif 6121 6122 /* execute instructions, until pc is set to -1 */ 6123 while (mach->pc != -1) { 6124 boolean barrier_hit; 6125#if DEBUG_EXECUTION 6126 uint i; 6127 6128 tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++); 6129#endif 6130 6131 assert(mach->pc < (int) mach->NumInstructions); 6132 barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc); 6133 6134 /* for compute shaders if we hit a barrier return now for later rescheduling */ 6135 if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE) 6136 return 0; 6137 6138#if DEBUG_EXECUTION 6139 for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) { 6140 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) { 6141 uint j; 6142 6143 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i])); 6144 debug_printf("TEMP[%2u] = ", i); 6145 for (j = 0; j < 4; j++) { 6146 if (j > 0) { 6147 debug_printf(" "); 6148 } 6149 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 6150 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j], 6151 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j], 6152 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j], 6153 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]); 6154 } 6155 } 6156 } 6157 if (mach->Outputs) { 6158 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { 6159 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) { 6160 uint j; 6161 6162 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i])); 6163 debug_printf("OUT[%2u] = ", i); 6164 for (j = 0; j < 4; j++) { 6165 if (j > 0) { 6166 debug_printf(" "); 6167 } 6168 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n", 6169 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j], 6170 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j], 6171 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j], 6172 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]); 6173 } 6174 } 6175 } 6176 } 6177#endif 6178 } 6179 } 6180 6181#if 0 6182 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ 6183 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) { 6184 /* 6185 * Scale back depth component. 6186 */ 6187 for (i = 0; i < 4; i++) 6188 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; 6189 } 6190#endif 6191 6192 /* Strictly speaking, these assertions aren't really needed but they 6193 * can potentially catch some bugs in the control flow code. 6194 */ 6195 assert(mach->CondStackTop == 0); 6196 assert(mach->LoopStackTop == 0); 6197 assert(mach->ContStackTop == 0); 6198 assert(mach->SwitchStackTop == 0); 6199 assert(mach->BreakStackTop == 0); 6200 assert(mach->CallStackTop == 0); 6201 6202 return ~mach->KillMask; 6203} 6204