1/************************************************************************** 2 * 3 * Copyright 2007-2008 VMware, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28/* 29 * \author 30 * Michal Krol, 31 * Keith Whitwell 32 */ 33 34#include "pipe/p_compiler.h" 35#include "pipe/p_context.h" 36#include "pipe/p_screen.h" 37#include "pipe/p_shader_tokens.h" 38#include "pipe/p_state.h" 39#include "tgsi/tgsi_ureg.h" 40#include "st_mesa_to_tgsi.h" 41#include "st_context.h" 42#include "program/prog_instruction.h" 43#include "program/prog_parameter.h" 44#include "util/u_debug.h" 45#include "util/u_math.h" 46#include "util/u_memory.h" 47#include "st_glsl_to_tgsi.h" /* for _mesa_sysval_to_semantic */ 48 49 50#define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \ 51 (1 << PROGRAM_CONSTANT) | \ 52 (1 << PROGRAM_UNIFORM)) 53 54/** 55 * Intermediate state used during shader translation. 56 */ 57struct st_translate { 58 struct ureg_program *ureg; 59 60 struct ureg_dst temps[MAX_PROGRAM_TEMPS]; 61 struct ureg_src *constants; 62 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 63 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 64 struct ureg_dst address[1]; 65 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 66 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 67 68 const ubyte *inputMapping; 69 const ubyte *outputMapping; 70 71 unsigned procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */ 72}; 73 74 75/** 76 * Map a Mesa dst register to a TGSI ureg_dst register. 77 */ 78static struct ureg_dst 79dst_register(struct st_translate *t, gl_register_file file, GLuint index) 80{ 81 switch(file) { 82 case PROGRAM_UNDEFINED: 83 return ureg_dst_undef(); 84 85 case PROGRAM_TEMPORARY: 86 if (ureg_dst_is_undef(t->temps[index])) 87 t->temps[index] = ureg_DECL_temporary(t->ureg); 88 89 return t->temps[index]; 90 91 case PROGRAM_OUTPUT: 92 if (t->procType == PIPE_SHADER_VERTEX) 93 assert(index < VARYING_SLOT_MAX); 94 else if (t->procType == PIPE_SHADER_FRAGMENT) 95 assert(index < FRAG_RESULT_MAX); 96 else 97 assert(index < VARYING_SLOT_MAX); 98 99 assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs)); 100 101 return t->outputs[t->outputMapping[index]]; 102 103 case PROGRAM_ADDRESS: 104 return t->address[index]; 105 106 default: 107 debug_assert(0); 108 return ureg_dst_undef(); 109 } 110} 111 112 113/** 114 * Map a Mesa src register to a TGSI ureg_src register. 115 */ 116static struct ureg_src 117src_register(struct st_translate *t, 118 gl_register_file file, 119 GLint index) 120{ 121 switch(file) { 122 case PROGRAM_UNDEFINED: 123 return ureg_src_undef(); 124 125 case PROGRAM_TEMPORARY: 126 assert(index >= 0); 127 assert(index < ARRAY_SIZE(t->temps)); 128 if (ureg_dst_is_undef(t->temps[index])) 129 t->temps[index] = ureg_DECL_temporary(t->ureg); 130 return ureg_src(t->temps[index]); 131 132 case PROGRAM_UNIFORM: 133 assert(index >= 0); 134 return t->constants[index]; 135 case PROGRAM_STATE_VAR: 136 case PROGRAM_CONSTANT: /* ie, immediate */ 137 if (index < 0) 138 return ureg_DECL_constant(t->ureg, 0); 139 else 140 return t->constants[index]; 141 142 case PROGRAM_INPUT: 143 assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs)); 144 return t->inputs[t->inputMapping[index]]; 145 146 case PROGRAM_OUTPUT: 147 assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs)); 148 return ureg_src(t->outputs[t->outputMapping[index]]); /* not needed? */ 149 150 case PROGRAM_ADDRESS: 151 return ureg_src(t->address[index]); 152 153 case PROGRAM_SYSTEM_VALUE: 154 assert(index < ARRAY_SIZE(t->systemValues)); 155 return t->systemValues[index]; 156 157 default: 158 debug_assert(0); 159 return ureg_src_undef(); 160 } 161} 162 163 164/** 165 * Map mesa texture target to TGSI texture target. 166 */ 167enum tgsi_texture_type 168st_translate_texture_target(gl_texture_index textarget, GLboolean shadow) 169{ 170 if (shadow) { 171 switch (textarget) { 172 case TEXTURE_1D_INDEX: 173 return TGSI_TEXTURE_SHADOW1D; 174 case TEXTURE_2D_INDEX: 175 return TGSI_TEXTURE_SHADOW2D; 176 case TEXTURE_RECT_INDEX: 177 return TGSI_TEXTURE_SHADOWRECT; 178 case TEXTURE_1D_ARRAY_INDEX: 179 return TGSI_TEXTURE_SHADOW1D_ARRAY; 180 case TEXTURE_2D_ARRAY_INDEX: 181 return TGSI_TEXTURE_SHADOW2D_ARRAY; 182 case TEXTURE_CUBE_INDEX: 183 return TGSI_TEXTURE_SHADOWCUBE; 184 case TEXTURE_CUBE_ARRAY_INDEX: 185 return TGSI_TEXTURE_SHADOWCUBE_ARRAY; 186 default: 187 break; 188 } 189 } 190 191 switch (textarget) { 192 case TEXTURE_2D_MULTISAMPLE_INDEX: 193 return TGSI_TEXTURE_2D_MSAA; 194 case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: 195 return TGSI_TEXTURE_2D_ARRAY_MSAA; 196 case TEXTURE_BUFFER_INDEX: 197 return TGSI_TEXTURE_BUFFER; 198 case TEXTURE_1D_INDEX: 199 return TGSI_TEXTURE_1D; 200 case TEXTURE_2D_INDEX: 201 return TGSI_TEXTURE_2D; 202 case TEXTURE_3D_INDEX: 203 return TGSI_TEXTURE_3D; 204 case TEXTURE_CUBE_INDEX: 205 return TGSI_TEXTURE_CUBE; 206 case TEXTURE_CUBE_ARRAY_INDEX: 207 return TGSI_TEXTURE_CUBE_ARRAY; 208 case TEXTURE_RECT_INDEX: 209 return TGSI_TEXTURE_RECT; 210 case TEXTURE_1D_ARRAY_INDEX: 211 return TGSI_TEXTURE_1D_ARRAY; 212 case TEXTURE_2D_ARRAY_INDEX: 213 return TGSI_TEXTURE_2D_ARRAY; 214 case TEXTURE_EXTERNAL_INDEX: 215 return TGSI_TEXTURE_2D; 216 default: 217 debug_assert(!"unexpected texture target index"); 218 return TGSI_TEXTURE_1D; 219 } 220} 221 222 223/** 224 * Map GLSL base type to TGSI return type. 225 */ 226enum tgsi_return_type 227st_translate_texture_type(enum glsl_base_type type) 228{ 229 switch (type) { 230 case GLSL_TYPE_INT: 231 return TGSI_RETURN_TYPE_SINT; 232 case GLSL_TYPE_UINT: 233 return TGSI_RETURN_TYPE_UINT; 234 case GLSL_TYPE_FLOAT: 235 return TGSI_RETURN_TYPE_FLOAT; 236 default: 237 assert(!"unexpected texture type"); 238 return TGSI_RETURN_TYPE_UNKNOWN; 239 } 240} 241 242 243/** 244 * Translate a (1 << TEXTURE_x_INDEX) bit into a TGSI_TEXTURE_x enum. 245 */ 246static unsigned 247translate_texture_index(GLbitfield texBit, bool shadow) 248{ 249 int index = ffs(texBit); 250 assert(index > 0); 251 assert(index - 1 < NUM_TEXTURE_TARGETS); 252 return st_translate_texture_target(index - 1, shadow); 253} 254 255 256/** 257 * Create a TGSI ureg_dst register from a Mesa dest register. 258 */ 259static struct ureg_dst 260translate_dst(struct st_translate *t, 261 const struct prog_dst_register *DstReg, 262 boolean saturate) 263{ 264 struct ureg_dst dst = dst_register(t, DstReg->File, DstReg->Index); 265 266 dst = ureg_writemask(dst, DstReg->WriteMask); 267 268 if (saturate) 269 dst = ureg_saturate(dst); 270 271 if (DstReg->RelAddr) 272 dst = ureg_dst_indirect(dst, ureg_src(t->address[0])); 273 274 return dst; 275} 276 277 278/** 279 * Create a TGSI ureg_src register from a Mesa src register. 280 */ 281static struct ureg_src 282translate_src(struct st_translate *t, 283 const struct prog_src_register *SrcReg) 284{ 285 struct ureg_src src = src_register(t, SrcReg->File, SrcReg->Index); 286 287 src = ureg_swizzle(src, 288 GET_SWZ(SrcReg->Swizzle, 0) & 0x3, 289 GET_SWZ(SrcReg->Swizzle, 1) & 0x3, 290 GET_SWZ(SrcReg->Swizzle, 2) & 0x3, 291 GET_SWZ(SrcReg->Swizzle, 3) & 0x3); 292 293 if (SrcReg->Negate == NEGATE_XYZW) 294 src = ureg_negate(src); 295 296 if (SrcReg->RelAddr) { 297 src = ureg_src_indirect(src, ureg_src(t->address[0])); 298 if (SrcReg->File != PROGRAM_INPUT && 299 SrcReg->File != PROGRAM_OUTPUT) { 300 /* If SrcReg->Index was negative, it was set to zero in 301 * src_register(). Reassign it now. But don't do this 302 * for input/output regs since they get remapped while 303 * const buffers don't. 304 */ 305 src.Index = SrcReg->Index; 306 } 307 } 308 309 return src; 310} 311 312 313static struct ureg_src 314swizzle_4v(struct ureg_src src, const unsigned *swz) 315{ 316 return ureg_swizzle(src, swz[0], swz[1], swz[2], swz[3]); 317} 318 319 320/** 321 * Translate a SWZ instruction into a MOV, MUL or MAD instruction. EG: 322 * 323 * SWZ dst, src.x-y10 324 * 325 * becomes: 326 * 327 * MAD dst {1,-1,0,0}, src.xyxx, {0,0,1,0} 328 */ 329static void 330emit_swz(struct st_translate *t, 331 struct ureg_dst dst, 332 const struct prog_src_register *SrcReg) 333{ 334 struct ureg_program *ureg = t->ureg; 335 struct ureg_src src = src_register(t, SrcReg->File, SrcReg->Index); 336 337 unsigned negate_mask = SrcReg->Negate; 338 339 unsigned one_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ONE) << 0 | 340 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ONE) << 1 | 341 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ONE) << 2 | 342 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ONE) << 3); 343 344 unsigned zero_mask = ((GET_SWZ(SrcReg->Swizzle, 0) == SWIZZLE_ZERO) << 0 | 345 (GET_SWZ(SrcReg->Swizzle, 1) == SWIZZLE_ZERO) << 1 | 346 (GET_SWZ(SrcReg->Swizzle, 2) == SWIZZLE_ZERO) << 2 | 347 (GET_SWZ(SrcReg->Swizzle, 3) == SWIZZLE_ZERO) << 3); 348 349 unsigned negative_one_mask = one_mask & negate_mask; 350 unsigned positive_one_mask = one_mask & ~negate_mask; 351 352 struct ureg_src imm; 353 unsigned i; 354 unsigned mul_swizzle[4] = {0,0,0,0}; 355 unsigned add_swizzle[4] = {0,0,0,0}; 356 unsigned src_swizzle[4] = {0,0,0,0}; 357 boolean need_add = FALSE; 358 boolean need_mul = FALSE; 359 360 if (dst.WriteMask == 0) 361 return; 362 363 /* Is this just a MOV? 364 */ 365 if (zero_mask == 0 && 366 one_mask == 0 && 367 (negate_mask == 0 || negate_mask == TGSI_WRITEMASK_XYZW)) { 368 ureg_MOV(ureg, dst, translate_src(t, SrcReg)); 369 return; 370 } 371 372#define IMM_ZERO 0 373#define IMM_ONE 1 374#define IMM_NEG_ONE 2 375 376 imm = ureg_imm3f(ureg, 0, 1, -1); 377 378 for (i = 0; i < 4; i++) { 379 unsigned bit = 1 << i; 380 381 if (dst.WriteMask & bit) { 382 if (positive_one_mask & bit) { 383 mul_swizzle[i] = IMM_ZERO; 384 add_swizzle[i] = IMM_ONE; 385 need_add = TRUE; 386 } 387 else if (negative_one_mask & bit) { 388 mul_swizzle[i] = IMM_ZERO; 389 add_swizzle[i] = IMM_NEG_ONE; 390 need_add = TRUE; 391 } 392 else if (zero_mask & bit) { 393 mul_swizzle[i] = IMM_ZERO; 394 add_swizzle[i] = IMM_ZERO; 395 need_add = TRUE; 396 } 397 else { 398 add_swizzle[i] = IMM_ZERO; 399 src_swizzle[i] = GET_SWZ(SrcReg->Swizzle, i); 400 need_mul = TRUE; 401 if (negate_mask & bit) { 402 mul_swizzle[i] = IMM_NEG_ONE; 403 } 404 else { 405 mul_swizzle[i] = IMM_ONE; 406 } 407 } 408 } 409 } 410 411 if (need_mul && need_add) { 412 ureg_MAD(ureg, 413 dst, 414 swizzle_4v(src, src_swizzle), 415 swizzle_4v(imm, mul_swizzle), 416 swizzle_4v(imm, add_swizzle)); 417 } 418 else if (need_mul) { 419 ureg_MUL(ureg, 420 dst, 421 swizzle_4v(src, src_swizzle), 422 swizzle_4v(imm, mul_swizzle)); 423 } 424 else if (need_add) { 425 ureg_MOV(ureg, 426 dst, 427 swizzle_4v(imm, add_swizzle)); 428 } 429 else { 430 debug_assert(0); 431 } 432 433#undef IMM_ZERO 434#undef IMM_ONE 435#undef IMM_NEG_ONE 436} 437 438 439static unsigned 440translate_opcode(unsigned op) 441{ 442 switch(op) { 443 case OPCODE_ARL: 444 return TGSI_OPCODE_ARL; 445 case OPCODE_ADD: 446 return TGSI_OPCODE_ADD; 447 case OPCODE_CMP: 448 return TGSI_OPCODE_CMP; 449 case OPCODE_COS: 450 return TGSI_OPCODE_COS; 451 case OPCODE_DP3: 452 return TGSI_OPCODE_DP3; 453 case OPCODE_DP4: 454 return TGSI_OPCODE_DP4; 455 case OPCODE_DST: 456 return TGSI_OPCODE_DST; 457 case OPCODE_EX2: 458 return TGSI_OPCODE_EX2; 459 case OPCODE_EXP: 460 return TGSI_OPCODE_EXP; 461 case OPCODE_FLR: 462 return TGSI_OPCODE_FLR; 463 case OPCODE_FRC: 464 return TGSI_OPCODE_FRC; 465 case OPCODE_KIL: 466 return TGSI_OPCODE_KILL_IF; 467 case OPCODE_LG2: 468 return TGSI_OPCODE_LG2; 469 case OPCODE_LOG: 470 return TGSI_OPCODE_LOG; 471 case OPCODE_LIT: 472 return TGSI_OPCODE_LIT; 473 case OPCODE_LRP: 474 return TGSI_OPCODE_LRP; 475 case OPCODE_MAD: 476 return TGSI_OPCODE_MAD; 477 case OPCODE_MAX: 478 return TGSI_OPCODE_MAX; 479 case OPCODE_MIN: 480 return TGSI_OPCODE_MIN; 481 case OPCODE_MOV: 482 return TGSI_OPCODE_MOV; 483 case OPCODE_MUL: 484 return TGSI_OPCODE_MUL; 485 case OPCODE_POW: 486 return TGSI_OPCODE_POW; 487 case OPCODE_RCP: 488 return TGSI_OPCODE_RCP; 489 case OPCODE_SGE: 490 return TGSI_OPCODE_SGE; 491 case OPCODE_SIN: 492 return TGSI_OPCODE_SIN; 493 case OPCODE_SLT: 494 return TGSI_OPCODE_SLT; 495 case OPCODE_TEX: 496 return TGSI_OPCODE_TEX; 497 case OPCODE_TXB: 498 return TGSI_OPCODE_TXB; 499 case OPCODE_TXP: 500 return TGSI_OPCODE_TXP; 501 case OPCODE_END: 502 return TGSI_OPCODE_END; 503 default: 504 debug_assert(0); 505 return TGSI_OPCODE_NOP; 506 } 507} 508 509 510static void 511compile_instruction(struct gl_context *ctx, 512 struct st_translate *t, 513 const struct prog_instruction *inst) 514{ 515 struct ureg_program *ureg = t->ureg; 516 GLuint i; 517 struct ureg_dst dst[1] = { { 0 } }; 518 struct ureg_src src[4]; 519 unsigned num_dst; 520 unsigned num_src; 521 522 num_dst = _mesa_num_inst_dst_regs(inst->Opcode); 523 num_src = _mesa_num_inst_src_regs(inst->Opcode); 524 525 if (num_dst) 526 dst[0] = translate_dst(t, &inst->DstReg, inst->Saturate); 527 528 for (i = 0; i < num_src; i++) 529 src[i] = translate_src(t, &inst->SrcReg[i]); 530 531 switch(inst->Opcode) { 532 case OPCODE_SWZ: 533 emit_swz(t, dst[0], &inst->SrcReg[0]); 534 return; 535 536 case OPCODE_TEX: 537 case OPCODE_TXB: 538 case OPCODE_TXP: 539 src[num_src++] = t->samplers[inst->TexSrcUnit]; 540 ureg_tex_insn(ureg, 541 translate_opcode(inst->Opcode), 542 dst, num_dst, 543 st_translate_texture_target(inst->TexSrcTarget, 544 inst->TexShadow), 545 TGSI_RETURN_TYPE_FLOAT, 546 NULL, 0, 547 src, num_src); 548 return; 549 550 case OPCODE_SCS: 551 ureg_COS(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_X), 552 ureg_scalar(src[0], TGSI_SWIZZLE_X)); 553 ureg_SIN(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_Y), 554 ureg_scalar(src[0], TGSI_SWIZZLE_X)); 555 break; 556 557 case OPCODE_XPD: { 558 struct ureg_dst tmp = ureg_DECL_temporary(ureg); 559 560 ureg_MUL(ureg, ureg_writemask(tmp, TGSI_WRITEMASK_XYZ), 561 ureg_swizzle(src[0], TGSI_SWIZZLE_Y, TGSI_SWIZZLE_Z, 562 TGSI_SWIZZLE_X, 0), 563 ureg_swizzle(src[1], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 564 TGSI_SWIZZLE_Y, 0)); 565 ureg_MAD(ureg, ureg_writemask(dst[0], TGSI_WRITEMASK_XYZ), 566 ureg_swizzle(src[0], TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 567 TGSI_SWIZZLE_Y, 0), 568 ureg_negate(ureg_swizzle(src[1], TGSI_SWIZZLE_Y, 569 TGSI_SWIZZLE_Z, TGSI_SWIZZLE_X, 0)), 570 ureg_src(tmp)); 571 break; 572 } 573 574 case OPCODE_RSQ: 575 ureg_RSQ(ureg, dst[0], ureg_abs(src[0])); 576 break; 577 578 case OPCODE_ABS: 579 ureg_MOV(ureg, dst[0], ureg_abs(src[0])); 580 break; 581 582 case OPCODE_SUB: 583 ureg_ADD(ureg, dst[0], src[0], ureg_negate(src[1])); 584 break; 585 586 case OPCODE_DPH: { 587 struct ureg_dst temp = ureg_DECL_temporary(ureg); 588 589 /* DPH = DP4(src0, src1) where src0.w = 1. */ 590 ureg_MOV(ureg, ureg_writemask(temp, TGSI_WRITEMASK_XYZ), src[0]); 591 ureg_MOV(ureg, ureg_writemask(temp, TGSI_WRITEMASK_W), 592 ureg_imm1f(ureg, 1)); 593 ureg_DP4(ureg, dst[0], ureg_src(temp), src[1]); 594 break; 595 } 596 597 default: 598 ureg_insn(ureg, 599 translate_opcode(inst->Opcode), 600 dst, num_dst, 601 src, num_src, 0); 602 break; 603 } 604} 605 606 607/** 608 * Emit the TGSI instructions for inverting and adjusting WPOS. 609 * This code is unavoidable because it also depends on whether 610 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 611 */ 612static void 613emit_wpos_adjustment(struct gl_context *ctx, 614 struct st_translate *t, 615 const struct gl_program *program, 616 boolean invert, 617 GLfloat adjX, GLfloat adjY[2]) 618{ 619 struct ureg_program *ureg = t->ureg; 620 621 /* Fragment program uses fragment position input. 622 * Need to replace instances of INPUT[WPOS] with temp T 623 * where T = INPUT[WPOS] by y is inverted. 624 */ 625 static const gl_state_index16 wposTransformState[STATE_LENGTH] 626 = { STATE_INTERNAL, STATE_FB_WPOS_Y_TRANSFORM, 0, 0, 0 }; 627 628 /* XXX: note we are modifying the incoming shader here! Need to 629 * do this before emitting the constant decls below, or this 630 * will be missed: 631 */ 632 unsigned wposTransConst = _mesa_add_state_reference(program->Parameters, 633 wposTransformState); 634 635 struct ureg_src wpostrans = ureg_DECL_constant(ureg, wposTransConst); 636 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); 637 struct ureg_src *wpos = 638 ctx->Const.GLSLFragCoordIsSysVal ? 639 &t->systemValues[SYSTEM_VALUE_FRAG_COORD] : 640 &t->inputs[t->inputMapping[VARYING_SLOT_POS]]; 641 struct ureg_src wpos_input = *wpos; 642 643 /* First, apply the coordinate shift: */ 644 if (adjX || adjY[0] || adjY[1]) { 645 if (adjY[0] != adjY[1]) { 646 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively 647 * depending on whether inversion is actually going to be applied 648 * or not, which is determined by testing against the inversion 649 * state variable used below, which will be either +1 or -1. 650 */ 651 struct ureg_dst adj_temp = ureg_DECL_temporary(ureg); 652 653 ureg_CMP(ureg, adj_temp, 654 ureg_scalar(wpostrans, invert ? 2 : 0), 655 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), 656 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); 657 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); 658 } else { 659 ureg_ADD(ureg, wpos_temp, wpos_input, 660 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); 661 } 662 wpos_input = ureg_src(wpos_temp); 663 } else { 664 /* MOV wpos_temp, input[wpos] 665 */ 666 ureg_MOV(ureg, wpos_temp, wpos_input); 667 } 668 669 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be 670 * inversion/identity, or the other way around if we're drawing to an FBO. 671 */ 672 if (invert) { 673 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 674 */ 675 ureg_MAD(ureg, 676 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), 677 wpos_input, 678 ureg_scalar(wpostrans, 0), 679 ureg_scalar(wpostrans, 1)); 680 } else { 681 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 682 */ 683 ureg_MAD(ureg, 684 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), 685 wpos_input, 686 ureg_scalar(wpostrans, 2), 687 ureg_scalar(wpostrans, 3)); 688 } 689 690 /* Use wpos_temp as position input from here on: 691 */ 692 *wpos = ureg_src(wpos_temp); 693} 694 695 696/** 697 * Emit fragment position/coordinate code. 698 */ 699static void 700emit_wpos(struct st_context *st, 701 struct st_translate *t, 702 const struct gl_program *program, 703 struct ureg_program *ureg) 704{ 705 struct pipe_screen *pscreen = st->pipe->screen; 706 GLfloat adjX = 0.0f; 707 GLfloat adjY[2] = { 0.0f, 0.0f }; 708 boolean invert = FALSE; 709 710 /* Query the pixel center conventions supported by the pipe driver and set 711 * adjX, adjY to help out if it cannot handle the requested one internally. 712 * 713 * The bias of the y-coordinate depends on whether y-inversion takes place 714 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are 715 * drawing to an FBO (causes additional inversion), and whether the pipe 716 * driver origin and the requested origin differ (the latter condition is 717 * stored in the 'invert' variable). 718 * 719 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): 720 * 721 * center shift only: 722 * i -> h: +0.5 723 * h -> i: -0.5 724 * 725 * inversion only: 726 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 727 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 728 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 729 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 730 * 731 * inversion and center shift: 732 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 733 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 734 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 735 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 736 */ 737 if (program->info.fs.origin_upper_left) { 738 /* Fragment shader wants origin in upper-left */ 739 if (pscreen->get_param(pscreen, 740 PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 741 /* the driver supports upper-left origin */ 742 } 743 else if (pscreen->get_param(pscreen, 744 PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 745 /* the driver supports lower-left origin, need to invert Y */ 746 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, 747 TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 748 invert = TRUE; 749 } 750 else 751 assert(0); 752 } 753 else { 754 /* Fragment shader wants origin in lower-left */ 755 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 756 /* the driver supports lower-left origin */ 757 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, 758 TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 759 else if (pscreen->get_param(pscreen, 760 PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 761 /* the driver supports upper-left origin, need to invert Y */ 762 invert = TRUE; 763 else 764 assert(0); 765 } 766 767 if (program->info.fs.pixel_center_integer) { 768 /* Fragment shader wants pixel center integer */ 769 if (pscreen->get_param(pscreen, 770 PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 771 /* the driver supports pixel center integer */ 772 adjY[1] = 1.0f; 773 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, 774 TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 775 } 776 else if (pscreen->get_param(pscreen, 777 PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 778 /* the driver supports pixel center half integer, need to bias X,Y */ 779 adjX = -0.5f; 780 adjY[0] = -0.5f; 781 adjY[1] = 0.5f; 782 } 783 else 784 assert(0); 785 } 786 else { 787 /* Fragment shader wants pixel center half integer */ 788 if (pscreen->get_param(pscreen, 789 PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 790 /* the driver supports pixel center half integer */ 791 } 792 else if (pscreen->get_param(pscreen, 793 PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 794 /* the driver supports pixel center integer, need to bias X,Y */ 795 adjX = adjY[0] = adjY[1] = 0.5f; 796 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, 797 TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 798 } 799 else 800 assert(0); 801 } 802 803 /* we invert after adjustment so that we avoid the MOV to temporary, 804 * and reuse the adjustment ADD instead */ 805 emit_wpos_adjustment(st->ctx, t, program, invert, adjX, adjY); 806} 807 808 809/** 810 * Translate Mesa program to TGSI format. 811 * \param program the program to translate 812 * \param numInputs number of input registers used 813 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 814 * input indexes 815 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 816 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 817 * each input 818 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 819 * \param numOutputs number of output registers used 820 * \param outputMapping maps Mesa fragment program outputs to TGSI 821 * generic outputs 822 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 823 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 824 * each output 825 * 826 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 827 */ 828enum pipe_error 829st_translate_mesa_program(struct gl_context *ctx, 830 uint procType, 831 struct ureg_program *ureg, 832 const struct gl_program *program, 833 GLuint numInputs, 834 const ubyte inputMapping[], 835 const ubyte inputSemanticName[], 836 const ubyte inputSemanticIndex[], 837 const ubyte interpMode[], 838 GLuint numOutputs, 839 const ubyte outputMapping[], 840 const ubyte outputSemanticName[], 841 const ubyte outputSemanticIndex[]) 842{ 843 struct st_translate translate, *t; 844 unsigned i; 845 enum pipe_error ret = PIPE_OK; 846 847 assert(numInputs <= ARRAY_SIZE(t->inputs)); 848 assert(numOutputs <= ARRAY_SIZE(t->outputs)); 849 850 t = &translate; 851 memset(t, 0, sizeof *t); 852 853 t->procType = procType; 854 t->inputMapping = inputMapping; 855 t->outputMapping = outputMapping; 856 t->ureg = ureg; 857 858 /*_mesa_print_program(program);*/ 859 860 /* 861 * Declare input attributes. 862 */ 863 if (procType == PIPE_SHADER_FRAGMENT) { 864 for (i = 0; i < numInputs; i++) { 865 t->inputs[i] = ureg_DECL_fs_input(ureg, 866 inputSemanticName[i], 867 inputSemanticIndex[i], 868 interpMode[i]); 869 } 870 871 if (program->info.inputs_read & VARYING_BIT_POS) { 872 /* Must do this after setting up t->inputs, and before 873 * emitting constant references, below: 874 */ 875 emit_wpos(st_context(ctx), t, program, ureg); 876 } 877 878 /* 879 * Declare output attributes. 880 */ 881 for (i = 0; i < numOutputs; i++) { 882 switch (outputSemanticName[i]) { 883 case TGSI_SEMANTIC_POSITION: 884 t->outputs[i] = ureg_DECL_output(ureg, 885 TGSI_SEMANTIC_POSITION, /* Z / Depth */ 886 outputSemanticIndex[i]); 887 888 t->outputs[i] = ureg_writemask(t->outputs[i], 889 TGSI_WRITEMASK_Z); 890 break; 891 case TGSI_SEMANTIC_STENCIL: 892 t->outputs[i] = ureg_DECL_output(ureg, 893 TGSI_SEMANTIC_STENCIL, /* Stencil */ 894 outputSemanticIndex[i]); 895 t->outputs[i] = ureg_writemask(t->outputs[i], 896 TGSI_WRITEMASK_Y); 897 break; 898 case TGSI_SEMANTIC_COLOR: 899 t->outputs[i] = ureg_DECL_output(ureg, 900 TGSI_SEMANTIC_COLOR, 901 outputSemanticIndex[i]); 902 break; 903 default: 904 debug_assert(0); 905 return 0; 906 } 907 } 908 } 909 else if (procType == PIPE_SHADER_GEOMETRY) { 910 for (i = 0; i < numInputs; i++) { 911 t->inputs[i] = ureg_DECL_input(ureg, 912 inputSemanticName[i], 913 inputSemanticIndex[i], 0, 1); 914 } 915 916 for (i = 0; i < numOutputs; i++) { 917 t->outputs[i] = ureg_DECL_output(ureg, 918 outputSemanticName[i], 919 outputSemanticIndex[i]); 920 } 921 } 922 else { 923 assert(procType == PIPE_SHADER_VERTEX); 924 925 for (i = 0; i < numInputs; i++) { 926 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 927 } 928 929 for (i = 0; i < numOutputs; i++) { 930 t->outputs[i] = ureg_DECL_output(ureg, 931 outputSemanticName[i], 932 outputSemanticIndex[i]); 933 if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) { 934 /* force register to contain a fog coordinate in the 935 * form (F, 0, 0, 1). 936 */ 937 ureg_MOV(ureg, 938 ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW), 939 ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); 940 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X); 941 } 942 } 943 } 944 945 /* Declare address register. 946 */ 947 if (program->arb.NumAddressRegs > 0) { 948 debug_assert(program->arb.NumAddressRegs == 1); 949 t->address[0] = ureg_DECL_address(ureg); 950 } 951 952 /* Declare misc input registers 953 */ 954 GLbitfield64 sysInputs = program->info.system_values_read; 955 for (i = 0; sysInputs; i++) { 956 if (sysInputs & (1ull << i)) { 957 unsigned semName = _mesa_sysval_to_semantic(i); 958 959 t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0); 960 961 if (semName == TGSI_SEMANTIC_INSTANCEID || 962 semName == TGSI_SEMANTIC_VERTEXID) { 963 /* From Gallium perspective, these system values are always 964 * integer, and require native integer support. However, if 965 * native integer is supported on the vertex stage but not the 966 * pixel stage (e.g, i915g + draw), Mesa will generate IR that 967 * assumes these system values are floats. To resolve the 968 * inconsistency, we insert a U2F. 969 */ 970 struct st_context *st = st_context(ctx); 971 struct pipe_screen *pscreen = st->pipe->screen; 972 assert(procType == PIPE_SHADER_VERTEX); 973 assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, 974 PIPE_SHADER_CAP_INTEGERS)); 975 (void) pscreen; /* silence non-debug build warnings */ 976 if (!ctx->Const.NativeIntegers) { 977 struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg); 978 ureg_U2F(t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), 979 t->systemValues[i]); 980 t->systemValues[i] = ureg_scalar(ureg_src(temp), 0); 981 } 982 } 983 984 if (procType == PIPE_SHADER_FRAGMENT && 985 semName == TGSI_SEMANTIC_POSITION) 986 emit_wpos(st_context(ctx), t, program, ureg); 987 988 sysInputs &= ~(1ull << i); 989 } 990 } 991 992 if (program->arb.IndirectRegisterFiles & (1 << PROGRAM_TEMPORARY)) { 993 /* If temps are accessed with indirect addressing, declare temporaries 994 * in sequential order. Else, we declare them on demand elsewhere. 995 */ 996 for (i = 0; i < program->arb.NumTemporaries; i++) { 997 /* XXX use TGSI_FILE_TEMPORARY_ARRAY when it's supported by ureg */ 998 t->temps[i] = ureg_DECL_temporary(t->ureg); 999 } 1000 } 1001 1002 /* Emit constants and immediates. Mesa uses a single index space 1003 * for these, so we put all the translated regs in t->constants. 1004 */ 1005 if (program->Parameters) { 1006 t->constants = calloc(program->Parameters->NumParameters, 1007 sizeof t->constants[0]); 1008 if (t->constants == NULL) { 1009 ret = PIPE_ERROR_OUT_OF_MEMORY; 1010 goto out; 1011 } 1012 1013 for (i = 0; i < program->Parameters->NumParameters; i++) { 1014 unsigned pvo = program->Parameters->ParameterValueOffset[i]; 1015 1016 switch (program->Parameters->Parameters[i].Type) { 1017 case PROGRAM_STATE_VAR: 1018 case PROGRAM_UNIFORM: 1019 t->constants[i] = ureg_DECL_constant(ureg, i); 1020 break; 1021 1022 /* Emit immediates only when there's no indirect addressing of 1023 * the const buffer. 1024 * FIXME: Be smarter and recognize param arrays: 1025 * indirect addressing is only valid within the referenced 1026 * array. 1027 */ 1028 case PROGRAM_CONSTANT: 1029 if (program->arb.IndirectRegisterFiles & PROGRAM_ANY_CONST) 1030 t->constants[i] = ureg_DECL_constant( ureg, i ); 1031 else 1032 t->constants[i] = 1033 ureg_DECL_immediate(ureg, 1034 (const float *) 1035 program->Parameters->ParameterValues + pvo, 1036 4); 1037 break; 1038 default: 1039 break; 1040 } 1041 } 1042 } 1043 1044 /* texture samplers */ 1045 for (i = 0; 1046 i < ctx->Const.Program[MESA_SHADER_FRAGMENT].MaxTextureImageUnits; i++) { 1047 if (program->SamplersUsed & (1u << i)) { 1048 unsigned target = 1049 translate_texture_index(program->TexturesUsed[i], 1050 !!(program->ShadowSamplers & (1 << i))); 1051 t->samplers[i] = ureg_DECL_sampler(ureg, i); 1052 ureg_DECL_sampler_view(ureg, i, target, 1053 TGSI_RETURN_TYPE_FLOAT, 1054 TGSI_RETURN_TYPE_FLOAT, 1055 TGSI_RETURN_TYPE_FLOAT, 1056 TGSI_RETURN_TYPE_FLOAT); 1057 1058 } 1059 } 1060 1061 /* Emit each instruction in turn: 1062 */ 1063 for (i = 0; i < program->arb.NumInstructions; i++) 1064 compile_instruction(ctx, t, &program->arb.Instructions[i]); 1065 1066out: 1067 free(t->constants); 1068 return ret; 1069} 1070