1/********************************************************** 2 * Copyright 2008-2009 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26 27#include "pipe/p_shader_tokens.h" 28#include "tgsi/tgsi_parse.h" 29#include "util/u_memory.h" 30 31#include "svga_tgsi_emit.h" 32 33 34/** 35 * Translate TGSI semantic info into SVGA3d semantic info. 36 * This is called for VS outputs and PS inputs only. 37 */ 38static boolean 39translate_vs_ps_semantic(struct svga_shader_emitter *emit, 40 struct tgsi_declaration_semantic semantic, 41 unsigned *usage, 42 unsigned *idx) 43{ 44 switch (semantic.Name) { 45 case TGSI_SEMANTIC_POSITION: 46 *idx = semantic.Index; 47 *usage = SVGA3D_DECLUSAGE_POSITION; 48 break; 49 case TGSI_SEMANTIC_COLOR: 50 *idx = semantic.Index; 51 *usage = SVGA3D_DECLUSAGE_COLOR; 52 break; 53 case TGSI_SEMANTIC_BCOLOR: 54 *idx = semantic.Index + 2; /* sharing with COLOR */ 55 *usage = SVGA3D_DECLUSAGE_COLOR; 56 break; 57 case TGSI_SEMANTIC_FOG: 58 *idx = 0; 59 assert(semantic.Index == 0); 60 *usage = SVGA3D_DECLUSAGE_TEXCOORD; 61 break; 62 case TGSI_SEMANTIC_PSIZE: 63 *idx = semantic.Index; 64 *usage = SVGA3D_DECLUSAGE_PSIZE; 65 break; 66 case TGSI_SEMANTIC_GENERIC: 67 *idx = svga_remap_generic_index(emit->key.generic_remap_table, 68 semantic.Index); 69 *usage = SVGA3D_DECLUSAGE_TEXCOORD; 70 break; 71 case TGSI_SEMANTIC_NORMAL: 72 *idx = semantic.Index; 73 *usage = SVGA3D_DECLUSAGE_NORMAL; 74 break; 75 case TGSI_SEMANTIC_CLIPDIST: 76 case TGSI_SEMANTIC_CLIPVERTEX: 77 /* XXX at this time we don't support clip distance or clip vertices */ 78 debug_warn_once("unsupported clip distance/vertex attribute\n"); 79 *usage = SVGA3D_DECLUSAGE_TEXCOORD; 80 *idx = 0; 81 return TRUE; 82 default: 83 assert(0); 84 *usage = SVGA3D_DECLUSAGE_TEXCOORD; 85 *idx = 0; 86 return FALSE; 87 } 88 89 return TRUE; 90} 91 92 93/** 94 * Emit a PS input (or VS depth/fog output) register declaration. 95 * For example, if usage = SVGA3D_DECLUSAGE_TEXCOORD, reg.num = 1, and 96 * index = 3, we'll emit "dcl_texcoord3 v1". 97 */ 98static boolean 99emit_decl(struct svga_shader_emitter *emit, 100 SVGA3dShaderDestToken reg, 101 unsigned usage, 102 unsigned index) 103{ 104 SVGA3DOpDclArgs dcl; 105 SVGA3dShaderInstToken opcode; 106 107 /* check values against bitfield sizes */ 108 assert(index < 16); 109 assert(usage <= SVGA3D_DECLUSAGE_MAX); 110 111 opcode = inst_token(SVGA3DOP_DCL); 112 dcl.values[0] = 0; 113 dcl.values[1] = 0; 114 115 dcl.dst = reg; 116 dcl.usage = usage; 117 dcl.index = index; 118 dcl.values[0] |= 1<<31; 119 120 return (emit_instruction(emit, opcode) && 121 svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values))); 122} 123 124 125/** 126 * Emit declaration for PS front/back-face input register. 127 */ 128static boolean 129emit_vface_decl(struct svga_shader_emitter *emit) 130{ 131 if (!emit->emitted_vface) { 132 SVGA3dShaderDestToken reg = 133 dst_register(SVGA3DREG_MISCTYPE, SVGA3DMISCREG_FACE); 134 135 if (!emit_decl(emit, reg, 0, 0)) 136 return FALSE; 137 138 emit->emitted_vface = TRUE; 139 } 140 return TRUE; 141} 142 143 144/** 145 * Emit PS input register to pass depth/fog coordinates. 146 * Note that this always goes into texcoord[0]. 147 */ 148static boolean 149ps30_input_emit_depth_fog(struct svga_shader_emitter *emit, 150 struct src_register *out) 151{ 152 struct src_register reg; 153 154 if (emit->emitted_depth_fog) { 155 *out = emit->ps_depth_fog; 156 return TRUE; 157 } 158 159 if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX) 160 return FALSE; 161 162 reg = src_register(SVGA3DREG_INPUT, 163 emit->ps30_input_count++); 164 165 *out = emit->ps_depth_fog = reg; 166 167 emit->emitted_depth_fog = TRUE; 168 169 return emit_decl(emit, dst(reg), SVGA3D_DECLUSAGE_TEXCOORD, 0); 170} 171 172 173/** 174 * Process a PS input declaration. 175 * We'll emit a declaration like "dcl_texcoord1 v2" 176 */ 177static boolean 178ps30_input(struct svga_shader_emitter *emit, 179 struct tgsi_declaration_semantic semantic, 180 unsigned idx) 181{ 182 unsigned usage, index; 183 SVGA3dShaderDestToken reg; 184 185 if (semantic.Name == TGSI_SEMANTIC_POSITION) { 186 187 emit->ps_true_pos = src_register(SVGA3DREG_MISCTYPE, 188 SVGA3DMISCREG_POSITION); 189 emit->ps_true_pos.base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X, 190 TGSI_SWIZZLE_Y, 191 TGSI_SWIZZLE_Y, 192 TGSI_SWIZZLE_Y); 193 reg = writemask(dst(emit->ps_true_pos), 194 TGSI_WRITEMASK_XY); 195 emit->ps_reads_pos = TRUE; 196 197 if (emit->info.reads_z) { 198 emit->ps_temp_pos = dst_register(SVGA3DREG_TEMP, 199 emit->nr_hw_temp); 200 201 emit->input_map[idx] = src_register(SVGA3DREG_TEMP, 202 emit->nr_hw_temp); 203 emit->nr_hw_temp++; 204 205 if (!ps30_input_emit_depth_fog(emit, &emit->ps_depth_pos)) 206 return FALSE; 207 208 emit->ps_depth_pos.base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Z, 209 TGSI_SWIZZLE_Z, 210 TGSI_SWIZZLE_Z, 211 TGSI_SWIZZLE_W); 212 } 213 else { 214 emit->input_map[idx] = emit->ps_true_pos; 215 } 216 217 return emit_decl(emit, reg, 0, 0); 218 } 219 else if (emit->key.fs.light_twoside && 220 (semantic.Name == TGSI_SEMANTIC_COLOR)) { 221 222 if (!translate_vs_ps_semantic(emit, semantic, &usage, &index)) 223 return FALSE; 224 225 emit->internal_color_idx[emit->internal_color_count] = idx; 226 emit->input_map[idx] = 227 src_register(SVGA3DREG_INPUT, emit->ps30_input_count); 228 emit->ps30_input_count++; 229 emit->internal_color_count++; 230 231 reg = dst(emit->input_map[idx]); 232 233 if (!emit_decl(emit, reg, usage, index)) 234 return FALSE; 235 236 semantic.Name = TGSI_SEMANTIC_BCOLOR; 237 if (!translate_vs_ps_semantic(emit, semantic, &usage, &index)) 238 return FALSE; 239 240 if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX) 241 return FALSE; 242 243 reg = dst_register(SVGA3DREG_INPUT, emit->ps30_input_count++); 244 245 if (!emit_decl(emit, reg, usage, index)) 246 return FALSE; 247 248 if (!emit_vface_decl(emit)) 249 return FALSE; 250 251 return TRUE; 252 } 253 else if (semantic.Name == TGSI_SEMANTIC_FACE) { 254 if (!emit_vface_decl(emit)) 255 return FALSE; 256 emit->emit_frontface = TRUE; 257 emit->internal_frontface_idx = idx; 258 return TRUE; 259 } 260 else if (semantic.Name == TGSI_SEMANTIC_FOG) { 261 262 assert(semantic.Index == 0); 263 264 if (!ps30_input_emit_depth_fog(emit, &emit->input_map[idx])) 265 return FALSE; 266 267 emit->input_map[idx].base.swizzle = TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X, 268 TGSI_SWIZZLE_X, 269 TGSI_SWIZZLE_X, 270 TGSI_SWIZZLE_X); 271 return TRUE; 272 } 273 else { 274 275 if (!translate_vs_ps_semantic(emit, semantic, &usage, &index)) 276 return FALSE; 277 278 if (emit->ps30_input_count >= SVGA3D_INPUTREG_MAX) 279 return FALSE; 280 281 emit->input_map[idx] = 282 src_register(SVGA3DREG_INPUT, emit->ps30_input_count++); 283 284 reg = dst(emit->input_map[idx]); 285 286 if (!emit_decl(emit, reg, usage, index)) 287 return FALSE; 288 289 if (semantic.Name == TGSI_SEMANTIC_GENERIC && 290 emit->key.sprite_origin_lower_left && 291 index >= 1 && 292 emit->key.sprite_coord_enable & (1 << semantic.Index)) { 293 /* This is a sprite texture coord with lower-left origin. 294 * We need to invert the texture T coordinate since the SVGA3D 295 * device only supports an upper-left origin. 296 */ 297 unsigned unit = index - 1; 298 299 emit->inverted_texcoords |= (1 << unit); 300 301 /* save original texcoord reg */ 302 emit->ps_true_texcoord[unit] = emit->input_map[idx]; 303 304 /* this temp register will be the results of the MAD instruction */ 305 emit->ps_inverted_texcoord[unit] = 306 src_register(SVGA3DREG_TEMP, emit->nr_hw_temp); 307 emit->nr_hw_temp++; 308 309 emit->ps_inverted_texcoord_input[unit] = idx; 310 311 /* replace input_map entry with the temp register */ 312 emit->input_map[idx] = emit->ps_inverted_texcoord[unit]; 313 } 314 315 return TRUE; 316 } 317 318} 319 320 321/** 322 * Process a PS output declaration. 323 * Note that we don't actually emit a SVGA3DOpDcl for PS outputs. 324 * \idx register index, such as OUT[2] (not semantic index) 325 */ 326static boolean 327ps30_output(struct svga_shader_emitter *emit, 328 struct tgsi_declaration_semantic semantic, 329 unsigned idx) 330{ 331 switch (semantic.Name) { 332 case TGSI_SEMANTIC_COLOR: 333 if (emit->unit == PIPE_SHADER_FRAGMENT) { 334 if (emit->key.fs.white_fragments) { 335 /* Used for XOR logicop mode */ 336 emit->output_map[idx] = dst_register(SVGA3DREG_TEMP, 337 emit->nr_hw_temp++); 338 emit->temp_color_output[idx] = emit->output_map[idx]; 339 emit->true_color_output[idx] = dst_register(SVGA3DREG_COLOROUT, 340 semantic.Index); 341 } 342 else if (emit->key.fs.write_color0_to_n_cbufs) { 343 /* We'll write color output [0] to all render targets. 344 * Prepare all the output registers here, but only when the 345 * semantic.Index == 0 so we don't do this more than once. 346 */ 347 if (semantic.Index == 0) { 348 unsigned i; 349 for (i = 0; i < emit->key.fs.write_color0_to_n_cbufs; i++) { 350 emit->output_map[idx+i] = dst_register(SVGA3DREG_TEMP, 351 emit->nr_hw_temp++); 352 emit->temp_color_output[i] = emit->output_map[idx+i]; 353 emit->true_color_output[i] = dst_register(SVGA3DREG_COLOROUT, 354 i); 355 } 356 } 357 } 358 else { 359 emit->output_map[idx] = 360 dst_register(SVGA3DREG_COLOROUT, semantic.Index); 361 } 362 } 363 else { 364 emit->output_map[idx] = dst_register(SVGA3DREG_COLOROUT, 365 semantic.Index); 366 } 367 break; 368 case TGSI_SEMANTIC_POSITION: 369 emit->output_map[idx] = dst_register(SVGA3DREG_TEMP, 370 emit->nr_hw_temp++); 371 emit->temp_pos = emit->output_map[idx]; 372 emit->true_pos = dst_register(SVGA3DREG_DEPTHOUT, 373 semantic.Index); 374 break; 375 default: 376 assert(0); 377 /* A wild stab in the dark. */ 378 emit->output_map[idx] = dst_register(SVGA3DREG_COLOROUT, 0); 379 break; 380 } 381 382 return TRUE; 383} 384 385 386/** 387 * Declare a VS input register. 388 * We still make up the input semantics the same as in 2.0 389 */ 390static boolean 391vs30_input(struct svga_shader_emitter *emit, 392 struct tgsi_declaration_semantic semantic, 393 unsigned idx) 394{ 395 SVGA3DOpDclArgs dcl; 396 SVGA3dShaderInstToken opcode; 397 unsigned usage, index; 398 399 opcode = inst_token(SVGA3DOP_DCL); 400 dcl.values[0] = 0; 401 dcl.values[1] = 0; 402 403 emit->input_map[idx] = src_register(SVGA3DREG_INPUT, idx); 404 dcl.dst = dst_register(SVGA3DREG_INPUT, idx); 405 406 assert(dcl.dst.reserved0); 407 408 svga_generate_vdecl_semantics(idx, &usage, &index); 409 410 dcl.usage = usage; 411 dcl.index = index; 412 dcl.values[0] |= 1<<31; 413 414 return (emit_instruction(emit, opcode) && 415 svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values))); 416} 417 418 419/** 420 * Declare VS output for holding depth/fog. 421 */ 422static boolean 423vs30_output_emit_depth_fog(struct svga_shader_emitter *emit, 424 SVGA3dShaderDestToken *out) 425{ 426 SVGA3dShaderDestToken reg; 427 428 if (emit->emitted_depth_fog) { 429 *out = emit->vs_depth_fog; 430 return TRUE; 431 } 432 433 reg = dst_register(SVGA3DREG_OUTPUT, emit->vs30_output_count++); 434 435 *out = emit->vs_depth_fog = reg; 436 437 emit->emitted_depth_fog = TRUE; 438 439 return emit_decl(emit, reg, SVGA3D_DECLUSAGE_TEXCOORD, 0); 440} 441 442 443/** 444 * Declare a VS output. 445 * VS3.0 outputs have proper declarations and semantic info for 446 * matching against PS inputs. 447 */ 448static boolean 449vs30_output(struct svga_shader_emitter *emit, 450 struct tgsi_declaration_semantic semantic, 451 unsigned idx) 452{ 453 SVGA3DOpDclArgs dcl; 454 SVGA3dShaderInstToken opcode; 455 unsigned usage, index; 456 457 opcode = inst_token(SVGA3DOP_DCL); 458 dcl.values[0] = 0; 459 dcl.values[1] = 0; 460 461 if (!translate_vs_ps_semantic(emit, semantic, &usage, &index)) 462 return FALSE; 463 464 if (emit->vs30_output_count >= SVGA3D_OUTPUTREG_MAX) 465 return FALSE; 466 467 dcl.dst = dst_register(SVGA3DREG_OUTPUT, emit->vs30_output_count++); 468 dcl.usage = usage; 469 dcl.index = index; 470 dcl.values[0] |= 1<<31; 471 472 if (semantic.Name == TGSI_SEMANTIC_POSITION) { 473 assert(idx == 0); 474 emit->output_map[idx] = dst_register(SVGA3DREG_TEMP, 475 emit->nr_hw_temp++); 476 emit->temp_pos = emit->output_map[idx]; 477 emit->true_pos = dcl.dst; 478 479 /* Grab an extra output for the depth output */ 480 if (!vs30_output_emit_depth_fog(emit, &emit->depth_pos)) 481 return FALSE; 482 483 } 484 else if (semantic.Name == TGSI_SEMANTIC_PSIZE) { 485 emit->output_map[idx] = dst_register(SVGA3DREG_TEMP, 486 emit->nr_hw_temp++); 487 emit->temp_psiz = emit->output_map[idx]; 488 489 /* This has the effect of not declaring psiz (below) and not 490 * emitting the final MOV to true_psiz in the postamble. 491 */ 492 if (!emit->key.vs.allow_psiz) 493 return TRUE; 494 495 emit->true_psiz = dcl.dst; 496 } 497 else if (semantic.Name == TGSI_SEMANTIC_FOG) { 498 /* 499 * Fog is shared with depth. 500 * So we need to decrement out_count since emit_depth_fog will increment it. 501 */ 502 emit->vs30_output_count--; 503 504 if (!vs30_output_emit_depth_fog(emit, &emit->output_map[idx])) 505 return FALSE; 506 507 return TRUE; 508 } 509 else { 510 emit->output_map[idx] = dcl.dst; 511 } 512 513 return (emit_instruction(emit, opcode) && 514 svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values))); 515} 516 517 518/** Translate PIPE_TEXTURE_x to SVGA3DSAMP_x */ 519static ubyte 520svga_tgsi_sampler_type(const struct svga_shader_emitter *emit, int idx) 521{ 522 switch (emit->sampler_target[idx]) { 523 case TGSI_TEXTURE_1D: 524 return SVGA3DSAMP_2D; 525 case TGSI_TEXTURE_2D: 526 case TGSI_TEXTURE_RECT: 527 return SVGA3DSAMP_2D; 528 case TGSI_TEXTURE_SHADOW2D: 529 return SVGA3DSAMP_2D_SHADOW; 530 case TGSI_TEXTURE_3D: 531 return SVGA3DSAMP_VOLUME; 532 case TGSI_TEXTURE_CUBE: 533 return SVGA3DSAMP_CUBE; 534 } 535 536 return SVGA3DSAMP_UNKNOWN; 537} 538 539 540static boolean 541ps30_sampler(struct svga_shader_emitter *emit, 542 unsigned idx) 543{ 544 SVGA3DOpDclArgs dcl; 545 SVGA3dShaderInstToken opcode; 546 547 opcode = inst_token(SVGA3DOP_DCL); 548 dcl.values[0] = 0; 549 dcl.values[1] = 0; 550 551 dcl.dst = dst_register(SVGA3DREG_SAMPLER, idx); 552 dcl.type = svga_tgsi_sampler_type(emit, idx); 553 dcl.values[0] |= 1<<31; 554 555 return (emit_instruction(emit, opcode) && 556 svga_shader_emit_dwords(emit, dcl.values, ARRAY_SIZE(dcl.values))); 557} 558 559 560boolean 561svga_shader_emit_samplers_decl(struct svga_shader_emitter *emit) 562{ 563 unsigned i; 564 565 for (i = 0; i < emit->num_samplers; i++) { 566 if (!ps30_sampler(emit, i)) 567 return FALSE; 568 } 569 return TRUE; 570} 571 572 573boolean 574svga_translate_decl_sm30(struct svga_shader_emitter *emit, 575 const struct tgsi_full_declaration *decl) 576{ 577 unsigned first = decl->Range.First; 578 unsigned last = decl->Range.Last; 579 unsigned idx; 580 581 for (idx = first; idx <= last; idx++) { 582 boolean ok = TRUE; 583 584 switch (decl->Declaration.File) { 585 case TGSI_FILE_SAMPLER: 586 assert (emit->unit == PIPE_SHADER_FRAGMENT); 587 /* just keep track of the number of samplers here. 588 * Will emit the declaration in the helpers function. 589 */ 590 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); 591 break; 592 593 case TGSI_FILE_INPUT: 594 if (emit->unit == PIPE_SHADER_VERTEX) 595 ok = vs30_input(emit, decl->Semantic, idx); 596 else 597 ok = ps30_input(emit, decl->Semantic, idx); 598 break; 599 600 case TGSI_FILE_OUTPUT: 601 if (emit->unit == PIPE_SHADER_VERTEX) 602 ok = vs30_output(emit, decl->Semantic, idx); 603 else 604 ok = ps30_output(emit, decl->Semantic, idx); 605 break; 606 607 case TGSI_FILE_SAMPLER_VIEW: 608 { 609 unsigned unit = decl->Range.First; 610 assert(decl->Range.First == decl->Range.Last); 611 emit->sampler_target[unit] = decl->SamplerView.Resource; 612 } 613 break; 614 615 default: 616 /* don't need to declare other vars */ 617 ok = TRUE; 618 } 619 620 if (!ok) 621 return FALSE; 622 } 623 624 return TRUE; 625} 626