1/********************************************************** 2 * Copyright 1998-2013 VMware, Inc. All rights reserved. 3 * 4 * Permission is hereby granted, free of charge, to any person 5 * obtaining a copy of this software and associated documentation 6 * files (the "Software"), to deal in the Software without 7 * restriction, including without limitation the rights to use, copy, 8 * modify, merge, publish, distribute, sublicense, and/or sell copies 9 * of the Software, and to permit persons to whom the Software is 10 * furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be 13 * included in all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 * 24 **********************************************************/ 25 26/** 27 * @file svga_tgsi_vgpu10.c 28 * 29 * TGSI -> VGPU10 shader translation. 30 * 31 * \author Mingcheng Chen 32 * \author Brian Paul 33 */ 34 35#include "pipe/p_compiler.h" 36#include "pipe/p_shader_tokens.h" 37#include "pipe/p_defines.h" 38#include "tgsi/tgsi_build.h" 39#include "tgsi/tgsi_dump.h" 40#include "tgsi/tgsi_info.h" 41#include "tgsi/tgsi_parse.h" 42#include "tgsi/tgsi_scan.h" 43#include "tgsi/tgsi_strings.h" 44#include "tgsi/tgsi_two_side.h" 45#include "tgsi/tgsi_aa_point.h" 46#include "tgsi/tgsi_util.h" 47#include "util/u_math.h" 48#include "util/u_memory.h" 49#include "util/u_bitmask.h" 50#include "util/u_debug.h" 51#include "util/u_pstipple.h" 52 53#include "svga_context.h" 54#include "svga_debug.h" 55#include "svga_link.h" 56#include "svga_shader.h" 57#include "svga_tgsi.h" 58 59#include "VGPU10ShaderTokens.h" 60 61 62#define INVALID_INDEX 99999 63#define MAX_INTERNAL_TEMPS 3 64#define MAX_SYSTEM_VALUES 4 65#define MAX_IMMEDIATE_COUNT \ 66 (VGPU10_MAX_IMMEDIATE_CONSTANT_BUFFER_ELEMENT_COUNT/4) 67#define MAX_TEMP_ARRAYS 64 /* Enough? */ 68 69 70/** 71 * Clipping is complicated. There's four different cases which we 72 * handle during VS/GS shader translation: 73 */ 74enum clipping_mode 75{ 76 CLIP_NONE, /**< No clipping enabled */ 77 CLIP_LEGACY, /**< The shader has no clipping declarations or code but 78 * one or more user-defined clip planes are enabled. We 79 * generate extra code to emit clip distances. 80 */ 81 CLIP_DISTANCE, /**< The shader already declares clip distance output 82 * registers and has code to write to them. 83 */ 84 CLIP_VERTEX /**< The shader declares a clip vertex output register and 85 * has code that writes to the register. We convert the 86 * clipvertex position into one or more clip distances. 87 */ 88}; 89 90 91/* Shader signature info */ 92struct svga_shader_signature 93{ 94 SVGA3dDXShaderSignatureHeader header; 95 SVGA3dDXShaderSignatureEntry inputs[PIPE_MAX_SHADER_INPUTS]; 96 SVGA3dDXShaderSignatureEntry outputs[PIPE_MAX_SHADER_OUTPUTS]; 97 SVGA3dDXShaderSignatureEntry patchConstants[PIPE_MAX_SHADER_OUTPUTS]; 98}; 99 100static inline void 101set_shader_signature_entry(SVGA3dDXShaderSignatureEntry *e, 102 unsigned index, 103 SVGA3dDXSignatureSemanticName sgnName, 104 unsigned mask, 105 SVGA3dDXSignatureRegisterComponentType compType, 106 SVGA3dDXSignatureMinPrecision minPrecision) 107{ 108 e->registerIndex = index; 109 e->semanticName = sgnName; 110 e->mask = mask; 111 e->componentType = compType; 112 e->minPrecision = minPrecision; 113}; 114 115static const SVGA3dDXSignatureSemanticName 116tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_COUNT] = { 117 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION, 118 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 119 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 120 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 121 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 122 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 123 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 124 SVGADX_SIGNATURE_SEMANTIC_NAME_IS_FRONT_FACE, 125 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 126 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID, 127 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID, 128 SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID, 129 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 130 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE, 131 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 132 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 133 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 134 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 135 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 136 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 137 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 138 SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX, 139 SVGADX_SIGNATURE_SEMANTIC_NAME_RENDER_TARGET_ARRAY_INDEX, 140 SVGADX_SIGNATURE_SEMANTIC_NAME_SAMPLE_INDEX, 141 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 142 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 143 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID, 144 SVGADX_SIGNATURE_SEMANTIC_NAME_VERTEX_ID, 145 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 146 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 147 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 148 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 149 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 150 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 151 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 152 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 153 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 154 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 155 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 156 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 157 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 158 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 159 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 160 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED, 161 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED 162}; 163 164 165/** 166 * Map tgsi semantic name to SVGA signature semantic name 167 */ 168static inline SVGA3dDXSignatureSemanticName 169map_tgsi_semantic_to_sgn_name(enum tgsi_semantic name) 170{ 171 assert(name < TGSI_SEMANTIC_COUNT); 172 173 /* Do a few asserts here to spot check the mapping */ 174 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_PRIMID] == 175 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID); 176 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_VIEWPORT_INDEX] == 177 SVGADX_SIGNATURE_SEMANTIC_NAME_VIEWPORT_ARRAY_INDEX); 178 assert(tgsi_semantic_to_sgn_name[TGSI_SEMANTIC_INVOCATIONID] == 179 SVGADX_SIGNATURE_SEMANTIC_NAME_INSTANCE_ID); 180 181 return tgsi_semantic_to_sgn_name[name]; 182} 183 184 185struct svga_shader_emitter_v10 186{ 187 /* The token output buffer */ 188 unsigned size; 189 char *buf; 190 char *ptr; 191 192 /* Information about the shader and state (does not change) */ 193 struct svga_compile_key key; 194 struct tgsi_shader_info info; 195 unsigned unit; 196 unsigned version; /**< Either 40 or 41 at this time */ 197 198 unsigned cur_tgsi_token; /**< current tgsi token position */ 199 unsigned inst_start_token; 200 boolean discard_instruction; /**< throw away current instruction? */ 201 boolean reemit_instruction; /**< reemit current instruction */ 202 boolean skip_instruction; /**< skip current instruction */ 203 204 union tgsi_immediate_data immediates[MAX_IMMEDIATE_COUNT][4]; 205 double (*immediates_dbl)[2]; 206 unsigned num_immediates; /**< Number of immediates emitted */ 207 unsigned common_immediate_pos[10]; /**< literals for common immediates */ 208 unsigned num_common_immediates; 209 boolean immediates_emitted; 210 211 unsigned num_outputs; /**< include any extra outputs */ 212 /** The first extra output is reserved for 213 * non-adjusted vertex position for 214 * stream output purpose 215 */ 216 217 /* Temporary Registers */ 218 unsigned num_shader_temps; /**< num of temps used by original shader */ 219 unsigned internal_temp_count; /**< currently allocated internal temps */ 220 struct { 221 unsigned start, size; 222 } temp_arrays[MAX_TEMP_ARRAYS]; 223 unsigned num_temp_arrays; 224 225 /** Map TGSI temp registers to VGPU10 temp array IDs and indexes */ 226 struct { 227 unsigned arrayId, index; 228 boolean initialized; 229 } temp_map[VGPU10_MAX_TEMPS]; /**< arrayId, element */ 230 231 unsigned initialize_temp_index; 232 233 /** Number of constants used by original shader for each constant buffer. 234 * The size should probably always match with that of svga_state.constbufs. 235 */ 236 unsigned num_shader_consts[SVGA_MAX_CONST_BUFS]; 237 238 /* Samplers */ 239 unsigned num_samplers; 240 boolean sampler_view[PIPE_MAX_SAMPLERS]; /**< True if sampler view exists*/ 241 ubyte sampler_target[PIPE_MAX_SAMPLERS]; /**< TGSI_TEXTURE_x */ 242 ubyte sampler_return_type[PIPE_MAX_SAMPLERS]; /**< TGSI_RETURN_TYPE_x */ 243 244 /* Index Range declaration */ 245 struct { 246 unsigned start_index; 247 unsigned count; 248 boolean required; 249 unsigned operandType; 250 unsigned size; 251 unsigned dim; 252 } index_range; 253 254 /* Address regs (really implemented with temps) */ 255 unsigned num_address_regs; 256 unsigned address_reg_index[MAX_VGPU10_ADDR_REGS]; 257 258 /* Output register usage masks */ 259 ubyte output_usage_mask[PIPE_MAX_SHADER_OUTPUTS]; 260 261 /* To map TGSI system value index to VGPU shader input indexes */ 262 ubyte system_value_indexes[MAX_SYSTEM_VALUES]; 263 264 struct { 265 /* vertex position scale/translation */ 266 unsigned out_index; /**< the real position output reg */ 267 unsigned tmp_index; /**< the fake/temp position output reg */ 268 unsigned so_index; /**< the non-adjusted position output reg */ 269 unsigned prescale_cbuf_index; /* index to the const buf for prescale */ 270 unsigned prescale_scale_index, prescale_trans_index; 271 unsigned num_prescale; /* number of prescale factor in const buf */ 272 unsigned viewport_index; 273 unsigned need_prescale:1; 274 unsigned have_prescale:1; 275 } vposition; 276 277 /* For vertex shaders only */ 278 struct { 279 /* viewport constant */ 280 unsigned viewport_index; 281 282 unsigned vertex_id_bias_index; 283 unsigned vertex_id_sys_index; 284 unsigned vertex_id_tmp_index; 285 286 /* temp index of adjusted vertex attributes */ 287 unsigned adjusted_input[PIPE_MAX_SHADER_INPUTS]; 288 } vs; 289 290 /* For fragment shaders only */ 291 struct { 292 unsigned color_out_index[PIPE_MAX_COLOR_BUFS]; /**< the real color output regs */ 293 unsigned num_color_outputs; 294 unsigned color_tmp_index; /**< fake/temp color output reg */ 295 unsigned alpha_ref_index; /**< immediate constant for alpha ref */ 296 297 /* front-face */ 298 unsigned face_input_index; /**< real fragment shader face reg (bool) */ 299 unsigned face_tmp_index; /**< temp face reg converted to -1 / +1 */ 300 301 unsigned pstipple_sampler_unit; 302 303 unsigned fragcoord_input_index; /**< real fragment position input reg */ 304 unsigned fragcoord_tmp_index; /**< 1/w modified position temp reg */ 305 306 unsigned sample_id_sys_index; /**< TGSI index of sample id sys value */ 307 308 unsigned sample_pos_sys_index; /**< TGSI index of sample pos sys value */ 309 unsigned sample_pos_tmp_index; /**< which temp reg has the sample pos */ 310 311 /** TGSI index of sample mask input sys value */ 312 unsigned sample_mask_in_sys_index; 313 314 /** Which texture units are doing shadow comparison in the FS code */ 315 unsigned shadow_compare_units; 316 317 /* layer */ 318 unsigned layer_input_index; /**< TGSI index of layer */ 319 unsigned layer_imm_index; /**< immediate for default layer 0 */ 320 } fs; 321 322 /* For geometry shaders only */ 323 struct { 324 VGPU10_PRIMITIVE prim_type;/**< VGPU10 primitive type */ 325 VGPU10_PRIMITIVE_TOPOLOGY prim_topology; /**< VGPU10 primitive topology */ 326 unsigned input_size; /**< size of input arrays */ 327 unsigned prim_id_index; /**< primitive id register index */ 328 unsigned max_out_vertices; /**< maximum number of output vertices */ 329 unsigned invocations; 330 unsigned invocation_id_sys_index; 331 332 unsigned viewport_index_out_index; 333 unsigned viewport_index_tmp_index; 334 } gs; 335 336 /* For tessellation control shaders only */ 337 struct { 338 unsigned vertices_per_patch_index; /**< vertices_per_patch system value index */ 339 unsigned imm_index; /**< immediate for tcs */ 340 unsigned invocation_id_sys_index; /**< invocation id */ 341 unsigned invocation_id_tmp_index; 342 unsigned instruction_token_pos; /* token pos for the first instruction */ 343 unsigned control_point_input_index; /* control point input register index */ 344 unsigned control_point_addr_index; /* control point input address register */ 345 unsigned control_point_out_index; /* control point output register index */ 346 unsigned control_point_tmp_index; /* control point temporary register */ 347 unsigned control_point_out_count; /* control point output count */ 348 boolean control_point_phase; /* true if in control point phase */ 349 boolean fork_phase_add_signature; /* true if needs to add signature in fork phase */ 350 unsigned patch_generic_out_count; /* per-patch generic output count */ 351 unsigned patch_generic_out_index; /* per-patch generic output register index*/ 352 unsigned patch_generic_tmp_index; /* per-patch generic temporary register index*/ 353 unsigned prim_id_index; /* primitive id */ 354 struct { 355 unsigned out_index; /* real tessinner output register */ 356 unsigned temp_index; /* tessinner temp register */ 357 unsigned tgsi_index; /* tgsi tessinner output register */ 358 } inner; 359 struct { 360 unsigned out_index; /* real tessouter output register */ 361 unsigned temp_index; /* tessouter temp register */ 362 unsigned tgsi_index; /* tgsi tessouter output register */ 363 } outer; 364 } tcs; 365 366 /* For tessellation evaluation shaders only */ 367 struct { 368 enum pipe_prim_type prim_mode; 369 enum pipe_tess_spacing spacing; 370 boolean vertices_order_cw; 371 boolean point_mode; 372 unsigned tesscoord_sys_index; 373 unsigned prim_id_index; /* primitive id */ 374 struct { 375 unsigned in_index; /* real tessinner input register */ 376 unsigned temp_index; /* tessinner temp register */ 377 unsigned tgsi_index; /* tgsi tessinner input register */ 378 } inner; 379 struct { 380 unsigned in_index; /* real tessouter input register */ 381 unsigned temp_index; /* tessouter temp register */ 382 unsigned tgsi_index; /* tgsi tessouter input register */ 383 } outer; 384 } tes; 385 386 /* For vertex or geometry shaders */ 387 enum clipping_mode clip_mode; 388 unsigned clip_dist_out_index; /**< clip distance output register index */ 389 unsigned clip_dist_tmp_index; /**< clip distance temporary register */ 390 unsigned clip_dist_so_index; /**< clip distance shadow copy */ 391 392 /** Index of temporary holding the clipvertex coordinate */ 393 unsigned clip_vertex_out_index; /**< clip vertex output register index */ 394 unsigned clip_vertex_tmp_index; /**< clip vertex temporary index */ 395 396 /* user clip plane constant slot indexes */ 397 unsigned clip_plane_const[PIPE_MAX_CLIP_PLANES]; 398 399 unsigned num_output_writes; 400 boolean constant_color_output; 401 402 boolean uses_flat_interp; 403 404 unsigned reserved_token; /* index to the reserved token */ 405 boolean uses_precise_qualifier; 406 407 /* For all shaders: const reg index for RECT coord scaling */ 408 unsigned texcoord_scale_index[PIPE_MAX_SAMPLERS]; 409 410 /* For all shaders: const reg index for texture buffer size */ 411 unsigned texture_buffer_size_index[PIPE_MAX_SAMPLERS]; 412 413 /* VS/TCS/TES/GS/FS Linkage info */ 414 struct shader_linkage linkage; 415 struct tgsi_shader_info *prevShaderInfo; 416 417 /* Shader signature */ 418 struct svga_shader_signature signature; 419 420 bool register_overflow; /**< Set if we exceed a VGPU10 register limit */ 421 422 /* For pipe_debug_message */ 423 struct pipe_debug_callback svga_debug_callback; 424 425 /* current loop depth in shader */ 426 unsigned current_loop_depth; 427}; 428 429 430static void emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit); 431static void emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit); 432static boolean emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit); 433static boolean emit_constant_declaration(struct svga_shader_emitter_v10 *emit); 434static boolean emit_sampler_declarations(struct svga_shader_emitter_v10 *emit); 435static boolean emit_resource_declarations(struct svga_shader_emitter_v10 *emit); 436static boolean emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit); 437static boolean emit_index_range_declaration(struct svga_shader_emitter_v10 *emit); 438static void emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit); 439 440static boolean 441emit_post_helpers(struct svga_shader_emitter_v10 *emit); 442 443static boolean 444emit_vertex(struct svga_shader_emitter_v10 *emit, 445 const struct tgsi_full_instruction *inst); 446 447static boolean 448emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, 449 unsigned inst_number, 450 const struct tgsi_full_instruction *inst); 451 452static void 453emit_input_declaration(struct svga_shader_emitter_v10 *emit, 454 unsigned opcodeType, unsigned operandType, 455 unsigned dim, unsigned index, unsigned size, 456 unsigned name, unsigned numComp, 457 unsigned selMode, unsigned usageMask, 458 unsigned interpMode, 459 boolean addSignature, 460 SVGA3dDXSignatureSemanticName sgnName); 461 462static void 463create_temp_array(struct svga_shader_emitter_v10 *emit, 464 unsigned arrayID, unsigned first, unsigned count, 465 unsigned startIndex); 466 467static char err_buf[128]; 468 469static boolean 470expand(struct svga_shader_emitter_v10 *emit) 471{ 472 char *new_buf; 473 unsigned newsize = emit->size * 2; 474 475 if (emit->buf != err_buf) 476 new_buf = REALLOC(emit->buf, emit->size, newsize); 477 else 478 new_buf = NULL; 479 480 if (!new_buf) { 481 emit->ptr = err_buf; 482 emit->buf = err_buf; 483 emit->size = sizeof(err_buf); 484 return FALSE; 485 } 486 487 emit->size = newsize; 488 emit->ptr = new_buf + (emit->ptr - emit->buf); 489 emit->buf = new_buf; 490 return TRUE; 491} 492 493/** 494 * Create and initialize a new svga_shader_emitter_v10 object. 495 */ 496static struct svga_shader_emitter_v10 * 497alloc_emitter(void) 498{ 499 struct svga_shader_emitter_v10 *emit = CALLOC(1, sizeof(*emit)); 500 501 if (!emit) 502 return NULL; 503 504 /* to initialize the output buffer */ 505 emit->size = 512; 506 if (!expand(emit)) { 507 FREE(emit); 508 return NULL; 509 } 510 return emit; 511} 512 513/** 514 * Free an svga_shader_emitter_v10 object. 515 */ 516static void 517free_emitter(struct svga_shader_emitter_v10 *emit) 518{ 519 assert(emit); 520 FREE(emit->buf); /* will be NULL if translation succeeded */ 521 FREE(emit); 522} 523 524static inline boolean 525reserve(struct svga_shader_emitter_v10 *emit, 526 unsigned nr_dwords) 527{ 528 while (emit->ptr - emit->buf + nr_dwords * sizeof(uint32) >= emit->size) { 529 if (!expand(emit)) 530 return FALSE; 531 } 532 533 return TRUE; 534} 535 536static boolean 537emit_dword(struct svga_shader_emitter_v10 *emit, uint32 dword) 538{ 539 if (!reserve(emit, 1)) 540 return FALSE; 541 542 *(uint32 *)emit->ptr = dword; 543 emit->ptr += sizeof dword; 544 return TRUE; 545} 546 547static boolean 548emit_dwords(struct svga_shader_emitter_v10 *emit, 549 const uint32 *dwords, 550 unsigned nr) 551{ 552 if (!reserve(emit, nr)) 553 return FALSE; 554 555 memcpy(emit->ptr, dwords, nr * sizeof *dwords); 556 emit->ptr += nr * sizeof *dwords; 557 return TRUE; 558} 559 560/** Return the number of tokens in the emitter's buffer */ 561static unsigned 562emit_get_num_tokens(const struct svga_shader_emitter_v10 *emit) 563{ 564 return (emit->ptr - emit->buf) / sizeof(unsigned); 565} 566 567 568/** 569 * Check for register overflow. If we overflow we'll set an 570 * error flag. This function can be called for register declarations 571 * or use as src/dst instruction operands. 572 * \param type register type. One of VGPU10_OPERAND_TYPE_x 573 or VGPU10_OPCODE_DCL_x 574 * \param index the register index 575 */ 576static void 577check_register_index(struct svga_shader_emitter_v10 *emit, 578 unsigned operandType, unsigned index) 579{ 580 bool overflow_before = emit->register_overflow; 581 582 switch (operandType) { 583 case VGPU10_OPERAND_TYPE_TEMP: 584 case VGPU10_OPERAND_TYPE_INDEXABLE_TEMP: 585 case VGPU10_OPCODE_DCL_TEMPS: 586 if (index >= VGPU10_MAX_TEMPS) { 587 emit->register_overflow = TRUE; 588 } 589 break; 590 case VGPU10_OPERAND_TYPE_CONSTANT_BUFFER: 591 case VGPU10_OPCODE_DCL_CONSTANT_BUFFER: 592 if (index >= VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 593 emit->register_overflow = TRUE; 594 } 595 break; 596 case VGPU10_OPERAND_TYPE_INPUT: 597 case VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID: 598 case VGPU10_OPCODE_DCL_INPUT: 599 case VGPU10_OPCODE_DCL_INPUT_SGV: 600 case VGPU10_OPCODE_DCL_INPUT_SIV: 601 case VGPU10_OPCODE_DCL_INPUT_PS: 602 case VGPU10_OPCODE_DCL_INPUT_PS_SGV: 603 case VGPU10_OPCODE_DCL_INPUT_PS_SIV: 604 if ((emit->unit == PIPE_SHADER_VERTEX && 605 index >= VGPU10_MAX_VS_INPUTS) || 606 (emit->unit == PIPE_SHADER_GEOMETRY && 607 index >= VGPU10_MAX_GS_INPUTS) || 608 (emit->unit == PIPE_SHADER_FRAGMENT && 609 index >= VGPU10_MAX_FS_INPUTS) || 610 (emit->unit == PIPE_SHADER_TESS_CTRL && 611 index >= VGPU11_MAX_HS_INPUT_CONTROL_POINTS) || 612 (emit->unit == PIPE_SHADER_TESS_EVAL && 613 index >= VGPU11_MAX_DS_INPUT_CONTROL_POINTS)) { 614 emit->register_overflow = TRUE; 615 } 616 break; 617 case VGPU10_OPERAND_TYPE_OUTPUT: 618 case VGPU10_OPCODE_DCL_OUTPUT: 619 case VGPU10_OPCODE_DCL_OUTPUT_SGV: 620 case VGPU10_OPCODE_DCL_OUTPUT_SIV: 621 /* Note: we are skipping two output indices in tcs for 622 * tessinner/outer levels. Implementation will not exceed 623 * number of output count but it allows index to go beyond 624 * VGPU11_MAX_HS_OUTPUTS. 625 * Index will never be >= index >= VGPU11_MAX_HS_OUTPUTS + 2 626 */ 627 if ((emit->unit == PIPE_SHADER_VERTEX && 628 index >= VGPU10_MAX_VS_OUTPUTS) || 629 (emit->unit == PIPE_SHADER_GEOMETRY && 630 index >= VGPU10_MAX_GS_OUTPUTS) || 631 (emit->unit == PIPE_SHADER_FRAGMENT && 632 index >= VGPU10_MAX_FS_OUTPUTS) || 633 (emit->unit == PIPE_SHADER_TESS_CTRL && 634 index >= VGPU11_MAX_HS_OUTPUTS + 2) || 635 (emit->unit == PIPE_SHADER_TESS_EVAL && 636 index >= VGPU11_MAX_DS_OUTPUTS)) { 637 emit->register_overflow = TRUE; 638 } 639 break; 640 case VGPU10_OPERAND_TYPE_SAMPLER: 641 case VGPU10_OPCODE_DCL_SAMPLER: 642 if (index >= VGPU10_MAX_SAMPLERS) { 643 emit->register_overflow = TRUE; 644 } 645 break; 646 case VGPU10_OPERAND_TYPE_RESOURCE: 647 case VGPU10_OPCODE_DCL_RESOURCE: 648 if (index >= VGPU10_MAX_RESOURCES) { 649 emit->register_overflow = TRUE; 650 } 651 break; 652 case VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: 653 if (index >= MAX_IMMEDIATE_COUNT) { 654 emit->register_overflow = TRUE; 655 } 656 break; 657 case VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK: 658 /* nothing */ 659 break; 660 default: 661 assert(0); 662 ; /* nothing */ 663 } 664 665 if (emit->register_overflow && !overflow_before) { 666 debug_printf("svga: vgpu10 register overflow (reg %u, index %u)\n", 667 operandType, index); 668 } 669} 670 671 672/** 673 * Examine misc state to determine the clipping mode. 674 */ 675static void 676determine_clipping_mode(struct svga_shader_emitter_v10 *emit) 677{ 678 /* num_written_clipdistance in the shader info for tessellation 679 * control shader is always 0 because the TGSI_PROPERTY_NUM_CLIPDIST_ENABLED 680 * is not defined for this shader. So we go through all the output declarations 681 * to set the num_written_clipdistance. This is just to determine the 682 * clipping mode. 683 */ 684 if (emit->unit == PIPE_SHADER_TESS_CTRL) { 685 unsigned i; 686 for (i = 0; i < emit->info.num_outputs; i++) { 687 if (emit->info.output_semantic_name[i] == TGSI_SEMANTIC_CLIPDIST) { 688 emit->info.num_written_clipdistance = 689 4 * (emit->info.output_semantic_index[i] + 1); 690 } 691 } 692 } 693 694 if (emit->info.num_written_clipdistance > 0) { 695 emit->clip_mode = CLIP_DISTANCE; 696 } 697 else if (emit->info.writes_clipvertex) { 698 emit->clip_mode = CLIP_VERTEX; 699 } 700 else if (emit->key.clip_plane_enable && emit->key.last_vertex_stage) { 701 /* 702 * Only the last shader in the vertex processing stage needs to 703 * handle the legacy clip mode. 704 */ 705 emit->clip_mode = CLIP_LEGACY; 706 } 707 else { 708 emit->clip_mode = CLIP_NONE; 709 } 710} 711 712 713/** 714 * For clip distance register declarations and clip distance register 715 * writes we need to mask the declaration usage or instruction writemask 716 * (respectively) against the set of the really-enabled clipping planes. 717 * 718 * The piglit test spec/glsl-1.30/execution/clipping/vs-clip-distance-enables 719 * has a VS that writes to all 8 clip distance registers, but the plane enable 720 * flags are a subset of that. 721 * 722 * This function is used to apply the plane enable flags to the register 723 * declaration or instruction writemask. 724 * 725 * \param writemask the declaration usage mask or instruction writemask 726 * \param clip_reg_index which clip plane register is being declared/written. 727 * The legal values are 0 and 1 (two clip planes per 728 * register, for a total of 8 clip planes) 729 */ 730static unsigned 731apply_clip_plane_mask(struct svga_shader_emitter_v10 *emit, 732 unsigned writemask, unsigned clip_reg_index) 733{ 734 unsigned shift; 735 736 assert(clip_reg_index < 2); 737 738 /* four clip planes per clip register: */ 739 shift = clip_reg_index * 4; 740 writemask &= ((emit->key.clip_plane_enable >> shift) & 0xf); 741 742 return writemask; 743} 744 745 746/** 747 * Translate gallium shader type into VGPU10 type. 748 */ 749static VGPU10_PROGRAM_TYPE 750translate_shader_type(unsigned type) 751{ 752 switch (type) { 753 case PIPE_SHADER_VERTEX: 754 return VGPU10_VERTEX_SHADER; 755 case PIPE_SHADER_GEOMETRY: 756 return VGPU10_GEOMETRY_SHADER; 757 case PIPE_SHADER_FRAGMENT: 758 return VGPU10_PIXEL_SHADER; 759 case PIPE_SHADER_TESS_CTRL: 760 return VGPU10_HULL_SHADER; 761 case PIPE_SHADER_TESS_EVAL: 762 return VGPU10_DOMAIN_SHADER; 763 case PIPE_SHADER_COMPUTE: 764 return VGPU10_COMPUTE_SHADER; 765 default: 766 assert(!"Unexpected shader type"); 767 return VGPU10_VERTEX_SHADER; 768 } 769} 770 771 772/** 773 * Translate a TGSI_OPCODE_x into a VGPU10_OPCODE_x 774 * Note: we only need to translate the opcodes for "simple" instructions, 775 * as seen below. All other opcodes are handled/translated specially. 776 */ 777static VGPU10_OPCODE_TYPE 778translate_opcode(enum tgsi_opcode opcode) 779{ 780 switch (opcode) { 781 case TGSI_OPCODE_MOV: 782 return VGPU10_OPCODE_MOV; 783 case TGSI_OPCODE_MUL: 784 return VGPU10_OPCODE_MUL; 785 case TGSI_OPCODE_ADD: 786 return VGPU10_OPCODE_ADD; 787 case TGSI_OPCODE_DP3: 788 return VGPU10_OPCODE_DP3; 789 case TGSI_OPCODE_DP4: 790 return VGPU10_OPCODE_DP4; 791 case TGSI_OPCODE_MIN: 792 return VGPU10_OPCODE_MIN; 793 case TGSI_OPCODE_MAX: 794 return VGPU10_OPCODE_MAX; 795 case TGSI_OPCODE_MAD: 796 return VGPU10_OPCODE_MAD; 797 case TGSI_OPCODE_SQRT: 798 return VGPU10_OPCODE_SQRT; 799 case TGSI_OPCODE_FRC: 800 return VGPU10_OPCODE_FRC; 801 case TGSI_OPCODE_FLR: 802 return VGPU10_OPCODE_ROUND_NI; 803 case TGSI_OPCODE_FSEQ: 804 return VGPU10_OPCODE_EQ; 805 case TGSI_OPCODE_FSGE: 806 return VGPU10_OPCODE_GE; 807 case TGSI_OPCODE_FSNE: 808 return VGPU10_OPCODE_NE; 809 case TGSI_OPCODE_DDX: 810 return VGPU10_OPCODE_DERIV_RTX; 811 case TGSI_OPCODE_DDY: 812 return VGPU10_OPCODE_DERIV_RTY; 813 case TGSI_OPCODE_RET: 814 return VGPU10_OPCODE_RET; 815 case TGSI_OPCODE_DIV: 816 return VGPU10_OPCODE_DIV; 817 case TGSI_OPCODE_IDIV: 818 return VGPU10_OPCODE_VMWARE; 819 case TGSI_OPCODE_DP2: 820 return VGPU10_OPCODE_DP2; 821 case TGSI_OPCODE_BRK: 822 return VGPU10_OPCODE_BREAK; 823 case TGSI_OPCODE_IF: 824 return VGPU10_OPCODE_IF; 825 case TGSI_OPCODE_ELSE: 826 return VGPU10_OPCODE_ELSE; 827 case TGSI_OPCODE_ENDIF: 828 return VGPU10_OPCODE_ENDIF; 829 case TGSI_OPCODE_CEIL: 830 return VGPU10_OPCODE_ROUND_PI; 831 case TGSI_OPCODE_I2F: 832 return VGPU10_OPCODE_ITOF; 833 case TGSI_OPCODE_NOT: 834 return VGPU10_OPCODE_NOT; 835 case TGSI_OPCODE_TRUNC: 836 return VGPU10_OPCODE_ROUND_Z; 837 case TGSI_OPCODE_SHL: 838 return VGPU10_OPCODE_ISHL; 839 case TGSI_OPCODE_AND: 840 return VGPU10_OPCODE_AND; 841 case TGSI_OPCODE_OR: 842 return VGPU10_OPCODE_OR; 843 case TGSI_OPCODE_XOR: 844 return VGPU10_OPCODE_XOR; 845 case TGSI_OPCODE_CONT: 846 return VGPU10_OPCODE_CONTINUE; 847 case TGSI_OPCODE_EMIT: 848 return VGPU10_OPCODE_EMIT; 849 case TGSI_OPCODE_ENDPRIM: 850 return VGPU10_OPCODE_CUT; 851 case TGSI_OPCODE_BGNLOOP: 852 return VGPU10_OPCODE_LOOP; 853 case TGSI_OPCODE_ENDLOOP: 854 return VGPU10_OPCODE_ENDLOOP; 855 case TGSI_OPCODE_ENDSUB: 856 return VGPU10_OPCODE_RET; 857 case TGSI_OPCODE_NOP: 858 return VGPU10_OPCODE_NOP; 859 case TGSI_OPCODE_END: 860 return VGPU10_OPCODE_RET; 861 case TGSI_OPCODE_F2I: 862 return VGPU10_OPCODE_FTOI; 863 case TGSI_OPCODE_IMAX: 864 return VGPU10_OPCODE_IMAX; 865 case TGSI_OPCODE_IMIN: 866 return VGPU10_OPCODE_IMIN; 867 case TGSI_OPCODE_UDIV: 868 case TGSI_OPCODE_UMOD: 869 case TGSI_OPCODE_MOD: 870 return VGPU10_OPCODE_UDIV; 871 case TGSI_OPCODE_IMUL_HI: 872 return VGPU10_OPCODE_IMUL; 873 case TGSI_OPCODE_INEG: 874 return VGPU10_OPCODE_INEG; 875 case TGSI_OPCODE_ISHR: 876 return VGPU10_OPCODE_ISHR; 877 case TGSI_OPCODE_ISGE: 878 return VGPU10_OPCODE_IGE; 879 case TGSI_OPCODE_ISLT: 880 return VGPU10_OPCODE_ILT; 881 case TGSI_OPCODE_F2U: 882 return VGPU10_OPCODE_FTOU; 883 case TGSI_OPCODE_UADD: 884 return VGPU10_OPCODE_IADD; 885 case TGSI_OPCODE_U2F: 886 return VGPU10_OPCODE_UTOF; 887 case TGSI_OPCODE_UCMP: 888 return VGPU10_OPCODE_MOVC; 889 case TGSI_OPCODE_UMAD: 890 return VGPU10_OPCODE_UMAD; 891 case TGSI_OPCODE_UMAX: 892 return VGPU10_OPCODE_UMAX; 893 case TGSI_OPCODE_UMIN: 894 return VGPU10_OPCODE_UMIN; 895 case TGSI_OPCODE_UMUL: 896 case TGSI_OPCODE_UMUL_HI: 897 return VGPU10_OPCODE_UMUL; 898 case TGSI_OPCODE_USEQ: 899 return VGPU10_OPCODE_IEQ; 900 case TGSI_OPCODE_USGE: 901 return VGPU10_OPCODE_UGE; 902 case TGSI_OPCODE_USHR: 903 return VGPU10_OPCODE_USHR; 904 case TGSI_OPCODE_USLT: 905 return VGPU10_OPCODE_ULT; 906 case TGSI_OPCODE_USNE: 907 return VGPU10_OPCODE_INE; 908 case TGSI_OPCODE_SWITCH: 909 return VGPU10_OPCODE_SWITCH; 910 case TGSI_OPCODE_CASE: 911 return VGPU10_OPCODE_CASE; 912 case TGSI_OPCODE_DEFAULT: 913 return VGPU10_OPCODE_DEFAULT; 914 case TGSI_OPCODE_ENDSWITCH: 915 return VGPU10_OPCODE_ENDSWITCH; 916 case TGSI_OPCODE_FSLT: 917 return VGPU10_OPCODE_LT; 918 case TGSI_OPCODE_ROUND: 919 return VGPU10_OPCODE_ROUND_NE; 920 /* Begin SM5 opcodes */ 921 case TGSI_OPCODE_F2D: 922 return VGPU10_OPCODE_FTOD; 923 case TGSI_OPCODE_D2F: 924 return VGPU10_OPCODE_DTOF; 925 case TGSI_OPCODE_DMUL: 926 return VGPU10_OPCODE_DMUL; 927 case TGSI_OPCODE_DADD: 928 return VGPU10_OPCODE_DADD; 929 case TGSI_OPCODE_DMAX: 930 return VGPU10_OPCODE_DMAX; 931 case TGSI_OPCODE_DMIN: 932 return VGPU10_OPCODE_DMIN; 933 case TGSI_OPCODE_DSEQ: 934 return VGPU10_OPCODE_DEQ; 935 case TGSI_OPCODE_DSGE: 936 return VGPU10_OPCODE_DGE; 937 case TGSI_OPCODE_DSLT: 938 return VGPU10_OPCODE_DLT; 939 case TGSI_OPCODE_DSNE: 940 return VGPU10_OPCODE_DNE; 941 case TGSI_OPCODE_IBFE: 942 return VGPU10_OPCODE_IBFE; 943 case TGSI_OPCODE_UBFE: 944 return VGPU10_OPCODE_UBFE; 945 case TGSI_OPCODE_BFI: 946 return VGPU10_OPCODE_BFI; 947 case TGSI_OPCODE_BREV: 948 return VGPU10_OPCODE_BFREV; 949 case TGSI_OPCODE_POPC: 950 return VGPU10_OPCODE_COUNTBITS; 951 case TGSI_OPCODE_LSB: 952 return VGPU10_OPCODE_FIRSTBIT_LO; 953 case TGSI_OPCODE_IMSB: 954 return VGPU10_OPCODE_FIRSTBIT_SHI; 955 case TGSI_OPCODE_UMSB: 956 return VGPU10_OPCODE_FIRSTBIT_HI; 957 case TGSI_OPCODE_INTERP_CENTROID: 958 return VGPU10_OPCODE_EVAL_CENTROID; 959 case TGSI_OPCODE_INTERP_SAMPLE: 960 return VGPU10_OPCODE_EVAL_SAMPLE_INDEX; 961 case TGSI_OPCODE_BARRIER: 962 return VGPU10_OPCODE_SYNC; 963 964 /* DX11.1 Opcodes */ 965 case TGSI_OPCODE_DDIV: 966 return VGPU10_OPCODE_DDIV; 967 case TGSI_OPCODE_DRCP: 968 return VGPU10_OPCODE_DRCP; 969 case TGSI_OPCODE_D2I: 970 return VGPU10_OPCODE_DTOI; 971 case TGSI_OPCODE_D2U: 972 return VGPU10_OPCODE_DTOU; 973 case TGSI_OPCODE_I2D: 974 return VGPU10_OPCODE_ITOD; 975 case TGSI_OPCODE_U2D: 976 return VGPU10_OPCODE_UTOD; 977 978 case TGSI_OPCODE_SAMPLE_POS: 979 /* Note: we never actually get this opcode because there's no GLSL 980 * function to query multisample resource sample positions. There's 981 * only the TGSI_SEMANTIC_SAMPLEPOS system value which contains the 982 * position of the current sample in the render target. 983 */ 984 FALLTHROUGH; 985 case TGSI_OPCODE_SAMPLE_INFO: 986 /* NOTE: we never actually get this opcode because the GLSL compiler 987 * implements the gl_NumSamples variable with a simple constant in the 988 * constant buffer. 989 */ 990 FALLTHROUGH; 991 default: 992 assert(!"Unexpected TGSI opcode in translate_opcode()"); 993 return VGPU10_OPCODE_NOP; 994 } 995} 996 997 998/** 999 * Translate a TGSI register file type into a VGPU10 operand type. 1000 * \param array is the TGSI_FILE_TEMPORARY register an array? 1001 */ 1002static VGPU10_OPERAND_TYPE 1003translate_register_file(enum tgsi_file_type file, boolean array) 1004{ 1005 switch (file) { 1006 case TGSI_FILE_CONSTANT: 1007 return VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 1008 case TGSI_FILE_INPUT: 1009 return VGPU10_OPERAND_TYPE_INPUT; 1010 case TGSI_FILE_OUTPUT: 1011 return VGPU10_OPERAND_TYPE_OUTPUT; 1012 case TGSI_FILE_TEMPORARY: 1013 return array ? VGPU10_OPERAND_TYPE_INDEXABLE_TEMP 1014 : VGPU10_OPERAND_TYPE_TEMP; 1015 case TGSI_FILE_IMMEDIATE: 1016 /* all immediates are 32-bit values at this time so 1017 * VGPU10_OPERAND_TYPE_IMMEDIATE64 is not possible at this time. 1018 */ 1019 return VGPU10_OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER; 1020 case TGSI_FILE_SAMPLER: 1021 return VGPU10_OPERAND_TYPE_SAMPLER; 1022 case TGSI_FILE_SYSTEM_VALUE: 1023 return VGPU10_OPERAND_TYPE_INPUT; 1024 1025 /* XXX TODO more cases to finish */ 1026 1027 default: 1028 assert(!"Bad tgsi register file!"); 1029 return VGPU10_OPERAND_TYPE_NULL; 1030 } 1031} 1032 1033 1034/** 1035 * Emit a null dst register 1036 */ 1037static void 1038emit_null_dst_register(struct svga_shader_emitter_v10 *emit) 1039{ 1040 VGPU10OperandToken0 operand; 1041 1042 operand.value = 0; 1043 operand.operandType = VGPU10_OPERAND_TYPE_NULL; 1044 operand.numComponents = VGPU10_OPERAND_0_COMPONENT; 1045 1046 emit_dword(emit, operand.value); 1047} 1048 1049 1050/** 1051 * If the given register is a temporary, return the array ID. 1052 * Else return zero. 1053 */ 1054static unsigned 1055get_temp_array_id(const struct svga_shader_emitter_v10 *emit, 1056 enum tgsi_file_type file, unsigned index) 1057{ 1058 if (file == TGSI_FILE_TEMPORARY) { 1059 return emit->temp_map[index].arrayId; 1060 } 1061 else { 1062 return 0; 1063 } 1064} 1065 1066 1067/** 1068 * If the given register is a temporary, convert the index from a TGSI 1069 * TEMPORARY index to a VGPU10 temp index. 1070 */ 1071static unsigned 1072remap_temp_index(const struct svga_shader_emitter_v10 *emit, 1073 enum tgsi_file_type file, unsigned index) 1074{ 1075 if (file == TGSI_FILE_TEMPORARY) { 1076 return emit->temp_map[index].index; 1077 } 1078 else { 1079 return index; 1080 } 1081} 1082 1083 1084/** 1085 * Setup the operand0 fields related to indexing (1D, 2D, relative, etc). 1086 * Note: the operandType field must already be initialized. 1087 * \param file the register file being accessed 1088 * \param indirect using indirect addressing of the register file? 1089 * \param index2D if true, 2-D indexing is being used (const or temp registers) 1090 * \param indirect2D if true, 2-D indirect indexing being used (for const buf) 1091 */ 1092static VGPU10OperandToken0 1093setup_operand0_indexing(struct svga_shader_emitter_v10 *emit, 1094 VGPU10OperandToken0 operand0, 1095 enum tgsi_file_type file, 1096 boolean indirect, 1097 boolean index2D, bool indirect2D) 1098{ 1099 VGPU10_OPERAND_INDEX_REPRESENTATION index0Rep, index1Rep; 1100 VGPU10_OPERAND_INDEX_DIMENSION indexDim; 1101 1102 /* 1103 * Compute index dimensions 1104 */ 1105 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32 || 1106 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID || 1107 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID || 1108 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID || 1109 operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP || 1110 operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) { 1111 /* there's no swizzle for in-line immediates */ 1112 indexDim = VGPU10_OPERAND_INDEX_0D; 1113 assert(operand0.selectionMode == 0); 1114 } 1115 else if (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT) { 1116 indexDim = VGPU10_OPERAND_INDEX_0D; 1117 } 1118 else { 1119 indexDim = index2D ? VGPU10_OPERAND_INDEX_2D : VGPU10_OPERAND_INDEX_1D; 1120 } 1121 1122 /* 1123 * Compute index representation(s) (immediate vs relative). 1124 */ 1125 if (indexDim == VGPU10_OPERAND_INDEX_2D) { 1126 index0Rep = indirect2D ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE 1127 : VGPU10_OPERAND_INDEX_IMMEDIATE32; 1128 1129 index1Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE 1130 : VGPU10_OPERAND_INDEX_IMMEDIATE32; 1131 } 1132 else if (indexDim == VGPU10_OPERAND_INDEX_1D) { 1133 index0Rep = indirect ? VGPU10_OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE 1134 : VGPU10_OPERAND_INDEX_IMMEDIATE32; 1135 1136 index1Rep = 0; 1137 } 1138 else { 1139 index0Rep = 0; 1140 index1Rep = 0; 1141 } 1142 1143 operand0.indexDimension = indexDim; 1144 operand0.index0Representation = index0Rep; 1145 operand0.index1Representation = index1Rep; 1146 1147 return operand0; 1148} 1149 1150 1151/** 1152 * Emit the operand for expressing an address register for indirect indexing. 1153 * Note that the address register is really just a temp register. 1154 * \param addr_reg_index which address register to use 1155 */ 1156static void 1157emit_indirect_register(struct svga_shader_emitter_v10 *emit, 1158 unsigned addr_reg_index) 1159{ 1160 unsigned tmp_reg_index; 1161 VGPU10OperandToken0 operand0; 1162 1163 assert(addr_reg_index < MAX_VGPU10_ADDR_REGS); 1164 1165 tmp_reg_index = emit->address_reg_index[addr_reg_index]; 1166 1167 /* operand0 is a simple temporary register, selecting one component */ 1168 operand0.value = 0; 1169 operand0.operandType = VGPU10_OPERAND_TYPE_TEMP; 1170 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1171 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1172 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 1173 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1174 operand0.swizzleX = 0; 1175 operand0.swizzleY = 1; 1176 operand0.swizzleZ = 2; 1177 operand0.swizzleW = 3; 1178 1179 emit_dword(emit, operand0.value); 1180 emit_dword(emit, remap_temp_index(emit, TGSI_FILE_TEMPORARY, tmp_reg_index)); 1181} 1182 1183 1184/** 1185 * Translate the dst register of a TGSI instruction and emit VGPU10 tokens. 1186 * \param emit the emitter context 1187 * \param reg the TGSI dst register to translate 1188 */ 1189static void 1190emit_dst_register(struct svga_shader_emitter_v10 *emit, 1191 const struct tgsi_full_dst_register *reg) 1192{ 1193 enum tgsi_file_type file = reg->Register.File; 1194 unsigned index = reg->Register.Index; 1195 const enum tgsi_semantic sem_name = emit->info.output_semantic_name[index]; 1196 const unsigned sem_index = emit->info.output_semantic_index[index]; 1197 unsigned writemask = reg->Register.WriteMask; 1198 const boolean indirect = reg->Register.Indirect; 1199 unsigned tempArrayId = get_temp_array_id(emit, file, index); 1200 boolean index2d = reg->Register.Dimension || tempArrayId > 0; 1201 VGPU10OperandToken0 operand0; 1202 1203 if (file == TGSI_FILE_TEMPORARY) { 1204 emit->temp_map[index].initialized = TRUE; 1205 } 1206 1207 if (file == TGSI_FILE_OUTPUT) { 1208 if (emit->unit == PIPE_SHADER_VERTEX || 1209 emit->unit == PIPE_SHADER_GEOMETRY || 1210 emit->unit == PIPE_SHADER_TESS_EVAL) { 1211 if (index == emit->vposition.out_index && 1212 emit->vposition.tmp_index != INVALID_INDEX) { 1213 /* replace OUTPUT[POS] with TEMP[POS]. We need to store the 1214 * vertex position result in a temporary so that we can modify 1215 * it in the post_helper() code. 1216 */ 1217 file = TGSI_FILE_TEMPORARY; 1218 index = emit->vposition.tmp_index; 1219 } 1220 else if (sem_name == TGSI_SEMANTIC_CLIPDIST && 1221 emit->clip_dist_tmp_index != INVALID_INDEX) { 1222 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. 1223 * We store the clip distance in a temporary first, then 1224 * we'll copy it to the shadow copy and to CLIPDIST with the 1225 * enabled planes mask in emit_clip_distance_instructions(). 1226 */ 1227 file = TGSI_FILE_TEMPORARY; 1228 index = emit->clip_dist_tmp_index + sem_index; 1229 } 1230 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && 1231 emit->clip_vertex_tmp_index != INVALID_INDEX) { 1232 /* replace the CLIPVERTEX output register with a temporary */ 1233 assert(emit->clip_mode == CLIP_VERTEX); 1234 assert(sem_index == 0); 1235 file = TGSI_FILE_TEMPORARY; 1236 index = emit->clip_vertex_tmp_index; 1237 } 1238 else if (sem_name == TGSI_SEMANTIC_COLOR && 1239 emit->key.clamp_vertex_color) { 1240 1241 /* set the saturate modifier of the instruction 1242 * to clamp the vertex color. 1243 */ 1244 VGPU10OpcodeToken0 *token = 1245 (VGPU10OpcodeToken0 *)emit->buf + emit->inst_start_token; 1246 token->saturate = TRUE; 1247 } 1248 else if (sem_name == TGSI_SEMANTIC_VIEWPORT_INDEX && 1249 emit->gs.viewport_index_out_index != INVALID_INDEX) { 1250 file = TGSI_FILE_TEMPORARY; 1251 index = emit->gs.viewport_index_tmp_index; 1252 } 1253 } 1254 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 1255 if (sem_name == TGSI_SEMANTIC_POSITION) { 1256 /* Fragment depth output register */ 1257 operand0.value = 0; 1258 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 1259 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 1260 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 1261 emit_dword(emit, operand0.value); 1262 return; 1263 } 1264 else if (sem_name == TGSI_SEMANTIC_SAMPLEMASK) { 1265 /* Fragment sample mask output */ 1266 operand0.value = 0; 1267 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK; 1268 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 1269 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 1270 emit_dword(emit, operand0.value); 1271 return; 1272 } 1273 else if (index == emit->fs.color_out_index[0] && 1274 emit->fs.color_tmp_index != INVALID_INDEX) { 1275 /* replace OUTPUT[COLOR] with TEMP[COLOR]. We need to store the 1276 * fragment color result in a temporary so that we can read it 1277 * it in the post_helper() code. 1278 */ 1279 file = TGSI_FILE_TEMPORARY; 1280 index = emit->fs.color_tmp_index; 1281 } 1282 else { 1283 /* Typically, for fragment shaders, the output register index 1284 * matches the color semantic index. But not when we write to 1285 * the fragment depth register. In that case, OUT[0] will be 1286 * fragdepth and OUT[1] will be the 0th color output. We need 1287 * to use the semantic index for color outputs. 1288 */ 1289 assert(sem_name == TGSI_SEMANTIC_COLOR); 1290 index = emit->info.output_semantic_index[index]; 1291 1292 emit->num_output_writes++; 1293 } 1294 } 1295 else if (emit->unit == PIPE_SHADER_TESS_CTRL) { 1296 if (index == emit->tcs.inner.tgsi_index) { 1297 /* replace OUTPUT[TESSLEVEL] with temp. We are storing it 1298 * in temporary for now so that will be store into appropriate 1299 * registers in post_helper() in patch constant phase. 1300 */ 1301 if (emit->tcs.control_point_phase) { 1302 /* Discard writing into tessfactor in control point phase */ 1303 emit->discard_instruction = TRUE; 1304 } 1305 else { 1306 file = TGSI_FILE_TEMPORARY; 1307 index = emit->tcs.inner.temp_index; 1308 } 1309 } 1310 else if (index == emit->tcs.outer.tgsi_index) { 1311 /* replace OUTPUT[TESSLEVEL] with temp. We are storing it 1312 * in temporary for now so that will be store into appropriate 1313 * registers in post_helper(). 1314 */ 1315 if (emit->tcs.control_point_phase) { 1316 /* Discard writing into tessfactor in control point phase */ 1317 emit->discard_instruction = TRUE; 1318 } 1319 else { 1320 file = TGSI_FILE_TEMPORARY; 1321 index = emit->tcs.outer.temp_index; 1322 } 1323 } 1324 else if (index >= emit->tcs.patch_generic_out_index && 1325 index < (emit->tcs.patch_generic_out_index + 1326 emit->tcs.patch_generic_out_count)) { 1327 if (emit->tcs.control_point_phase) { 1328 /* Discard writing into generic patch constant outputs in 1329 control point phase */ 1330 emit->discard_instruction = TRUE; 1331 } 1332 else { 1333 if (emit->reemit_instruction) { 1334 /* Store results of reemitted instruction in temporary register. */ 1335 file = TGSI_FILE_TEMPORARY; 1336 index = emit->tcs.patch_generic_tmp_index + 1337 (index - emit->tcs.patch_generic_out_index); 1338 /** 1339 * Temporaries for patch constant data can be done 1340 * as indexable temporaries. 1341 */ 1342 tempArrayId = get_temp_array_id(emit, file, index); 1343 index2d = tempArrayId > 0; 1344 1345 emit->reemit_instruction = FALSE; 1346 } 1347 else { 1348 /* If per-patch outputs is been read in shader, we 1349 * reemit instruction and store results in temporaries in 1350 * patch constant phase. */ 1351 if (emit->info.reads_perpatch_outputs) { 1352 emit->reemit_instruction = TRUE; 1353 } 1354 } 1355 } 1356 } 1357 else if (reg->Register.Dimension) { 1358 /* Only control point outputs are declared 2D in tgsi */ 1359 if (emit->tcs.control_point_phase) { 1360 if (emit->reemit_instruction) { 1361 /* Store results of reemitted instruction in temporary register. */ 1362 index2d = FALSE; 1363 file = TGSI_FILE_TEMPORARY; 1364 index = emit->tcs.control_point_tmp_index + 1365 (index - emit->tcs.control_point_out_index); 1366 emit->reemit_instruction = FALSE; 1367 } 1368 else { 1369 /* The mapped control point outputs are 1-D */ 1370 index2d = FALSE; 1371 if (emit->info.reads_pervertex_outputs) { 1372 /* If per-vertex outputs is been read in shader, we 1373 * reemit instruction and store results in temporaries 1374 * control point phase. */ 1375 emit->reemit_instruction = TRUE; 1376 } 1377 } 1378 1379 if (sem_name == TGSI_SEMANTIC_CLIPDIST && 1380 emit->clip_dist_tmp_index != INVALID_INDEX) { 1381 /* replace OUTPUT[CLIPDIST] with TEMP[CLIPDIST]. 1382 * We store the clip distance in a temporary first, then 1383 * we'll copy it to the shadow copy and to CLIPDIST with the 1384 * enabled planes mask in emit_clip_distance_instructions(). 1385 */ 1386 file = TGSI_FILE_TEMPORARY; 1387 index = emit->clip_dist_tmp_index + sem_index; 1388 } 1389 else if (sem_name == TGSI_SEMANTIC_CLIPVERTEX && 1390 emit->clip_vertex_tmp_index != INVALID_INDEX) { 1391 /* replace the CLIPVERTEX output register with a temporary */ 1392 assert(emit->clip_mode == CLIP_VERTEX); 1393 assert(sem_index == 0); 1394 file = TGSI_FILE_TEMPORARY; 1395 index = emit->clip_vertex_tmp_index; 1396 } 1397 } 1398 else { 1399 /* Discard writing into control point outputs in 1400 patch constant phase */ 1401 emit->discard_instruction = TRUE; 1402 } 1403 } 1404 } 1405 } 1406 1407 /* init operand tokens to all zero */ 1408 operand0.value = 0; 1409 1410 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1411 1412 /* the operand has a writemask */ 1413 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 1414 1415 /* Which of the four dest components to write to. Note that we can use a 1416 * simple assignment here since TGSI writemasks match VGPU10 writemasks. 1417 */ 1418 STATIC_ASSERT(TGSI_WRITEMASK_X == VGPU10_OPERAND_4_COMPONENT_MASK_X); 1419 operand0.mask = writemask; 1420 1421 /* translate TGSI register file type to VGPU10 operand type */ 1422 operand0.operandType = translate_register_file(file, tempArrayId > 0); 1423 1424 check_register_index(emit, operand0.operandType, index); 1425 1426 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 1427 index2d, FALSE); 1428 1429 /* Emit tokens */ 1430 emit_dword(emit, operand0.value); 1431 if (tempArrayId > 0) { 1432 emit_dword(emit, tempArrayId); 1433 } 1434 1435 emit_dword(emit, remap_temp_index(emit, file, index)); 1436 1437 if (indirect) { 1438 emit_indirect_register(emit, reg->Indirect.Index); 1439 } 1440} 1441 1442 1443/** 1444 * Check if temporary register needs to be initialize when 1445 * shader is not using indirect addressing for temporary and uninitialized 1446 * temporary is not used in loop. In these two scenarios, we cannot 1447 * determine if temporary is initialized or not. 1448 */ 1449static boolean 1450need_temp_reg_initialization(struct svga_shader_emitter_v10 *emit, 1451 unsigned index) 1452{ 1453 if (!(emit->info.indirect_files & (1u << TGSI_FILE_TEMPORARY)) 1454 && emit->current_loop_depth == 0) { 1455 if (!emit->temp_map[index].initialized && 1456 emit->temp_map[index].index < emit->num_shader_temps) { 1457 return TRUE; 1458 } 1459 } 1460 1461 return FALSE; 1462} 1463 1464 1465/** 1466 * Translate a src register of a TGSI instruction and emit VGPU10 tokens. 1467 * In quite a few cases, we do register substitution. For example, if 1468 * the TGSI register is the front/back-face register, we replace that with 1469 * a temp register containing a value we computed earlier. 1470 */ 1471static void 1472emit_src_register(struct svga_shader_emitter_v10 *emit, 1473 const struct tgsi_full_src_register *reg) 1474{ 1475 enum tgsi_file_type file = reg->Register.File; 1476 unsigned index = reg->Register.Index; 1477 const boolean indirect = reg->Register.Indirect; 1478 unsigned tempArrayId = get_temp_array_id(emit, file, index); 1479 boolean index2d = (reg->Register.Dimension || 1480 tempArrayId > 0 || 1481 file == TGSI_FILE_CONSTANT); 1482 unsigned index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index; 1483 boolean indirect2d = reg->Dimension.Indirect; 1484 unsigned swizzleX = reg->Register.SwizzleX; 1485 unsigned swizzleY = reg->Register.SwizzleY; 1486 unsigned swizzleZ = reg->Register.SwizzleZ; 1487 unsigned swizzleW = reg->Register.SwizzleW; 1488 const boolean absolute = reg->Register.Absolute; 1489 const boolean negate = reg->Register.Negate; 1490 VGPU10OperandToken0 operand0; 1491 VGPU10OperandToken1 operand1; 1492 1493 operand0.value = operand1.value = 0; 1494 1495 if (emit->unit == PIPE_SHADER_FRAGMENT){ 1496 if (file == TGSI_FILE_INPUT) { 1497 if (index == emit->fs.face_input_index) { 1498 /* Replace INPUT[FACE] with TEMP[FACE] */ 1499 file = TGSI_FILE_TEMPORARY; 1500 index = emit->fs.face_tmp_index; 1501 } 1502 else if (index == emit->fs.fragcoord_input_index) { 1503 /* Replace INPUT[POSITION] with TEMP[POSITION] */ 1504 file = TGSI_FILE_TEMPORARY; 1505 index = emit->fs.fragcoord_tmp_index; 1506 } 1507 else if (index == emit->fs.layer_input_index) { 1508 /* Replace INPUT[LAYER] with zero.x */ 1509 file = TGSI_FILE_IMMEDIATE; 1510 index = emit->fs.layer_imm_index; 1511 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X; 1512 } 1513 else { 1514 /* We remap fragment shader inputs to that FS input indexes 1515 * match up with VS/GS output indexes. 1516 */ 1517 index = emit->linkage.input_map[index]; 1518 } 1519 } 1520 else if (file == TGSI_FILE_SYSTEM_VALUE) { 1521 if (index == emit->fs.sample_pos_sys_index) { 1522 assert(emit->version >= 41); 1523 /* Current sample position is in a temp register */ 1524 file = TGSI_FILE_TEMPORARY; 1525 index = emit->fs.sample_pos_tmp_index; 1526 } 1527 else if (index == emit->fs.sample_mask_in_sys_index) { 1528 /* Emitted as vCoverage0.x */ 1529 /* According to GLSL spec, the gl_SampleMaskIn array has ceil(s / 32) 1530 * elements where s is the maximum number of color samples supported 1531 * by the implementation. With current implementation, we should not 1532 * have more than one element. So assert if Index != 0 1533 */ 1534 assert((!reg->Register.Indirect && reg->Register.Index == 0) || 1535 reg->Register.Indirect); 1536 operand0.value = 0; 1537 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK; 1538 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 1539 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1540 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1541 emit_dword(emit, operand0.value); 1542 return; 1543 } 1544 else { 1545 /* Map the TGSI system value to a VGPU10 input register */ 1546 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 1547 file = TGSI_FILE_INPUT; 1548 index = emit->system_value_indexes[index]; 1549 } 1550 } 1551 } 1552 else if (emit->unit == PIPE_SHADER_GEOMETRY) { 1553 if (file == TGSI_FILE_INPUT) { 1554 if (index == emit->gs.prim_id_index) { 1555 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 1556 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 1557 } 1558 index = emit->linkage.input_map[index]; 1559 } 1560 else if (file == TGSI_FILE_SYSTEM_VALUE && 1561 index == emit->gs.invocation_id_sys_index) { 1562 /* Emitted as vGSInstanceID0.x */ 1563 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1564 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID; 1565 index = 0; 1566 } 1567 } 1568 else if (emit->unit == PIPE_SHADER_VERTEX) { 1569 if (file == TGSI_FILE_INPUT) { 1570 /* if input is adjusted... */ 1571 if ((emit->key.vs.adjust_attrib_w_1 | 1572 emit->key.vs.adjust_attrib_itof | 1573 emit->key.vs.adjust_attrib_utof | 1574 emit->key.vs.attrib_is_bgra | 1575 emit->key.vs.attrib_puint_to_snorm | 1576 emit->key.vs.attrib_puint_to_uscaled | 1577 emit->key.vs.attrib_puint_to_sscaled) & (1 << index)) { 1578 file = TGSI_FILE_TEMPORARY; 1579 index = emit->vs.adjusted_input[index]; 1580 } 1581 } 1582 else if (file == TGSI_FILE_SYSTEM_VALUE) { 1583 if (index == emit->vs.vertex_id_sys_index && 1584 emit->vs.vertex_id_tmp_index != INVALID_INDEX) { 1585 file = TGSI_FILE_TEMPORARY; 1586 index = emit->vs.vertex_id_tmp_index; 1587 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X; 1588 } 1589 else { 1590 /* Map the TGSI system value to a VGPU10 input register */ 1591 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 1592 file = TGSI_FILE_INPUT; 1593 index = emit->system_value_indexes[index]; 1594 } 1595 } 1596 } 1597 else if (emit->unit == PIPE_SHADER_TESS_CTRL) { 1598 1599 if (file == TGSI_FILE_SYSTEM_VALUE) { 1600 if (index == emit->tcs.vertices_per_patch_index) { 1601 /** 1602 * if source register is the system value for vertices_per_patch, 1603 * replace it with the immediate. 1604 */ 1605 file = TGSI_FILE_IMMEDIATE; 1606 index = emit->tcs.imm_index; 1607 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_X; 1608 } 1609 else if (index == emit->tcs.invocation_id_sys_index) { 1610 if (emit->tcs.control_point_phase) { 1611 /** 1612 * Emitted as vOutputControlPointID.x 1613 */ 1614 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 1615 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID; 1616 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 1617 operand0.mask = 0; 1618 emit_dword(emit, operand0.value); 1619 return; 1620 } 1621 else { 1622 /* There is no control point ID input declaration in 1623 * the patch constant phase in hull shader. 1624 * Since for now we are emitting all instructions in 1625 * the patch constant phase, we are replacing the 1626 * control point ID reference with the immediate 0. 1627 */ 1628 file = TGSI_FILE_IMMEDIATE; 1629 index = emit->tcs.imm_index; 1630 swizzleX = swizzleY = swizzleZ = swizzleW = TGSI_SWIZZLE_W; 1631 } 1632 } 1633 else if (index == emit->tcs.prim_id_index) { 1634 /** 1635 * Emitted as vPrim.x 1636 */ 1637 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 1638 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 1639 index = 0; 1640 } 1641 } 1642 else if (file == TGSI_FILE_INPUT) { 1643 index = emit->linkage.input_map[index]; 1644 if (!emit->tcs.control_point_phase) { 1645 /* Emitted as vicp */ 1646 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1647 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; 1648 assert(reg->Register.Dimension); 1649 } 1650 } 1651 else if (file == TGSI_FILE_OUTPUT) { 1652 if ((index >= emit->tcs.patch_generic_out_index && 1653 index < (emit->tcs.patch_generic_out_index + 1654 emit->tcs.patch_generic_out_count)) || 1655 index == emit->tcs.inner.tgsi_index || 1656 index == emit->tcs.outer.tgsi_index) { 1657 if (emit->tcs.control_point_phase) { 1658 emit->discard_instruction = TRUE; 1659 } 1660 else { 1661 /* Device doesn't allow reading from output so 1662 * use corresponding temporary register as source */ 1663 file = TGSI_FILE_TEMPORARY; 1664 if (index == emit->tcs.inner.tgsi_index) { 1665 index = emit->tcs.inner.temp_index; 1666 } 1667 else if (index == emit->tcs.outer.tgsi_index) { 1668 index = emit->tcs.outer.temp_index; 1669 } 1670 else { 1671 index = emit->tcs.patch_generic_tmp_index + 1672 (index - emit->tcs.patch_generic_out_index); 1673 } 1674 1675 /** 1676 * Temporaries for patch constant data can be done 1677 * as indexable temporaries. 1678 */ 1679 tempArrayId = get_temp_array_id(emit, file, index); 1680 index2d = tempArrayId > 0; 1681 index2 = tempArrayId > 0 ? tempArrayId : reg->Dimension.Index; 1682 } 1683 } 1684 else if (index2d) { 1685 if (emit->tcs.control_point_phase) { 1686 /* Device doesn't allow reading from output so 1687 * use corresponding temporary register as source */ 1688 file = TGSI_FILE_TEMPORARY; 1689 index2d = FALSE; 1690 index = emit->tcs.control_point_tmp_index + 1691 (index - emit->tcs.control_point_out_index); 1692 } 1693 else { 1694 emit->discard_instruction = TRUE; 1695 } 1696 } 1697 } 1698 } 1699 else if (emit->unit == PIPE_SHADER_TESS_EVAL) { 1700 if (file == TGSI_FILE_SYSTEM_VALUE) { 1701 if (index == emit->tes.tesscoord_sys_index) { 1702 /** 1703 * Emitted as vDomain 1704 */ 1705 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1706 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT; 1707 index = 0; 1708 } 1709 else if (index == emit->tes.inner.tgsi_index) { 1710 file = TGSI_FILE_TEMPORARY; 1711 index = emit->tes.inner.temp_index; 1712 } 1713 else if (index == emit->tes.outer.tgsi_index) { 1714 file = TGSI_FILE_TEMPORARY; 1715 index = emit->tes.outer.temp_index; 1716 } 1717 else if (index == emit->tes.prim_id_index) { 1718 /** 1719 * Emitted as vPrim.x 1720 */ 1721 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 1722 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 1723 index = 0; 1724 } 1725 1726 } 1727 else if (file == TGSI_FILE_INPUT) { 1728 if (index2d) { 1729 /* 2D input is emitted as vcp (input control point). */ 1730 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; 1731 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1732 1733 /* index specifies the element index and is remapped 1734 * to align with the tcs output index. 1735 */ 1736 index = emit->linkage.input_map[index]; 1737 1738 assert(index2 < emit->key.tes.vertices_per_patch); 1739 } 1740 else { 1741 if (index < emit->key.tes.tessfactor_index) 1742 /* index specifies the generic patch index. 1743 * Remapped to match up with the tcs output index. 1744 */ 1745 index = emit->linkage.input_map[index]; 1746 1747 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT; 1748 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1749 } 1750 } 1751 } 1752 1753 if (file == TGSI_FILE_ADDRESS) { 1754 index = emit->address_reg_index[index]; 1755 file = TGSI_FILE_TEMPORARY; 1756 } 1757 1758 if (file == TGSI_FILE_TEMPORARY) { 1759 if (need_temp_reg_initialization(emit, index)) { 1760 emit->initialize_temp_index = index; 1761 emit->discard_instruction = TRUE; 1762 } 1763 } 1764 1765 if (operand0.value == 0) { 1766 /* if operand0 was not set above for a special case, do the general 1767 * case now. 1768 */ 1769 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1770 operand0.operandType = translate_register_file(file, tempArrayId > 0); 1771 } 1772 operand0 = setup_operand0_indexing(emit, operand0, file, indirect, 1773 index2d, indirect2d); 1774 1775 if (operand0.operandType != VGPU10_OPERAND_TYPE_IMMEDIATE32 && 1776 operand0.operandType != VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) { 1777 /* there's no swizzle for in-line immediates */ 1778 if (swizzleX == swizzleY && 1779 swizzleX == swizzleZ && 1780 swizzleX == swizzleW) { 1781 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1782 } 1783 else { 1784 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1785 } 1786 1787 operand0.swizzleX = swizzleX; 1788 operand0.swizzleY = swizzleY; 1789 operand0.swizzleZ = swizzleZ; 1790 operand0.swizzleW = swizzleW; 1791 1792 if (absolute || negate) { 1793 operand0.extended = 1; 1794 operand1.extendedOperandType = VGPU10_EXTENDED_OPERAND_MODIFIER; 1795 if (absolute && !negate) 1796 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABS; 1797 if (!absolute && negate) 1798 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_NEG; 1799 if (absolute && negate) 1800 operand1.operandModifier = VGPU10_OPERAND_MODIFIER_ABSNEG; 1801 } 1802 } 1803 1804 /* Emit the operand tokens */ 1805 emit_dword(emit, operand0.value); 1806 if (operand0.extended) 1807 emit_dword(emit, operand1.value); 1808 1809 if (operand0.operandType == VGPU10_OPERAND_TYPE_IMMEDIATE32) { 1810 /* Emit the four float/int in-line immediate values */ 1811 unsigned *c; 1812 assert(index < ARRAY_SIZE(emit->immediates)); 1813 assert(file == TGSI_FILE_IMMEDIATE); 1814 assert(swizzleX < 4); 1815 assert(swizzleY < 4); 1816 assert(swizzleZ < 4); 1817 assert(swizzleW < 4); 1818 c = (unsigned *) emit->immediates[index]; 1819 emit_dword(emit, c[swizzleX]); 1820 emit_dword(emit, c[swizzleY]); 1821 emit_dword(emit, c[swizzleZ]); 1822 emit_dword(emit, c[swizzleW]); 1823 } 1824 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_1D) { 1825 /* Emit the register index(es) */ 1826 if (index2d) { 1827 emit_dword(emit, index2); 1828 1829 if (indirect2d) { 1830 emit_indirect_register(emit, reg->DimIndirect.Index); 1831 } 1832 } 1833 1834 emit_dword(emit, remap_temp_index(emit, file, index)); 1835 1836 if (indirect) { 1837 emit_indirect_register(emit, reg->Indirect.Index); 1838 } 1839 } 1840} 1841 1842 1843/** 1844 * Emit a resource operand (for use with a SAMPLE instruction). 1845 */ 1846static void 1847emit_resource_register(struct svga_shader_emitter_v10 *emit, 1848 unsigned resource_number) 1849{ 1850 VGPU10OperandToken0 operand0; 1851 1852 check_register_index(emit, VGPU10_OPERAND_TYPE_RESOURCE, resource_number); 1853 1854 /* init */ 1855 operand0.value = 0; 1856 1857 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 1858 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1859 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1860 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1861 operand0.swizzleX = VGPU10_COMPONENT_X; 1862 operand0.swizzleY = VGPU10_COMPONENT_Y; 1863 operand0.swizzleZ = VGPU10_COMPONENT_Z; 1864 operand0.swizzleW = VGPU10_COMPONENT_W; 1865 1866 emit_dword(emit, operand0.value); 1867 emit_dword(emit, resource_number); 1868} 1869 1870 1871/** 1872 * Emit a sampler operand (for use with a SAMPLE instruction). 1873 */ 1874static void 1875emit_sampler_register(struct svga_shader_emitter_v10 *emit, 1876 unsigned sampler_number) 1877{ 1878 VGPU10OperandToken0 operand0; 1879 1880 check_register_index(emit, VGPU10_OPERAND_TYPE_SAMPLER, sampler_number); 1881 1882 /* init */ 1883 operand0.value = 0; 1884 1885 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 1886 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1887 1888 emit_dword(emit, operand0.value); 1889 emit_dword(emit, sampler_number); 1890} 1891 1892 1893/** 1894 * Emit an operand which reads the IS_FRONT_FACING register. 1895 */ 1896static void 1897emit_face_register(struct svga_shader_emitter_v10 *emit) 1898{ 1899 VGPU10OperandToken0 operand0; 1900 unsigned index = emit->linkage.input_map[emit->fs.face_input_index]; 1901 1902 /* init */ 1903 operand0.value = 0; 1904 1905 operand0.operandType = VGPU10_OPERAND_TYPE_INPUT; 1906 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1907 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SELECT_1_MODE; 1908 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1909 1910 operand0.swizzleX = VGPU10_COMPONENT_X; 1911 operand0.swizzleY = VGPU10_COMPONENT_X; 1912 operand0.swizzleZ = VGPU10_COMPONENT_X; 1913 operand0.swizzleW = VGPU10_COMPONENT_X; 1914 1915 emit_dword(emit, operand0.value); 1916 emit_dword(emit, index); 1917} 1918 1919 1920/** 1921 * Emit tokens for the "rasterizer" register used by the SAMPLE_POS 1922 * instruction. 1923 */ 1924static void 1925emit_rasterizer_register(struct svga_shader_emitter_v10 *emit) 1926{ 1927 VGPU10OperandToken0 operand0; 1928 1929 /* init */ 1930 operand0.value = 0; 1931 1932 /* No register index for rasterizer index (there's only one) */ 1933 operand0.operandType = VGPU10_OPERAND_TYPE_RASTERIZER; 1934 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 1935 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 1936 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 1937 operand0.swizzleX = VGPU10_COMPONENT_X; 1938 operand0.swizzleY = VGPU10_COMPONENT_Y; 1939 operand0.swizzleZ = VGPU10_COMPONENT_Z; 1940 operand0.swizzleW = VGPU10_COMPONENT_W; 1941 1942 emit_dword(emit, operand0.value); 1943} 1944 1945 1946/** 1947 * Emit tokens for the "stream" register used by the 1948 * DCL_STREAM, CUT_STREAM, EMIT_STREAM instructions. 1949 */ 1950static void 1951emit_stream_register(struct svga_shader_emitter_v10 *emit, unsigned index) 1952{ 1953 VGPU10OperandToken0 operand0; 1954 1955 /* init */ 1956 operand0.value = 0; 1957 1958 /* No register index for rasterizer index (there's only one) */ 1959 operand0.operandType = VGPU10_OPERAND_TYPE_STREAM; 1960 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 1961 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 1962 1963 emit_dword(emit, operand0.value); 1964 emit_dword(emit, index); 1965} 1966 1967 1968/** 1969 * Emit the token for a VGPU10 opcode, with precise parameter. 1970 * \param saturate clamp result to [0,1]? 1971 */ 1972static void 1973emit_opcode_precise(struct svga_shader_emitter_v10 *emit, 1974 unsigned vgpu10_opcode, boolean saturate, boolean precise) 1975{ 1976 VGPU10OpcodeToken0 token0; 1977 1978 token0.value = 0; /* init all fields to zero */ 1979 token0.opcodeType = vgpu10_opcode; 1980 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 1981 token0.saturate = saturate; 1982 1983 /* Mesa's GLSL IR -> TGSI translator will set the TGSI precise flag for 1984 * 'invariant' declarations. Only set preciseValues=1 if we have SM5. 1985 */ 1986 token0.preciseValues = precise && emit->version >= 50; 1987 1988 emit_dword(emit, token0.value); 1989 1990 emit->uses_precise_qualifier |= token0.preciseValues; 1991} 1992 1993 1994/** 1995 * Emit the token for a VGPU10 opcode. 1996 * \param saturate clamp result to [0,1]? 1997 */ 1998static void 1999emit_opcode(struct svga_shader_emitter_v10 *emit, 2000 unsigned vgpu10_opcode, boolean saturate) 2001{ 2002 emit_opcode_precise(emit, vgpu10_opcode, saturate, FALSE); 2003} 2004 2005 2006/** 2007 * Emit the token for a VGPU10 resinfo instruction. 2008 * \param modifier return type modifier, _uint or _rcpFloat. 2009 * TODO: We may want to remove this parameter if it will 2010 * only ever be used as _uint. 2011 */ 2012static void 2013emit_opcode_resinfo(struct svga_shader_emitter_v10 *emit, 2014 VGPU10_RESINFO_RETURN_TYPE modifier) 2015{ 2016 VGPU10OpcodeToken0 token0; 2017 2018 token0.value = 0; /* init all fields to zero */ 2019 token0.opcodeType = VGPU10_OPCODE_RESINFO; 2020 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 2021 token0.resinfoReturnType = modifier; 2022 2023 emit_dword(emit, token0.value); 2024} 2025 2026 2027/** 2028 * Emit opcode tokens for a texture sample instruction. Texture instructions 2029 * can be rather complicated (texel offsets, etc) so we have this specialized 2030 * function. 2031 */ 2032static void 2033emit_sample_opcode(struct svga_shader_emitter_v10 *emit, 2034 unsigned vgpu10_opcode, boolean saturate, 2035 const int offsets[3]) 2036{ 2037 VGPU10OpcodeToken0 token0; 2038 VGPU10OpcodeToken1 token1; 2039 2040 token0.value = 0; /* init all fields to zero */ 2041 token0.opcodeType = vgpu10_opcode; 2042 token0.instructionLength = 0; /* Filled in by end_emit_instruction() */ 2043 token0.saturate = saturate; 2044 2045 if (offsets[0] || offsets[1] || offsets[2]) { 2046 assert(offsets[0] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 2047 assert(offsets[1] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 2048 assert(offsets[2] >= VGPU10_MIN_TEXEL_FETCH_OFFSET); 2049 assert(offsets[0] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 2050 assert(offsets[1] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 2051 assert(offsets[2] <= VGPU10_MAX_TEXEL_FETCH_OFFSET); 2052 2053 token0.extended = 1; 2054 token1.value = 0; 2055 token1.opcodeType = VGPU10_EXTENDED_OPCODE_SAMPLE_CONTROLS; 2056 token1.offsetU = offsets[0]; 2057 token1.offsetV = offsets[1]; 2058 token1.offsetW = offsets[2]; 2059 } 2060 2061 emit_dword(emit, token0.value); 2062 if (token0.extended) { 2063 emit_dword(emit, token1.value); 2064 } 2065} 2066 2067 2068/** 2069 * Emit a DISCARD opcode token. 2070 * If nonzero is set, we'll discard the fragment if the X component is not 0. 2071 * Otherwise, we'll discard the fragment if the X component is 0. 2072 */ 2073static void 2074emit_discard_opcode(struct svga_shader_emitter_v10 *emit, boolean nonzero) 2075{ 2076 VGPU10OpcodeToken0 opcode0; 2077 2078 opcode0.value = 0; 2079 opcode0.opcodeType = VGPU10_OPCODE_DISCARD; 2080 if (nonzero) 2081 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 2082 2083 emit_dword(emit, opcode0.value); 2084} 2085 2086 2087/** 2088 * We need to call this before we begin emitting a VGPU10 instruction. 2089 */ 2090static void 2091begin_emit_instruction(struct svga_shader_emitter_v10 *emit) 2092{ 2093 assert(emit->inst_start_token == 0); 2094 /* Save location of the instruction's VGPU10OpcodeToken0 token. 2095 * Note, we can't save a pointer because it would become invalid if 2096 * we have to realloc the output buffer. 2097 */ 2098 emit->inst_start_token = emit_get_num_tokens(emit); 2099} 2100 2101 2102/** 2103 * We need to call this after we emit the last token of a VGPU10 instruction. 2104 * This function patches in the opcode token's instructionLength field. 2105 */ 2106static void 2107end_emit_instruction(struct svga_shader_emitter_v10 *emit) 2108{ 2109 VGPU10OpcodeToken0 *tokens = (VGPU10OpcodeToken0 *) emit->buf; 2110 unsigned inst_length; 2111 2112 assert(emit->inst_start_token > 0); 2113 2114 if (emit->discard_instruction) { 2115 /* Back up the emit->ptr to where this instruction started so 2116 * that we discard the current instruction. 2117 */ 2118 emit->ptr = (char *) (tokens + emit->inst_start_token); 2119 } 2120 else { 2121 /* Compute instruction length and patch that into the start of 2122 * the instruction. 2123 */ 2124 inst_length = emit_get_num_tokens(emit) - emit->inst_start_token; 2125 2126 assert(inst_length > 0); 2127 2128 tokens[emit->inst_start_token].instructionLength = inst_length; 2129 } 2130 2131 emit->inst_start_token = 0; /* reset to zero for error checking */ 2132 emit->discard_instruction = FALSE; 2133} 2134 2135 2136/** 2137 * Return index for a free temporary register. 2138 */ 2139static unsigned 2140get_temp_index(struct svga_shader_emitter_v10 *emit) 2141{ 2142 assert(emit->internal_temp_count < MAX_INTERNAL_TEMPS); 2143 return emit->num_shader_temps + emit->internal_temp_count++; 2144} 2145 2146 2147/** 2148 * Release the temporaries which were generated by get_temp_index(). 2149 */ 2150static void 2151free_temp_indexes(struct svga_shader_emitter_v10 *emit) 2152{ 2153 emit->internal_temp_count = 0; 2154} 2155 2156 2157/** 2158 * Create a tgsi_full_src_register. 2159 */ 2160static struct tgsi_full_src_register 2161make_src_reg(enum tgsi_file_type file, unsigned index) 2162{ 2163 struct tgsi_full_src_register reg; 2164 2165 memset(®, 0, sizeof(reg)); 2166 reg.Register.File = file; 2167 reg.Register.Index = index; 2168 reg.Register.SwizzleX = TGSI_SWIZZLE_X; 2169 reg.Register.SwizzleY = TGSI_SWIZZLE_Y; 2170 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; 2171 reg.Register.SwizzleW = TGSI_SWIZZLE_W; 2172 return reg; 2173} 2174 2175 2176/** 2177 * Create a tgsi_full_src_register with a swizzle such that all four 2178 * vector components have the same scalar value. 2179 */ 2180static struct tgsi_full_src_register 2181make_src_scalar_reg(enum tgsi_file_type file, unsigned index, unsigned component) 2182{ 2183 struct tgsi_full_src_register reg; 2184 2185 assert(component >= TGSI_SWIZZLE_X); 2186 assert(component <= TGSI_SWIZZLE_W); 2187 2188 memset(®, 0, sizeof(reg)); 2189 reg.Register.File = file; 2190 reg.Register.Index = index; 2191 reg.Register.SwizzleX = 2192 reg.Register.SwizzleY = 2193 reg.Register.SwizzleZ = 2194 reg.Register.SwizzleW = component; 2195 return reg; 2196} 2197 2198 2199/** 2200 * Create a tgsi_full_src_register for a temporary. 2201 */ 2202static struct tgsi_full_src_register 2203make_src_temp_reg(unsigned index) 2204{ 2205 return make_src_reg(TGSI_FILE_TEMPORARY, index); 2206} 2207 2208 2209/** 2210 * Create a tgsi_full_src_register for a constant. 2211 */ 2212static struct tgsi_full_src_register 2213make_src_const_reg(unsigned index) 2214{ 2215 return make_src_reg(TGSI_FILE_CONSTANT, index); 2216} 2217 2218 2219/** 2220 * Create a tgsi_full_src_register for an immediate constant. 2221 */ 2222static struct tgsi_full_src_register 2223make_src_immediate_reg(unsigned index) 2224{ 2225 return make_src_reg(TGSI_FILE_IMMEDIATE, index); 2226} 2227 2228 2229/** 2230 * Create a tgsi_full_dst_register. 2231 */ 2232static struct tgsi_full_dst_register 2233make_dst_reg(enum tgsi_file_type file, unsigned index) 2234{ 2235 struct tgsi_full_dst_register reg; 2236 2237 memset(®, 0, sizeof(reg)); 2238 reg.Register.File = file; 2239 reg.Register.Index = index; 2240 reg.Register.WriteMask = TGSI_WRITEMASK_XYZW; 2241 return reg; 2242} 2243 2244 2245/** 2246 * Create a tgsi_full_dst_register for a temporary. 2247 */ 2248static struct tgsi_full_dst_register 2249make_dst_temp_reg(unsigned index) 2250{ 2251 return make_dst_reg(TGSI_FILE_TEMPORARY, index); 2252} 2253 2254 2255/** 2256 * Create a tgsi_full_dst_register for an output. 2257 */ 2258static struct tgsi_full_dst_register 2259make_dst_output_reg(unsigned index) 2260{ 2261 return make_dst_reg(TGSI_FILE_OUTPUT, index); 2262} 2263 2264 2265/** 2266 * Create negated tgsi_full_src_register. 2267 */ 2268static struct tgsi_full_src_register 2269negate_src(const struct tgsi_full_src_register *reg) 2270{ 2271 struct tgsi_full_src_register neg = *reg; 2272 neg.Register.Negate = !reg->Register.Negate; 2273 return neg; 2274} 2275 2276/** 2277 * Create absolute value of a tgsi_full_src_register. 2278 */ 2279static struct tgsi_full_src_register 2280absolute_src(const struct tgsi_full_src_register *reg) 2281{ 2282 struct tgsi_full_src_register absolute = *reg; 2283 absolute.Register.Absolute = 1; 2284 return absolute; 2285} 2286 2287 2288/** Return the named swizzle term from the src register */ 2289static inline unsigned 2290get_swizzle(const struct tgsi_full_src_register *reg, enum tgsi_swizzle term) 2291{ 2292 switch (term) { 2293 case TGSI_SWIZZLE_X: 2294 return reg->Register.SwizzleX; 2295 case TGSI_SWIZZLE_Y: 2296 return reg->Register.SwizzleY; 2297 case TGSI_SWIZZLE_Z: 2298 return reg->Register.SwizzleZ; 2299 case TGSI_SWIZZLE_W: 2300 return reg->Register.SwizzleW; 2301 default: 2302 assert(!"Bad swizzle"); 2303 return TGSI_SWIZZLE_X; 2304 } 2305} 2306 2307 2308/** 2309 * Create swizzled tgsi_full_src_register. 2310 */ 2311static struct tgsi_full_src_register 2312swizzle_src(const struct tgsi_full_src_register *reg, 2313 enum tgsi_swizzle swizzleX, enum tgsi_swizzle swizzleY, 2314 enum tgsi_swizzle swizzleZ, enum tgsi_swizzle swizzleW) 2315{ 2316 struct tgsi_full_src_register swizzled = *reg; 2317 /* Note: we swizzle the current swizzle */ 2318 swizzled.Register.SwizzleX = get_swizzle(reg, swizzleX); 2319 swizzled.Register.SwizzleY = get_swizzle(reg, swizzleY); 2320 swizzled.Register.SwizzleZ = get_swizzle(reg, swizzleZ); 2321 swizzled.Register.SwizzleW = get_swizzle(reg, swizzleW); 2322 return swizzled; 2323} 2324 2325 2326/** 2327 * Create swizzled tgsi_full_src_register where all the swizzle 2328 * terms are the same. 2329 */ 2330static struct tgsi_full_src_register 2331scalar_src(const struct tgsi_full_src_register *reg, enum tgsi_swizzle swizzle) 2332{ 2333 struct tgsi_full_src_register swizzled = *reg; 2334 /* Note: we swizzle the current swizzle */ 2335 swizzled.Register.SwizzleX = 2336 swizzled.Register.SwizzleY = 2337 swizzled.Register.SwizzleZ = 2338 swizzled.Register.SwizzleW = get_swizzle(reg, swizzle); 2339 return swizzled; 2340} 2341 2342 2343/** 2344 * Create new tgsi_full_dst_register with writemask. 2345 * \param mask bitmask of TGSI_WRITEMASK_[XYZW] 2346 */ 2347static struct tgsi_full_dst_register 2348writemask_dst(const struct tgsi_full_dst_register *reg, unsigned mask) 2349{ 2350 struct tgsi_full_dst_register masked = *reg; 2351 masked.Register.WriteMask = mask; 2352 return masked; 2353} 2354 2355 2356/** 2357 * Check if the register's swizzle is XXXX, YYYY, ZZZZ, or WWWW. 2358 */ 2359static boolean 2360same_swizzle_terms(const struct tgsi_full_src_register *reg) 2361{ 2362 return (reg->Register.SwizzleX == reg->Register.SwizzleY && 2363 reg->Register.SwizzleY == reg->Register.SwizzleZ && 2364 reg->Register.SwizzleZ == reg->Register.SwizzleW); 2365} 2366 2367 2368/** 2369 * Search the vector for the value 'x' and return its position. 2370 */ 2371static int 2372find_imm_in_vec4(const union tgsi_immediate_data vec[4], 2373 union tgsi_immediate_data x) 2374{ 2375 unsigned i; 2376 for (i = 0; i < 4; i++) { 2377 if (vec[i].Int == x.Int) 2378 return i; 2379 } 2380 return -1; 2381} 2382 2383 2384/** 2385 * Helper used by make_immediate_reg(), make_immediate_reg_4(). 2386 */ 2387static int 2388find_immediate(struct svga_shader_emitter_v10 *emit, 2389 union tgsi_immediate_data x, unsigned startIndex) 2390{ 2391 const unsigned endIndex = emit->num_immediates; 2392 unsigned i; 2393 2394 assert(emit->immediates_emitted); 2395 2396 /* Search immediates for x, y, z, w */ 2397 for (i = startIndex; i < endIndex; i++) { 2398 if (x.Int == emit->immediates[i][0].Int || 2399 x.Int == emit->immediates[i][1].Int || 2400 x.Int == emit->immediates[i][2].Int || 2401 x.Int == emit->immediates[i][3].Int) { 2402 return i; 2403 } 2404 } 2405 /* Should never try to use an immediate value that wasn't pre-declared */ 2406 assert(!"find_immediate() failed!"); 2407 return -1; 2408} 2409 2410 2411/** 2412 * As above, but search for a double[2] pair. 2413 */ 2414static int 2415find_immediate_dbl(struct svga_shader_emitter_v10 *emit, 2416 double x, double y) 2417{ 2418 const unsigned endIndex = emit->num_immediates; 2419 unsigned i; 2420 2421 assert(emit->immediates_emitted); 2422 2423 /* Search immediates for x, y, z, w */ 2424 for (i = 0; i < endIndex; i++) { 2425 if (x == emit->immediates_dbl[i][0] && 2426 y == emit->immediates_dbl[i][1]) { 2427 return i; 2428 } 2429 } 2430 /* Should never try to use an immediate value that wasn't pre-declared */ 2431 assert(!"find_immediate_dbl() failed!"); 2432 return -1; 2433} 2434 2435 2436 2437/** 2438 * Return a tgsi_full_src_register for an immediate/literal 2439 * union tgsi_immediate_data[4] value. 2440 * Note: the values must have been previously declared/allocated in 2441 * emit_pre_helpers(). And, all of x,y,z,w must be located in the same 2442 * vec4 immediate. 2443 */ 2444static struct tgsi_full_src_register 2445make_immediate_reg_4(struct svga_shader_emitter_v10 *emit, 2446 const union tgsi_immediate_data imm[4]) 2447{ 2448 struct tgsi_full_src_register reg; 2449 unsigned i; 2450 2451 for (i = 0; i < emit->num_common_immediates; i++) { 2452 /* search for first component value */ 2453 int immpos = find_immediate(emit, imm[0], i); 2454 int x, y, z, w; 2455 2456 assert(immpos >= 0); 2457 2458 /* find remaining components within the immediate vector */ 2459 x = find_imm_in_vec4(emit->immediates[immpos], imm[0]); 2460 y = find_imm_in_vec4(emit->immediates[immpos], imm[1]); 2461 z = find_imm_in_vec4(emit->immediates[immpos], imm[2]); 2462 w = find_imm_in_vec4(emit->immediates[immpos], imm[3]); 2463 2464 if (x >=0 && y >= 0 && z >= 0 && w >= 0) { 2465 /* found them all */ 2466 memset(®, 0, sizeof(reg)); 2467 reg.Register.File = TGSI_FILE_IMMEDIATE; 2468 reg.Register.Index = immpos; 2469 reg.Register.SwizzleX = x; 2470 reg.Register.SwizzleY = y; 2471 reg.Register.SwizzleZ = z; 2472 reg.Register.SwizzleW = w; 2473 return reg; 2474 } 2475 /* else, keep searching */ 2476 } 2477 2478 assert(!"Failed to find immediate register!"); 2479 2480 /* Just return IMM[0].xxxx */ 2481 memset(®, 0, sizeof(reg)); 2482 reg.Register.File = TGSI_FILE_IMMEDIATE; 2483 return reg; 2484} 2485 2486 2487/** 2488 * Return a tgsi_full_src_register for an immediate/literal 2489 * union tgsi_immediate_data value of the form {value, value, value, value}. 2490 * \sa make_immediate_reg_4() regarding allowed values. 2491 */ 2492static struct tgsi_full_src_register 2493make_immediate_reg(struct svga_shader_emitter_v10 *emit, 2494 union tgsi_immediate_data value) 2495{ 2496 struct tgsi_full_src_register reg; 2497 int immpos = find_immediate(emit, value, 0); 2498 2499 assert(immpos >= 0); 2500 2501 memset(®, 0, sizeof(reg)); 2502 reg.Register.File = TGSI_FILE_IMMEDIATE; 2503 reg.Register.Index = immpos; 2504 reg.Register.SwizzleX = 2505 reg.Register.SwizzleY = 2506 reg.Register.SwizzleZ = 2507 reg.Register.SwizzleW = find_imm_in_vec4(emit->immediates[immpos], value); 2508 2509 return reg; 2510} 2511 2512 2513/** 2514 * Return a tgsi_full_src_register for an immediate/literal float[4] value. 2515 * \sa make_immediate_reg_4() regarding allowed values. 2516 */ 2517static struct tgsi_full_src_register 2518make_immediate_reg_float4(struct svga_shader_emitter_v10 *emit, 2519 float x, float y, float z, float w) 2520{ 2521 union tgsi_immediate_data imm[4]; 2522 imm[0].Float = x; 2523 imm[1].Float = y; 2524 imm[2].Float = z; 2525 imm[3].Float = w; 2526 return make_immediate_reg_4(emit, imm); 2527} 2528 2529 2530/** 2531 * Return a tgsi_full_src_register for an immediate/literal float value 2532 * of the form {value, value, value, value}. 2533 * \sa make_immediate_reg_4() regarding allowed values. 2534 */ 2535static struct tgsi_full_src_register 2536make_immediate_reg_float(struct svga_shader_emitter_v10 *emit, float value) 2537{ 2538 union tgsi_immediate_data imm; 2539 imm.Float = value; 2540 return make_immediate_reg(emit, imm); 2541} 2542 2543 2544/** 2545 * Return a tgsi_full_src_register for an immediate/literal int[4] vector. 2546 */ 2547static struct tgsi_full_src_register 2548make_immediate_reg_int4(struct svga_shader_emitter_v10 *emit, 2549 int x, int y, int z, int w) 2550{ 2551 union tgsi_immediate_data imm[4]; 2552 imm[0].Int = x; 2553 imm[1].Int = y; 2554 imm[2].Int = z; 2555 imm[3].Int = w; 2556 return make_immediate_reg_4(emit, imm); 2557} 2558 2559 2560/** 2561 * Return a tgsi_full_src_register for an immediate/literal int value 2562 * of the form {value, value, value, value}. 2563 * \sa make_immediate_reg_4() regarding allowed values. 2564 */ 2565static struct tgsi_full_src_register 2566make_immediate_reg_int(struct svga_shader_emitter_v10 *emit, int value) 2567{ 2568 union tgsi_immediate_data imm; 2569 imm.Int = value; 2570 return make_immediate_reg(emit, imm); 2571} 2572 2573 2574static struct tgsi_full_src_register 2575make_immediate_reg_double(struct svga_shader_emitter_v10 *emit, double value) 2576{ 2577 struct tgsi_full_src_register reg; 2578 int immpos = find_immediate_dbl(emit, value, value); 2579 2580 assert(immpos >= 0); 2581 2582 memset(®, 0, sizeof(reg)); 2583 reg.Register.File = TGSI_FILE_IMMEDIATE; 2584 reg.Register.Index = immpos; 2585 reg.Register.SwizzleX = TGSI_SWIZZLE_X; 2586 reg.Register.SwizzleY = TGSI_SWIZZLE_Y; 2587 reg.Register.SwizzleZ = TGSI_SWIZZLE_Z; 2588 reg.Register.SwizzleW = TGSI_SWIZZLE_W; 2589 2590 return reg; 2591} 2592 2593 2594/** 2595 * Allocate space for a union tgsi_immediate_data[4] immediate. 2596 * \return the index/position of the immediate. 2597 */ 2598static unsigned 2599alloc_immediate_4(struct svga_shader_emitter_v10 *emit, 2600 const union tgsi_immediate_data imm[4]) 2601{ 2602 unsigned n = emit->num_immediates++; 2603 assert(!emit->immediates_emitted); 2604 assert(n < ARRAY_SIZE(emit->immediates)); 2605 emit->immediates[n][0] = imm[0]; 2606 emit->immediates[n][1] = imm[1]; 2607 emit->immediates[n][2] = imm[2]; 2608 emit->immediates[n][3] = imm[3]; 2609 return n; 2610} 2611 2612 2613/** 2614 * Allocate space for a float[4] immediate. 2615 * \return the index/position of the immediate. 2616 */ 2617static unsigned 2618alloc_immediate_float4(struct svga_shader_emitter_v10 *emit, 2619 float x, float y, float z, float w) 2620{ 2621 union tgsi_immediate_data imm[4]; 2622 imm[0].Float = x; 2623 imm[1].Float = y; 2624 imm[2].Float = z; 2625 imm[3].Float = w; 2626 return alloc_immediate_4(emit, imm); 2627} 2628 2629 2630/** 2631 * Allocate space for an int[4] immediate. 2632 * \return the index/position of the immediate. 2633 */ 2634static unsigned 2635alloc_immediate_int4(struct svga_shader_emitter_v10 *emit, 2636 int x, int y, int z, int w) 2637{ 2638 union tgsi_immediate_data imm[4]; 2639 imm[0].Int = x; 2640 imm[1].Int = y; 2641 imm[2].Int = z; 2642 imm[3].Int = w; 2643 return alloc_immediate_4(emit, imm); 2644} 2645 2646 2647static unsigned 2648alloc_immediate_double2(struct svga_shader_emitter_v10 *emit, 2649 double x, double y) 2650{ 2651 unsigned n = emit->num_immediates++; 2652 assert(!emit->immediates_emitted); 2653 assert(n < ARRAY_SIZE(emit->immediates)); 2654 emit->immediates_dbl[n][0] = x; 2655 emit->immediates_dbl[n][1] = y; 2656 return n; 2657 2658} 2659 2660 2661/** 2662 * Allocate a shader input to store a system value. 2663 */ 2664static unsigned 2665alloc_system_value_index(struct svga_shader_emitter_v10 *emit, unsigned index) 2666{ 2667 const unsigned n = emit->linkage.input_map_max + 1 + index; 2668 assert(index < ARRAY_SIZE(emit->system_value_indexes)); 2669 emit->system_value_indexes[index] = n; 2670 return n; 2671} 2672 2673 2674/** 2675 * Translate a TGSI immediate value (union tgsi_immediate_data[4]) to VGPU10. 2676 */ 2677static boolean 2678emit_vgpu10_immediate(struct svga_shader_emitter_v10 *emit, 2679 const struct tgsi_full_immediate *imm) 2680{ 2681 /* We don't actually emit any code here. We just save the 2682 * immediate values and emit them later. 2683 */ 2684 alloc_immediate_4(emit, imm->u); 2685 return TRUE; 2686} 2687 2688 2689/** 2690 * Emit a VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER block 2691 * containing all the immediate values previously allocated 2692 * with alloc_immediate_4(). 2693 */ 2694static boolean 2695emit_vgpu10_immediates_block(struct svga_shader_emitter_v10 *emit) 2696{ 2697 VGPU10OpcodeToken0 token; 2698 2699 assert(!emit->immediates_emitted); 2700 2701 token.value = 0; 2702 token.opcodeType = VGPU10_OPCODE_CUSTOMDATA; 2703 token.customDataClass = VGPU10_CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER; 2704 2705 /* Note: no begin/end_emit_instruction() calls */ 2706 emit_dword(emit, token.value); 2707 emit_dword(emit, 2 + 4 * emit->num_immediates); 2708 emit_dwords(emit, (unsigned *) emit->immediates, 4 * emit->num_immediates); 2709 2710 emit->immediates_emitted = TRUE; 2711 2712 return TRUE; 2713} 2714 2715 2716/** 2717 * Translate a fragment shader's TGSI_INTERPOLATE_x mode to a vgpu10 2718 * interpolation mode. 2719 * \return a VGPU10_INTERPOLATION_x value 2720 */ 2721static unsigned 2722translate_interpolation(const struct svga_shader_emitter_v10 *emit, 2723 enum tgsi_interpolate_mode interp, 2724 enum tgsi_interpolate_loc interpolate_loc) 2725{ 2726 if (interp == TGSI_INTERPOLATE_COLOR) { 2727 interp = emit->key.fs.flatshade ? 2728 TGSI_INTERPOLATE_CONSTANT : TGSI_INTERPOLATE_PERSPECTIVE; 2729 } 2730 2731 switch (interp) { 2732 case TGSI_INTERPOLATE_CONSTANT: 2733 return VGPU10_INTERPOLATION_CONSTANT; 2734 case TGSI_INTERPOLATE_LINEAR: 2735 if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) { 2736 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID; 2737 } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE && 2738 emit->version >= 41) { 2739 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE; 2740 } else { 2741 return VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE; 2742 } 2743 break; 2744 case TGSI_INTERPOLATE_PERSPECTIVE: 2745 if (interpolate_loc == TGSI_INTERPOLATE_LOC_CENTROID) { 2746 return VGPU10_INTERPOLATION_LINEAR_CENTROID; 2747 } else if (interpolate_loc == TGSI_INTERPOLATE_LOC_SAMPLE && 2748 emit->version >= 41) { 2749 return VGPU10_INTERPOLATION_LINEAR_SAMPLE; 2750 } else { 2751 return VGPU10_INTERPOLATION_LINEAR; 2752 } 2753 break; 2754 default: 2755 assert(!"Unexpected interpolation mode"); 2756 return VGPU10_INTERPOLATION_CONSTANT; 2757 } 2758} 2759 2760 2761/** 2762 * Translate a TGSI property to VGPU10. 2763 * Don't emit any instructions yet, only need to gather the primitive property 2764 * information. The output primitive topology might be changed later. The 2765 * final property instructions will be emitted as part of the pre-helper code. 2766 */ 2767static boolean 2768emit_vgpu10_property(struct svga_shader_emitter_v10 *emit, 2769 const struct tgsi_full_property *prop) 2770{ 2771 static const VGPU10_PRIMITIVE primType[] = { 2772 VGPU10_PRIMITIVE_POINT, /* PIPE_PRIM_POINTS */ 2773 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINES */ 2774 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_LOOP */ 2775 VGPU10_PRIMITIVE_LINE, /* PIPE_PRIM_LINE_STRIP */ 2776 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLES */ 2777 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_STRIP */ 2778 VGPU10_PRIMITIVE_TRIANGLE, /* PIPE_PRIM_TRIANGLE_FAN */ 2779 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUADS */ 2780 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 2781 VGPU10_PRIMITIVE_UNDEFINED, /* PIPE_PRIM_POLYGON */ 2782 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 2783 VGPU10_PRIMITIVE_LINE_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 2784 VGPU10_PRIMITIVE_TRIANGLE_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 2785 VGPU10_PRIMITIVE_TRIANGLE_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 2786 }; 2787 2788 static const VGPU10_PRIMITIVE_TOPOLOGY primTopology[] = { 2789 VGPU10_PRIMITIVE_TOPOLOGY_POINTLIST, /* PIPE_PRIM_POINTS */ 2790 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINES */ 2791 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST, /* PIPE_PRIM_LINE_LOOP */ 2792 VGPU10_PRIMITIVE_TOPOLOGY_LINESTRIP, /* PIPE_PRIM_LINE_STRIP */ 2793 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST, /* PIPE_PRIM_TRIANGLES */ 2794 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_STRIP */ 2795 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP, /* PIPE_PRIM_TRIANGLE_FAN */ 2796 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUADS */ 2797 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_QUAD_STRIP */ 2798 VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED, /* PIPE_PRIM_POLYGON */ 2799 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINES_ADJACENCY */ 2800 VGPU10_PRIMITIVE_TOPOLOGY_LINELIST_ADJ, /* PIPE_PRIM_LINE_STRIP_ADJACENCY */ 2801 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ, /* PIPE_PRIM_TRIANGLES_ADJACENCY */ 2802 VGPU10_PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ /* PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY */ 2803 }; 2804 2805 static const unsigned inputArraySize[] = { 2806 0, /* VGPU10_PRIMITIVE_UNDEFINED */ 2807 1, /* VGPU10_PRIMITIVE_POINT */ 2808 2, /* VGPU10_PRIMITIVE_LINE */ 2809 3, /* VGPU10_PRIMITIVE_TRIANGLE */ 2810 0, 2811 0, 2812 4, /* VGPU10_PRIMITIVE_LINE_ADJ */ 2813 6 /* VGPU10_PRIMITIVE_TRIANGLE_ADJ */ 2814 }; 2815 2816 switch (prop->Property.PropertyName) { 2817 case TGSI_PROPERTY_GS_INPUT_PRIM: 2818 assert(prop->u[0].Data < ARRAY_SIZE(primType)); 2819 emit->gs.prim_type = primType[prop->u[0].Data]; 2820 assert(emit->gs.prim_type != VGPU10_PRIMITIVE_UNDEFINED); 2821 emit->gs.input_size = inputArraySize[emit->gs.prim_type]; 2822 break; 2823 2824 case TGSI_PROPERTY_GS_OUTPUT_PRIM: 2825 assert(prop->u[0].Data < ARRAY_SIZE(primTopology)); 2826 emit->gs.prim_topology = primTopology[prop->u[0].Data]; 2827 assert(emit->gs.prim_topology != VGPU10_PRIMITIVE_TOPOLOGY_UNDEFINED); 2828 break; 2829 2830 case TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES: 2831 emit->gs.max_out_vertices = prop->u[0].Data; 2832 break; 2833 2834 case TGSI_PROPERTY_GS_INVOCATIONS: 2835 emit->gs.invocations = prop->u[0].Data; 2836 break; 2837 2838 case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS: 2839 case TGSI_PROPERTY_NEXT_SHADER: 2840 case TGSI_PROPERTY_NUM_CLIPDIST_ENABLED: 2841 /* no-op */ 2842 break; 2843 2844 case TGSI_PROPERTY_TCS_VERTICES_OUT: 2845 /* This info is already captured in the shader key */ 2846 break; 2847 2848 case TGSI_PROPERTY_TES_PRIM_MODE: 2849 emit->tes.prim_mode = prop->u[0].Data; 2850 break; 2851 2852 case TGSI_PROPERTY_TES_SPACING: 2853 emit->tes.spacing = prop->u[0].Data; 2854 break; 2855 2856 case TGSI_PROPERTY_TES_VERTEX_ORDER_CW: 2857 emit->tes.vertices_order_cw = prop->u[0].Data; 2858 break; 2859 2860 case TGSI_PROPERTY_TES_POINT_MODE: 2861 emit->tes.point_mode = prop->u[0].Data; 2862 break; 2863 2864 default: 2865 debug_printf("Unexpected TGSI property %s\n", 2866 tgsi_property_names[prop->Property.PropertyName]); 2867 } 2868 2869 return TRUE; 2870} 2871 2872 2873static void 2874emit_property_instruction(struct svga_shader_emitter_v10 *emit, 2875 VGPU10OpcodeToken0 opcode0, unsigned nData, 2876 unsigned data) 2877{ 2878 begin_emit_instruction(emit); 2879 emit_dword(emit, opcode0.value); 2880 if (nData) 2881 emit_dword(emit, data); 2882 end_emit_instruction(emit); 2883} 2884 2885 2886/** 2887 * Emit property instructions 2888 */ 2889static void 2890emit_property_instructions(struct svga_shader_emitter_v10 *emit) 2891{ 2892 VGPU10OpcodeToken0 opcode0; 2893 2894 assert(emit->unit == PIPE_SHADER_GEOMETRY); 2895 2896 /* emit input primitive type declaration */ 2897 opcode0.value = 0; 2898 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INPUT_PRIMITIVE; 2899 opcode0.primitive = emit->gs.prim_type; 2900 emit_property_instruction(emit, opcode0, 0, 0); 2901 2902 /* emit max output vertices */ 2903 opcode0.value = 0; 2904 opcode0.opcodeType = VGPU10_OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT; 2905 emit_property_instruction(emit, opcode0, 1, emit->gs.max_out_vertices); 2906 2907 if (emit->version >= 50 && emit->gs.invocations > 0) { 2908 opcode0.value = 0; 2909 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_INSTANCE_COUNT; 2910 emit_property_instruction(emit, opcode0, 1, emit->gs.invocations); 2911 } 2912} 2913 2914 2915/** 2916 * A helper function to declare tessellator domain in a hull shader or 2917 * in the domain shader. 2918 */ 2919static void 2920emit_tessellator_domain(struct svga_shader_emitter_v10 *emit, 2921 enum pipe_prim_type prim_mode) 2922{ 2923 VGPU10OpcodeToken0 opcode0; 2924 2925 opcode0.value = 0; 2926 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_DOMAIN; 2927 switch (prim_mode) { 2928 case PIPE_PRIM_QUADS: 2929 case PIPE_PRIM_LINES: 2930 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_QUAD; 2931 break; 2932 case PIPE_PRIM_TRIANGLES: 2933 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_TRI; 2934 break; 2935 default: 2936 debug_printf("Invalid tessellator prim mode %d\n", prim_mode); 2937 opcode0.tessDomain = VGPU10_TESSELLATOR_DOMAIN_UNDEFINED; 2938 } 2939 begin_emit_instruction(emit); 2940 emit_dword(emit, opcode0.value); 2941 end_emit_instruction(emit); 2942} 2943 2944 2945/** 2946 * Emit domain shader declarations. 2947 */ 2948static void 2949emit_domain_shader_declarations(struct svga_shader_emitter_v10 *emit) 2950{ 2951 VGPU10OpcodeToken0 opcode0; 2952 2953 assert(emit->unit == PIPE_SHADER_TESS_EVAL); 2954 2955 /* Emit the input control point count */ 2956 assert(emit->key.tes.vertices_per_patch >= 0 && 2957 emit->key.tes.vertices_per_patch <= 32); 2958 2959 opcode0.value = 0; 2960 opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT; 2961 opcode0.controlPointCount = emit->key.tes.vertices_per_patch; 2962 begin_emit_instruction(emit); 2963 emit_dword(emit, opcode0.value); 2964 end_emit_instruction(emit); 2965 2966 emit_tessellator_domain(emit, emit->tes.prim_mode); 2967} 2968 2969 2970/** 2971 * Some common values like 0.0, 1.0, 0.5, etc. are frequently needed 2972 * to implement some instructions. We pre-allocate those values here 2973 * in the immediate constant buffer. 2974 */ 2975static void 2976alloc_common_immediates(struct svga_shader_emitter_v10 *emit) 2977{ 2978 unsigned n = 0; 2979 2980 emit->common_immediate_pos[n++] = 2981 alloc_immediate_float4(emit, 0.0f, 1.0f, 0.5f, -1.0f); 2982 2983 if (emit->info.opcode_count[TGSI_OPCODE_LIT] > 0) { 2984 emit->common_immediate_pos[n++] = 2985 alloc_immediate_float4(emit, 128.0f, -128.0f, 0.0f, 0.0f); 2986 } 2987 2988 emit->common_immediate_pos[n++] = 2989 alloc_immediate_int4(emit, 0, 1, 0, -1); 2990 2991 if (emit->info.opcode_count[TGSI_OPCODE_IMSB] > 0 || 2992 emit->info.opcode_count[TGSI_OPCODE_UMSB] > 0) { 2993 emit->common_immediate_pos[n++] = 2994 alloc_immediate_int4(emit, 31, 0, 0, 0); 2995 } 2996 2997 if (emit->info.opcode_count[TGSI_OPCODE_UBFE] > 0 || 2998 emit->info.opcode_count[TGSI_OPCODE_IBFE] > 0 || 2999 emit->info.opcode_count[TGSI_OPCODE_BFI] > 0) { 3000 emit->common_immediate_pos[n++] = 3001 alloc_immediate_int4(emit, 32, 0, 0, 0); 3002 } 3003 3004 if (emit->key.vs.attrib_puint_to_snorm) { 3005 emit->common_immediate_pos[n++] = 3006 alloc_immediate_float4(emit, -2.0f, 2.0f, 3.0f, -1.66666f); 3007 } 3008 3009 if (emit->key.vs.attrib_puint_to_uscaled) { 3010 emit->common_immediate_pos[n++] = 3011 alloc_immediate_float4(emit, 1023.0f, 3.0f, 0.0f, 0.0f); 3012 } 3013 3014 if (emit->key.vs.attrib_puint_to_sscaled) { 3015 emit->common_immediate_pos[n++] = 3016 alloc_immediate_int4(emit, 22, 12, 2, 0); 3017 3018 emit->common_immediate_pos[n++] = 3019 alloc_immediate_int4(emit, 22, 30, 0, 0); 3020 } 3021 3022 if (emit->vposition.num_prescale > 1) { 3023 unsigned i; 3024 for (i = 0; i < emit->vposition.num_prescale; i+=4) { 3025 emit->common_immediate_pos[n++] = 3026 alloc_immediate_int4(emit, i, i+1, i+2, i+3); 3027 } 3028 } 3029 3030 emit->immediates_dbl = (double (*)[2]) emit->immediates; 3031 3032 if (emit->info.opcode_count[TGSI_OPCODE_DNEG] > 0) { 3033 emit->common_immediate_pos[n++] = 3034 alloc_immediate_double2(emit, -1.0, -1.0); 3035 } 3036 3037 if (emit->info.opcode_count[TGSI_OPCODE_DSQRT] > 0) { 3038 emit->common_immediate_pos[n++] = 3039 alloc_immediate_double2(emit, 0.0, 0.0); 3040 emit->common_immediate_pos[n++] = 3041 alloc_immediate_double2(emit, 1.0, 1.0); 3042 } 3043 3044 if (emit->info.opcode_count[TGSI_OPCODE_INTERP_OFFSET] > 0) { 3045 emit->common_immediate_pos[n++] = 3046 alloc_immediate_float4(emit, 16.0f, -16.0f, 0.0, 0.0); 3047 } 3048 3049 assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); 3050 3051 unsigned i; 3052 3053 for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { 3054 if (emit->key.tex[i].texel_bias) { 3055 /* Replace 0.0f if more immediate float value is needed */ 3056 emit->common_immediate_pos[n++] = 3057 alloc_immediate_float4(emit, 0.0001f, 0.0f, 0.0f, 0.0f); 3058 break; 3059 } 3060 } 3061 3062 assert(n <= ARRAY_SIZE(emit->common_immediate_pos)); 3063 emit->num_common_immediates = n; 3064} 3065 3066 3067/** 3068 * Emit hull shader declarations. 3069*/ 3070static void 3071emit_hull_shader_declarations(struct svga_shader_emitter_v10 *emit) 3072{ 3073 VGPU10OpcodeToken0 opcode0; 3074 3075 /* Emit the input control point count */ 3076 assert(emit->key.tcs.vertices_per_patch > 0 && 3077 emit->key.tcs.vertices_per_patch <= 32); 3078 3079 opcode0.value = 0; 3080 opcode0.opcodeType = VGPU10_OPCODE_DCL_INPUT_CONTROL_POINT_COUNT; 3081 opcode0.controlPointCount = emit->key.tcs.vertices_per_patch; 3082 begin_emit_instruction(emit); 3083 emit_dword(emit, opcode0.value); 3084 end_emit_instruction(emit); 3085 3086 /* Emit the output control point count */ 3087 assert(emit->key.tcs.vertices_out >= 0 && emit->key.tcs.vertices_out <= 32); 3088 3089 opcode0.value = 0; 3090 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT; 3091 opcode0.controlPointCount = emit->key.tcs.vertices_out; 3092 begin_emit_instruction(emit); 3093 emit_dword(emit, opcode0.value); 3094 end_emit_instruction(emit); 3095 3096 /* Emit tessellator domain */ 3097 emit_tessellator_domain(emit, emit->key.tcs.prim_mode); 3098 3099 /* Emit tessellator output primitive */ 3100 opcode0.value = 0; 3101 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_OUTPUT_PRIMITIVE; 3102 if (emit->key.tcs.point_mode) { 3103 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_POINT; 3104 } 3105 else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) { 3106 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_LINE; 3107 } 3108 else { 3109 assert(emit->key.tcs.prim_mode == PIPE_PRIM_QUADS || 3110 emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES); 3111 3112 if (emit->key.tcs.vertices_order_cw) 3113 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CCW; 3114 else 3115 opcode0.tessOutputPrimitive = VGPU10_TESSELLATOR_OUTPUT_TRIANGLE_CW; 3116 } 3117 begin_emit_instruction(emit); 3118 emit_dword(emit, opcode0.value); 3119 end_emit_instruction(emit); 3120 3121 /* Emit tessellator partitioning */ 3122 opcode0.value = 0; 3123 opcode0.opcodeType = VGPU10_OPCODE_DCL_TESS_PARTITIONING; 3124 switch (emit->key.tcs.spacing) { 3125 case PIPE_TESS_SPACING_FRACTIONAL_ODD: 3126 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_ODD; 3127 break; 3128 case PIPE_TESS_SPACING_FRACTIONAL_EVEN: 3129 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN; 3130 break; 3131 case PIPE_TESS_SPACING_EQUAL: 3132 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_INTEGER; 3133 break; 3134 default: 3135 debug_printf("invalid tessellator spacing %d\n", emit->key.tcs.spacing); 3136 opcode0.tessPartitioning = VGPU10_TESSELLATOR_PARTITIONING_UNDEFINED; 3137 } 3138 begin_emit_instruction(emit); 3139 emit_dword(emit, opcode0.value); 3140 end_emit_instruction(emit); 3141 3142 /* Declare constant registers */ 3143 emit_constant_declaration(emit); 3144 3145 /* Declare samplers and resources */ 3146 emit_sampler_declarations(emit); 3147 emit_resource_declarations(emit); 3148 3149 alloc_common_immediates(emit); 3150 3151 int nVertices = emit->key.tcs.vertices_per_patch; 3152 emit->tcs.imm_index = 3153 alloc_immediate_int4(emit, nVertices, nVertices, nVertices, 0); 3154 3155 /* Now, emit the constant block containing all the immediates 3156 * declared by shader, as well as the extra ones seen above. 3157 */ 3158 emit_vgpu10_immediates_block(emit); 3159 3160} 3161 3162 3163/** 3164 * A helper function to determine if control point phase is needed. 3165 * Returns TRUE if there is control point output. 3166 */ 3167static boolean 3168needs_control_point_phase(struct svga_shader_emitter_v10 *emit) 3169{ 3170 unsigned i; 3171 3172 assert(emit->unit == PIPE_SHADER_TESS_CTRL); 3173 3174 /* If output control point count does not match the input count, 3175 * we need a control point phase to explicitly set the output control 3176 * points. 3177 */ 3178 if ((emit->key.tcs.vertices_per_patch != emit->key.tcs.vertices_out) && 3179 emit->key.tcs.vertices_out) 3180 return TRUE; 3181 3182 for (i = 0; i < emit->info.num_outputs; i++) { 3183 switch (emit->info.output_semantic_name[i]) { 3184 case TGSI_SEMANTIC_PATCH: 3185 case TGSI_SEMANTIC_TESSOUTER: 3186 case TGSI_SEMANTIC_TESSINNER: 3187 break; 3188 default: 3189 return TRUE; 3190 } 3191 } 3192 return FALSE; 3193} 3194 3195 3196/** 3197 * A helper function to add shader signature for passthrough control point 3198 * phase. This signature is also generated for passthrough control point 3199 * phase from HLSL compiler and is needed by Metal Renderer. 3200 */ 3201static void 3202emit_passthrough_control_point_signature(struct svga_shader_emitter_v10 *emit) 3203{ 3204 struct svga_shader_signature *sgn = &emit->signature; 3205 SVGA3dDXShaderSignatureEntry *sgnEntry; 3206 unsigned i; 3207 3208 for (i = 0; i < emit->info.num_inputs; i++) { 3209 unsigned index = emit->linkage.input_map[i]; 3210 enum tgsi_semantic sem_name = emit->info.input_semantic_name[i]; 3211 3212 sgnEntry = &sgn->inputs[sgn->header.numInputSignatures++]; 3213 3214 set_shader_signature_entry(sgnEntry, index, 3215 tgsi_semantic_to_sgn_name[sem_name], 3216 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 3217 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 3218 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 3219 3220 sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++]; 3221 3222 set_shader_signature_entry(sgnEntry, i, 3223 tgsi_semantic_to_sgn_name[sem_name], 3224 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 3225 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 3226 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 3227 } 3228} 3229 3230 3231/** 3232 * A helper function to emit an instruction to start the control point phase 3233 * in the hull shader. 3234 */ 3235static void 3236emit_control_point_phase_instruction(struct svga_shader_emitter_v10 *emit) 3237{ 3238 VGPU10OpcodeToken0 opcode0; 3239 3240 opcode0.value = 0; 3241 opcode0.opcodeType = VGPU10_OPCODE_HS_CONTROL_POINT_PHASE; 3242 begin_emit_instruction(emit); 3243 emit_dword(emit, opcode0.value); 3244 end_emit_instruction(emit); 3245} 3246 3247 3248/** 3249 * Start the hull shader control point phase 3250 */ 3251static boolean 3252emit_hull_shader_control_point_phase(struct svga_shader_emitter_v10 *emit) 3253{ 3254 /* If there is no control point output, skip the control point phase. */ 3255 if (!needs_control_point_phase(emit)) { 3256 if (!emit->key.tcs.vertices_out) { 3257 /** 3258 * If the tcs does not explicitly generate any control point output 3259 * and the tes does not use any input control point, then 3260 * emit an empty control point phase with zero output control 3261 * point count. 3262 */ 3263 emit_control_point_phase_instruction(emit); 3264 3265 /** 3266 * Since this is an empty control point phase, we will need to 3267 * add input signatures when we parse the tcs again in the 3268 * patch constant phase. 3269 */ 3270 emit->tcs.fork_phase_add_signature = TRUE; 3271 } 3272 else { 3273 /** 3274 * Before skipping the control point phase, add the signature for 3275 * the passthrough control point. 3276 */ 3277 emit_passthrough_control_point_signature(emit); 3278 } 3279 return FALSE; 3280 } 3281 3282 /* Start the control point phase in the hull shader */ 3283 emit_control_point_phase_instruction(emit); 3284 3285 /* Declare the output control point ID */ 3286 if (emit->tcs.invocation_id_sys_index == INVALID_INDEX) { 3287 /* Add invocation id declaration if it does not exist */ 3288 emit->tcs.invocation_id_sys_index = emit->info.num_system_values + 1; 3289 } 3290 3291 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 3292 VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID, 3293 VGPU10_OPERAND_INDEX_0D, 3294 0, 1, 3295 VGPU10_NAME_UNDEFINED, 3296 VGPU10_OPERAND_0_COMPONENT, 0, 3297 0, 3298 VGPU10_INTERPOLATION_CONSTANT, TRUE, 3299 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 3300 3301 if (emit->tcs.prim_id_index != INVALID_INDEX) { 3302 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 3303 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID, 3304 VGPU10_OPERAND_INDEX_0D, 3305 0, 1, 3306 VGPU10_NAME_UNDEFINED, 3307 VGPU10_OPERAND_0_COMPONENT, 3308 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 3309 0, 3310 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 3311 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID); 3312 } 3313 3314 return TRUE; 3315} 3316 3317 3318/** 3319 * Start the hull shader patch constant phase and 3320 * do the second pass of the tcs translation and emit 3321 * the relevant declarations and instructions for this phase. 3322 */ 3323static boolean 3324emit_hull_shader_patch_constant_phase(struct svga_shader_emitter_v10 *emit, 3325 struct tgsi_parse_context *parse) 3326{ 3327 unsigned inst_number = 0; 3328 boolean ret = TRUE; 3329 VGPU10OpcodeToken0 opcode0; 3330 3331 emit->skip_instruction = FALSE; 3332 3333 /* Start the patch constant phase */ 3334 opcode0.value = 0; 3335 opcode0.opcodeType = VGPU10_OPCODE_HS_FORK_PHASE; 3336 begin_emit_instruction(emit); 3337 emit_dword(emit, opcode0.value); 3338 end_emit_instruction(emit); 3339 3340 /* Set the current phase to patch constant phase */ 3341 emit->tcs.control_point_phase = FALSE; 3342 3343 if (emit->tcs.prim_id_index != INVALID_INDEX) { 3344 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 3345 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID, 3346 VGPU10_OPERAND_INDEX_0D, 3347 0, 1, 3348 VGPU10_NAME_UNDEFINED, 3349 VGPU10_OPERAND_0_COMPONENT, 3350 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 3351 0, 3352 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 3353 SVGADX_SIGNATURE_SEMANTIC_NAME_PRIMITIVE_ID); 3354 } 3355 3356 /* Emit declarations for this phase */ 3357 emit->index_range.required = 3358 emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE; 3359 emit_tcs_input_declarations(emit); 3360 3361 if (emit->index_range.start_index != INVALID_INDEX) { 3362 emit_index_range_declaration(emit); 3363 } 3364 3365 emit->index_range.required = 3366 emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE; 3367 emit_tcs_output_declarations(emit); 3368 3369 if (emit->index_range.start_index != INVALID_INDEX) { 3370 emit_index_range_declaration(emit); 3371 } 3372 emit->index_range.required = FALSE; 3373 3374 emit_temporaries_declaration(emit); 3375 3376 /* Reset the token position to the first instruction token 3377 * in preparation for the second pass of the shader 3378 */ 3379 parse->Position = emit->tcs.instruction_token_pos; 3380 3381 while (!tgsi_parse_end_of_tokens(parse)) { 3382 tgsi_parse_token(parse); 3383 3384 assert(parse->FullToken.Token.Type == TGSI_TOKEN_TYPE_INSTRUCTION); 3385 ret = emit_vgpu10_instruction(emit, inst_number++, 3386 &parse->FullToken.FullInstruction); 3387 3388 /* Usually this applies to TCS only. If shader is reading output of 3389 * patch constant in fork phase, we should reemit all instructions 3390 * which are writting into ouput of patch constant in fork phase 3391 * to store results into temporaries. 3392 */ 3393 if (emit->reemit_instruction) { 3394 assert(emit->unit == PIPE_SHADER_TESS_CTRL); 3395 ret = emit_vgpu10_instruction(emit, inst_number, 3396 &parse->FullToken.FullInstruction); 3397 } 3398 3399 if (!ret) 3400 return FALSE; 3401 } 3402 3403 return TRUE; 3404} 3405 3406 3407/** 3408 * Emit index range declaration. 3409 */ 3410static boolean 3411emit_index_range_declaration(struct svga_shader_emitter_v10 *emit) 3412{ 3413 if (emit->version < 50) 3414 return TRUE; 3415 3416 assert(emit->index_range.start_index != INVALID_INDEX); 3417 assert(emit->index_range.count != 0); 3418 assert(emit->index_range.required); 3419 assert(emit->index_range.operandType != VGPU10_NUM_OPERANDS); 3420 assert(emit->index_range.dim != 0); 3421 assert(emit->index_range.size != 0); 3422 3423 VGPU10OpcodeToken0 opcode0; 3424 VGPU10OperandToken0 operand0; 3425 3426 opcode0.value = 0; 3427 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEX_RANGE; 3428 3429 operand0.value = 0; 3430 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 3431 operand0.indexDimension = emit->index_range.dim; 3432 operand0.operandType = emit->index_range.operandType; 3433 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3434 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3435 3436 if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) 3437 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3438 3439 begin_emit_instruction(emit); 3440 emit_dword(emit, opcode0.value); 3441 emit_dword(emit, operand0.value); 3442 3443 if (emit->index_range.dim == VGPU10_OPERAND_INDEX_2D) { 3444 emit_dword(emit, emit->index_range.size); 3445 emit_dword(emit, emit->index_range.start_index); 3446 emit_dword(emit, emit->index_range.count); 3447 } 3448 else { 3449 emit_dword(emit, emit->index_range.start_index); 3450 emit_dword(emit, emit->index_range.count); 3451 } 3452 3453 end_emit_instruction(emit); 3454 3455 /* Reset fields in emit->index_range struct except 3456 * emit->index_range.required which will be reset afterwards 3457 */ 3458 emit->index_range.count = 0; 3459 emit->index_range.operandType = VGPU10_NUM_OPERANDS; 3460 emit->index_range.start_index = INVALID_INDEX; 3461 emit->index_range.size = 0; 3462 emit->index_range.dim = 0; 3463 3464 return TRUE; 3465} 3466 3467 3468/** 3469 * Emit a vgpu10 declaration "instruction". 3470 * \param index the register index 3471 * \param size array size of the operand. In most cases, it is 1, 3472 * but for inputs to geometry shader, the array size varies 3473 * depending on the primitive type. 3474 */ 3475static void 3476emit_decl_instruction(struct svga_shader_emitter_v10 *emit, 3477 VGPU10OpcodeToken0 opcode0, 3478 VGPU10OperandToken0 operand0, 3479 VGPU10NameToken name_token, 3480 unsigned index, unsigned size) 3481{ 3482 assert(opcode0.opcodeType); 3483 assert(operand0.mask || 3484 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT) || 3485 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_DEPTH) || 3486 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK) || 3487 (operand0.operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) || 3488 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID) || 3489 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID) || 3490 (operand0.operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK) || 3491 (operand0.operandType == VGPU10_OPERAND_TYPE_STREAM)); 3492 3493 begin_emit_instruction(emit); 3494 emit_dword(emit, opcode0.value); 3495 3496 emit_dword(emit, operand0.value); 3497 3498 if (operand0.indexDimension == VGPU10_OPERAND_INDEX_1D) { 3499 /* Next token is the index of the register to declare */ 3500 emit_dword(emit, index); 3501 } 3502 else if (operand0.indexDimension >= VGPU10_OPERAND_INDEX_2D) { 3503 /* Next token is the size of the register */ 3504 emit_dword(emit, size); 3505 3506 /* Followed by the index of the register */ 3507 emit_dword(emit, index); 3508 } 3509 3510 if (name_token.value) { 3511 emit_dword(emit, name_token.value); 3512 } 3513 3514 end_emit_instruction(emit); 3515} 3516 3517 3518/** 3519 * Emit the declaration for a shader input. 3520 * \param opcodeType opcode type, one of VGPU10_OPCODE_DCL_INPUTx 3521 * \param operandType operand type, one of VGPU10_OPERAND_TYPE_INPUT_x 3522 * \param dim index dimension 3523 * \param index the input register index 3524 * \param size array size of the operand. In most cases, it is 1, 3525 * but for inputs to geometry shader, the array size varies 3526 * depending on the primitive type. For tessellation control 3527 * shader, the array size is the vertex count per patch. 3528 * \param name one of VGPU10_NAME_x 3529 * \parma numComp number of components 3530 * \param selMode component selection mode 3531 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 3532 * \param interpMode interpolation mode 3533 */ 3534static void 3535emit_input_declaration(struct svga_shader_emitter_v10 *emit, 3536 VGPU10_OPCODE_TYPE opcodeType, 3537 VGPU10_OPERAND_TYPE operandType, 3538 VGPU10_OPERAND_INDEX_DIMENSION dim, 3539 unsigned index, unsigned size, 3540 VGPU10_SYSTEM_NAME name, 3541 VGPU10_OPERAND_NUM_COMPONENTS numComp, 3542 VGPU10_OPERAND_4_COMPONENT_SELECTION_MODE selMode, 3543 unsigned usageMask, 3544 VGPU10_INTERPOLATION_MODE interpMode, 3545 boolean addSignature, 3546 SVGA3dDXSignatureSemanticName sgnName) 3547{ 3548 VGPU10OpcodeToken0 opcode0; 3549 VGPU10OperandToken0 operand0; 3550 VGPU10NameToken name_token; 3551 3552 assert(usageMask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 3553 assert(opcodeType == VGPU10_OPCODE_DCL_INPUT || 3554 opcodeType == VGPU10_OPCODE_DCL_INPUT_SIV || 3555 opcodeType == VGPU10_OPCODE_DCL_INPUT_SGV || 3556 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS || 3557 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SIV || 3558 opcodeType == VGPU10_OPCODE_DCL_INPUT_PS_SGV); 3559 assert(operandType == VGPU10_OPERAND_TYPE_INPUT || 3560 operandType == VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID || 3561 operandType == VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK || 3562 operandType == VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID || 3563 operandType == VGPU10_OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID || 3564 operandType == VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT || 3565 operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT || 3566 operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT || 3567 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID || 3568 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_GROUP_ID || 3569 operandType == VGPU10_OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP); 3570 3571 assert(numComp <= VGPU10_OPERAND_4_COMPONENT); 3572 assert(selMode <= VGPU10_OPERAND_4_COMPONENT_MASK_MODE); 3573 assert(dim <= VGPU10_OPERAND_INDEX_3D); 3574 assert(name == VGPU10_NAME_UNDEFINED || 3575 name == VGPU10_NAME_POSITION || 3576 name == VGPU10_NAME_INSTANCE_ID || 3577 name == VGPU10_NAME_VERTEX_ID || 3578 name == VGPU10_NAME_PRIMITIVE_ID || 3579 name == VGPU10_NAME_IS_FRONT_FACE || 3580 name == VGPU10_NAME_SAMPLE_INDEX || 3581 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || 3582 name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX); 3583 3584 assert(interpMode == VGPU10_INTERPOLATION_UNDEFINED || 3585 interpMode == VGPU10_INTERPOLATION_CONSTANT || 3586 interpMode == VGPU10_INTERPOLATION_LINEAR || 3587 interpMode == VGPU10_INTERPOLATION_LINEAR_CENTROID || 3588 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE || 3589 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID || 3590 interpMode == VGPU10_INTERPOLATION_LINEAR_SAMPLE || 3591 interpMode == VGPU10_INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE); 3592 3593 check_register_index(emit, opcodeType, index); 3594 3595 opcode0.value = operand0.value = name_token.value = 0; 3596 3597 opcode0.opcodeType = opcodeType; 3598 opcode0.interpolationMode = interpMode; 3599 3600 operand0.operandType = operandType; 3601 operand0.numComponents = numComp; 3602 operand0.selectionMode = selMode; 3603 operand0.mask = usageMask; 3604 operand0.indexDimension = dim; 3605 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3606 if (dim == VGPU10_OPERAND_INDEX_2D) 3607 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3608 3609 name_token.name = name; 3610 3611 emit_decl_instruction(emit, opcode0, operand0, name_token, index, size); 3612 3613 if (addSignature) { 3614 struct svga_shader_signature *sgn = &emit->signature; 3615 if (operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT) { 3616 /* Set patch constant signature */ 3617 SVGA3dDXShaderSignatureEntry *sgnEntry = 3618 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++]; 3619 set_shader_signature_entry(sgnEntry, index, 3620 sgnName, usageMask, 3621 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 3622 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 3623 3624 } else if (operandType == VGPU10_OPERAND_TYPE_INPUT || 3625 operandType == VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT) { 3626 /* Set input signature */ 3627 SVGA3dDXShaderSignatureEntry *sgnEntry = 3628 &sgn->inputs[sgn->header.numInputSignatures++]; 3629 set_shader_signature_entry(sgnEntry, index, 3630 sgnName, usageMask, 3631 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 3632 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 3633 } 3634 } 3635 3636 if (emit->index_range.required) { 3637 /* Here, index_range declaration is only applicable for opcodeType 3638 * VGPU10_OPCODE_DCL_INPUT and VGPU10_OPCODE_DCL_INPUT_PS and 3639 * for operandType VGPU10_OPERAND_TYPE_INPUT, 3640 * VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT and 3641 * VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT. 3642 */ 3643 if ((opcodeType != VGPU10_OPCODE_DCL_INPUT && 3644 opcodeType != VGPU10_OPCODE_DCL_INPUT_PS) || 3645 (operandType != VGPU10_OPERAND_TYPE_INPUT && 3646 operandType != VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT && 3647 operandType != VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT)) { 3648 if (emit->index_range.start_index != INVALID_INDEX) { 3649 emit_index_range_declaration(emit); 3650 } 3651 return; 3652 } 3653 3654 if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) { 3655 /* Need record new index_range */ 3656 emit->index_range.count = 1; 3657 emit->index_range.operandType = operandType; 3658 emit->index_range.start_index = index; 3659 emit->index_range.size = size; 3660 emit->index_range.dim = dim; 3661 } 3662 else if (index != 3663 (emit->index_range.start_index + emit->index_range.count) || 3664 emit->index_range.operandType != operandType) { 3665 /* Input index is not contiguous with index range or operandType is 3666 * different from index range's operandType. We need to emit current 3667 * index_range first and then start recording next index range. 3668 */ 3669 emit_index_range_declaration(emit); 3670 3671 emit->index_range.count = 1; 3672 emit->index_range.operandType = operandType; 3673 emit->index_range.start_index = index; 3674 emit->index_range.size = size; 3675 emit->index_range.dim = dim; 3676 } 3677 else if (emit->index_range.operandType == operandType) { 3678 /* Since input index is contiguous with index range and operandType 3679 * is same as index range's operandType, increment index range count. 3680 */ 3681 emit->index_range.count++; 3682 } 3683 } 3684} 3685 3686 3687/** 3688 * Emit the declaration for a shader output. 3689 * \param type one of VGPU10_OPCODE_DCL_OUTPUTx 3690 * \param index the output register index 3691 * \param name one of VGPU10_NAME_x 3692 * \param usageMask bitfield of VGPU10_OPERAND_4_COMPONENT_MASK_x values 3693 */ 3694static void 3695emit_output_declaration(struct svga_shader_emitter_v10 *emit, 3696 VGPU10_OPCODE_TYPE type, unsigned index, 3697 VGPU10_SYSTEM_NAME name, 3698 unsigned writemask, 3699 boolean addSignature, 3700 SVGA3dDXSignatureSemanticName sgnName) 3701{ 3702 VGPU10OpcodeToken0 opcode0; 3703 VGPU10OperandToken0 operand0; 3704 VGPU10NameToken name_token; 3705 3706 assert(writemask <= VGPU10_OPERAND_4_COMPONENT_MASK_ALL); 3707 assert(type == VGPU10_OPCODE_DCL_OUTPUT || 3708 type == VGPU10_OPCODE_DCL_OUTPUT_SGV || 3709 type == VGPU10_OPCODE_DCL_OUTPUT_SIV); 3710 assert(name == VGPU10_NAME_UNDEFINED || 3711 name == VGPU10_NAME_POSITION || 3712 name == VGPU10_NAME_PRIMITIVE_ID || 3713 name == VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX || 3714 name == VGPU10_NAME_VIEWPORT_ARRAY_INDEX || 3715 name == VGPU10_NAME_CLIP_DISTANCE); 3716 3717 check_register_index(emit, type, index); 3718 3719 opcode0.value = operand0.value = name_token.value = 0; 3720 3721 opcode0.opcodeType = type; 3722 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT; 3723 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 3724 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 3725 operand0.mask = writemask; 3726 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 3727 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 3728 3729 name_token.name = name; 3730 3731 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); 3732 3733 /* Capture output signature */ 3734 if (addSignature) { 3735 struct svga_shader_signature *sgn = &emit->signature; 3736 SVGA3dDXShaderSignatureEntry *sgnEntry = 3737 &sgn->outputs[sgn->header.numOutputSignatures++]; 3738 set_shader_signature_entry(sgnEntry, index, 3739 sgnName, writemask, 3740 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 3741 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 3742 } 3743 3744 if (emit->index_range.required) { 3745 /* Here, index_range declaration is only applicable for opcodeType 3746 * VGPU10_OPCODE_DCL_OUTPUT and for operandType 3747 * VGPU10_OPERAND_TYPE_OUTPUT. 3748 */ 3749 if (type != VGPU10_OPCODE_DCL_OUTPUT) { 3750 if (emit->index_range.start_index != INVALID_INDEX) { 3751 emit_index_range_declaration(emit); 3752 } 3753 return; 3754 } 3755 3756 if (emit->index_range.operandType == VGPU10_NUM_OPERANDS) { 3757 /* Need record new index_range */ 3758 emit->index_range.count = 1; 3759 emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT; 3760 emit->index_range.start_index = index; 3761 emit->index_range.size = 1; 3762 emit->index_range.dim = VGPU10_OPERAND_INDEX_1D; 3763 } 3764 else if (index != 3765 (emit->index_range.start_index + emit->index_range.count)) { 3766 /* Output index is not contiguous with index range. We need to 3767 * emit current index_range first and then start recording next 3768 * index range. 3769 */ 3770 emit_index_range_declaration(emit); 3771 3772 emit->index_range.count = 1; 3773 emit->index_range.operandType = VGPU10_OPERAND_TYPE_OUTPUT; 3774 emit->index_range.start_index = index; 3775 emit->index_range.size = 1; 3776 emit->index_range.dim = VGPU10_OPERAND_INDEX_1D; 3777 } 3778 else { 3779 /* Since output index is contiguous with index range, increment 3780 * index range count. 3781 */ 3782 emit->index_range.count++; 3783 } 3784 } 3785} 3786 3787 3788/** 3789 * Emit the declaration for the fragment depth output. 3790 */ 3791static void 3792emit_fragdepth_output_declaration(struct svga_shader_emitter_v10 *emit) 3793{ 3794 VGPU10OpcodeToken0 opcode0; 3795 VGPU10OperandToken0 operand0; 3796 VGPU10NameToken name_token; 3797 3798 assert(emit->unit == PIPE_SHADER_FRAGMENT); 3799 3800 opcode0.value = operand0.value = name_token.value = 0; 3801 3802 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; 3803 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_DEPTH; 3804 operand0.numComponents = VGPU10_OPERAND_1_COMPONENT; 3805 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 3806 operand0.mask = 0; 3807 3808 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); 3809} 3810 3811 3812/** 3813 * Emit the declaration for the fragment sample mask/coverage output. 3814 */ 3815static void 3816emit_samplemask_output_declaration(struct svga_shader_emitter_v10 *emit) 3817{ 3818 VGPU10OpcodeToken0 opcode0; 3819 VGPU10OperandToken0 operand0; 3820 VGPU10NameToken name_token; 3821 3822 assert(emit->unit == PIPE_SHADER_FRAGMENT); 3823 assert(emit->version >= 41); 3824 3825 opcode0.value = operand0.value = name_token.value = 0; 3826 3827 opcode0.opcodeType = VGPU10_OPCODE_DCL_OUTPUT; 3828 operand0.operandType = VGPU10_OPERAND_TYPE_OUTPUT_COVERAGE_MASK; 3829 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 3830 operand0.indexDimension = VGPU10_OPERAND_INDEX_0D; 3831 operand0.mask = 0; 3832 3833 emit_decl_instruction(emit, opcode0, operand0, name_token, 0, 1); 3834} 3835 3836 3837/** 3838 * Emit output declarations for fragment shader. 3839 */ 3840static void 3841emit_fs_output_declarations(struct svga_shader_emitter_v10 *emit) 3842{ 3843 unsigned int i; 3844 3845 for (i = 0; i < emit->info.num_outputs; i++) { 3846 /*const unsigned usage_mask = emit->info.output_usage_mask[i];*/ 3847 const enum tgsi_semantic semantic_name = 3848 emit->info.output_semantic_name[i]; 3849 const unsigned semantic_index = emit->info.output_semantic_index[i]; 3850 unsigned index = i; 3851 3852 if (semantic_name == TGSI_SEMANTIC_COLOR) { 3853 assert(semantic_index < ARRAY_SIZE(emit->fs.color_out_index)); 3854 3855 emit->fs.color_out_index[semantic_index] = index; 3856 3857 emit->fs.num_color_outputs = MAX2(emit->fs.num_color_outputs, 3858 index + 1); 3859 3860 /* The semantic index is the shader's color output/buffer index */ 3861 emit_output_declaration(emit, 3862 VGPU10_OPCODE_DCL_OUTPUT, semantic_index, 3863 VGPU10_NAME_UNDEFINED, 3864 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 3865 TRUE, 3866 map_tgsi_semantic_to_sgn_name(semantic_name)); 3867 3868 if (semantic_index == 0) { 3869 if (emit->key.fs.write_color0_to_n_cbufs > 1) { 3870 /* Emit declarations for the additional color outputs 3871 * for broadcasting. 3872 */ 3873 unsigned j; 3874 for (j = 1; j < emit->key.fs.write_color0_to_n_cbufs; j++) { 3875 /* Allocate a new output index */ 3876 unsigned idx = emit->info.num_outputs + j - 1; 3877 emit->fs.color_out_index[j] = idx; 3878 emit_output_declaration(emit, 3879 VGPU10_OPCODE_DCL_OUTPUT, idx, 3880 VGPU10_NAME_UNDEFINED, 3881 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 3882 TRUE, 3883 map_tgsi_semantic_to_sgn_name(semantic_name)); 3884 emit->info.output_semantic_index[idx] = j; 3885 } 3886 3887 emit->fs.num_color_outputs = 3888 emit->key.fs.write_color0_to_n_cbufs; 3889 } 3890 } 3891 } 3892 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 3893 /* Fragment depth output */ 3894 emit_fragdepth_output_declaration(emit); 3895 } 3896 else if (semantic_name == TGSI_SEMANTIC_SAMPLEMASK) { 3897 /* Sample mask output */ 3898 emit_samplemask_output_declaration(emit); 3899 } 3900 else { 3901 assert(!"Bad output semantic name"); 3902 } 3903 } 3904} 3905 3906 3907/** 3908 * Emit common output declaration for vertex processing. 3909 */ 3910static void 3911emit_vertex_output_declaration(struct svga_shader_emitter_v10 *emit, 3912 unsigned index, unsigned writemask, 3913 boolean addSignature) 3914{ 3915 const enum tgsi_semantic semantic_name = 3916 emit->info.output_semantic_name[index]; 3917 const unsigned semantic_index = emit->info.output_semantic_index[index]; 3918 unsigned name, type; 3919 unsigned final_mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 3920 3921 assert(emit->unit != PIPE_SHADER_FRAGMENT && 3922 emit->unit != PIPE_SHADER_COMPUTE); 3923 3924 switch (semantic_name) { 3925 case TGSI_SEMANTIC_POSITION: 3926 if (emit->unit == PIPE_SHADER_TESS_CTRL) { 3927 /* position will be declared in control point only */ 3928 assert(emit->tcs.control_point_phase); 3929 type = VGPU10_OPCODE_DCL_OUTPUT; 3930 name = VGPU10_NAME_UNDEFINED; 3931 emit_output_declaration(emit, type, index, name, final_mask, TRUE, 3932 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 3933 return; 3934 } 3935 else { 3936 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 3937 name = VGPU10_NAME_POSITION; 3938 } 3939 /* Save the index of the vertex position output register */ 3940 emit->vposition.out_index = index; 3941 break; 3942 case TGSI_SEMANTIC_CLIPDIST: 3943 type = VGPU10_OPCODE_DCL_OUTPUT_SIV; 3944 name = VGPU10_NAME_CLIP_DISTANCE; 3945 /* save the starting index of the clip distance output register */ 3946 if (semantic_index == 0) 3947 emit->clip_dist_out_index = index; 3948 final_mask = apply_clip_plane_mask(emit, writemask, semantic_index); 3949 if (final_mask == 0x0) 3950 return; /* discard this do-nothing declaration */ 3951 break; 3952 case TGSI_SEMANTIC_CLIPVERTEX: 3953 type = VGPU10_OPCODE_DCL_OUTPUT; 3954 name = VGPU10_NAME_UNDEFINED; 3955 emit->clip_vertex_out_index = index; 3956 break; 3957 default: 3958 /* generic output */ 3959 type = VGPU10_OPCODE_DCL_OUTPUT; 3960 name = VGPU10_NAME_UNDEFINED; 3961 } 3962 3963 emit_output_declaration(emit, type, index, name, final_mask, addSignature, 3964 map_tgsi_semantic_to_sgn_name(semantic_name)); 3965} 3966 3967 3968/** 3969 * Emit declaration for outputs in vertex shader. 3970 */ 3971static void 3972emit_vs_output_declarations(struct svga_shader_emitter_v10 *emit) 3973{ 3974 unsigned i; 3975 for (i = 0; i < emit->info.num_outputs; i++) { 3976 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE); 3977 } 3978} 3979 3980 3981/** 3982 * A helper function to determine the writemask for an output 3983 * for the specified stream. 3984 */ 3985static unsigned 3986output_writemask_for_stream(unsigned stream, ubyte output_streams, 3987 ubyte output_usagemask) 3988{ 3989 unsigned i; 3990 unsigned writemask = 0; 3991 3992 for (i = 0; i < 4; i++) { 3993 if ((output_streams & 0x3) == stream) 3994 writemask |= (VGPU10_OPERAND_4_COMPONENT_MASK_X << i); 3995 output_streams >>= 2; 3996 } 3997 return writemask & output_usagemask; 3998} 3999 4000 4001/** 4002 * Emit declaration for outputs in geometry shader. 4003 */ 4004static void 4005emit_gs_output_declarations(struct svga_shader_emitter_v10 *emit) 4006{ 4007 unsigned i; 4008 VGPU10OpcodeToken0 opcode0; 4009 unsigned numStreamsSupported = 1; 4010 int s; 4011 4012 if (emit->version >= 50) { 4013 numStreamsSupported = ARRAY_SIZE(emit->info.num_stream_output_components); 4014 } 4015 4016 /** 4017 * Start emitting from the last stream first, so we end with 4018 * stream 0, so any of the auxiliary output declarations will 4019 * go to stream 0. 4020 */ 4021 for (s = numStreamsSupported-1; s >= 0; s--) { 4022 4023 if (emit->info.num_stream_output_components[s] == 0) 4024 continue; 4025 4026 if (emit->version >= 50) { 4027 /* DCL_STREAM stream */ 4028 begin_emit_instruction(emit); 4029 emit_opcode(emit, VGPU10_OPCODE_DCL_STREAM, FALSE); 4030 emit_stream_register(emit, s); 4031 end_emit_instruction(emit); 4032 } 4033 4034 /* emit output primitive topology declaration */ 4035 opcode0.value = 0; 4036 opcode0.opcodeType = VGPU10_OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY; 4037 opcode0.primitiveTopology = emit->gs.prim_topology; 4038 emit_property_instruction(emit, opcode0, 0, 0); 4039 4040 for (i = 0; i < emit->info.num_outputs; i++) { 4041 unsigned writemask; 4042 4043 /* find out the writemask for this stream */ 4044 writemask = output_writemask_for_stream(s, emit->info.output_streams[i], 4045 emit->output_usage_mask[i]); 4046 4047 if (writemask) { 4048 enum tgsi_semantic semantic_name = 4049 emit->info.output_semantic_name[i]; 4050 4051 /* TODO: Still need to take care of a special case where a 4052 * single varying spans across multiple output registers. 4053 */ 4054 switch(semantic_name) { 4055 case TGSI_SEMANTIC_PRIMID: 4056 emit_output_declaration(emit, 4057 VGPU10_OPCODE_DCL_OUTPUT_SGV, i, 4058 VGPU10_NAME_PRIMITIVE_ID, 4059 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4060 FALSE, 4061 map_tgsi_semantic_to_sgn_name(semantic_name)); 4062 break; 4063 case TGSI_SEMANTIC_LAYER: 4064 emit_output_declaration(emit, 4065 VGPU10_OPCODE_DCL_OUTPUT_SIV, i, 4066 VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX, 4067 VGPU10_OPERAND_4_COMPONENT_MASK_X, 4068 FALSE, 4069 map_tgsi_semantic_to_sgn_name(semantic_name)); 4070 break; 4071 case TGSI_SEMANTIC_VIEWPORT_INDEX: 4072 emit_output_declaration(emit, 4073 VGPU10_OPCODE_DCL_OUTPUT_SIV, i, 4074 VGPU10_NAME_VIEWPORT_ARRAY_INDEX, 4075 VGPU10_OPERAND_4_COMPONENT_MASK_X, 4076 FALSE, 4077 map_tgsi_semantic_to_sgn_name(semantic_name)); 4078 emit->gs.viewport_index_out_index = i; 4079 break; 4080 default: 4081 emit_vertex_output_declaration(emit, i, writemask, FALSE); 4082 } 4083 } 4084 } 4085 } 4086 4087 /* For geometry shader outputs, it is possible the same register is 4088 * declared multiple times for different streams. So to avoid 4089 * redundant signature entries, geometry shader output signature is done 4090 * outside of the declaration. 4091 */ 4092 struct svga_shader_signature *sgn = &emit->signature; 4093 SVGA3dDXShaderSignatureEntry *sgnEntry; 4094 4095 for (i = 0; i < emit->info.num_outputs; i++) { 4096 if (emit->output_usage_mask[i]) { 4097 enum tgsi_semantic sem_name = emit->info.output_semantic_name[i]; 4098 4099 sgnEntry = &sgn->outputs[sgn->header.numOutputSignatures++]; 4100 set_shader_signature_entry(sgnEntry, i, 4101 map_tgsi_semantic_to_sgn_name(sem_name), 4102 emit->output_usage_mask[i], 4103 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 4104 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 4105 } 4106 } 4107} 4108 4109 4110/** 4111 * Emit the declaration for the tess inner/outer output. 4112 * \param opcodeType either VGPU10_OPCODE_DCL_OUTPUT_SIV or _INPUT_SIV 4113 * \param operandType either VGPU10_OPERAND_TYPE_OUTPUT or _INPUT 4114 * \param name VGPU10_NAME_FINAL_*_TESSFACTOR value 4115 */ 4116static void 4117emit_tesslevel_declaration(struct svga_shader_emitter_v10 *emit, 4118 unsigned index, unsigned opcodeType, 4119 unsigned operandType, VGPU10_SYSTEM_NAME name, 4120 SVGA3dDXSignatureSemanticName sgnName) 4121{ 4122 VGPU10OpcodeToken0 opcode0; 4123 VGPU10OperandToken0 operand0; 4124 VGPU10NameToken name_token; 4125 4126 assert(emit->version >= 50); 4127 assert(name >= VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR || 4128 (emit->key.tcs.prim_mode == PIPE_PRIM_LINES && 4129 name == VGPU10_NAME_UNDEFINED)); 4130 assert(name <= VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR); 4131 4132 assert(operandType == VGPU10_OPERAND_TYPE_OUTPUT || 4133 operandType == VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT); 4134 4135 opcode0.value = operand0.value = name_token.value = 0; 4136 4137 opcode0.opcodeType = opcodeType; 4138 operand0.operandType = operandType; 4139 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 4140 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 4141 operand0.mask = VGPU10_OPERAND_4_COMPONENT_MASK_X; 4142 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 4143 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 4144 4145 name_token.name = name; 4146 emit_decl_instruction(emit, opcode0, operand0, name_token, index, 1); 4147 4148 /* Capture patch constant signature */ 4149 struct svga_shader_signature *sgn = &emit->signature; 4150 SVGA3dDXShaderSignatureEntry *sgnEntry = 4151 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++]; 4152 set_shader_signature_entry(sgnEntry, index, 4153 sgnName, VGPU10_OPERAND_4_COMPONENT_MASK_X, 4154 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 4155 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 4156} 4157 4158 4159/** 4160 * Emit output declarations for tessellation control shader. 4161 */ 4162static void 4163emit_tcs_output_declarations(struct svga_shader_emitter_v10 *emit) 4164{ 4165 unsigned int i; 4166 unsigned outputIndex = emit->num_outputs; 4167 struct svga_shader_signature *sgn = &emit->signature; 4168 4169 /** 4170 * Initialize patch_generic_out_count so it won't be counted twice 4171 * since this function is called twice, one for control point phase 4172 * and another time for patch constant phase. 4173 */ 4174 emit->tcs.patch_generic_out_count = 0; 4175 4176 for (i = 0; i < emit->info.num_outputs; i++) { 4177 unsigned index = i; 4178 const enum tgsi_semantic semantic_name = 4179 emit->info.output_semantic_name[i]; 4180 4181 switch (semantic_name) { 4182 case TGSI_SEMANTIC_TESSINNER: 4183 emit->tcs.inner.tgsi_index = i; 4184 4185 /* skip per-patch output declarations in control point phase */ 4186 if (emit->tcs.control_point_phase) 4187 break; 4188 4189 emit->tcs.inner.out_index = outputIndex; 4190 switch (emit->key.tcs.prim_mode) { 4191 case PIPE_PRIM_QUADS: 4192 emit_tesslevel_declaration(emit, outputIndex++, 4193 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4194 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR, 4195 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR); 4196 4197 emit_tesslevel_declaration(emit, outputIndex++, 4198 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4199 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR, 4200 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); 4201 break; 4202 case PIPE_PRIM_TRIANGLES: 4203 emit_tesslevel_declaration(emit, outputIndex++, 4204 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4205 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR, 4206 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR); 4207 break; 4208 case PIPE_PRIM_LINES: 4209 break; 4210 default: 4211 debug_printf("Unsupported primitive type"); 4212 } 4213 break; 4214 4215 case TGSI_SEMANTIC_TESSOUTER: 4216 emit->tcs.outer.tgsi_index = i; 4217 4218 /* skip per-patch output declarations in control point phase */ 4219 if (emit->tcs.control_point_phase) 4220 break; 4221 4222 emit->tcs.outer.out_index = outputIndex; 4223 switch (emit->key.tcs.prim_mode) { 4224 case PIPE_PRIM_QUADS: 4225 for (int j = 0; j < 4; j++) { 4226 emit_tesslevel_declaration(emit, outputIndex++, 4227 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4228 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j, 4229 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + j); 4230 } 4231 break; 4232 case PIPE_PRIM_TRIANGLES: 4233 for (int j = 0; j < 3; j++) { 4234 emit_tesslevel_declaration(emit, outputIndex++, 4235 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4236 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j, 4237 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + j); 4238 } 4239 break; 4240 case PIPE_PRIM_LINES: 4241 for (int j = 0; j < 2; j++) { 4242 emit_tesslevel_declaration(emit, outputIndex++, 4243 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4244 VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j, 4245 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR + j); 4246 } 4247 break; 4248 default: 4249 debug_printf("Unsupported primitive type"); 4250 } 4251 break; 4252 4253 case TGSI_SEMANTIC_PATCH: 4254 if (emit->tcs.patch_generic_out_index == INVALID_INDEX) 4255 emit->tcs.patch_generic_out_index= i; 4256 emit->tcs.patch_generic_out_count++; 4257 4258 /* skip per-patch output declarations in control point phase */ 4259 if (emit->tcs.control_point_phase) 4260 break; 4261 4262 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, index, 4263 VGPU10_NAME_UNDEFINED, 4264 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4265 FALSE, 4266 map_tgsi_semantic_to_sgn_name(semantic_name)); 4267 4268 SVGA3dDXShaderSignatureEntry *sgnEntry = 4269 &sgn->patchConstants[sgn->header.numPatchConstantSignatures++]; 4270 set_shader_signature_entry(sgnEntry, index, 4271 map_tgsi_semantic_to_sgn_name(semantic_name), 4272 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4273 SVGADX_SIGNATURE_REGISTER_COMPONENT_UNKNOWN, 4274 SVGADX_SIGNATURE_MIN_PRECISION_DEFAULT); 4275 4276 break; 4277 4278 default: 4279 /* save the starting index of control point outputs */ 4280 if (emit->tcs.control_point_out_index == INVALID_INDEX) 4281 emit->tcs.control_point_out_index = i; 4282 emit->tcs.control_point_out_count++; 4283 4284 /* skip control point output declarations in patch constant phase */ 4285 if (!emit->tcs.control_point_phase) 4286 break; 4287 4288 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], 4289 TRUE); 4290 4291 } 4292 } 4293 4294 if (emit->tcs.control_point_phase) { 4295 /** 4296 * Add missing control point output in control point phase. 4297 */ 4298 if (emit->tcs.control_point_out_index == INVALID_INDEX) { 4299 /* use register index after tessellation factors */ 4300 switch (emit->key.tcs.prim_mode) { 4301 case PIPE_PRIM_QUADS: 4302 emit->tcs.control_point_out_index = outputIndex + 6; 4303 break; 4304 case PIPE_PRIM_TRIANGLES: 4305 emit->tcs.control_point_out_index = outputIndex + 4; 4306 break; 4307 default: 4308 emit->tcs.control_point_out_index = outputIndex + 2; 4309 break; 4310 } 4311 emit->tcs.control_point_out_count++; 4312 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, 4313 emit->tcs.control_point_out_index, 4314 VGPU10_NAME_POSITION, 4315 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4316 TRUE, 4317 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); 4318 4319 /* If tcs does not output any control point output, 4320 * we can end the hull shader control point phase here 4321 * after emitting the default control point output. 4322 */ 4323 emit->skip_instruction = TRUE; 4324 } 4325 } 4326 else { 4327 if (emit->tcs.outer.out_index == INVALID_INDEX) { 4328 /* since the TCS did not declare out outer tess level output register, 4329 * we declare it here for patch constant phase only. 4330 */ 4331 emit->tcs.outer.out_index = outputIndex; 4332 if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { 4333 for (int i = 0; i < 4; i++) { 4334 emit_tesslevel_declaration(emit, outputIndex++, 4335 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4336 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i, 4337 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i); 4338 } 4339 } 4340 else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { 4341 for (int i = 0; i < 3; i++) { 4342 emit_tesslevel_declaration(emit, outputIndex++, 4343 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4344 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i, 4345 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i); 4346 } 4347 } 4348 } 4349 4350 if (emit->tcs.inner.out_index == INVALID_INDEX) { 4351 /* since the TCS did not declare out inner tess level output register, 4352 * we declare it here 4353 */ 4354 emit->tcs.inner.out_index = outputIndex; 4355 if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { 4356 emit_tesslevel_declaration(emit, outputIndex++, 4357 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4358 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR, 4359 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR); 4360 emit_tesslevel_declaration(emit, outputIndex++, 4361 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4362 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR, 4363 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); 4364 } 4365 else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { 4366 emit_tesslevel_declaration(emit, outputIndex++, 4367 VGPU10_OPCODE_DCL_OUTPUT_SIV, VGPU10_OPERAND_TYPE_OUTPUT, 4368 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR, 4369 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR); 4370 } 4371 } 4372 } 4373 emit->num_outputs = outputIndex; 4374} 4375 4376 4377/** 4378 * Emit output declarations for tessellation evaluation shader. 4379 */ 4380static void 4381emit_tes_output_declarations(struct svga_shader_emitter_v10 *emit) 4382{ 4383 unsigned int i; 4384 4385 for (i = 0; i < emit->info.num_outputs; i++) { 4386 emit_vertex_output_declaration(emit, i, emit->output_usage_mask[i], TRUE); 4387 } 4388} 4389 4390 4391/** 4392 * Emit the declaration for a system value input/output. 4393 */ 4394static void 4395emit_system_value_declaration(struct svga_shader_emitter_v10 *emit, 4396 enum tgsi_semantic semantic_name, unsigned index) 4397{ 4398 switch (semantic_name) { 4399 case TGSI_SEMANTIC_INSTANCEID: 4400 index = alloc_system_value_index(emit, index); 4401 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 4402 VGPU10_OPERAND_TYPE_INPUT, 4403 VGPU10_OPERAND_INDEX_1D, 4404 index, 1, 4405 VGPU10_NAME_INSTANCE_ID, 4406 VGPU10_OPERAND_4_COMPONENT, 4407 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4408 VGPU10_OPERAND_4_COMPONENT_MASK_X, 4409 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4410 map_tgsi_semantic_to_sgn_name(semantic_name)); 4411 break; 4412 case TGSI_SEMANTIC_VERTEXID: 4413 emit->vs.vertex_id_sys_index = index; 4414 index = alloc_system_value_index(emit, index); 4415 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_SIV, 4416 VGPU10_OPERAND_TYPE_INPUT, 4417 VGPU10_OPERAND_INDEX_1D, 4418 index, 1, 4419 VGPU10_NAME_VERTEX_ID, 4420 VGPU10_OPERAND_4_COMPONENT, 4421 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4422 VGPU10_OPERAND_4_COMPONENT_MASK_X, 4423 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4424 map_tgsi_semantic_to_sgn_name(semantic_name)); 4425 break; 4426 case TGSI_SEMANTIC_SAMPLEID: 4427 assert(emit->unit == PIPE_SHADER_FRAGMENT); 4428 emit->fs.sample_id_sys_index = index; 4429 index = alloc_system_value_index(emit, index); 4430 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT_PS_SIV, 4431 VGPU10_OPERAND_TYPE_INPUT, 4432 VGPU10_OPERAND_INDEX_1D, 4433 index, 1, 4434 VGPU10_NAME_SAMPLE_INDEX, 4435 VGPU10_OPERAND_4_COMPONENT, 4436 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4437 VGPU10_OPERAND_4_COMPONENT_MASK_X, 4438 VGPU10_INTERPOLATION_CONSTANT, TRUE, 4439 map_tgsi_semantic_to_sgn_name(semantic_name)); 4440 break; 4441 case TGSI_SEMANTIC_SAMPLEPOS: 4442 /* This system value contains the position of the current sample 4443 * when using per-sample shading. We implement this by calling 4444 * the VGPU10_OPCODE_SAMPLE_POS instruction with the current sample 4445 * index as the argument. See emit_sample_position_instructions(). 4446 */ 4447 assert(emit->version >= 41); 4448 emit->fs.sample_pos_sys_index = index; 4449 index = alloc_system_value_index(emit, index); 4450 break; 4451 case TGSI_SEMANTIC_INVOCATIONID: 4452 /* Note: invocation id input is mapped to different register depending 4453 * on the shader type. In GS, it will be mapped to vGSInstanceID#. 4454 * In TCS, it will be mapped to vOutputControlPointID#. 4455 * Since in both cases, the mapped name is unique rather than 4456 * just a generic input name ("v#"), so there is no need to remap 4457 * the index value. 4458 */ 4459 assert(emit->unit == PIPE_SHADER_GEOMETRY || 4460 emit->unit == PIPE_SHADER_TESS_CTRL); 4461 assert(emit->version >= 50); 4462 4463 if (emit->unit == PIPE_SHADER_GEOMETRY) { 4464 emit->gs.invocation_id_sys_index = index; 4465 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 4466 VGPU10_OPERAND_TYPE_INPUT_GS_INSTANCE_ID, 4467 VGPU10_OPERAND_INDEX_0D, 4468 index, 1, 4469 VGPU10_NAME_UNDEFINED, 4470 VGPU10_OPERAND_0_COMPONENT, 4471 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4472 0, 4473 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4474 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 4475 } else if (emit->unit == PIPE_SHADER_TESS_CTRL) { 4476 /* The emission of the control point id will be done 4477 * in the control point phase in emit_hull_shader_control_point_phase(). 4478 */ 4479 emit->tcs.invocation_id_sys_index = index; 4480 } 4481 break; 4482 case TGSI_SEMANTIC_SAMPLEMASK: 4483 /* Note: the PS sample mask input has a unique name ("vCoverage#") 4484 * rather than just a generic input name ("v#") so no need to remap the 4485 * index value. 4486 */ 4487 assert(emit->unit == PIPE_SHADER_FRAGMENT); 4488 assert(emit->version >= 50); 4489 emit->fs.sample_mask_in_sys_index = index; 4490 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 4491 VGPU10_OPERAND_TYPE_INPUT_COVERAGE_MASK, 4492 VGPU10_OPERAND_INDEX_0D, 4493 index, 1, 4494 VGPU10_NAME_UNDEFINED, 4495 VGPU10_OPERAND_1_COMPONENT, 4496 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4497 0, 4498 VGPU10_INTERPOLATION_CONSTANT, TRUE, 4499 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 4500 break; 4501 case TGSI_SEMANTIC_TESSCOORD: 4502 assert(emit->version >= 50); 4503 4504 unsigned usageMask = 0; 4505 4506 if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) { 4507 usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XYZ; 4508 } 4509 else if (emit->tes.prim_mode == PIPE_PRIM_LINES || 4510 emit->tes.prim_mode == PIPE_PRIM_QUADS) { 4511 usageMask = VGPU10_OPERAND_4_COMPONENT_MASK_XY; 4512 } 4513 4514 emit->tes.tesscoord_sys_index = index; 4515 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 4516 VGPU10_OPERAND_TYPE_INPUT_DOMAIN_POINT, 4517 VGPU10_OPERAND_INDEX_0D, 4518 index, 1, 4519 VGPU10_NAME_UNDEFINED, 4520 VGPU10_OPERAND_4_COMPONENT, 4521 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4522 usageMask, 4523 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4524 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 4525 break; 4526 case TGSI_SEMANTIC_TESSINNER: 4527 assert(emit->version >= 50); 4528 emit->tes.inner.tgsi_index = index; 4529 break; 4530 case TGSI_SEMANTIC_TESSOUTER: 4531 assert(emit->version >= 50); 4532 emit->tes.outer.tgsi_index = index; 4533 break; 4534 case TGSI_SEMANTIC_VERTICESIN: 4535 assert(emit->unit == PIPE_SHADER_TESS_CTRL); 4536 assert(emit->version >= 50); 4537 4538 /* save the system value index */ 4539 emit->tcs.vertices_per_patch_index = index; 4540 break; 4541 case TGSI_SEMANTIC_PRIMID: 4542 assert(emit->version >= 50); 4543 if (emit->unit == PIPE_SHADER_TESS_CTRL) { 4544 emit->tcs.prim_id_index = index; 4545 } 4546 else if (emit->unit == PIPE_SHADER_TESS_EVAL) { 4547 emit->tes.prim_id_index = index; 4548 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 4549 VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID, 4550 VGPU10_OPERAND_INDEX_0D, 4551 index, 1, 4552 VGPU10_NAME_UNDEFINED, 4553 VGPU10_OPERAND_0_COMPONENT, 4554 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4555 0, 4556 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4557 map_tgsi_semantic_to_sgn_name(semantic_name)); 4558 } 4559 break; 4560 default: 4561 debug_printf("unexpected system value semantic index %u / %s\n", 4562 semantic_name, tgsi_semantic_names[semantic_name]); 4563 } 4564} 4565 4566/** 4567 * Translate a TGSI declaration to VGPU10. 4568 */ 4569static boolean 4570emit_vgpu10_declaration(struct svga_shader_emitter_v10 *emit, 4571 const struct tgsi_full_declaration *decl) 4572{ 4573 switch (decl->Declaration.File) { 4574 case TGSI_FILE_INPUT: 4575 /* do nothing - see emit_input_declarations() */ 4576 return TRUE; 4577 4578 case TGSI_FILE_OUTPUT: 4579 assert(decl->Range.First == decl->Range.Last); 4580 emit->output_usage_mask[decl->Range.First] = decl->Declaration.UsageMask; 4581 return TRUE; 4582 4583 case TGSI_FILE_TEMPORARY: 4584 /* Don't declare the temps here. Just keep track of how many 4585 * and emit the declaration later. 4586 */ 4587 if (decl->Declaration.Array) { 4588 /* Indexed temporary array. Save the start index of the array 4589 * and the size of the array. 4590 */ 4591 const unsigned arrayID = MIN2(decl->Array.ArrayID, MAX_TEMP_ARRAYS); 4592 assert(arrayID < ARRAY_SIZE(emit->temp_arrays)); 4593 4594 /* Save this array so we can emit the declaration for it later */ 4595 create_temp_array(emit, arrayID, decl->Range.First, 4596 decl->Range.Last - decl->Range.First + 1, 4597 decl->Range.First); 4598 } 4599 4600 /* for all temps, indexed or not, keep track of highest index */ 4601 emit->num_shader_temps = MAX2(emit->num_shader_temps, 4602 decl->Range.Last + 1); 4603 return TRUE; 4604 4605 case TGSI_FILE_CONSTANT: 4606 /* Don't declare constants here. Just keep track and emit later. */ 4607 { 4608 unsigned constbuf = 0, num_consts; 4609 if (decl->Declaration.Dimension) { 4610 constbuf = decl->Dim.Index2D; 4611 } 4612 /* We throw an assertion here when, in fact, the shader should never 4613 * have linked due to constbuf index out of bounds, so we shouldn't 4614 * have reached here. 4615 */ 4616 assert(constbuf < ARRAY_SIZE(emit->num_shader_consts)); 4617 4618 num_consts = MAX2(emit->num_shader_consts[constbuf], 4619 decl->Range.Last + 1); 4620 4621 if (num_consts > VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT) { 4622 debug_printf("Warning: constant buffer is declared to size [%u]" 4623 " but [%u] is the limit.\n", 4624 num_consts, 4625 VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 4626 } 4627 /* The linker doesn't enforce the max UBO size so we clamp here */ 4628 emit->num_shader_consts[constbuf] = 4629 MIN2(num_consts, VGPU10_MAX_CONSTANT_BUFFER_ELEMENT_COUNT); 4630 } 4631 return TRUE; 4632 4633 case TGSI_FILE_IMMEDIATE: 4634 assert(!"TGSI_FILE_IMMEDIATE not handled yet!"); 4635 return FALSE; 4636 4637 case TGSI_FILE_SYSTEM_VALUE: 4638 emit_system_value_declaration(emit, decl->Semantic.Name, 4639 decl->Range.First); 4640 return TRUE; 4641 4642 case TGSI_FILE_SAMPLER: 4643 /* Don't declare samplers here. Just keep track and emit later. */ 4644 emit->num_samplers = MAX2(emit->num_samplers, decl->Range.Last + 1); 4645 return TRUE; 4646 4647#if 0 4648 case TGSI_FILE_RESOURCE: 4649 /*opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE;*/ 4650 /* XXX more, VGPU10_RETURN_TYPE_FLOAT */ 4651 assert(!"TGSI_FILE_RESOURCE not handled yet"); 4652 return FALSE; 4653#endif 4654 4655 case TGSI_FILE_ADDRESS: 4656 emit->num_address_regs = MAX2(emit->num_address_regs, 4657 decl->Range.Last + 1); 4658 return TRUE; 4659 4660 case TGSI_FILE_SAMPLER_VIEW: 4661 { 4662 unsigned unit = decl->Range.First; 4663 assert(decl->Range.First == decl->Range.Last); 4664 emit->sampler_target[unit] = decl->SamplerView.Resource; 4665 4666 /* Note: we can ignore YZW return types for now */ 4667 emit->sampler_return_type[unit] = decl->SamplerView.ReturnTypeX; 4668 emit->sampler_view[unit] = TRUE; 4669 } 4670 return TRUE; 4671 4672 default: 4673 assert(!"Unexpected type of declaration"); 4674 return FALSE; 4675 } 4676} 4677 4678 4679 4680/** 4681 * Emit input declarations for fragment shader. 4682 */ 4683static void 4684emit_fs_input_declarations(struct svga_shader_emitter_v10 *emit) 4685{ 4686 unsigned i; 4687 4688 for (i = 0; i < emit->linkage.num_inputs; i++) { 4689 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; 4690 unsigned usage_mask = emit->info.input_usage_mask[i]; 4691 unsigned index = emit->linkage.input_map[i]; 4692 unsigned type, interpolationMode, name; 4693 unsigned mask = VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 4694 4695 if (usage_mask == 0) 4696 continue; /* register is not actually used */ 4697 4698 if (semantic_name == TGSI_SEMANTIC_POSITION) { 4699 /* fragment position input */ 4700 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 4701 interpolationMode = VGPU10_INTERPOLATION_LINEAR; 4702 name = VGPU10_NAME_POSITION; 4703 if (usage_mask & TGSI_WRITEMASK_W) { 4704 /* we need to replace use of 'w' with '1/w' */ 4705 emit->fs.fragcoord_input_index = i; 4706 } 4707 } 4708 else if (semantic_name == TGSI_SEMANTIC_FACE) { 4709 /* fragment front-facing input */ 4710 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 4711 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 4712 name = VGPU10_NAME_IS_FRONT_FACE; 4713 emit->fs.face_input_index = i; 4714 } 4715 else if (semantic_name == TGSI_SEMANTIC_PRIMID) { 4716 /* primitive ID */ 4717 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 4718 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 4719 name = VGPU10_NAME_PRIMITIVE_ID; 4720 } 4721 else if (semantic_name == TGSI_SEMANTIC_SAMPLEID) { 4722 /* sample index / ID */ 4723 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 4724 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 4725 name = VGPU10_NAME_SAMPLE_INDEX; 4726 } 4727 else if (semantic_name == TGSI_SEMANTIC_LAYER) { 4728 /* render target array index */ 4729 if (emit->key.fs.layer_to_zero) { 4730 /** 4731 * The shader from the previous stage does not write to layer, 4732 * so reading the layer index in fragment shader should return 0. 4733 */ 4734 emit->fs.layer_input_index = i; 4735 continue; 4736 } else { 4737 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 4738 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 4739 name = VGPU10_NAME_RENDER_TARGET_ARRAY_INDEX; 4740 mask = VGPU10_OPERAND_4_COMPONENT_MASK_X; 4741 } 4742 } 4743 else if (semantic_name == TGSI_SEMANTIC_VIEWPORT_INDEX) { 4744 /* viewport index */ 4745 type = VGPU10_OPCODE_DCL_INPUT_PS_SGV; 4746 interpolationMode = VGPU10_INTERPOLATION_CONSTANT; 4747 name = VGPU10_NAME_VIEWPORT_ARRAY_INDEX; 4748 mask = VGPU10_OPERAND_4_COMPONENT_MASK_X; 4749 } 4750 else { 4751 /* general fragment input */ 4752 type = VGPU10_OPCODE_DCL_INPUT_PS; 4753 interpolationMode = 4754 translate_interpolation(emit, 4755 emit->info.input_interpolate[i], 4756 emit->info.input_interpolate_loc[i]); 4757 4758 /* keeps track if flat interpolation mode is being used */ 4759 emit->uses_flat_interp = emit->uses_flat_interp || 4760 (interpolationMode == VGPU10_INTERPOLATION_CONSTANT); 4761 4762 name = VGPU10_NAME_UNDEFINED; 4763 } 4764 4765 emit_input_declaration(emit, type, 4766 VGPU10_OPERAND_TYPE_INPUT, 4767 VGPU10_OPERAND_INDEX_1D, index, 1, 4768 name, 4769 VGPU10_OPERAND_4_COMPONENT, 4770 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4771 mask, 4772 interpolationMode, TRUE, 4773 map_tgsi_semantic_to_sgn_name(semantic_name)); 4774 } 4775} 4776 4777 4778/** 4779 * Emit input declarations for vertex shader. 4780 */ 4781static void 4782emit_vs_input_declarations(struct svga_shader_emitter_v10 *emit) 4783{ 4784 unsigned i; 4785 4786 for (i = 0; i < emit->info.file_max[TGSI_FILE_INPUT] + 1; i++) { 4787 unsigned usage_mask = emit->info.input_usage_mask[i]; 4788 unsigned index = i; 4789 4790 if (usage_mask == 0) 4791 continue; /* register is not actually used */ 4792 4793 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 4794 VGPU10_OPERAND_TYPE_INPUT, 4795 VGPU10_OPERAND_INDEX_1D, index, 1, 4796 VGPU10_NAME_UNDEFINED, 4797 VGPU10_OPERAND_4_COMPONENT, 4798 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4799 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4800 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4801 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 4802 } 4803} 4804 4805 4806/** 4807 * Emit input declarations for geometry shader. 4808 */ 4809static void 4810emit_gs_input_declarations(struct svga_shader_emitter_v10 *emit) 4811{ 4812 unsigned i; 4813 4814 for (i = 0; i < emit->info.num_inputs; i++) { 4815 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; 4816 unsigned usage_mask = emit->info.input_usage_mask[i]; 4817 unsigned index = emit->linkage.input_map[i]; 4818 unsigned opcodeType, operandType; 4819 unsigned numComp, selMode; 4820 unsigned name; 4821 unsigned dim; 4822 4823 if (usage_mask == 0) 4824 continue; /* register is not actually used */ 4825 4826 opcodeType = VGPU10_OPCODE_DCL_INPUT; 4827 operandType = VGPU10_OPERAND_TYPE_INPUT; 4828 numComp = VGPU10_OPERAND_4_COMPONENT; 4829 selMode = VGPU10_OPERAND_4_COMPONENT_MASK_MODE; 4830 name = VGPU10_NAME_UNDEFINED; 4831 4832 /* all geometry shader inputs are two dimensional except 4833 * gl_PrimitiveID 4834 */ 4835 dim = VGPU10_OPERAND_INDEX_2D; 4836 4837 if (semantic_name == TGSI_SEMANTIC_PRIMID) { 4838 /* Primitive ID */ 4839 operandType = VGPU10_OPERAND_TYPE_INPUT_PRIMITIVEID; 4840 dim = VGPU10_OPERAND_INDEX_0D; 4841 numComp = VGPU10_OPERAND_0_COMPONENT; 4842 selMode = 0; 4843 4844 /* also save the register index so we can check for 4845 * primitive id when emit src register. We need to modify the 4846 * operand type, index dimension when emit primitive id src reg. 4847 */ 4848 emit->gs.prim_id_index = i; 4849 } 4850 else if (semantic_name == TGSI_SEMANTIC_POSITION) { 4851 /* vertex position input */ 4852 opcodeType = VGPU10_OPCODE_DCL_INPUT_SIV; 4853 name = VGPU10_NAME_POSITION; 4854 } 4855 4856 emit_input_declaration(emit, opcodeType, operandType, 4857 dim, index, 4858 emit->gs.input_size, 4859 name, 4860 numComp, selMode, 4861 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4862 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4863 map_tgsi_semantic_to_sgn_name(semantic_name)); 4864 } 4865} 4866 4867 4868/** 4869 * Emit input declarations for tessellation control shader. 4870 */ 4871static void 4872emit_tcs_input_declarations(struct svga_shader_emitter_v10 *emit) 4873{ 4874 unsigned i; 4875 unsigned size = emit->key.tcs.vertices_per_patch; 4876 unsigned indicesMask = 0; 4877 boolean addSignature = TRUE; 4878 4879 if (!emit->tcs.control_point_phase) 4880 addSignature = emit->tcs.fork_phase_add_signature; 4881 4882 for (i = 0; i < emit->info.num_inputs; i++) { 4883 unsigned usage_mask = emit->info.input_usage_mask[i]; 4884 unsigned index = emit->linkage.input_map[i]; 4885 enum tgsi_semantic semantic_name = emit->info.input_semantic_name[i]; 4886 VGPU10_SYSTEM_NAME name = VGPU10_NAME_UNDEFINED; 4887 VGPU10_OPERAND_TYPE operandType = VGPU10_OPERAND_TYPE_INPUT; 4888 SVGA3dDXSignatureSemanticName sgn_name = 4889 map_tgsi_semantic_to_sgn_name(semantic_name); 4890 4891 /* indices that are declared */ 4892 indicesMask |= 1 << index; 4893 4894 if (semantic_name == TGSI_SEMANTIC_POSITION || 4895 index == emit->linkage.position_index) { 4896 /* save the input control point index for later use */ 4897 emit->tcs.control_point_input_index = i; 4898 } 4899 else if (usage_mask == 0) { 4900 continue; /* register is not actually used */ 4901 } 4902 else if (semantic_name == TGSI_SEMANTIC_CLIPDIST) { 4903 /* The shadow copy is being used here. So set the signature name 4904 * to UNDEFINED. 4905 */ 4906 sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED; 4907 } 4908 4909 /* input control points in the patch constant phase are emitted in the 4910 * vicp register rather than the v register. 4911 */ 4912 if (!emit->tcs.control_point_phase) { 4913 operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; 4914 } 4915 4916 /* Tessellation control shader inputs are two dimensional. 4917 * The array size is determined by the patch vertex count. 4918 */ 4919 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 4920 operandType, 4921 VGPU10_OPERAND_INDEX_2D, 4922 index, size, name, 4923 VGPU10_OPERAND_4_COMPONENT, 4924 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4925 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4926 VGPU10_INTERPOLATION_UNDEFINED, 4927 addSignature, sgn_name); 4928 } 4929 4930 if (emit->tcs.control_point_phase) { 4931 if (emit->tcs.control_point_input_index == INVALID_INDEX) { 4932 4933 /* Add input control point declaration if it does not exist */ 4934 if ((indicesMask & (1 << emit->linkage.position_index)) == 0) { 4935 emit->linkage.input_map[emit->linkage.num_inputs] = 4936 emit->linkage.position_index; 4937 emit->tcs.control_point_input_index = emit->linkage.num_inputs++; 4938 4939 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 4940 VGPU10_OPERAND_TYPE_INPUT, 4941 VGPU10_OPERAND_INDEX_2D, 4942 emit->linkage.position_index, 4943 emit->key.tcs.vertices_per_patch, 4944 VGPU10_NAME_UNDEFINED, 4945 VGPU10_OPERAND_4_COMPONENT, 4946 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 4947 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 4948 VGPU10_INTERPOLATION_UNDEFINED, TRUE, 4949 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); 4950 } 4951 } 4952 4953 /* Also add an address register for the indirection to the 4954 * input control points 4955 */ 4956 emit->tcs.control_point_addr_index = emit->num_address_regs++; 4957 } 4958} 4959 4960 4961static void 4962emit_tessfactor_input_declarations(struct svga_shader_emitter_v10 *emit) 4963{ 4964 4965 /* In tcs, tess factors are emitted as extra outputs. 4966 * The starting register index for the tess factors is captured 4967 * in the compile key. 4968 */ 4969 unsigned inputIndex = emit->key.tes.tessfactor_index; 4970 4971 if (emit->tes.prim_mode == PIPE_PRIM_QUADS) { 4972 if (emit->key.tes.need_tessouter) { 4973 emit->tes.outer.in_index = inputIndex; 4974 for (int i = 0; i < 4; i++) { 4975 emit_tesslevel_declaration(emit, inputIndex++, 4976 VGPU10_OPCODE_DCL_INPUT_SIV, 4977 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 4978 VGPU10_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i, 4979 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR + i); 4980 } 4981 } 4982 4983 if (emit->key.tes.need_tessinner) { 4984 emit->tes.inner.in_index = inputIndex; 4985 emit_tesslevel_declaration(emit, inputIndex++, 4986 VGPU10_OPCODE_DCL_INPUT_SIV, 4987 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 4988 VGPU10_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR, 4989 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR); 4990 4991 emit_tesslevel_declaration(emit, inputIndex++, 4992 VGPU10_OPCODE_DCL_INPUT_SIV, 4993 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 4994 VGPU10_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR, 4995 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR); 4996 } 4997 } 4998 else if (emit->tes.prim_mode == PIPE_PRIM_TRIANGLES) { 4999 if (emit->key.tes.need_tessouter) { 5000 emit->tes.outer.in_index = inputIndex; 5001 for (int i = 0; i < 3; i++) { 5002 emit_tesslevel_declaration(emit, inputIndex++, 5003 VGPU10_OPCODE_DCL_INPUT_SIV, 5004 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 5005 VGPU10_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i, 5006 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR + i); 5007 } 5008 } 5009 5010 if (emit->key.tes.need_tessinner) { 5011 emit->tes.inner.in_index = inputIndex; 5012 emit_tesslevel_declaration(emit, inputIndex++, 5013 VGPU10_OPCODE_DCL_INPUT_SIV, 5014 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 5015 VGPU10_NAME_FINAL_TRI_INSIDE_TESSFACTOR, 5016 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_TRI_INSIDE_TESSFACTOR); 5017 } 5018 } 5019 else if (emit->tes.prim_mode == PIPE_PRIM_LINES) { 5020 if (emit->key.tes.need_tessouter) { 5021 emit->tes.outer.in_index = inputIndex; 5022 emit_tesslevel_declaration(emit, inputIndex++, 5023 VGPU10_OPCODE_DCL_INPUT_SIV, 5024 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 5025 VGPU10_NAME_FINAL_LINE_DETAIL_TESSFACTOR, 5026 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DETAIL_TESSFACTOR); 5027 5028 emit_tesslevel_declaration(emit, inputIndex++, 5029 VGPU10_OPCODE_DCL_INPUT_SIV, 5030 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 5031 VGPU10_NAME_FINAL_LINE_DENSITY_TESSFACTOR, 5032 SVGADX_SIGNATURE_SEMANTIC_NAME_FINAL_LINE_DENSITY_TESSFACTOR); 5033 } 5034 } 5035} 5036 5037 5038/** 5039 * Emit input declarations for tessellation evaluation shader. 5040 */ 5041static void 5042emit_tes_input_declarations(struct svga_shader_emitter_v10 *emit) 5043{ 5044 unsigned i; 5045 5046 for (i = 0; i < emit->info.num_inputs; i++) { 5047 unsigned usage_mask = emit->info.input_usage_mask[i]; 5048 unsigned index = emit->linkage.input_map[i]; 5049 unsigned size; 5050 const enum tgsi_semantic semantic_name = 5051 emit->info.input_semantic_name[i]; 5052 SVGA3dDXSignatureSemanticName sgn_name; 5053 VGPU10_OPERAND_TYPE operandType; 5054 VGPU10_OPERAND_INDEX_DIMENSION dim; 5055 5056 if (usage_mask == 0) 5057 usage_mask = 1; /* at least set usage mask to one */ 5058 5059 if (semantic_name == TGSI_SEMANTIC_PATCH) { 5060 operandType = VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT; 5061 dim = VGPU10_OPERAND_INDEX_1D; 5062 size = 1; 5063 sgn_name = map_tgsi_semantic_to_sgn_name(semantic_name); 5064 } 5065 else { 5066 operandType = VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT; 5067 dim = VGPU10_OPERAND_INDEX_2D; 5068 size = emit->key.tes.vertices_per_patch; 5069 sgn_name = SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED; 5070 } 5071 5072 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, operandType, 5073 dim, index, size, VGPU10_NAME_UNDEFINED, 5074 VGPU10_OPERAND_4_COMPONENT, 5075 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 5076 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 5077 VGPU10_INTERPOLATION_UNDEFINED, 5078 TRUE, sgn_name); 5079 } 5080 5081 emit_tessfactor_input_declarations(emit); 5082 5083 /* DX spec requires DS input controlpoint/patch-constant signatures to match 5084 * the HS output controlpoint/patch-constant signatures exactly. 5085 * Add missing input declarations even if they are not used in the shader. 5086 */ 5087 if (emit->linkage.num_inputs < emit->linkage.prevShader.num_outputs) { 5088 struct tgsi_shader_info *prevInfo = emit->prevShaderInfo; 5089 for (i = 0; i < emit->linkage.prevShader.num_outputs; i++) { 5090 5091 /* If a tcs output does not have a corresponding input register in 5092 * tes, add one. 5093 */ 5094 if (emit->linkage.prevShader.output_map[i] > 5095 emit->linkage.input_map_max) { 5096 const enum tgsi_semantic sem_name = prevInfo->output_semantic_name[i]; 5097 5098 if (sem_name == TGSI_SEMANTIC_PATCH) { 5099 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 5100 VGPU10_OPERAND_TYPE_INPUT_PATCH_CONSTANT, 5101 VGPU10_OPERAND_INDEX_1D, 5102 i, 1, VGPU10_NAME_UNDEFINED, 5103 VGPU10_OPERAND_4_COMPONENT, 5104 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 5105 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 5106 VGPU10_INTERPOLATION_UNDEFINED, 5107 TRUE, 5108 map_tgsi_semantic_to_sgn_name(sem_name)); 5109 5110 } else if (sem_name != TGSI_SEMANTIC_TESSINNER && 5111 sem_name != TGSI_SEMANTIC_TESSOUTER) { 5112 emit_input_declaration(emit, VGPU10_OPCODE_DCL_INPUT, 5113 VGPU10_OPERAND_TYPE_INPUT_CONTROL_POINT, 5114 VGPU10_OPERAND_INDEX_2D, 5115 i, emit->key.tes.vertices_per_patch, 5116 VGPU10_NAME_UNDEFINED, 5117 VGPU10_OPERAND_4_COMPONENT, 5118 VGPU10_OPERAND_4_COMPONENT_MASK_MODE, 5119 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 5120 VGPU10_INTERPOLATION_UNDEFINED, 5121 TRUE, 5122 map_tgsi_semantic_to_sgn_name(sem_name)); 5123 } 5124 /* tessellation factors are taken care of in 5125 * emit_tessfactor_input_declarations(). 5126 */ 5127 } 5128 } 5129 } 5130} 5131 5132 5133/** 5134 * Emit all input declarations. 5135 */ 5136static boolean 5137emit_input_declarations(struct svga_shader_emitter_v10 *emit) 5138{ 5139 emit->index_range.required = 5140 emit->info.indirect_files & (1 << TGSI_FILE_INPUT) ? TRUE : FALSE; 5141 5142 switch (emit->unit) { 5143 case PIPE_SHADER_FRAGMENT: 5144 emit_fs_input_declarations(emit); 5145 break; 5146 case PIPE_SHADER_GEOMETRY: 5147 emit_gs_input_declarations(emit); 5148 break; 5149 case PIPE_SHADER_VERTEX: 5150 emit_vs_input_declarations(emit); 5151 break; 5152 case PIPE_SHADER_TESS_CTRL: 5153 emit_tcs_input_declarations(emit); 5154 break; 5155 case PIPE_SHADER_TESS_EVAL: 5156 emit_tes_input_declarations(emit); 5157 break; 5158 case PIPE_SHADER_COMPUTE: 5159 //XXX emit_cs_input_declarations(emit); 5160 break; 5161 default: 5162 assert(0); 5163 } 5164 5165 if (emit->index_range.start_index != INVALID_INDEX) { 5166 emit_index_range_declaration(emit); 5167 } 5168 emit->index_range.required = FALSE; 5169 return TRUE; 5170} 5171 5172 5173/** 5174 * Emit all output declarations. 5175 */ 5176static boolean 5177emit_output_declarations(struct svga_shader_emitter_v10 *emit) 5178{ 5179 emit->index_range.required = 5180 emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT) ? TRUE : FALSE; 5181 5182 switch (emit->unit) { 5183 case PIPE_SHADER_FRAGMENT: 5184 emit_fs_output_declarations(emit); 5185 break; 5186 case PIPE_SHADER_GEOMETRY: 5187 emit_gs_output_declarations(emit); 5188 break; 5189 case PIPE_SHADER_VERTEX: 5190 emit_vs_output_declarations(emit); 5191 break; 5192 case PIPE_SHADER_TESS_CTRL: 5193 emit_tcs_output_declarations(emit); 5194 break; 5195 case PIPE_SHADER_TESS_EVAL: 5196 emit_tes_output_declarations(emit); 5197 break; 5198 case PIPE_SHADER_COMPUTE: 5199 //XXX emit_cs_output_declarations(emit); 5200 break; 5201 default: 5202 assert(0); 5203 } 5204 5205 if (emit->vposition.so_index != INVALID_INDEX && 5206 emit->vposition.out_index != INVALID_INDEX) { 5207 5208 assert(emit->unit != PIPE_SHADER_FRAGMENT); 5209 5210 /* Emit the declaration for the non-adjusted vertex position 5211 * for stream output purpose 5212 */ 5213 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 5214 emit->vposition.so_index, 5215 VGPU10_NAME_UNDEFINED, 5216 VGPU10_OPERAND_4_COMPONENT_MASK_ALL, 5217 TRUE, 5218 SVGADX_SIGNATURE_SEMANTIC_NAME_POSITION); 5219 } 5220 5221 if (emit->clip_dist_so_index != INVALID_INDEX && 5222 emit->clip_dist_out_index != INVALID_INDEX) { 5223 5224 assert(emit->unit != PIPE_SHADER_FRAGMENT); 5225 5226 /* Emit the declaration for the clip distance shadow copy which 5227 * will be used for stream output purpose and for clip distance 5228 * varying variable 5229 */ 5230 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 5231 emit->clip_dist_so_index, 5232 VGPU10_NAME_UNDEFINED, 5233 emit->output_usage_mask[emit->clip_dist_out_index], 5234 TRUE, 5235 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 5236 5237 if (emit->info.num_written_clipdistance > 4) { 5238 /* for the second clip distance register, each handles 4 planes */ 5239 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT, 5240 emit->clip_dist_so_index + 1, 5241 VGPU10_NAME_UNDEFINED, 5242 emit->output_usage_mask[emit->clip_dist_out_index+1], 5243 TRUE, 5244 SVGADX_SIGNATURE_SEMANTIC_NAME_UNDEFINED); 5245 } 5246 } 5247 5248 if (emit->index_range.start_index != INVALID_INDEX) { 5249 emit_index_range_declaration(emit); 5250 } 5251 emit->index_range.required = FALSE; 5252 return TRUE; 5253} 5254 5255 5256/** 5257 * A helper function to create a temporary indexable array 5258 * and initialize the corresponding entries in the temp_map array. 5259 */ 5260static void 5261create_temp_array(struct svga_shader_emitter_v10 *emit, 5262 unsigned arrayID, unsigned first, unsigned count, 5263 unsigned startIndex) 5264{ 5265 unsigned i, tempIndex = startIndex; 5266 5267 emit->num_temp_arrays = MAX2(emit->num_temp_arrays, arrayID + 1); 5268 assert(emit->num_temp_arrays <= MAX_TEMP_ARRAYS); 5269 emit->num_temp_arrays = MIN2(emit->num_temp_arrays, MAX_TEMP_ARRAYS); 5270 5271 emit->temp_arrays[arrayID].start = first; 5272 emit->temp_arrays[arrayID].size = count; 5273 5274 /* Fill in the temp_map entries for this temp array */ 5275 for (i = 0; i < count; i++, tempIndex++) { 5276 emit->temp_map[tempIndex].arrayId = arrayID; 5277 emit->temp_map[tempIndex].index = i; 5278 } 5279} 5280 5281 5282/** 5283 * Emit the declaration for the temporary registers. 5284 */ 5285static boolean 5286emit_temporaries_declaration(struct svga_shader_emitter_v10 *emit) 5287{ 5288 unsigned total_temps, reg, i; 5289 5290 total_temps = emit->num_shader_temps; 5291 5292 /* If there is indirect access to non-indexable temps in the shader, 5293 * convert those temps to indexable temps. This works around a bug 5294 * in the GLSL->TGSI translator exposed in piglit test 5295 * glsl-1.20/execution/fs-const-array-of-struct-of-array.shader_test. 5296 * Internal temps added by the driver remain as non-indexable temps. 5297 */ 5298 if ((emit->info.indirect_files & (1 << TGSI_FILE_TEMPORARY)) && 5299 emit->num_temp_arrays == 0) { 5300 create_temp_array(emit, 1, 0, total_temps, 0); 5301 } 5302 5303 /* Allocate extra temps for specially-implemented instructions, 5304 * such as LIT. 5305 */ 5306 total_temps += MAX_INTERNAL_TEMPS; 5307 5308 /* Allocate extra temps for clip distance or clip vertex. 5309 */ 5310 if (emit->clip_mode == CLIP_DISTANCE) { 5311 /* We need to write the clip distance to a temporary register 5312 * first. Then it will be copied to the shadow copy for 5313 * the clip distance varying variable and stream output purpose. 5314 * It will also be copied to the actual CLIPDIST register 5315 * according to the enabled clip planes 5316 */ 5317 emit->clip_dist_tmp_index = total_temps++; 5318 if (emit->info.num_written_clipdistance > 4) 5319 total_temps++; /* second clip register */ 5320 } 5321 else if (emit->clip_mode == CLIP_VERTEX && emit->key.last_vertex_stage) { 5322 /* If the current shader is in the last vertex processing stage, 5323 * We need to convert the TGSI CLIPVERTEX output to one or more 5324 * clip distances. Allocate a temp reg for the clipvertex here. 5325 */ 5326 assert(emit->info.writes_clipvertex > 0); 5327 emit->clip_vertex_tmp_index = total_temps; 5328 total_temps++; 5329 } 5330 5331 if (emit->info.uses_vertexid) { 5332 assert(emit->unit == PIPE_SHADER_VERTEX); 5333 emit->vs.vertex_id_tmp_index = total_temps++; 5334 } 5335 5336 if (emit->unit == PIPE_SHADER_VERTEX || emit->unit == PIPE_SHADER_GEOMETRY) { 5337 if (emit->vposition.need_prescale || emit->key.vs.undo_viewport || 5338 emit->key.clip_plane_enable || 5339 emit->vposition.so_index != INVALID_INDEX) { 5340 emit->vposition.tmp_index = total_temps; 5341 total_temps += 1; 5342 } 5343 5344 if (emit->vposition.need_prescale) { 5345 emit->vposition.prescale_scale_index = total_temps++; 5346 emit->vposition.prescale_trans_index = total_temps++; 5347 } 5348 5349 if (emit->unit == PIPE_SHADER_VERTEX) { 5350 unsigned attrib_mask = (emit->key.vs.adjust_attrib_w_1 | 5351 emit->key.vs.adjust_attrib_itof | 5352 emit->key.vs.adjust_attrib_utof | 5353 emit->key.vs.attrib_is_bgra | 5354 emit->key.vs.attrib_puint_to_snorm | 5355 emit->key.vs.attrib_puint_to_uscaled | 5356 emit->key.vs.attrib_puint_to_sscaled); 5357 while (attrib_mask) { 5358 unsigned index = u_bit_scan(&attrib_mask); 5359 emit->vs.adjusted_input[index] = total_temps++; 5360 } 5361 } 5362 else if (emit->unit == PIPE_SHADER_GEOMETRY) { 5363 if (emit->key.gs.writes_viewport_index) 5364 emit->gs.viewport_index_tmp_index = total_temps++; 5365 } 5366 } 5367 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 5368 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS || 5369 emit->key.fs.write_color0_to_n_cbufs > 1) { 5370 /* Allocate a temp to hold the output color */ 5371 emit->fs.color_tmp_index = total_temps; 5372 total_temps += 1; 5373 } 5374 5375 if (emit->fs.face_input_index != INVALID_INDEX) { 5376 /* Allocate a temp for the +/-1 face register */ 5377 emit->fs.face_tmp_index = total_temps; 5378 total_temps += 1; 5379 } 5380 5381 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 5382 /* Allocate a temp for modified fragment position register */ 5383 emit->fs.fragcoord_tmp_index = total_temps; 5384 total_temps += 1; 5385 } 5386 5387 if (emit->fs.sample_pos_sys_index != INVALID_INDEX) { 5388 /* Allocate a temp for the sample position */ 5389 emit->fs.sample_pos_tmp_index = total_temps++; 5390 } 5391 } 5392 else if (emit->unit == PIPE_SHADER_TESS_EVAL) { 5393 if (emit->vposition.need_prescale) { 5394 emit->vposition.tmp_index = total_temps++; 5395 emit->vposition.prescale_scale_index = total_temps++; 5396 emit->vposition.prescale_trans_index = total_temps++; 5397 } 5398 5399 if (emit->tes.inner.tgsi_index) { 5400 emit->tes.inner.temp_index = total_temps; 5401 total_temps += 1; 5402 } 5403 5404 if (emit->tes.outer.tgsi_index) { 5405 emit->tes.outer.temp_index = total_temps; 5406 total_temps += 1; 5407 } 5408 } 5409 else if (emit->unit == PIPE_SHADER_TESS_CTRL) { 5410 if (emit->tcs.inner.tgsi_index != INVALID_INDEX) { 5411 if (!emit->tcs.control_point_phase) { 5412 emit->tcs.inner.temp_index = total_temps; 5413 total_temps += 1; 5414 } 5415 } 5416 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) { 5417 if (!emit->tcs.control_point_phase) { 5418 emit->tcs.outer.temp_index = total_temps; 5419 total_temps += 1; 5420 } 5421 } 5422 5423 if (emit->tcs.control_point_phase && 5424 emit->info.reads_pervertex_outputs) { 5425 emit->tcs.control_point_tmp_index = total_temps; 5426 total_temps += emit->tcs.control_point_out_count; 5427 } 5428 else if (!emit->tcs.control_point_phase && 5429 emit->info.reads_perpatch_outputs) { 5430 5431 /* If there is indirect access to the patch constant outputs 5432 * in the control point phase, then an indexable temporary array 5433 * will be created for these patch constant outputs. 5434 * Note, indirect access can only be applicable to 5435 * patch constant outputs in the control point phase. 5436 */ 5437 if (emit->info.indirect_files & (1 << TGSI_FILE_OUTPUT)) { 5438 unsigned arrayID = 5439 emit->num_temp_arrays ? emit->num_temp_arrays : 1; 5440 create_temp_array(emit, arrayID, 0, 5441 emit->tcs.patch_generic_out_count, total_temps); 5442 } 5443 emit->tcs.patch_generic_tmp_index = total_temps; 5444 total_temps += emit->tcs.patch_generic_out_count; 5445 } 5446 5447 emit->tcs.invocation_id_tmp_index = total_temps++; 5448 } 5449 5450 for (i = 0; i < emit->num_address_regs; i++) { 5451 emit->address_reg_index[i] = total_temps++; 5452 } 5453 5454 /* Initialize the temp_map array which maps TGSI temp indexes to VGPU10 5455 * temp indexes. Basically, we compact all the non-array temp register 5456 * indexes into a consecutive series. 5457 * 5458 * Before, we may have some TGSI declarations like: 5459 * DCL TEMP[0..1], LOCAL 5460 * DCL TEMP[2..4], ARRAY(1), LOCAL 5461 * DCL TEMP[5..7], ARRAY(2), LOCAL 5462 * plus, some extra temps, like TEMP[8], TEMP[9] for misc things 5463 * 5464 * After, we'll have a map like this: 5465 * temp_map[0] = { array 0, index 0 } 5466 * temp_map[1] = { array 0, index 1 } 5467 * temp_map[2] = { array 1, index 0 } 5468 * temp_map[3] = { array 1, index 1 } 5469 * temp_map[4] = { array 1, index 2 } 5470 * temp_map[5] = { array 2, index 0 } 5471 * temp_map[6] = { array 2, index 1 } 5472 * temp_map[7] = { array 2, index 2 } 5473 * temp_map[8] = { array 0, index 2 } 5474 * temp_map[9] = { array 0, index 3 } 5475 * 5476 * We'll declare two arrays of 3 elements, plus a set of four non-indexed 5477 * temps numbered 0..3 5478 * 5479 * Any time we emit a temporary register index, we'll have to use the 5480 * temp_map[] table to convert the TGSI index to the VGPU10 index. 5481 * 5482 * Finally, we recompute the total_temps value here. 5483 */ 5484 reg = 0; 5485 for (i = 0; i < total_temps; i++) { 5486 if (emit->temp_map[i].arrayId == 0) { 5487 emit->temp_map[i].index = reg++; 5488 } 5489 } 5490 5491 if (0) { 5492 debug_printf("total_temps %u\n", total_temps); 5493 for (i = 0; i < total_temps; i++) { 5494 debug_printf("temp %u -> array %u index %u\n", 5495 i, emit->temp_map[i].arrayId, emit->temp_map[i].index); 5496 } 5497 } 5498 5499 total_temps = reg; 5500 5501 /* Emit declaration of ordinary temp registers */ 5502 if (total_temps > 0) { 5503 VGPU10OpcodeToken0 opcode0; 5504 5505 opcode0.value = 0; 5506 opcode0.opcodeType = VGPU10_OPCODE_DCL_TEMPS; 5507 5508 begin_emit_instruction(emit); 5509 emit_dword(emit, opcode0.value); 5510 emit_dword(emit, total_temps); 5511 end_emit_instruction(emit); 5512 } 5513 5514 /* Emit declarations for indexable temp arrays. Skip 0th entry since 5515 * it's unused. 5516 */ 5517 for (i = 1; i < emit->num_temp_arrays; i++) { 5518 unsigned num_temps = emit->temp_arrays[i].size; 5519 5520 if (num_temps > 0) { 5521 VGPU10OpcodeToken0 opcode0; 5522 5523 opcode0.value = 0; 5524 opcode0.opcodeType = VGPU10_OPCODE_DCL_INDEXABLE_TEMP; 5525 5526 begin_emit_instruction(emit); 5527 emit_dword(emit, opcode0.value); 5528 emit_dword(emit, i); /* which array */ 5529 emit_dword(emit, num_temps); 5530 emit_dword(emit, 4); /* num components */ 5531 end_emit_instruction(emit); 5532 5533 total_temps += num_temps; 5534 } 5535 } 5536 5537 /* Check that the grand total of all regular and indexed temps is 5538 * under the limit. 5539 */ 5540 check_register_index(emit, VGPU10_OPCODE_DCL_TEMPS, total_temps - 1); 5541 5542 return TRUE; 5543} 5544 5545 5546static boolean 5547emit_constant_declaration(struct svga_shader_emitter_v10 *emit) 5548{ 5549 VGPU10OpcodeToken0 opcode0; 5550 VGPU10OperandToken0 operand0; 5551 unsigned total_consts, i; 5552 5553 opcode0.value = 0; 5554 opcode0.opcodeType = VGPU10_OPCODE_DCL_CONSTANT_BUFFER; 5555 opcode0.accessPattern = VGPU10_CB_IMMEDIATE_INDEXED; 5556 /* XXX or, access pattern = VGPU10_CB_DYNAMIC_INDEXED */ 5557 5558 operand0.value = 0; 5559 operand0.numComponents = VGPU10_OPERAND_4_COMPONENT; 5560 operand0.indexDimension = VGPU10_OPERAND_INDEX_2D; 5561 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 5562 operand0.index1Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 5563 operand0.operandType = VGPU10_OPERAND_TYPE_CONSTANT_BUFFER; 5564 operand0.selectionMode = VGPU10_OPERAND_4_COMPONENT_SWIZZLE_MODE; 5565 operand0.swizzleX = 0; 5566 operand0.swizzleY = 1; 5567 operand0.swizzleZ = 2; 5568 operand0.swizzleW = 3; 5569 5570 /** 5571 * Emit declaration for constant buffer [0]. We also allocate 5572 * room for the extra constants here. 5573 */ 5574 total_consts = emit->num_shader_consts[0]; 5575 5576 /* Now, allocate constant slots for the "extra" constants. 5577 * Note: it's critical that these extra constant locations 5578 * exactly match what's emitted by the "extra" constants code 5579 * in svga_state_constants.c 5580 */ 5581 5582 /* Vertex position scale/translation */ 5583 if (emit->vposition.need_prescale) { 5584 emit->vposition.prescale_cbuf_index = total_consts; 5585 total_consts += (2 * emit->vposition.num_prescale); 5586 } 5587 5588 if (emit->unit == PIPE_SHADER_VERTEX) { 5589 if (emit->key.vs.undo_viewport) { 5590 emit->vs.viewport_index = total_consts++; 5591 } 5592 if (emit->key.vs.need_vertex_id_bias) { 5593 emit->vs.vertex_id_bias_index = total_consts++; 5594 } 5595 } 5596 5597 /* user-defined clip planes */ 5598 if (emit->key.clip_plane_enable) { 5599 unsigned n = util_bitcount(emit->key.clip_plane_enable); 5600 assert(emit->unit != PIPE_SHADER_FRAGMENT && 5601 emit->unit != PIPE_SHADER_COMPUTE); 5602 for (i = 0; i < n; i++) { 5603 emit->clip_plane_const[i] = total_consts++; 5604 } 5605 } 5606 5607 for (i = 0; i < emit->num_samplers; i++) { 5608 5609 if (emit->key.tex[i].sampler_view) { 5610 /* Texcoord scale factors for RECT textures */ 5611 if (emit->key.tex[i].unnormalized) { 5612 emit->texcoord_scale_index[i] = total_consts++; 5613 } 5614 5615 /* Texture buffer sizes */ 5616 if (emit->key.tex[i].target == PIPE_BUFFER) { 5617 emit->texture_buffer_size_index[i] = total_consts++; 5618 } 5619 } 5620 } 5621 5622 if (total_consts > 0) { 5623 begin_emit_instruction(emit); 5624 emit_dword(emit, opcode0.value); 5625 emit_dword(emit, operand0.value); 5626 emit_dword(emit, 0); /* which const buffer slot */ 5627 emit_dword(emit, total_consts); 5628 end_emit_instruction(emit); 5629 } 5630 5631 /* Declare remaining constant buffers (UBOs) */ 5632 for (i = 1; i < ARRAY_SIZE(emit->num_shader_consts); i++) { 5633 if (emit->num_shader_consts[i] > 0) { 5634 begin_emit_instruction(emit); 5635 emit_dword(emit, opcode0.value); 5636 emit_dword(emit, operand0.value); 5637 emit_dword(emit, i); /* which const buffer slot */ 5638 emit_dword(emit, emit->num_shader_consts[i]); 5639 end_emit_instruction(emit); 5640 } 5641 } 5642 5643 return TRUE; 5644} 5645 5646 5647/** 5648 * Emit declarations for samplers. 5649 */ 5650static boolean 5651emit_sampler_declarations(struct svga_shader_emitter_v10 *emit) 5652{ 5653 unsigned i; 5654 5655 for (i = 0; i < emit->num_samplers; i++) { 5656 VGPU10OpcodeToken0 opcode0; 5657 VGPU10OperandToken0 operand0; 5658 5659 opcode0.value = 0; 5660 opcode0.opcodeType = VGPU10_OPCODE_DCL_SAMPLER; 5661 opcode0.samplerMode = VGPU10_SAMPLER_MODE_DEFAULT; 5662 5663 operand0.value = 0; 5664 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 5665 operand0.operandType = VGPU10_OPERAND_TYPE_SAMPLER; 5666 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 5667 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 5668 5669 begin_emit_instruction(emit); 5670 emit_dword(emit, opcode0.value); 5671 emit_dword(emit, operand0.value); 5672 emit_dword(emit, i); 5673 end_emit_instruction(emit); 5674 } 5675 5676 return TRUE; 5677} 5678 5679 5680/** 5681 * Translate PIPE_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x. 5682 */ 5683static unsigned 5684pipe_texture_to_resource_dimension(enum tgsi_texture_type target, 5685 unsigned num_samples, 5686 boolean is_array) 5687{ 5688 switch (target) { 5689 case PIPE_BUFFER: 5690 return VGPU10_RESOURCE_DIMENSION_BUFFER; 5691 case PIPE_TEXTURE_1D: 5692 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 5693 case PIPE_TEXTURE_2D: 5694 return num_samples > 2 ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS : 5695 VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 5696 case PIPE_TEXTURE_RECT: 5697 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 5698 case PIPE_TEXTURE_3D: 5699 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; 5700 case PIPE_TEXTURE_CUBE: 5701 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 5702 case PIPE_TEXTURE_1D_ARRAY: 5703 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY 5704 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 5705 case PIPE_TEXTURE_2D_ARRAY: 5706 if (num_samples > 2 && is_array) 5707 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY; 5708 else if (is_array) 5709 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY; 5710 else 5711 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 5712 case PIPE_TEXTURE_CUBE_ARRAY: 5713 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY : 5714 VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 5715 default: 5716 assert(!"Unexpected resource type"); 5717 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 5718 } 5719} 5720 5721 5722/** 5723 * Translate TGSI_TEXTURE_x to VGPU10_RESOURCE_DIMENSION_x. 5724 */ 5725static unsigned 5726tgsi_texture_to_resource_dimension(enum tgsi_texture_type target, 5727 unsigned num_samples, 5728 boolean is_array) 5729{ 5730 if (target == TGSI_TEXTURE_2D_MSAA && num_samples < 2) { 5731 target = TGSI_TEXTURE_2D; 5732 } 5733 else if (target == TGSI_TEXTURE_2D_ARRAY_MSAA && num_samples < 2) { 5734 target = TGSI_TEXTURE_2D_ARRAY; 5735 } 5736 5737 switch (target) { 5738 case TGSI_TEXTURE_BUFFER: 5739 return VGPU10_RESOURCE_DIMENSION_BUFFER; 5740 case TGSI_TEXTURE_1D: 5741 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 5742 case TGSI_TEXTURE_2D: 5743 case TGSI_TEXTURE_RECT: 5744 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 5745 case TGSI_TEXTURE_3D: 5746 return VGPU10_RESOURCE_DIMENSION_TEXTURE3D; 5747 case TGSI_TEXTURE_CUBE: 5748 case TGSI_TEXTURE_SHADOWCUBE: 5749 return VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 5750 case TGSI_TEXTURE_SHADOW1D: 5751 return VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 5752 case TGSI_TEXTURE_SHADOW2D: 5753 case TGSI_TEXTURE_SHADOWRECT: 5754 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 5755 case TGSI_TEXTURE_1D_ARRAY: 5756 case TGSI_TEXTURE_SHADOW1D_ARRAY: 5757 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE1DARRAY 5758 : VGPU10_RESOURCE_DIMENSION_TEXTURE1D; 5759 case TGSI_TEXTURE_2D_ARRAY: 5760 case TGSI_TEXTURE_SHADOW2D_ARRAY: 5761 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DARRAY 5762 : VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 5763 case TGSI_TEXTURE_2D_MSAA: 5764 return VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; 5765 case TGSI_TEXTURE_2D_ARRAY_MSAA: 5766 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURE2DMSARRAY 5767 : VGPU10_RESOURCE_DIMENSION_TEXTURE2DMS; 5768 case TGSI_TEXTURE_CUBE_ARRAY: 5769 case TGSI_TEXTURE_SHADOWCUBE_ARRAY: 5770 return is_array ? VGPU10_RESOURCE_DIMENSION_TEXTURECUBEARRAY 5771 : VGPU10_RESOURCE_DIMENSION_TEXTURECUBE; 5772 default: 5773 assert(!"Unexpected resource type"); 5774 return VGPU10_RESOURCE_DIMENSION_TEXTURE2D; 5775 } 5776} 5777 5778 5779/** 5780 * Given a tgsi_return_type, return true iff it is an integer type. 5781 */ 5782static boolean 5783is_integer_type(enum tgsi_return_type type) 5784{ 5785 switch (type) { 5786 case TGSI_RETURN_TYPE_SINT: 5787 case TGSI_RETURN_TYPE_UINT: 5788 return TRUE; 5789 case TGSI_RETURN_TYPE_FLOAT: 5790 case TGSI_RETURN_TYPE_UNORM: 5791 case TGSI_RETURN_TYPE_SNORM: 5792 return FALSE; 5793 case TGSI_RETURN_TYPE_COUNT: 5794 default: 5795 assert(!"is_integer_type: Unknown tgsi_return_type"); 5796 return FALSE; 5797 } 5798} 5799 5800 5801/** 5802 * Emit declarations for resources. 5803 * XXX When we're sure that all TGSI shaders will be generated with 5804 * sampler view declarations (Ex: DCL SVIEW[n], 2D, UINT) we may 5805 * rework this code. 5806 */ 5807static boolean 5808emit_resource_declarations(struct svga_shader_emitter_v10 *emit) 5809{ 5810 unsigned i; 5811 5812 /* Emit resource decl for each sampler */ 5813 for (i = 0; i < emit->num_samplers; i++) { 5814 VGPU10OpcodeToken0 opcode0; 5815 VGPU10OperandToken0 operand0; 5816 VGPU10ResourceReturnTypeToken return_type; 5817 VGPU10_RESOURCE_RETURN_TYPE rt; 5818 5819 opcode0.value = 0; 5820 opcode0.opcodeType = VGPU10_OPCODE_DCL_RESOURCE; 5821 if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) { 5822 opcode0.resourceDimension = 5823 tgsi_texture_to_resource_dimension(emit->sampler_target[i], 5824 emit->key.tex[i].num_samples, 5825 emit->key.tex[i].is_array); 5826 } 5827 else { 5828 opcode0.resourceDimension = 5829 pipe_texture_to_resource_dimension(emit->key.tex[i].target, 5830 emit->key.tex[i].num_samples, 5831 emit->key.tex[i].is_array); 5832 } 5833 opcode0.sampleCount = emit->key.tex[i].num_samples; 5834 operand0.value = 0; 5835 operand0.numComponents = VGPU10_OPERAND_0_COMPONENT; 5836 operand0.operandType = VGPU10_OPERAND_TYPE_RESOURCE; 5837 operand0.indexDimension = VGPU10_OPERAND_INDEX_1D; 5838 operand0.index0Representation = VGPU10_OPERAND_INDEX_IMMEDIATE32; 5839 5840#if 1 5841 /* convert TGSI_RETURN_TYPE_x to VGPU10_RETURN_TYPE_x */ 5842 STATIC_ASSERT(VGPU10_RETURN_TYPE_UNORM == TGSI_RETURN_TYPE_UNORM + 1); 5843 STATIC_ASSERT(VGPU10_RETURN_TYPE_SNORM == TGSI_RETURN_TYPE_SNORM + 1); 5844 STATIC_ASSERT(VGPU10_RETURN_TYPE_SINT == TGSI_RETURN_TYPE_SINT + 1); 5845 STATIC_ASSERT(VGPU10_RETURN_TYPE_UINT == TGSI_RETURN_TYPE_UINT + 1); 5846 STATIC_ASSERT(VGPU10_RETURN_TYPE_FLOAT == TGSI_RETURN_TYPE_FLOAT + 1); 5847 assert(emit->sampler_return_type[i] <= TGSI_RETURN_TYPE_FLOAT); 5848 if (emit->sampler_view[i] || !emit->key.tex[i].sampler_view) { 5849 rt = emit->sampler_return_type[i] + 1; 5850 } 5851 else { 5852 rt = emit->key.tex[i].sampler_return_type; 5853 } 5854#else 5855 switch (emit->sampler_return_type[i]) { 5856 case TGSI_RETURN_TYPE_UNORM: rt = VGPU10_RETURN_TYPE_UNORM; break; 5857 case TGSI_RETURN_TYPE_SNORM: rt = VGPU10_RETURN_TYPE_SNORM; break; 5858 case TGSI_RETURN_TYPE_SINT: rt = VGPU10_RETURN_TYPE_SINT; break; 5859 case TGSI_RETURN_TYPE_UINT: rt = VGPU10_RETURN_TYPE_UINT; break; 5860 case TGSI_RETURN_TYPE_FLOAT: rt = VGPU10_RETURN_TYPE_FLOAT; break; 5861 case TGSI_RETURN_TYPE_COUNT: 5862 default: 5863 rt = VGPU10_RETURN_TYPE_FLOAT; 5864 assert(!"emit_resource_declarations: Unknown tgsi_return_type"); 5865 } 5866#endif 5867 5868 return_type.value = 0; 5869 return_type.component0 = rt; 5870 return_type.component1 = rt; 5871 return_type.component2 = rt; 5872 return_type.component3 = rt; 5873 5874 begin_emit_instruction(emit); 5875 emit_dword(emit, opcode0.value); 5876 emit_dword(emit, operand0.value); 5877 emit_dword(emit, i); 5878 emit_dword(emit, return_type.value); 5879 end_emit_instruction(emit); 5880 } 5881 5882 return TRUE; 5883} 5884 5885/** 5886 * Emit instruction with n=1, 2 or 3 source registers. 5887 */ 5888static void 5889emit_instruction_opn(struct svga_shader_emitter_v10 *emit, 5890 unsigned opcode, 5891 const struct tgsi_full_dst_register *dst, 5892 const struct tgsi_full_src_register *src1, 5893 const struct tgsi_full_src_register *src2, 5894 const struct tgsi_full_src_register *src3, 5895 boolean saturate, bool precise) 5896{ 5897 begin_emit_instruction(emit); 5898 emit_opcode_precise(emit, opcode, saturate, precise); 5899 emit_dst_register(emit, dst); 5900 emit_src_register(emit, src1); 5901 if (src2) { 5902 emit_src_register(emit, src2); 5903 } 5904 if (src3) { 5905 emit_src_register(emit, src3); 5906 } 5907 end_emit_instruction(emit); 5908} 5909 5910static void 5911emit_instruction_op1(struct svga_shader_emitter_v10 *emit, 5912 unsigned opcode, 5913 const struct tgsi_full_dst_register *dst, 5914 const struct tgsi_full_src_register *src) 5915{ 5916 emit_instruction_opn(emit, opcode, dst, src, NULL, NULL, FALSE, FALSE); 5917} 5918 5919static void 5920emit_instruction_op2(struct svga_shader_emitter_v10 *emit, 5921 VGPU10_OPCODE_TYPE opcode, 5922 const struct tgsi_full_dst_register *dst, 5923 const struct tgsi_full_src_register *src1, 5924 const struct tgsi_full_src_register *src2) 5925{ 5926 emit_instruction_opn(emit, opcode, dst, src1, src2, NULL, FALSE, FALSE); 5927} 5928 5929static void 5930emit_instruction_op3(struct svga_shader_emitter_v10 *emit, 5931 VGPU10_OPCODE_TYPE opcode, 5932 const struct tgsi_full_dst_register *dst, 5933 const struct tgsi_full_src_register *src1, 5934 const struct tgsi_full_src_register *src2, 5935 const struct tgsi_full_src_register *src3) 5936{ 5937 emit_instruction_opn(emit, opcode, dst, src1, src2, src3, FALSE, FALSE); 5938} 5939 5940static void 5941emit_instruction_op0(struct svga_shader_emitter_v10 *emit, 5942 VGPU10_OPCODE_TYPE opcode) 5943{ 5944 begin_emit_instruction(emit); 5945 emit_opcode(emit, opcode, FALSE); 5946 end_emit_instruction(emit); 5947} 5948 5949/** 5950 * Tessellation inner/outer levels needs to be store into its 5951 * appropriate registers depending on prim_mode. 5952 */ 5953static void 5954store_tesslevels(struct svga_shader_emitter_v10 *emit) 5955{ 5956 int i; 5957 5958 /* tessellation levels are required input/out in hull shader. 5959 * emitting the inner/outer tessellation levels, either from 5960 * values provided in tcs or fallback default values which is 1.0 5961 */ 5962 if (emit->key.tcs.prim_mode == PIPE_PRIM_QUADS) { 5963 struct tgsi_full_src_register temp_src; 5964 5965 if (emit->tcs.inner.tgsi_index != INVALID_INDEX) 5966 temp_src = make_src_temp_reg(emit->tcs.inner.temp_index); 5967 else 5968 temp_src = make_immediate_reg_float(emit, 1.0f); 5969 5970 for (i = 0; i < 2; i++) { 5971 struct tgsi_full_src_register src = 5972 scalar_src(&temp_src, TGSI_SWIZZLE_X + i); 5973 struct tgsi_full_dst_register dst = 5974 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index + i); 5975 dst = writemask_dst(&dst, TGSI_WRITEMASK_X); 5976 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 5977 } 5978 5979 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) 5980 temp_src = make_src_temp_reg(emit->tcs.outer.temp_index); 5981 else 5982 temp_src = make_immediate_reg_float(emit, 1.0f); 5983 5984 for (i = 0; i < 4; i++) { 5985 struct tgsi_full_src_register src = 5986 scalar_src(&temp_src, TGSI_SWIZZLE_X + i); 5987 struct tgsi_full_dst_register dst = 5988 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i); 5989 dst = writemask_dst(&dst, TGSI_WRITEMASK_X); 5990 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 5991 } 5992 } 5993 else if (emit->key.tcs.prim_mode == PIPE_PRIM_TRIANGLES) { 5994 struct tgsi_full_src_register temp_src; 5995 5996 if (emit->tcs.inner.tgsi_index != INVALID_INDEX) 5997 temp_src = make_src_temp_reg(emit->tcs.inner.temp_index); 5998 else 5999 temp_src = make_immediate_reg_float(emit, 1.0f); 6000 6001 struct tgsi_full_src_register src = 6002 scalar_src(&temp_src, TGSI_SWIZZLE_X); 6003 struct tgsi_full_dst_register dst = 6004 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.inner.out_index); 6005 dst = writemask_dst(&dst, TGSI_WRITEMASK_X); 6006 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 6007 6008 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) 6009 temp_src = make_src_temp_reg(emit->tcs.outer.temp_index); 6010 else 6011 temp_src = make_immediate_reg_float(emit, 1.0f); 6012 6013 for (i = 0; i < 3; i++) { 6014 struct tgsi_full_src_register src = 6015 scalar_src(&temp_src, TGSI_SWIZZLE_X + i); 6016 struct tgsi_full_dst_register dst = 6017 make_dst_reg(TGSI_FILE_OUTPUT, emit->tcs.outer.out_index + i); 6018 dst = writemask_dst(&dst, TGSI_WRITEMASK_X); 6019 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 6020 } 6021 } 6022 else if (emit->key.tcs.prim_mode == PIPE_PRIM_LINES) { 6023 if (emit->tcs.outer.tgsi_index != INVALID_INDEX) { 6024 struct tgsi_full_src_register temp_src = 6025 make_src_temp_reg(emit->tcs.outer.temp_index); 6026 for (i = 0; i < 2; i++) { 6027 struct tgsi_full_src_register src = 6028 scalar_src(&temp_src, TGSI_SWIZZLE_X + i); 6029 struct tgsi_full_dst_register dst = 6030 make_dst_reg(TGSI_FILE_OUTPUT, 6031 emit->tcs.outer.out_index + i); 6032 dst = writemask_dst(&dst, TGSI_WRITEMASK_X); 6033 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 6034 } 6035 } 6036 } 6037 else { 6038 debug_printf("Unsupported primitive type"); 6039 } 6040} 6041 6042 6043/** 6044 * Emit the actual clip distance instructions to be used for clipping 6045 * by copying the clip distance from the temporary registers to the 6046 * CLIPDIST registers written with the enabled planes mask. 6047 * Also copy the clip distance from the temporary to the clip distance 6048 * shadow copy register which will be referenced by the input shader 6049 */ 6050static void 6051emit_clip_distance_instructions(struct svga_shader_emitter_v10 *emit) 6052{ 6053 struct tgsi_full_src_register tmp_clip_dist_src; 6054 struct tgsi_full_dst_register clip_dist_dst; 6055 6056 unsigned i; 6057 unsigned clip_plane_enable = emit->key.clip_plane_enable; 6058 unsigned clip_dist_tmp_index = emit->clip_dist_tmp_index; 6059 int num_written_clipdist = emit->info.num_written_clipdistance; 6060 6061 assert(emit->clip_dist_out_index != INVALID_INDEX); 6062 assert(emit->clip_dist_tmp_index != INVALID_INDEX); 6063 6064 /** 6065 * Temporary reset the temporary clip dist register index so 6066 * that the copy to the real clip dist register will not 6067 * attempt to copy to the temporary register again 6068 */ 6069 emit->clip_dist_tmp_index = INVALID_INDEX; 6070 6071 for (i = 0; i < 2 && num_written_clipdist > 0; i++, num_written_clipdist-=4) { 6072 6073 tmp_clip_dist_src = make_src_temp_reg(clip_dist_tmp_index + i); 6074 6075 /** 6076 * copy to the shadow copy for use by varying variable and 6077 * stream output. All clip distances 6078 * will be written regardless of the enabled clipping planes. 6079 */ 6080 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 6081 emit->clip_dist_so_index + i); 6082 6083 /* MOV clip_dist_so, tmp_clip_dist */ 6084 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 6085 &tmp_clip_dist_src); 6086 6087 /** 6088 * copy those clip distances to enabled clipping planes 6089 * to CLIPDIST registers for clipping 6090 */ 6091 if (clip_plane_enable & 0xf) { 6092 clip_dist_dst = make_dst_reg(TGSI_FILE_OUTPUT, 6093 emit->clip_dist_out_index + i); 6094 clip_dist_dst = writemask_dst(&clip_dist_dst, clip_plane_enable & 0xf); 6095 6096 /* MOV CLIPDIST, tmp_clip_dist */ 6097 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &clip_dist_dst, 6098 &tmp_clip_dist_src); 6099 } 6100 /* four clip planes per clip register */ 6101 clip_plane_enable >>= 4; 6102 } 6103 /** 6104 * set the temporary clip dist register index back to the 6105 * temporary index for the next vertex 6106 */ 6107 emit->clip_dist_tmp_index = clip_dist_tmp_index; 6108} 6109 6110/* Declare clip distance output registers for user-defined clip planes 6111 * or the TGSI_CLIPVERTEX output. 6112 */ 6113static void 6114emit_clip_distance_declarations(struct svga_shader_emitter_v10 *emit) 6115{ 6116 unsigned num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 6117 unsigned index = emit->num_outputs; 6118 unsigned plane_mask; 6119 6120 assert(emit->unit != PIPE_SHADER_FRAGMENT); 6121 assert(num_clip_planes <= 8); 6122 6123 if (emit->clip_mode != CLIP_LEGACY && 6124 emit->clip_mode != CLIP_VERTEX) { 6125 return; 6126 } 6127 6128 if (num_clip_planes == 0) 6129 return; 6130 6131 /* Convert clip vertex to clip distances only in the last vertex stage */ 6132 if (!emit->key.last_vertex_stage) 6133 return; 6134 6135 /* Declare one or two clip output registers. The number of components 6136 * in the mask reflects the number of clip planes. For example, if 5 6137 * clip planes are needed, we'll declare outputs similar to: 6138 * dcl_output_siv o2.xyzw, clip_distance 6139 * dcl_output_siv o3.x, clip_distance 6140 */ 6141 emit->clip_dist_out_index = index; /* save the starting clip dist reg index */ 6142 6143 plane_mask = (1 << num_clip_planes) - 1; 6144 if (plane_mask & 0xf) { 6145 unsigned cmask = plane_mask & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 6146 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index, 6147 VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE, 6148 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE); 6149 emit->num_outputs++; 6150 } 6151 if (plane_mask & 0xf0) { 6152 unsigned cmask = (plane_mask >> 4) & VGPU10_OPERAND_4_COMPONENT_MASK_ALL; 6153 emit_output_declaration(emit, VGPU10_OPCODE_DCL_OUTPUT_SIV, index + 1, 6154 VGPU10_NAME_CLIP_DISTANCE, cmask, TRUE, 6155 SVGADX_SIGNATURE_SEMANTIC_NAME_CLIP_DISTANCE); 6156 emit->num_outputs++; 6157 } 6158} 6159 6160 6161/** 6162 * Emit the instructions for writing to the clip distance registers 6163 * to handle legacy/automatic clip planes. 6164 * For each clip plane, the distance is the dot product of the vertex 6165 * position (found in TEMP[vpos_tmp_index]) and the clip plane coefficients. 6166 * This is not used when the shader has an explicit CLIPVERTEX or CLIPDISTANCE 6167 * output registers already declared. 6168 */ 6169static void 6170emit_clip_distance_from_vpos(struct svga_shader_emitter_v10 *emit, 6171 unsigned vpos_tmp_index) 6172{ 6173 unsigned i, num_clip_planes = util_bitcount(emit->key.clip_plane_enable); 6174 6175 assert(emit->clip_mode == CLIP_LEGACY); 6176 assert(num_clip_planes <= 8); 6177 6178 assert(emit->unit == PIPE_SHADER_VERTEX || 6179 emit->unit == PIPE_SHADER_GEOMETRY || 6180 emit->unit == PIPE_SHADER_TESS_EVAL); 6181 6182 for (i = 0; i < num_clip_planes; i++) { 6183 struct tgsi_full_dst_register dst; 6184 struct tgsi_full_src_register plane_src, vpos_src; 6185 unsigned reg_index = emit->clip_dist_out_index + i / 4; 6186 unsigned comp = i % 4; 6187 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 6188 6189 /* create dst, src regs */ 6190 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 6191 dst = writemask_dst(&dst, writemask); 6192 6193 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 6194 vpos_src = make_src_temp_reg(vpos_tmp_index); 6195 6196 /* DP4 clip_dist, plane, vpos */ 6197 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 6198 &plane_src, &vpos_src); 6199 } 6200} 6201 6202 6203/** 6204 * Emit the instructions for computing the clip distance results from 6205 * the clip vertex temporary. 6206 * For each clip plane, the distance is the dot product of the clip vertex 6207 * position (found in a temp reg) and the clip plane coefficients. 6208 */ 6209static void 6210emit_clip_vertex_instructions(struct svga_shader_emitter_v10 *emit) 6211{ 6212 const unsigned num_clip = util_bitcount(emit->key.clip_plane_enable); 6213 unsigned i; 6214 struct tgsi_full_dst_register dst; 6215 struct tgsi_full_src_register clipvert_src; 6216 const unsigned clip_vertex_tmp = emit->clip_vertex_tmp_index; 6217 6218 assert(emit->unit == PIPE_SHADER_VERTEX || 6219 emit->unit == PIPE_SHADER_GEOMETRY || 6220 emit->unit == PIPE_SHADER_TESS_EVAL); 6221 6222 assert(emit->clip_mode == CLIP_VERTEX); 6223 6224 clipvert_src = make_src_temp_reg(clip_vertex_tmp); 6225 6226 for (i = 0; i < num_clip; i++) { 6227 struct tgsi_full_src_register plane_src; 6228 unsigned reg_index = emit->clip_dist_out_index + i / 4; 6229 unsigned comp = i % 4; 6230 unsigned writemask = VGPU10_OPERAND_4_COMPONENT_MASK_X << comp; 6231 6232 /* create dst, src regs */ 6233 dst = make_dst_reg(TGSI_FILE_OUTPUT, reg_index); 6234 dst = writemask_dst(&dst, writemask); 6235 6236 plane_src = make_src_const_reg(emit->clip_plane_const[i]); 6237 6238 /* DP4 clip_dist, plane, vpos */ 6239 emit_instruction_op2(emit, VGPU10_OPCODE_DP4, &dst, 6240 &plane_src, &clipvert_src); 6241 } 6242 6243 /* copy temporary clip vertex register to the clip vertex register */ 6244 6245 assert(emit->clip_vertex_out_index != INVALID_INDEX); 6246 6247 /** 6248 * temporary reset the temporary clip vertex register index so 6249 * that copy to the clip vertex register will not attempt 6250 * to copy to the temporary register again 6251 */ 6252 emit->clip_vertex_tmp_index = INVALID_INDEX; 6253 6254 /* MOV clip_vertex, clip_vertex_tmp */ 6255 dst = make_dst_reg(TGSI_FILE_OUTPUT, emit->clip_vertex_out_index); 6256 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 6257 &dst, &clipvert_src); 6258 6259 /** 6260 * set the temporary clip vertex register index back to the 6261 * temporary index for the next vertex 6262 */ 6263 emit->clip_vertex_tmp_index = clip_vertex_tmp; 6264} 6265 6266/** 6267 * Emit code to convert RGBA to BGRA 6268 */ 6269static void 6270emit_swap_r_b(struct svga_shader_emitter_v10 *emit, 6271 const struct tgsi_full_dst_register *dst, 6272 const struct tgsi_full_src_register *src) 6273{ 6274 struct tgsi_full_src_register bgra_src = 6275 swizzle_src(src, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_Y, TGSI_SWIZZLE_X, TGSI_SWIZZLE_W); 6276 6277 begin_emit_instruction(emit); 6278 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 6279 emit_dst_register(emit, dst); 6280 emit_src_register(emit, &bgra_src); 6281 end_emit_instruction(emit); 6282} 6283 6284 6285/** Convert from 10_10_10_2 normalized to 10_10_10_2_snorm */ 6286static void 6287emit_puint_to_snorm(struct svga_shader_emitter_v10 *emit, 6288 const struct tgsi_full_dst_register *dst, 6289 const struct tgsi_full_src_register *src) 6290{ 6291 struct tgsi_full_src_register half = make_immediate_reg_float(emit, 0.5f); 6292 struct tgsi_full_src_register two = 6293 make_immediate_reg_float4(emit, 2.0f, 2.0f, 2.0f, 3.0f); 6294 struct tgsi_full_src_register neg_two = 6295 make_immediate_reg_float4(emit, -2.0f, -2.0f, -2.0f, -1.66666f); 6296 6297 unsigned val_tmp = get_temp_index(emit); 6298 struct tgsi_full_dst_register val_dst = make_dst_temp_reg(val_tmp); 6299 struct tgsi_full_src_register val_src = make_src_temp_reg(val_tmp); 6300 6301 unsigned bias_tmp = get_temp_index(emit); 6302 struct tgsi_full_dst_register bias_dst = make_dst_temp_reg(bias_tmp); 6303 struct tgsi_full_src_register bias_src = make_src_temp_reg(bias_tmp); 6304 6305 /* val = src * 2.0 */ 6306 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &val_dst, src, &two); 6307 6308 /* bias = src > 0.5 */ 6309 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &bias_dst, src, &half); 6310 6311 /* bias = bias & -2.0 */ 6312 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &bias_dst, 6313 &bias_src, &neg_two); 6314 6315 /* dst = val + bias */ 6316 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, dst, 6317 &val_src, &bias_src); 6318 6319 free_temp_indexes(emit); 6320} 6321 6322 6323/** Convert from 10_10_10_2_unorm to 10_10_10_2_uscaled */ 6324static void 6325emit_puint_to_uscaled(struct svga_shader_emitter_v10 *emit, 6326 const struct tgsi_full_dst_register *dst, 6327 const struct tgsi_full_src_register *src) 6328{ 6329 struct tgsi_full_src_register scale = 6330 make_immediate_reg_float4(emit, 1023.0f, 1023.0f, 1023.0f, 3.0f); 6331 6332 /* dst = src * scale */ 6333 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, dst, src, &scale); 6334} 6335 6336 6337/** Convert from R32_UINT to 10_10_10_2_sscaled */ 6338static void 6339emit_puint_to_sscaled(struct svga_shader_emitter_v10 *emit, 6340 const struct tgsi_full_dst_register *dst, 6341 const struct tgsi_full_src_register *src) 6342{ 6343 struct tgsi_full_src_register lshift = 6344 make_immediate_reg_int4(emit, 22, 12, 2, 0); 6345 struct tgsi_full_src_register rshift = 6346 make_immediate_reg_int4(emit, 22, 22, 22, 30); 6347 6348 struct tgsi_full_src_register src_xxxx = scalar_src(src, TGSI_SWIZZLE_X); 6349 6350 unsigned tmp = get_temp_index(emit); 6351 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6352 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6353 6354 /* 6355 * r = (pixel << 22) >> 22; # signed int in [511, -512] 6356 * g = (pixel << 12) >> 22; # signed int in [511, -512] 6357 * b = (pixel << 2) >> 22; # signed int in [511, -512] 6358 * a = (pixel << 0) >> 30; # signed int in [1, -2] 6359 * dst = i_to_f(r,g,b,a); # convert to float 6360 */ 6361 emit_instruction_op2(emit, VGPU10_OPCODE_ISHL, &tmp_dst, 6362 &src_xxxx, &lshift); 6363 emit_instruction_op2(emit, VGPU10_OPCODE_ISHR, &tmp_dst, 6364 &tmp_src, &rshift); 6365 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, dst, &tmp_src); 6366 6367 free_temp_indexes(emit); 6368} 6369 6370 6371/** 6372 * Emit code for TGSI_OPCODE_ARL or TGSI_OPCODE_UARL instruction. 6373 */ 6374static boolean 6375emit_arl_uarl(struct svga_shader_emitter_v10 *emit, 6376 const struct tgsi_full_instruction *inst) 6377{ 6378 unsigned index = inst->Dst[0].Register.Index; 6379 struct tgsi_full_dst_register dst; 6380 VGPU10_OPCODE_TYPE opcode; 6381 6382 assert(index < MAX_VGPU10_ADDR_REGS); 6383 dst = make_dst_temp_reg(emit->address_reg_index[index]); 6384 dst = writemask_dst(&dst, inst->Dst[0].Register.WriteMask); 6385 6386 /* ARL dst, s0 6387 * Translates into: 6388 * FTOI address_tmp, s0 6389 * 6390 * UARL dst, s0 6391 * Translates into: 6392 * MOV address_tmp, s0 6393 */ 6394 if (inst->Instruction.Opcode == TGSI_OPCODE_ARL) 6395 opcode = VGPU10_OPCODE_FTOI; 6396 else 6397 opcode = VGPU10_OPCODE_MOV; 6398 6399 emit_instruction_op1(emit, opcode, &dst, &inst->Src[0]); 6400 6401 return TRUE; 6402} 6403 6404 6405/** 6406 * Emit code for TGSI_OPCODE_CAL instruction. 6407 */ 6408static boolean 6409emit_cal(struct svga_shader_emitter_v10 *emit, 6410 const struct tgsi_full_instruction *inst) 6411{ 6412 unsigned label = inst->Label.Label; 6413 VGPU10OperandToken0 operand; 6414 operand.value = 0; 6415 operand.operandType = VGPU10_OPERAND_TYPE_LABEL; 6416 6417 begin_emit_instruction(emit); 6418 emit_dword(emit, operand.value); 6419 emit_dword(emit, label); 6420 end_emit_instruction(emit); 6421 6422 return TRUE; 6423} 6424 6425 6426/** 6427 * Emit code for TGSI_OPCODE_IABS instruction. 6428 */ 6429static boolean 6430emit_iabs(struct svga_shader_emitter_v10 *emit, 6431 const struct tgsi_full_instruction *inst) 6432{ 6433 /* dst.x = (src0.x < 0) ? -src0.x : src0.x 6434 * dst.y = (src0.y < 0) ? -src0.y : src0.y 6435 * dst.z = (src0.z < 0) ? -src0.z : src0.z 6436 * dst.w = (src0.w < 0) ? -src0.w : src0.w 6437 * 6438 * Translates into 6439 * IMAX dst, src, neg(src) 6440 */ 6441 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); 6442 emit_instruction_op2(emit, VGPU10_OPCODE_IMAX, &inst->Dst[0], 6443 &inst->Src[0], &neg_src); 6444 6445 return TRUE; 6446} 6447 6448 6449/** 6450 * Emit code for TGSI_OPCODE_CMP instruction. 6451 */ 6452static boolean 6453emit_cmp(struct svga_shader_emitter_v10 *emit, 6454 const struct tgsi_full_instruction *inst) 6455{ 6456 /* dst.x = (src0.x < 0) ? src1.x : src2.x 6457 * dst.y = (src0.y < 0) ? src1.y : src2.y 6458 * dst.z = (src0.z < 0) ? src1.z : src2.z 6459 * dst.w = (src0.w < 0) ? src1.w : src2.w 6460 * 6461 * Translates into 6462 * LT tmp, src0, 0.0 6463 * MOVC dst, tmp, src1, src2 6464 */ 6465 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 6466 unsigned tmp = get_temp_index(emit); 6467 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6468 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6469 6470 emit_instruction_opn(emit, VGPU10_OPCODE_LT, &tmp_dst, 6471 &inst->Src[0], &zero, NULL, FALSE, 6472 inst->Instruction.Precise); 6473 emit_instruction_opn(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], 6474 &tmp_src, &inst->Src[1], &inst->Src[2], 6475 inst->Instruction.Saturate, FALSE); 6476 6477 free_temp_indexes(emit); 6478 6479 return TRUE; 6480} 6481 6482 6483/** 6484 * Emit code for TGSI_OPCODE_DST instruction. 6485 */ 6486static boolean 6487emit_dst(struct svga_shader_emitter_v10 *emit, 6488 const struct tgsi_full_instruction *inst) 6489{ 6490 /* 6491 * dst.x = 1 6492 * dst.y = src0.y * src1.y 6493 * dst.z = src0.z 6494 * dst.w = src1.w 6495 */ 6496 6497 struct tgsi_full_src_register s0_yyyy = 6498 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 6499 struct tgsi_full_src_register s0_zzzz = 6500 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Z); 6501 struct tgsi_full_src_register s1_yyyy = 6502 scalar_src(&inst->Src[1], TGSI_SWIZZLE_Y); 6503 struct tgsi_full_src_register s1_wwww = 6504 scalar_src(&inst->Src[1], TGSI_SWIZZLE_W); 6505 6506 /* 6507 * If dst and either src0 and src1 are the same we need 6508 * to create a temporary for it and insert a extra move. 6509 */ 6510 unsigned tmp_move = get_temp_index(emit); 6511 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 6512 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 6513 6514 /* MOV dst.x, 1.0 */ 6515 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 6516 struct tgsi_full_dst_register dst_x = 6517 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 6518 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 6519 6520 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one); 6521 } 6522 6523 /* MUL dst.y, s0.y, s1.y */ 6524 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 6525 struct tgsi_full_dst_register dst_y = 6526 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 6527 6528 emit_instruction_opn(emit, VGPU10_OPCODE_MUL, &dst_y, &s0_yyyy, 6529 &s1_yyyy, NULL, inst->Instruction.Saturate, 6530 inst->Instruction.Precise); 6531 } 6532 6533 /* MOV dst.z, s0.z */ 6534 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 6535 struct tgsi_full_dst_register dst_z = 6536 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 6537 6538 emit_instruction_opn(emit, VGPU10_OPCODE_MOV, 6539 &dst_z, &s0_zzzz, NULL, NULL, 6540 inst->Instruction.Saturate, 6541 inst->Instruction.Precise); 6542 } 6543 6544 /* MOV dst.w, s1.w */ 6545 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 6546 struct tgsi_full_dst_register dst_w = 6547 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 6548 6549 emit_instruction_opn(emit, VGPU10_OPCODE_MOV, 6550 &dst_w, &s1_wwww, NULL, NULL, 6551 inst->Instruction.Saturate, 6552 inst->Instruction.Precise); 6553 } 6554 6555 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src); 6556 free_temp_indexes(emit); 6557 6558 return TRUE; 6559} 6560 6561 6562/** 6563 * A helper function to return the stream index as specified in 6564 * the immediate register 6565 */ 6566static inline unsigned 6567find_stream_index(struct svga_shader_emitter_v10 *emit, 6568 const struct tgsi_full_src_register *src) 6569{ 6570 return emit->immediates[src->Register.Index][src->Register.SwizzleX].Int; 6571} 6572 6573 6574/** 6575 * Emit code for TGSI_OPCODE_ENDPRIM (GS only) 6576 */ 6577static boolean 6578emit_endprim(struct svga_shader_emitter_v10 *emit, 6579 const struct tgsi_full_instruction *inst) 6580{ 6581 assert(emit->unit == PIPE_SHADER_GEOMETRY); 6582 6583 begin_emit_instruction(emit); 6584 if (emit->version >= 50) { 6585 unsigned streamIndex = find_stream_index(emit, &inst->Src[0]); 6586 6587 if (emit->info.num_stream_output_components[streamIndex] == 0) { 6588 /** 6589 * If there is no output for this stream, discard this instruction. 6590 */ 6591 emit->discard_instruction = TRUE; 6592 } 6593 else { 6594 emit_opcode(emit, VGPU10_OPCODE_CUT_STREAM, FALSE); 6595 assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE); 6596 emit_stream_register(emit, streamIndex); 6597 } 6598 } 6599 else { 6600 emit_opcode(emit, VGPU10_OPCODE_CUT, FALSE); 6601 } 6602 end_emit_instruction(emit); 6603 return TRUE; 6604} 6605 6606 6607/** 6608 * Emit code for TGSI_OPCODE_EX2 (2^x) instruction. 6609 */ 6610static boolean 6611emit_ex2(struct svga_shader_emitter_v10 *emit, 6612 const struct tgsi_full_instruction *inst) 6613{ 6614 /* Note that TGSI_OPCODE_EX2 computes only one value from src.x 6615 * while VGPU10 computes four values. 6616 * 6617 * dst = EX2(src): 6618 * dst.xyzw = 2.0 ^ src.x 6619 */ 6620 6621 struct tgsi_full_src_register src_xxxx = 6622 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 6623 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 6624 6625 /* EXP tmp, s0.xxxx */ 6626 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &inst->Dst[0], &src_xxxx, 6627 NULL, NULL, 6628 inst->Instruction.Saturate, 6629 inst->Instruction.Precise); 6630 6631 return TRUE; 6632} 6633 6634 6635/** 6636 * Emit code for TGSI_OPCODE_EXP instruction. 6637 */ 6638static boolean 6639emit_exp(struct svga_shader_emitter_v10 *emit, 6640 const struct tgsi_full_instruction *inst) 6641{ 6642 /* 6643 * dst.x = 2 ^ floor(s0.x) 6644 * dst.y = s0.x - floor(s0.x) 6645 * dst.z = 2 ^ s0.x 6646 * dst.w = 1.0 6647 */ 6648 6649 struct tgsi_full_src_register src_xxxx = 6650 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 6651 unsigned tmp = get_temp_index(emit); 6652 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6653 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6654 6655 /* 6656 * If dst and src are the same we need to create 6657 * a temporary for it and insert a extra move. 6658 */ 6659 unsigned tmp_move = get_temp_index(emit); 6660 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 6661 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 6662 6663 /* only use X component of temp reg */ 6664 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 6665 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 6666 6667 /* ROUND_NI tmp.x, s0.x */ 6668 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, 6669 &src_xxxx); /* round to -infinity */ 6670 6671 /* EXP dst.x, tmp.x */ 6672 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 6673 struct tgsi_full_dst_register dst_x = 6674 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 6675 6676 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_x, &tmp_src, 6677 NULL, NULL, 6678 inst->Instruction.Saturate, 6679 inst->Instruction.Precise); 6680 } 6681 6682 /* ADD dst.y, s0.x, -tmp */ 6683 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 6684 struct tgsi_full_dst_register dst_y = 6685 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 6686 struct tgsi_full_src_register neg_tmp_src = negate_src(&tmp_src); 6687 6688 emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_y, &src_xxxx, 6689 &neg_tmp_src, NULL, 6690 inst->Instruction.Saturate, 6691 inst->Instruction.Precise); 6692 } 6693 6694 /* EXP dst.z, s0.x */ 6695 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 6696 struct tgsi_full_dst_register dst_z = 6697 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 6698 6699 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, &dst_z, &src_xxxx, 6700 NULL, NULL, 6701 inst->Instruction.Saturate, 6702 inst->Instruction.Precise); 6703 } 6704 6705 /* MOV dst.w, 1.0 */ 6706 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 6707 struct tgsi_full_dst_register dst_w = 6708 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 6709 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 6710 6711 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one); 6712 } 6713 6714 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src); 6715 6716 free_temp_indexes(emit); 6717 6718 return TRUE; 6719} 6720 6721 6722/** 6723 * Emit code for TGSI_OPCODE_IF instruction. 6724 */ 6725static boolean 6726emit_if(struct svga_shader_emitter_v10 *emit, 6727 const struct tgsi_full_src_register *src) 6728{ 6729 VGPU10OpcodeToken0 opcode0; 6730 6731 /* The src register should be a scalar */ 6732 assert(src->Register.SwizzleX == src->Register.SwizzleY && 6733 src->Register.SwizzleX == src->Register.SwizzleZ && 6734 src->Register.SwizzleX == src->Register.SwizzleW); 6735 6736 /* The only special thing here is that we need to set the 6737 * VGPU10_INSTRUCTION_TEST_NONZERO flag since we want to test if 6738 * src.x is non-zero. 6739 */ 6740 opcode0.value = 0; 6741 opcode0.opcodeType = VGPU10_OPCODE_IF; 6742 opcode0.testBoolean = VGPU10_INSTRUCTION_TEST_NONZERO; 6743 6744 begin_emit_instruction(emit); 6745 emit_dword(emit, opcode0.value); 6746 emit_src_register(emit, src); 6747 end_emit_instruction(emit); 6748 6749 return TRUE; 6750} 6751 6752 6753/** 6754 * Emit code for TGSI_OPCODE_KILL_IF instruction (kill fragment if any of 6755 * the register components are negative). 6756 */ 6757static boolean 6758emit_kill_if(struct svga_shader_emitter_v10 *emit, 6759 const struct tgsi_full_instruction *inst) 6760{ 6761 unsigned tmp = get_temp_index(emit); 6762 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 6763 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 6764 6765 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 6766 6767 struct tgsi_full_dst_register tmp_dst_x = 6768 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 6769 struct tgsi_full_src_register tmp_src_xxxx = 6770 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 6771 6772 /* tmp = src[0] < 0.0 */ 6773 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], &zero); 6774 6775 if (!same_swizzle_terms(&inst->Src[0])) { 6776 /* If the swizzle is not XXXX, YYYY, ZZZZ or WWWW we need to 6777 * logically OR the swizzle terms. Most uses of KILL_IF only 6778 * test one channel so it's good to avoid these extra steps. 6779 */ 6780 struct tgsi_full_src_register tmp_src_yyyy = 6781 scalar_src(&tmp_src, TGSI_SWIZZLE_Y); 6782 struct tgsi_full_src_register tmp_src_zzzz = 6783 scalar_src(&tmp_src, TGSI_SWIZZLE_Z); 6784 struct tgsi_full_src_register tmp_src_wwww = 6785 scalar_src(&tmp_src, TGSI_SWIZZLE_W); 6786 6787 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 6788 &tmp_src_yyyy); 6789 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 6790 &tmp_src_zzzz); 6791 emit_instruction_op2(emit, VGPU10_OPCODE_OR, &tmp_dst_x, &tmp_src_xxxx, 6792 &tmp_src_wwww); 6793 } 6794 6795 begin_emit_instruction(emit); 6796 emit_discard_opcode(emit, TRUE); /* discard if src0.x is non-zero */ 6797 emit_src_register(emit, &tmp_src_xxxx); 6798 end_emit_instruction(emit); 6799 6800 free_temp_indexes(emit); 6801 6802 return TRUE; 6803} 6804 6805 6806/** 6807 * Emit code for TGSI_OPCODE_KILL instruction (unconditional discard). 6808 */ 6809static boolean 6810emit_kill(struct svga_shader_emitter_v10 *emit, 6811 const struct tgsi_full_instruction *inst) 6812{ 6813 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 6814 6815 /* DISCARD if 0.0 is zero */ 6816 begin_emit_instruction(emit); 6817 emit_discard_opcode(emit, FALSE); 6818 emit_src_register(emit, &zero); 6819 end_emit_instruction(emit); 6820 6821 return TRUE; 6822} 6823 6824 6825/** 6826 * Emit code for TGSI_OPCODE_LG2 instruction. 6827 */ 6828static boolean 6829emit_lg2(struct svga_shader_emitter_v10 *emit, 6830 const struct tgsi_full_instruction *inst) 6831{ 6832 /* Note that TGSI_OPCODE_LG2 computes only one value from src.x 6833 * while VGPU10 computes four values. 6834 * 6835 * dst = LG2(src): 6836 * dst.xyzw = log2(src.x) 6837 */ 6838 6839 struct tgsi_full_src_register src_xxxx = 6840 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 6841 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 6842 6843 /* LOG tmp, s0.xxxx */ 6844 emit_instruction_opn(emit, VGPU10_OPCODE_LOG, 6845 &inst->Dst[0], &src_xxxx, NULL, NULL, 6846 inst->Instruction.Saturate, 6847 inst->Instruction.Precise); 6848 6849 return TRUE; 6850} 6851 6852 6853/** 6854 * Emit code for TGSI_OPCODE_LIT instruction. 6855 */ 6856static boolean 6857emit_lit(struct svga_shader_emitter_v10 *emit, 6858 const struct tgsi_full_instruction *inst) 6859{ 6860 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 6861 6862 /* 6863 * If dst and src are the same we need to create 6864 * a temporary for it and insert a extra move. 6865 */ 6866 unsigned tmp_move = get_temp_index(emit); 6867 struct tgsi_full_src_register move_src = make_src_temp_reg(tmp_move); 6868 struct tgsi_full_dst_register move_dst = make_dst_temp_reg(tmp_move); 6869 6870 /* 6871 * dst.x = 1 6872 * dst.y = max(src.x, 0) 6873 * dst.z = (src.x > 0) ? max(src.y, 0)^{clamp(src.w, -128, 128))} : 0 6874 * dst.w = 1 6875 */ 6876 6877 /* MOV dst.x, 1.0 */ 6878 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 6879 struct tgsi_full_dst_register dst_x = 6880 writemask_dst(&move_dst, TGSI_WRITEMASK_X); 6881 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_x, &one); 6882 } 6883 6884 /* MOV dst.w, 1.0 */ 6885 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 6886 struct tgsi_full_dst_register dst_w = 6887 writemask_dst(&move_dst, TGSI_WRITEMASK_W); 6888 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one); 6889 } 6890 6891 /* MAX dst.y, src.x, 0.0 */ 6892 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 6893 struct tgsi_full_dst_register dst_y = 6894 writemask_dst(&move_dst, TGSI_WRITEMASK_Y); 6895 struct tgsi_full_src_register zero = 6896 make_immediate_reg_float(emit, 0.0f); 6897 struct tgsi_full_src_register src_xxxx = 6898 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 6899 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 6900 6901 emit_instruction_opn(emit, VGPU10_OPCODE_MAX, &dst_y, &src_xxxx, 6902 &zero, NULL, inst->Instruction.Saturate, FALSE); 6903 } 6904 6905 /* 6906 * tmp1 = clamp(src.w, -128, 128); 6907 * MAX tmp1, src.w, -128 6908 * MIN tmp1, tmp1, 128 6909 * 6910 * tmp2 = max(tmp2, 0); 6911 * MAX tmp2, src.y, 0 6912 * 6913 * tmp1 = pow(tmp2, tmp1); 6914 * LOG tmp2, tmp2 6915 * MUL tmp1, tmp2, tmp1 6916 * EXP tmp1, tmp1 6917 * 6918 * tmp1 = (src.w == 0) ? 1 : tmp1; 6919 * EQ tmp2, 0, src.w 6920 * MOVC tmp1, tmp2, 1.0, tmp1 6921 * 6922 * dst.z = (0 < src.x) ? tmp1 : 0; 6923 * LT tmp2, 0, src.x 6924 * MOVC dst.z, tmp2, tmp1, 0.0 6925 */ 6926 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 6927 struct tgsi_full_dst_register dst_z = 6928 writemask_dst(&move_dst, TGSI_WRITEMASK_Z); 6929 6930 unsigned tmp1 = get_temp_index(emit); 6931 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 6932 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 6933 unsigned tmp2 = get_temp_index(emit); 6934 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 6935 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 6936 6937 struct tgsi_full_src_register src_xxxx = 6938 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 6939 struct tgsi_full_src_register src_yyyy = 6940 scalar_src(&inst->Src[0], TGSI_SWIZZLE_Y); 6941 struct tgsi_full_src_register src_wwww = 6942 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 6943 6944 struct tgsi_full_src_register zero = 6945 make_immediate_reg_float(emit, 0.0f); 6946 struct tgsi_full_src_register lowerbound = 6947 make_immediate_reg_float(emit, -128.0f); 6948 struct tgsi_full_src_register upperbound = 6949 make_immediate_reg_float(emit, 128.0f); 6950 6951 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp1_dst, &src_wwww, 6952 &lowerbound); 6953 emit_instruction_op2(emit, VGPU10_OPCODE_MIN, &tmp1_dst, &tmp1_src, 6954 &upperbound); 6955 emit_instruction_op2(emit, VGPU10_OPCODE_MAX, &tmp2_dst, &src_yyyy, 6956 &zero); 6957 6958 /* POW tmp1, tmp2, tmp1 */ 6959 /* LOG tmp2, tmp2 */ 6960 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp2_dst, &tmp2_src); 6961 6962 /* MUL tmp1, tmp2, tmp1 */ 6963 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp1_dst, &tmp2_src, 6964 &tmp1_src); 6965 6966 /* EXP tmp1, tmp1 */ 6967 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp1_dst, &tmp1_src); 6968 6969 /* EQ tmp2, 0, src.w */ 6970 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp2_dst, &zero, &src_wwww); 6971 /* MOVC tmp1.z, tmp2, tmp1, 1.0 */ 6972 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp1_dst, 6973 &tmp2_src, &one, &tmp1_src); 6974 6975 /* LT tmp2, 0, src.x */ 6976 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp2_dst, &zero, &src_xxxx); 6977 /* MOVC dst.z, tmp2, tmp1, 0.0 */ 6978 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &dst_z, 6979 &tmp2_src, &tmp1_src, &zero); 6980 } 6981 6982 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &move_src); 6983 free_temp_indexes(emit); 6984 6985 return TRUE; 6986} 6987 6988 6989/** 6990 * Emit Level Of Detail Query (LODQ) instruction. 6991 */ 6992static boolean 6993emit_lodq(struct svga_shader_emitter_v10 *emit, 6994 const struct tgsi_full_instruction *inst) 6995{ 6996 const uint unit = inst->Src[1].Register.Index; 6997 6998 assert(emit->version >= 41); 6999 7000 /* LOD dst, coord, resource, sampler */ 7001 begin_emit_instruction(emit); 7002 emit_opcode(emit, VGPU10_OPCODE_LOD, FALSE); 7003 emit_dst_register(emit, &inst->Dst[0]); 7004 emit_src_register(emit, &inst->Src[0]); /* coord */ 7005 emit_resource_register(emit, unit); 7006 emit_sampler_register(emit, unit); 7007 end_emit_instruction(emit); 7008 7009 return TRUE; 7010} 7011 7012 7013/** 7014 * Emit code for TGSI_OPCODE_LOG instruction. 7015 */ 7016static boolean 7017emit_log(struct svga_shader_emitter_v10 *emit, 7018 const struct tgsi_full_instruction *inst) 7019{ 7020 /* 7021 * dst.x = floor(lg2(abs(s0.x))) 7022 * dst.y = abs(s0.x) / (2 ^ floor(lg2(abs(s0.x)))) 7023 * dst.z = lg2(abs(s0.x)) 7024 * dst.w = 1.0 7025 */ 7026 7027 struct tgsi_full_src_register src_xxxx = 7028 scalar_src(&inst->Src[0], TGSI_SWIZZLE_X); 7029 unsigned tmp = get_temp_index(emit); 7030 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7031 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7032 struct tgsi_full_src_register abs_src_xxxx = absolute_src(&src_xxxx); 7033 7034 /* only use X component of temp reg */ 7035 tmp_dst = writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 7036 tmp_src = scalar_src(&tmp_src, TGSI_SWIZZLE_X); 7037 7038 /* LOG tmp.x, abs(s0.x) */ 7039 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XYZ) { 7040 emit_instruction_op1(emit, VGPU10_OPCODE_LOG, &tmp_dst, &abs_src_xxxx); 7041 } 7042 7043 /* MOV dst.z, tmp.x */ 7044 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) { 7045 struct tgsi_full_dst_register dst_z = 7046 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Z); 7047 7048 emit_instruction_opn(emit, VGPU10_OPCODE_MOV, 7049 &dst_z, &tmp_src, NULL, NULL, 7050 inst->Instruction.Saturate, FALSE); 7051 } 7052 7053 /* FLR tmp.x, tmp.x */ 7054 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) { 7055 emit_instruction_op1(emit, VGPU10_OPCODE_ROUND_NI, &tmp_dst, &tmp_src); 7056 } 7057 7058 /* MOV dst.x, tmp.x */ 7059 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) { 7060 struct tgsi_full_dst_register dst_x = 7061 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_X); 7062 7063 emit_instruction_opn(emit, VGPU10_OPCODE_MOV, 7064 &dst_x, &tmp_src, NULL, NULL, 7065 inst->Instruction.Saturate, FALSE); 7066 } 7067 7068 /* EXP tmp.x, tmp.x */ 7069 /* DIV dst.y, abs(s0.x), tmp.x */ 7070 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) { 7071 struct tgsi_full_dst_register dst_y = 7072 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_Y); 7073 7074 emit_instruction_op1(emit, VGPU10_OPCODE_EXP, &tmp_dst, &tmp_src); 7075 emit_instruction_opn(emit, VGPU10_OPCODE_DIV, &dst_y, &abs_src_xxxx, 7076 &tmp_src, NULL, inst->Instruction.Saturate, FALSE); 7077 } 7078 7079 /* MOV dst.w, 1.0 */ 7080 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) { 7081 struct tgsi_full_dst_register dst_w = 7082 writemask_dst(&inst->Dst[0], TGSI_WRITEMASK_W); 7083 struct tgsi_full_src_register one = 7084 make_immediate_reg_float(emit, 1.0f); 7085 7086 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst_w, &one); 7087 } 7088 7089 free_temp_indexes(emit); 7090 7091 return TRUE; 7092} 7093 7094 7095/** 7096 * Emit code for TGSI_OPCODE_LRP instruction. 7097 */ 7098static boolean 7099emit_lrp(struct svga_shader_emitter_v10 *emit, 7100 const struct tgsi_full_instruction *inst) 7101{ 7102 /* dst = LRP(s0, s1, s2): 7103 * dst = s0 * (s1 - s2) + s2 7104 * Translates into: 7105 * SUB tmp, s1, s2; tmp = s1 - s2 7106 * MAD dst, s0, tmp, s2; dst = s0 * t1 + s2 7107 */ 7108 unsigned tmp = get_temp_index(emit); 7109 struct tgsi_full_src_register src_tmp = make_src_temp_reg(tmp); 7110 struct tgsi_full_dst_register dst_tmp = make_dst_temp_reg(tmp); 7111 struct tgsi_full_src_register neg_src2 = negate_src(&inst->Src[2]); 7112 7113 /* ADD tmp, s1, -s2 */ 7114 emit_instruction_opn(emit, VGPU10_OPCODE_ADD, &dst_tmp, 7115 &inst->Src[1], &neg_src2, NULL, FALSE, 7116 inst->Instruction.Precise); 7117 7118 /* MAD dst, s1, tmp, s3 */ 7119 emit_instruction_opn(emit, VGPU10_OPCODE_MAD, &inst->Dst[0], 7120 &inst->Src[0], &src_tmp, &inst->Src[2], 7121 inst->Instruction.Saturate, 7122 inst->Instruction.Precise); 7123 7124 free_temp_indexes(emit); 7125 7126 return TRUE; 7127} 7128 7129 7130/** 7131 * Emit code for TGSI_OPCODE_POW instruction. 7132 */ 7133static boolean 7134emit_pow(struct svga_shader_emitter_v10 *emit, 7135 const struct tgsi_full_instruction *inst) 7136{ 7137 /* Note that TGSI_OPCODE_POW computes only one value from src0.x and 7138 * src1.x while VGPU10 computes four values. 7139 * 7140 * dst = POW(src0, src1): 7141 * dst.xyzw = src0.x ^ src1.x 7142 */ 7143 unsigned tmp = get_temp_index(emit); 7144 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7145 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7146 struct tgsi_full_src_register src0_xxxx = 7147 swizzle_src(&inst->Src[0], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 7148 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 7149 struct tgsi_full_src_register src1_xxxx = 7150 swizzle_src(&inst->Src[1], TGSI_SWIZZLE_X, TGSI_SWIZZLE_X, 7151 TGSI_SWIZZLE_X, TGSI_SWIZZLE_X); 7152 7153 /* LOG tmp, s0.xxxx */ 7154 emit_instruction_opn(emit, VGPU10_OPCODE_LOG, 7155 &tmp_dst, &src0_xxxx, NULL, NULL, 7156 FALSE, inst->Instruction.Precise); 7157 7158 /* MUL tmp, tmp, s1.xxxx */ 7159 emit_instruction_opn(emit, VGPU10_OPCODE_MUL, 7160 &tmp_dst, &tmp_src, &src1_xxxx, NULL, 7161 FALSE, inst->Instruction.Precise); 7162 7163 /* EXP tmp, s0.xxxx */ 7164 emit_instruction_opn(emit, VGPU10_OPCODE_EXP, 7165 &inst->Dst[0], &tmp_src, NULL, NULL, 7166 inst->Instruction.Saturate, 7167 inst->Instruction.Precise); 7168 7169 /* free tmp */ 7170 free_temp_indexes(emit); 7171 7172 return TRUE; 7173} 7174 7175 7176/** 7177 * Emit code for TGSI_OPCODE_RCP (reciprocal) instruction. 7178 */ 7179static boolean 7180emit_rcp(struct svga_shader_emitter_v10 *emit, 7181 const struct tgsi_full_instruction *inst) 7182{ 7183 if (emit->version >= 50) { 7184 /* use new RCP instruction. But VGPU10_OPCODE_RCP is component-wise 7185 * while TGSI_OPCODE_RCP computes dst.xyzw = 1.0 / src.xxxx so we need 7186 * to manipulate the src register's swizzle. 7187 */ 7188 struct tgsi_full_src_register src = inst->Src[0]; 7189 src.Register.SwizzleY = 7190 src.Register.SwizzleZ = 7191 src.Register.SwizzleW = src.Register.SwizzleX; 7192 7193 begin_emit_instruction(emit); 7194 emit_opcode_precise(emit, VGPU10_OPCODE_RCP, 7195 inst->Instruction.Saturate, 7196 inst->Instruction.Precise); 7197 emit_dst_register(emit, &inst->Dst[0]); 7198 emit_src_register(emit, &src); 7199 end_emit_instruction(emit); 7200 } 7201 else { 7202 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 7203 7204 unsigned tmp = get_temp_index(emit); 7205 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7206 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7207 7208 struct tgsi_full_dst_register tmp_dst_x = 7209 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 7210 struct tgsi_full_src_register tmp_src_xxxx = 7211 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 7212 7213 /* DIV tmp.x, 1.0, s0 */ 7214 emit_instruction_opn(emit, VGPU10_OPCODE_DIV, 7215 &tmp_dst_x, &one, &inst->Src[0], NULL, 7216 FALSE, inst->Instruction.Precise); 7217 7218 /* MOV dst, tmp.xxxx */ 7219 emit_instruction_opn(emit, VGPU10_OPCODE_MOV, 7220 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL, 7221 inst->Instruction.Saturate, 7222 inst->Instruction.Precise); 7223 7224 free_temp_indexes(emit); 7225 } 7226 7227 return TRUE; 7228} 7229 7230 7231/** 7232 * Emit code for TGSI_OPCODE_RSQ instruction. 7233 */ 7234static boolean 7235emit_rsq(struct svga_shader_emitter_v10 *emit, 7236 const struct tgsi_full_instruction *inst) 7237{ 7238 /* dst = RSQ(src): 7239 * dst.xyzw = 1 / sqrt(src.x) 7240 * Translates into: 7241 * RSQ tmp, src.x 7242 * MOV dst, tmp.xxxx 7243 */ 7244 7245 unsigned tmp = get_temp_index(emit); 7246 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7247 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7248 7249 struct tgsi_full_dst_register tmp_dst_x = 7250 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 7251 struct tgsi_full_src_register tmp_src_xxxx = 7252 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 7253 7254 /* RSQ tmp, src.x */ 7255 emit_instruction_opn(emit, VGPU10_OPCODE_RSQ, 7256 &tmp_dst_x, &inst->Src[0], NULL, NULL, 7257 FALSE, inst->Instruction.Precise); 7258 7259 /* MOV dst, tmp.xxxx */ 7260 emit_instruction_opn(emit, VGPU10_OPCODE_MOV, 7261 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL, 7262 inst->Instruction.Saturate, 7263 inst->Instruction.Precise); 7264 7265 /* free tmp */ 7266 free_temp_indexes(emit); 7267 7268 return TRUE; 7269} 7270 7271 7272/** 7273 * Emit code for TGSI_OPCODE_SEQ (Set Equal) instruction. 7274 */ 7275static boolean 7276emit_seq(struct svga_shader_emitter_v10 *emit, 7277 const struct tgsi_full_instruction *inst) 7278{ 7279 /* dst = SEQ(s0, s1): 7280 * dst = s0 == s1 ? 1.0 : 0.0 (per component) 7281 * Translates into: 7282 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 7283 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 7284 */ 7285 unsigned tmp = get_temp_index(emit); 7286 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7287 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7288 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 7289 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 7290 7291 /* EQ tmp, s0, s1 */ 7292 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, &tmp_dst, &inst->Src[0], 7293 &inst->Src[1]); 7294 7295 /* MOVC dst, tmp, one, zero */ 7296 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 7297 &one, &zero); 7298 7299 free_temp_indexes(emit); 7300 7301 return TRUE; 7302} 7303 7304 7305/** 7306 * Emit code for TGSI_OPCODE_SGE (Set Greater than or Equal) instruction. 7307 */ 7308static boolean 7309emit_sge(struct svga_shader_emitter_v10 *emit, 7310 const struct tgsi_full_instruction *inst) 7311{ 7312 /* dst = SGE(s0, s1): 7313 * dst = s0 >= s1 ? 1.0 : 0.0 (per component) 7314 * Translates into: 7315 * GE tmp, s0, s1; tmp = s0 >= s1 : 0xffffffff : 0 (per comp) 7316 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 7317 */ 7318 unsigned tmp = get_temp_index(emit); 7319 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7320 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7321 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 7322 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 7323 7324 /* GE tmp, s0, s1 */ 7325 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[0], 7326 &inst->Src[1]); 7327 7328 /* MOVC dst, tmp, one, zero */ 7329 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 7330 &one, &zero); 7331 7332 free_temp_indexes(emit); 7333 7334 return TRUE; 7335} 7336 7337 7338/** 7339 * Emit code for TGSI_OPCODE_SGT (Set Greater than) instruction. 7340 */ 7341static boolean 7342emit_sgt(struct svga_shader_emitter_v10 *emit, 7343 const struct tgsi_full_instruction *inst) 7344{ 7345 /* dst = SGT(s0, s1): 7346 * dst = s0 > s1 ? 1.0 : 0.0 (per component) 7347 * Translates into: 7348 * LT tmp, s1, s0; tmp = s1 < s0 ? 0xffffffff : 0 (per comp) 7349 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 7350 */ 7351 unsigned tmp = get_temp_index(emit); 7352 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7353 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7354 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 7355 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 7356 7357 /* LT tmp, s1, s0 */ 7358 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[1], 7359 &inst->Src[0]); 7360 7361 /* MOVC dst, tmp, one, zero */ 7362 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 7363 &one, &zero); 7364 7365 free_temp_indexes(emit); 7366 7367 return TRUE; 7368} 7369 7370 7371/** 7372 * Emit code for TGSI_OPCODE_SIN and TGSI_OPCODE_COS instructions. 7373 */ 7374static boolean 7375emit_sincos(struct svga_shader_emitter_v10 *emit, 7376 const struct tgsi_full_instruction *inst) 7377{ 7378 unsigned tmp = get_temp_index(emit); 7379 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7380 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7381 7382 struct tgsi_full_src_register tmp_src_xxxx = 7383 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 7384 struct tgsi_full_dst_register tmp_dst_x = 7385 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 7386 7387 begin_emit_instruction(emit); 7388 emit_opcode(emit, VGPU10_OPCODE_SINCOS, FALSE); 7389 7390 if(inst->Instruction.Opcode == TGSI_OPCODE_SIN) 7391 { 7392 emit_dst_register(emit, &tmp_dst_x); /* first destination register */ 7393 emit_null_dst_register(emit); /* second destination register */ 7394 } 7395 else { 7396 emit_null_dst_register(emit); 7397 emit_dst_register(emit, &tmp_dst_x); 7398 } 7399 7400 emit_src_register(emit, &inst->Src[0]); 7401 end_emit_instruction(emit); 7402 7403 emit_instruction_opn(emit, VGPU10_OPCODE_MOV, 7404 &inst->Dst[0], &tmp_src_xxxx, NULL, NULL, 7405 inst->Instruction.Saturate, 7406 inst->Instruction.Precise); 7407 7408 free_temp_indexes(emit); 7409 7410 return TRUE; 7411} 7412 7413 7414/** 7415 * Emit code for TGSI_OPCODE_SLE (Set Less than or Equal) instruction. 7416 */ 7417static boolean 7418emit_sle(struct svga_shader_emitter_v10 *emit, 7419 const struct tgsi_full_instruction *inst) 7420{ 7421 /* dst = SLE(s0, s1): 7422 * dst = s0 <= s1 ? 1.0 : 0.0 (per component) 7423 * Translates into: 7424 * GE tmp, s1, s0; tmp = s1 >= s0 : 0xffffffff : 0 (per comp) 7425 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 7426 */ 7427 unsigned tmp = get_temp_index(emit); 7428 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7429 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7430 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 7431 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 7432 7433 /* GE tmp, s1, s0 */ 7434 emit_instruction_op2(emit, VGPU10_OPCODE_GE, &tmp_dst, &inst->Src[1], 7435 &inst->Src[0]); 7436 7437 /* MOVC dst, tmp, one, zero */ 7438 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 7439 &one, &zero); 7440 7441 free_temp_indexes(emit); 7442 7443 return TRUE; 7444} 7445 7446 7447/** 7448 * Emit code for TGSI_OPCODE_SLT (Set Less than) instruction. 7449 */ 7450static boolean 7451emit_slt(struct svga_shader_emitter_v10 *emit, 7452 const struct tgsi_full_instruction *inst) 7453{ 7454 /* dst = SLT(s0, s1): 7455 * dst = s0 < s1 ? 1.0 : 0.0 (per component) 7456 * Translates into: 7457 * LT tmp, s0, s1; tmp = s0 < s1 ? 0xffffffff : 0 (per comp) 7458 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 7459 */ 7460 unsigned tmp = get_temp_index(emit); 7461 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7462 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7463 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 7464 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 7465 7466 /* LT tmp, s0, s1 */ 7467 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp_dst, &inst->Src[0], 7468 &inst->Src[1]); 7469 7470 /* MOVC dst, tmp, one, zero */ 7471 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 7472 &one, &zero); 7473 7474 free_temp_indexes(emit); 7475 7476 return TRUE; 7477} 7478 7479 7480/** 7481 * Emit code for TGSI_OPCODE_SNE (Set Not Equal) instruction. 7482 */ 7483static boolean 7484emit_sne(struct svga_shader_emitter_v10 *emit, 7485 const struct tgsi_full_instruction *inst) 7486{ 7487 /* dst = SNE(s0, s1): 7488 * dst = s0 != s1 ? 1.0 : 0.0 (per component) 7489 * Translates into: 7490 * EQ tmp, s0, s1; tmp = s0 == s1 : 0xffffffff : 0 (per comp) 7491 * MOVC dst, tmp, 1.0, 0.0; dst = tmp ? 1.0 : 0.0 (per component) 7492 */ 7493 unsigned tmp = get_temp_index(emit); 7494 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7495 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7496 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 7497 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 7498 7499 /* NE tmp, s0, s1 */ 7500 emit_instruction_op2(emit, VGPU10_OPCODE_NE, &tmp_dst, &inst->Src[0], 7501 &inst->Src[1]); 7502 7503 /* MOVC dst, tmp, one, zero */ 7504 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp_src, 7505 &one, &zero); 7506 7507 free_temp_indexes(emit); 7508 7509 return TRUE; 7510} 7511 7512 7513/** 7514 * Emit code for TGSI_OPCODE_SSG (Set Sign) instruction. 7515 */ 7516static boolean 7517emit_ssg(struct svga_shader_emitter_v10 *emit, 7518 const struct tgsi_full_instruction *inst) 7519{ 7520 /* dst.x = (src.x > 0.0) ? 1.0 : (src.x < 0.0) ? -1.0 : 0.0 7521 * dst.y = (src.y > 0.0) ? 1.0 : (src.y < 0.0) ? -1.0 : 0.0 7522 * dst.z = (src.z > 0.0) ? 1.0 : (src.z < 0.0) ? -1.0 : 0.0 7523 * dst.w = (src.w > 0.0) ? 1.0 : (src.w < 0.0) ? -1.0 : 0.0 7524 * Translates into: 7525 * LT tmp1, src, zero; tmp1 = src < zero ? 0xffffffff : 0 (per comp) 7526 * MOVC tmp2, tmp1, -1.0, 0.0; tmp2 = tmp1 ? -1.0 : 0.0 (per component) 7527 * LT tmp1, zero, src; tmp1 = zero < src ? 0xffffffff : 0 (per comp) 7528 * MOVC dst, tmp1, 1.0, tmp2; dst = tmp1 ? 1.0 : tmp2 (per component) 7529 */ 7530 struct tgsi_full_src_register zero = 7531 make_immediate_reg_float(emit, 0.0f); 7532 struct tgsi_full_src_register one = 7533 make_immediate_reg_float(emit, 1.0f); 7534 struct tgsi_full_src_register neg_one = 7535 make_immediate_reg_float(emit, -1.0f); 7536 7537 unsigned tmp1 = get_temp_index(emit); 7538 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 7539 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 7540 7541 unsigned tmp2 = get_temp_index(emit); 7542 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 7543 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 7544 7545 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &inst->Src[0], 7546 &zero); 7547 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &tmp2_dst, &tmp1_src, 7548 &neg_one, &zero); 7549 emit_instruction_op2(emit, VGPU10_OPCODE_LT, &tmp1_dst, &zero, 7550 &inst->Src[0]); 7551 emit_instruction_op3(emit, VGPU10_OPCODE_MOVC, &inst->Dst[0], &tmp1_src, 7552 &one, &tmp2_src); 7553 7554 free_temp_indexes(emit); 7555 7556 return TRUE; 7557} 7558 7559 7560/** 7561 * Emit code for TGSI_OPCODE_ISSG (Integer Set Sign) instruction. 7562 */ 7563static boolean 7564emit_issg(struct svga_shader_emitter_v10 *emit, 7565 const struct tgsi_full_instruction *inst) 7566{ 7567 /* dst.x = (src.x > 0) ? 1 : (src.x < 0) ? -1 : 0 7568 * dst.y = (src.y > 0) ? 1 : (src.y < 0) ? -1 : 0 7569 * dst.z = (src.z > 0) ? 1 : (src.z < 0) ? -1 : 0 7570 * dst.w = (src.w > 0) ? 1 : (src.w < 0) ? -1 : 0 7571 * Translates into: 7572 * ILT tmp1, src, 0 tmp1 = src < 0 ? -1 : 0 (per component) 7573 * ILT tmp2, 0, src tmp2 = 0 < src ? -1 : 0 (per component) 7574 * IADD dst, tmp1, neg(tmp2) dst = tmp1 - tmp2 (per component) 7575 */ 7576 struct tgsi_full_src_register zero = make_immediate_reg_float(emit, 0.0f); 7577 7578 unsigned tmp1 = get_temp_index(emit); 7579 struct tgsi_full_src_register tmp1_src = make_src_temp_reg(tmp1); 7580 struct tgsi_full_dst_register tmp1_dst = make_dst_temp_reg(tmp1); 7581 7582 unsigned tmp2 = get_temp_index(emit); 7583 struct tgsi_full_src_register tmp2_src = make_src_temp_reg(tmp2); 7584 struct tgsi_full_dst_register tmp2_dst = make_dst_temp_reg(tmp2); 7585 7586 struct tgsi_full_src_register neg_tmp2 = negate_src(&tmp2_src); 7587 7588 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp1_dst, 7589 &inst->Src[0], &zero); 7590 emit_instruction_op2(emit, VGPU10_OPCODE_ILT, &tmp2_dst, 7591 &zero, &inst->Src[0]); 7592 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, &inst->Dst[0], 7593 &tmp1_src, &neg_tmp2); 7594 7595 free_temp_indexes(emit); 7596 7597 return TRUE; 7598} 7599 7600 7601/** 7602 * Emit a comparison instruction. The dest register will get 7603 * 0 or ~0 values depending on the outcome of comparing src0 to src1. 7604 */ 7605static void 7606emit_comparison(struct svga_shader_emitter_v10 *emit, 7607 SVGA3dCmpFunc func, 7608 const struct tgsi_full_dst_register *dst, 7609 const struct tgsi_full_src_register *src0, 7610 const struct tgsi_full_src_register *src1) 7611{ 7612 struct tgsi_full_src_register immediate; 7613 VGPU10OpcodeToken0 opcode0; 7614 boolean swapSrc = FALSE; 7615 7616 /* Sanity checks for svga vs. gallium enums */ 7617 STATIC_ASSERT(SVGA3D_CMP_LESS == (PIPE_FUNC_LESS + 1)); 7618 STATIC_ASSERT(SVGA3D_CMP_GREATEREQUAL == (PIPE_FUNC_GEQUAL + 1)); 7619 7620 opcode0.value = 0; 7621 7622 switch (func) { 7623 case SVGA3D_CMP_NEVER: 7624 immediate = make_immediate_reg_int(emit, 0); 7625 /* MOV dst, {0} */ 7626 begin_emit_instruction(emit); 7627 emit_dword(emit, VGPU10_OPCODE_MOV); 7628 emit_dst_register(emit, dst); 7629 emit_src_register(emit, &immediate); 7630 end_emit_instruction(emit); 7631 return; 7632 case SVGA3D_CMP_ALWAYS: 7633 immediate = make_immediate_reg_int(emit, -1); 7634 /* MOV dst, {-1} */ 7635 begin_emit_instruction(emit); 7636 emit_dword(emit, VGPU10_OPCODE_MOV); 7637 emit_dst_register(emit, dst); 7638 emit_src_register(emit, &immediate); 7639 end_emit_instruction(emit); 7640 return; 7641 case SVGA3D_CMP_LESS: 7642 opcode0.opcodeType = VGPU10_OPCODE_LT; 7643 break; 7644 case SVGA3D_CMP_EQUAL: 7645 opcode0.opcodeType = VGPU10_OPCODE_EQ; 7646 break; 7647 case SVGA3D_CMP_LESSEQUAL: 7648 opcode0.opcodeType = VGPU10_OPCODE_GE; 7649 swapSrc = TRUE; 7650 break; 7651 case SVGA3D_CMP_GREATER: 7652 opcode0.opcodeType = VGPU10_OPCODE_LT; 7653 swapSrc = TRUE; 7654 break; 7655 case SVGA3D_CMP_NOTEQUAL: 7656 opcode0.opcodeType = VGPU10_OPCODE_NE; 7657 break; 7658 case SVGA3D_CMP_GREATEREQUAL: 7659 opcode0.opcodeType = VGPU10_OPCODE_GE; 7660 break; 7661 default: 7662 assert(!"Unexpected comparison mode"); 7663 opcode0.opcodeType = VGPU10_OPCODE_EQ; 7664 } 7665 7666 begin_emit_instruction(emit); 7667 emit_dword(emit, opcode0.value); 7668 emit_dst_register(emit, dst); 7669 if (swapSrc) { 7670 emit_src_register(emit, src1); 7671 emit_src_register(emit, src0); 7672 } 7673 else { 7674 emit_src_register(emit, src0); 7675 emit_src_register(emit, src1); 7676 } 7677 end_emit_instruction(emit); 7678} 7679 7680 7681/** 7682 * Get texel/address offsets for a texture instruction. 7683 */ 7684static void 7685get_texel_offsets(const struct svga_shader_emitter_v10 *emit, 7686 const struct tgsi_full_instruction *inst, int offsets[3]) 7687{ 7688 if (inst->Texture.NumOffsets == 1) { 7689 /* According to OpenGL Shader Language spec the offsets are only 7690 * fetched from a previously-declared immediate/literal. 7691 */ 7692 const struct tgsi_texture_offset *off = inst->TexOffsets; 7693 const unsigned index = off[0].Index; 7694 const unsigned swizzleX = off[0].SwizzleX; 7695 const unsigned swizzleY = off[0].SwizzleY; 7696 const unsigned swizzleZ = off[0].SwizzleZ; 7697 const union tgsi_immediate_data *imm = emit->immediates[index]; 7698 7699 assert(inst->TexOffsets[0].File == TGSI_FILE_IMMEDIATE); 7700 7701 offsets[0] = imm[swizzleX].Int; 7702 offsets[1] = imm[swizzleY].Int; 7703 offsets[2] = imm[swizzleZ].Int; 7704 } 7705 else { 7706 offsets[0] = offsets[1] = offsets[2] = 0; 7707 } 7708} 7709 7710 7711/** 7712 * Set up the coordinate register for texture sampling. 7713 * When we're sampling from a RECT texture we have to scale the 7714 * unnormalized coordinate to a normalized coordinate. 7715 * We do that by multiplying the coordinate by an "extra" constant. 7716 * An alternative would be to use the RESINFO instruction to query the 7717 * texture's size. 7718 */ 7719static struct tgsi_full_src_register 7720setup_texcoord(struct svga_shader_emitter_v10 *emit, 7721 unsigned unit, 7722 const struct tgsi_full_src_register *coord) 7723{ 7724 if (emit->key.tex[unit].sampler_view && emit->key.tex[unit].unnormalized) { 7725 unsigned scale_index = emit->texcoord_scale_index[unit]; 7726 unsigned tmp = get_temp_index(emit); 7727 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 7728 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 7729 struct tgsi_full_src_register scale_src = make_src_const_reg(scale_index); 7730 7731 if (emit->key.tex[unit].texel_bias) { 7732 /* to fix texture coordinate rounding issue, 0.0001 offset is 7733 * been added. This fixes piglit test fbo-blit-scaled-linear. */ 7734 struct tgsi_full_src_register offset = 7735 make_immediate_reg_float(emit, 0.0001f); 7736 7737 /* ADD tmp, coord, offset */ 7738 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_dst, 7739 coord, &offset); 7740 /* MUL tmp, tmp, scale */ 7741 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, 7742 &tmp_src, &scale_src); 7743 } 7744 else { 7745 /* MUL tmp, coord, const[] */ 7746 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_dst, 7747 coord, &scale_src); 7748 } 7749 return tmp_src; 7750 } 7751 else { 7752 /* use texcoord as-is */ 7753 return *coord; 7754 } 7755} 7756 7757 7758/** 7759 * For SAMPLE_C instructions, emit the extra src register which indicates 7760 * the reference/comparision value. 7761 */ 7762static void 7763emit_tex_compare_refcoord(struct svga_shader_emitter_v10 *emit, 7764 enum tgsi_texture_type target, 7765 const struct tgsi_full_src_register *coord) 7766{ 7767 struct tgsi_full_src_register coord_src_ref; 7768 int component; 7769 7770 assert(tgsi_is_shadow_target(target)); 7771 7772 component = tgsi_util_get_shadow_ref_src_index(target) % 4; 7773 assert(component >= 0); 7774 7775 coord_src_ref = scalar_src(coord, component); 7776 7777 emit_src_register(emit, &coord_src_ref); 7778} 7779 7780 7781/** 7782 * Info for implementing texture swizzles. 7783 * The begin_tex_swizzle(), get_tex_swizzle_dst() and end_tex_swizzle() 7784 * functions use this to encapsulate the extra steps needed to perform 7785 * a texture swizzle, or shadow/depth comparisons. 7786 * The shadow/depth comparison is only done here if for the cases where 7787 * there's no VGPU10 opcode (like texture bias lookup w/ shadow compare). 7788 */ 7789struct tex_swizzle_info 7790{ 7791 boolean swizzled; 7792 boolean shadow_compare; 7793 unsigned unit; 7794 enum tgsi_texture_type texture_target; /**< TGSI_TEXTURE_x */ 7795 struct tgsi_full_src_register tmp_src; 7796 struct tgsi_full_dst_register tmp_dst; 7797 const struct tgsi_full_dst_register *inst_dst; 7798 const struct tgsi_full_src_register *coord_src; 7799}; 7800 7801 7802/** 7803 * Do setup for handling texture swizzles or shadow compares. 7804 * \param unit the texture unit 7805 * \param inst the TGSI texture instruction 7806 * \param shadow_compare do shadow/depth comparison? 7807 * \param swz returns the swizzle info 7808 */ 7809static void 7810begin_tex_swizzle(struct svga_shader_emitter_v10 *emit, 7811 unsigned unit, 7812 const struct tgsi_full_instruction *inst, 7813 boolean shadow_compare, 7814 struct tex_swizzle_info *swz) 7815{ 7816 swz->swizzled = (emit->key.tex[unit].swizzle_r != TGSI_SWIZZLE_X || 7817 emit->key.tex[unit].swizzle_g != TGSI_SWIZZLE_Y || 7818 emit->key.tex[unit].swizzle_b != TGSI_SWIZZLE_Z || 7819 emit->key.tex[unit].swizzle_a != TGSI_SWIZZLE_W); 7820 7821 swz->shadow_compare = shadow_compare; 7822 swz->texture_target = inst->Texture.Texture; 7823 7824 if (swz->swizzled || shadow_compare) { 7825 /* Allocate temp register for the result of the SAMPLE instruction 7826 * and the source of the MOV/compare/swizzle instructions. 7827 */ 7828 unsigned tmp = get_temp_index(emit); 7829 swz->tmp_src = make_src_temp_reg(tmp); 7830 swz->tmp_dst = make_dst_temp_reg(tmp); 7831 7832 swz->unit = unit; 7833 } 7834 swz->inst_dst = &inst->Dst[0]; 7835 swz->coord_src = &inst->Src[0]; 7836 7837 emit->fs.shadow_compare_units |= shadow_compare << unit; 7838} 7839 7840 7841/** 7842 * Returns the register to put the SAMPLE instruction results into. 7843 * This will either be the original instruction dst reg (if no swizzle 7844 * and no shadow comparison) or a temporary reg if there is a swizzle. 7845 */ 7846static const struct tgsi_full_dst_register * 7847get_tex_swizzle_dst(const struct tex_swizzle_info *swz) 7848{ 7849 return (swz->swizzled || swz->shadow_compare) 7850 ? &swz->tmp_dst : swz->inst_dst; 7851} 7852 7853 7854/** 7855 * This emits the MOV instruction that actually implements a texture swizzle 7856 * and/or shadow comparison. 7857 */ 7858static void 7859end_tex_swizzle(struct svga_shader_emitter_v10 *emit, 7860 const struct tex_swizzle_info *swz) 7861{ 7862 if (swz->shadow_compare) { 7863 /* Emit extra instructions to compare the fetched texel value against 7864 * a texture coordinate component. The result of the comparison 7865 * is 0.0 or 1.0. 7866 */ 7867 struct tgsi_full_src_register coord_src; 7868 struct tgsi_full_src_register texel_src = 7869 scalar_src(&swz->tmp_src, TGSI_SWIZZLE_X); 7870 struct tgsi_full_src_register one = 7871 make_immediate_reg_float(emit, 1.0f); 7872 /* convert gallium comparison func to SVGA comparison func */ 7873 SVGA3dCmpFunc compare_func = emit->key.tex[swz->unit].compare_func + 1; 7874 7875 int component = 7876 tgsi_util_get_shadow_ref_src_index(swz->texture_target) % 4; 7877 assert(component >= 0); 7878 coord_src = scalar_src(swz->coord_src, component); 7879 7880 /* COMPARE tmp, coord, texel */ 7881 emit_comparison(emit, compare_func, 7882 &swz->tmp_dst, &coord_src, &texel_src); 7883 7884 /* AND dest, tmp, {1.0} */ 7885 begin_emit_instruction(emit); 7886 emit_opcode(emit, VGPU10_OPCODE_AND, FALSE); 7887 if (swz->swizzled) { 7888 emit_dst_register(emit, &swz->tmp_dst); 7889 } 7890 else { 7891 emit_dst_register(emit, swz->inst_dst); 7892 } 7893 emit_src_register(emit, &swz->tmp_src); 7894 emit_src_register(emit, &one); 7895 end_emit_instruction(emit); 7896 } 7897 7898 if (swz->swizzled) { 7899 unsigned swz_r = emit->key.tex[swz->unit].swizzle_r; 7900 unsigned swz_g = emit->key.tex[swz->unit].swizzle_g; 7901 unsigned swz_b = emit->key.tex[swz->unit].swizzle_b; 7902 unsigned swz_a = emit->key.tex[swz->unit].swizzle_a; 7903 unsigned writemask_0 = 0, writemask_1 = 0; 7904 boolean int_tex = is_integer_type(emit->sampler_return_type[swz->unit]); 7905 7906 /* Swizzle w/out zero/one terms */ 7907 struct tgsi_full_src_register src_swizzled = 7908 swizzle_src(&swz->tmp_src, 7909 swz_r < PIPE_SWIZZLE_0 ? swz_r : PIPE_SWIZZLE_X, 7910 swz_g < PIPE_SWIZZLE_0 ? swz_g : PIPE_SWIZZLE_Y, 7911 swz_b < PIPE_SWIZZLE_0 ? swz_b : PIPE_SWIZZLE_Z, 7912 swz_a < PIPE_SWIZZLE_0 ? swz_a : PIPE_SWIZZLE_W); 7913 7914 /* MOV dst, color(tmp).<swizzle> */ 7915 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 7916 swz->inst_dst, &src_swizzled); 7917 7918 /* handle swizzle zero terms */ 7919 writemask_0 = (((swz_r == PIPE_SWIZZLE_0) << 0) | 7920 ((swz_g == PIPE_SWIZZLE_0) << 1) | 7921 ((swz_b == PIPE_SWIZZLE_0) << 2) | 7922 ((swz_a == PIPE_SWIZZLE_0) << 3)); 7923 writemask_0 &= swz->inst_dst->Register.WriteMask; 7924 7925 if (writemask_0) { 7926 struct tgsi_full_src_register zero = int_tex ? 7927 make_immediate_reg_int(emit, 0) : 7928 make_immediate_reg_float(emit, 0.0f); 7929 struct tgsi_full_dst_register dst = 7930 writemask_dst(swz->inst_dst, writemask_0); 7931 7932 /* MOV dst.writemask_0, {0,0,0,0} */ 7933 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &zero); 7934 } 7935 7936 /* handle swizzle one terms */ 7937 writemask_1 = (((swz_r == PIPE_SWIZZLE_1) << 0) | 7938 ((swz_g == PIPE_SWIZZLE_1) << 1) | 7939 ((swz_b == PIPE_SWIZZLE_1) << 2) | 7940 ((swz_a == PIPE_SWIZZLE_1) << 3)); 7941 writemask_1 &= swz->inst_dst->Register.WriteMask; 7942 7943 if (writemask_1) { 7944 struct tgsi_full_src_register one = int_tex ? 7945 make_immediate_reg_int(emit, 1) : 7946 make_immediate_reg_float(emit, 1.0f); 7947 struct tgsi_full_dst_register dst = 7948 writemask_dst(swz->inst_dst, writemask_1); 7949 7950 /* MOV dst.writemask_1, {1,1,1,1} */ 7951 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &one); 7952 } 7953 } 7954} 7955 7956 7957/** 7958 * Emit code for TGSI_OPCODE_SAMPLE instruction. 7959 */ 7960static boolean 7961emit_sample(struct svga_shader_emitter_v10 *emit, 7962 const struct tgsi_full_instruction *inst) 7963{ 7964 const unsigned resource_unit = inst->Src[1].Register.Index; 7965 const unsigned sampler_unit = inst->Src[2].Register.Index; 7966 struct tgsi_full_src_register coord; 7967 int offsets[3]; 7968 struct tex_swizzle_info swz_info; 7969 7970 begin_tex_swizzle(emit, sampler_unit, inst, FALSE, &swz_info); 7971 7972 get_texel_offsets(emit, inst, offsets); 7973 7974 coord = setup_texcoord(emit, resource_unit, &inst->Src[0]); 7975 7976 /* SAMPLE dst, coord(s0), resource, sampler */ 7977 begin_emit_instruction(emit); 7978 7979 /* NOTE: for non-fragment shaders, we should use VGPU10_OPCODE_SAMPLE_L 7980 * with LOD=0. But our virtual GPU accepts this as-is. 7981 */ 7982 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE, 7983 inst->Instruction.Saturate, offsets); 7984 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 7985 emit_src_register(emit, &coord); 7986 emit_resource_register(emit, resource_unit); 7987 emit_sampler_register(emit, sampler_unit); 7988 end_emit_instruction(emit); 7989 7990 end_tex_swizzle(emit, &swz_info); 7991 7992 free_temp_indexes(emit); 7993 7994 return TRUE; 7995} 7996 7997 7998/** 7999 * Check if a texture instruction is valid. 8000 * An example of an invalid texture instruction is doing shadow comparison 8001 * with an integer-valued texture. 8002 * If we detect an invalid texture instruction, we replace it with: 8003 * MOV dst, {1,1,1,1}; 8004 * \return TRUE if valid, FALSE if invalid. 8005 */ 8006static boolean 8007is_valid_tex_instruction(struct svga_shader_emitter_v10 *emit, 8008 const struct tgsi_full_instruction *inst) 8009{ 8010 const unsigned unit = inst->Src[1].Register.Index; 8011 const enum tgsi_texture_type target = inst->Texture.Texture; 8012 boolean valid = TRUE; 8013 8014 if (tgsi_is_shadow_target(target) && 8015 is_integer_type(emit->sampler_return_type[unit])) { 8016 debug_printf("Invalid SAMPLE_C with an integer texture!\n"); 8017 valid = FALSE; 8018 } 8019 /* XXX might check for other conditions in the future here */ 8020 8021 if (!valid) { 8022 /* emit a MOV dst, {1,1,1,1} instruction. */ 8023 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 8024 begin_emit_instruction(emit); 8025 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 8026 emit_dst_register(emit, &inst->Dst[0]); 8027 emit_src_register(emit, &one); 8028 end_emit_instruction(emit); 8029 } 8030 8031 return valid; 8032} 8033 8034 8035/** 8036 * Emit code for TGSI_OPCODE_TEX (simple texture lookup) 8037 */ 8038static boolean 8039emit_tex(struct svga_shader_emitter_v10 *emit, 8040 const struct tgsi_full_instruction *inst) 8041{ 8042 const uint unit = inst->Src[1].Register.Index; 8043 const enum tgsi_texture_type target = inst->Texture.Texture; 8044 VGPU10_OPCODE_TYPE opcode; 8045 struct tgsi_full_src_register coord; 8046 int offsets[3]; 8047 struct tex_swizzle_info swz_info; 8048 8049 /* check that the sampler returns a float */ 8050 if (!is_valid_tex_instruction(emit, inst)) 8051 return TRUE; 8052 8053 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 8054 8055 get_texel_offsets(emit, inst, offsets); 8056 8057 coord = setup_texcoord(emit, unit, &inst->Src[0]); 8058 8059 /* SAMPLE dst, coord(s0), resource, sampler */ 8060 begin_emit_instruction(emit); 8061 8062 if (tgsi_is_shadow_target(target)) 8063 opcode = VGPU10_OPCODE_SAMPLE_C; 8064 else 8065 opcode = VGPU10_OPCODE_SAMPLE; 8066 8067 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 8068 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 8069 emit_src_register(emit, &coord); 8070 emit_resource_register(emit, unit); 8071 emit_sampler_register(emit, unit); 8072 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 8073 emit_tex_compare_refcoord(emit, target, &coord); 8074 } 8075 end_emit_instruction(emit); 8076 8077 end_tex_swizzle(emit, &swz_info); 8078 8079 free_temp_indexes(emit); 8080 8081 return TRUE; 8082} 8083 8084/** 8085 * Emit code for TGSI_OPCODE_TG4 (texture lookup for texture gather) 8086 */ 8087static boolean 8088emit_tg4(struct svga_shader_emitter_v10 *emit, 8089 const struct tgsi_full_instruction *inst) 8090{ 8091 const uint unit = inst->Src[2].Register.Index; 8092 struct tgsi_full_src_register src; 8093 struct tgsi_full_src_register offset_src, sampler, ref; 8094 int offsets[3]; 8095 8096 /* check that the sampler returns a float */ 8097 if (!is_valid_tex_instruction(emit, inst)) 8098 return TRUE; 8099 8100 if (emit->version >= 50) { 8101 unsigned target = inst->Texture.Texture; 8102 int index = inst->Src[1].Register.Index; 8103 const union tgsi_immediate_data *imm = emit->immediates[index]; 8104 int select_comp = imm[inst->Src[1].Register.SwizzleX].Int; 8105 unsigned select_swizzle = PIPE_SWIZZLE_X; 8106 8107 if (!tgsi_is_shadow_target(target)) { 8108 switch (select_comp) { 8109 case 0: 8110 select_swizzle = emit->key.tex[unit].swizzle_r; 8111 break; 8112 case 1: 8113 select_swizzle = emit->key.tex[unit].swizzle_g; 8114 break; 8115 case 2: 8116 select_swizzle = emit->key.tex[unit].swizzle_b; 8117 break; 8118 case 3: 8119 select_swizzle = emit->key.tex[unit].swizzle_a; 8120 break; 8121 default: 8122 assert(!"Unexpected component in texture gather swizzle"); 8123 } 8124 } 8125 else { 8126 select_swizzle = emit->key.tex[unit].swizzle_r; 8127 } 8128 8129 if (select_swizzle == PIPE_SWIZZLE_1) { 8130 src = make_immediate_reg_float(emit, 1.0); 8131 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); 8132 return TRUE; 8133 } 8134 else if (select_swizzle == PIPE_SWIZZLE_0) { 8135 src = make_immediate_reg_float(emit, 0.0); 8136 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); 8137 return TRUE; 8138 } 8139 8140 src = setup_texcoord(emit, unit, &inst->Src[0]); 8141 8142 /* GATHER4 dst, coord, resource, sampler */ 8143 /* GATHER4_C dst, coord, resource, sampler ref */ 8144 /* GATHER4_PO dst, coord, offset resource, sampler */ 8145 /* GATHER4_PO_C dst, coord, offset resource, sampler, ref */ 8146 begin_emit_instruction(emit); 8147 if (inst->Texture.NumOffsets == 1) { 8148 if (tgsi_is_shadow_target(target)) { 8149 emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO_C, 8150 inst->Instruction.Saturate); 8151 } 8152 else { 8153 emit_opcode(emit, VGPU10_OPCODE_GATHER4_PO, 8154 inst->Instruction.Saturate); 8155 } 8156 } 8157 else { 8158 if (tgsi_is_shadow_target(target)) { 8159 emit_opcode(emit, VGPU10_OPCODE_GATHER4_C, 8160 inst->Instruction.Saturate); 8161 } 8162 else { 8163 emit_opcode(emit, VGPU10_OPCODE_GATHER4, 8164 inst->Instruction.Saturate); 8165 } 8166 } 8167 8168 emit_dst_register(emit, &inst->Dst[0]); 8169 emit_src_register(emit, &src); 8170 if (inst->Texture.NumOffsets == 1) { 8171 /* offset */ 8172 offset_src = make_src_reg(inst->TexOffsets[0].File, 8173 inst->TexOffsets[0].Index); 8174 offset_src = swizzle_src(&offset_src, inst->TexOffsets[0].SwizzleX, 8175 inst->TexOffsets[0].SwizzleY, 8176 inst->TexOffsets[0].SwizzleZ, 8177 TGSI_SWIZZLE_W); 8178 emit_src_register(emit, &offset_src); 8179 } 8180 8181 /* resource */ 8182 emit_resource_register(emit, unit); 8183 8184 /* sampler */ 8185 sampler = make_src_reg(TGSI_FILE_SAMPLER, unit); 8186 sampler.Register.SwizzleX = 8187 sampler.Register.SwizzleY = 8188 sampler.Register.SwizzleZ = 8189 sampler.Register.SwizzleW = select_swizzle; 8190 emit_src_register(emit, &sampler); 8191 8192 if (tgsi_is_shadow_target(target)) { 8193 /* ref */ 8194 if (target == TGSI_TEXTURE_SHADOWCUBE_ARRAY) { 8195 ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 8196 emit_tex_compare_refcoord(emit, target, &ref); 8197 } 8198 else { 8199 emit_tex_compare_refcoord(emit, target, &src); 8200 } 8201 } 8202 8203 end_emit_instruction(emit); 8204 free_temp_indexes(emit); 8205 } 8206 else { 8207 /* Only a single channel is supported in SM4_1 and we report 8208 * PIPE_CAP_MAX_TEXTURE_GATHER_COMPONENTS = 1. 8209 * Only the 0th component will be gathered. 8210 */ 8211 switch (emit->key.tex[unit].swizzle_r) { 8212 case PIPE_SWIZZLE_X: 8213 get_texel_offsets(emit, inst, offsets); 8214 src = setup_texcoord(emit, unit, &inst->Src[0]); 8215 8216 /* Gather dst, coord, resource, sampler */ 8217 begin_emit_instruction(emit); 8218 emit_sample_opcode(emit, VGPU10_OPCODE_GATHER4, 8219 inst->Instruction.Saturate, offsets); 8220 emit_dst_register(emit, &inst->Dst[0]); 8221 emit_src_register(emit, &src); 8222 emit_resource_register(emit, unit); 8223 8224 /* sampler */ 8225 sampler = make_src_reg(TGSI_FILE_SAMPLER, unit); 8226 sampler.Register.SwizzleX = 8227 sampler.Register.SwizzleY = 8228 sampler.Register.SwizzleZ = 8229 sampler.Register.SwizzleW = PIPE_SWIZZLE_X; 8230 emit_src_register(emit, &sampler); 8231 8232 end_emit_instruction(emit); 8233 break; 8234 case PIPE_SWIZZLE_W: 8235 case PIPE_SWIZZLE_1: 8236 src = make_immediate_reg_float(emit, 1.0); 8237 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); 8238 break; 8239 case PIPE_SWIZZLE_Y: 8240 case PIPE_SWIZZLE_Z: 8241 case PIPE_SWIZZLE_0: 8242 default: 8243 src = make_immediate_reg_float(emit, 0.0); 8244 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &src); 8245 break; 8246 } 8247 } 8248 8249 return TRUE; 8250} 8251 8252 8253 8254/** 8255 * Emit code for TGSI_OPCODE_TEX2 (texture lookup for shadow cube map arrays) 8256 */ 8257static boolean 8258emit_tex2(struct svga_shader_emitter_v10 *emit, 8259 const struct tgsi_full_instruction *inst) 8260{ 8261 const uint unit = inst->Src[2].Register.Index; 8262 unsigned target = inst->Texture.Texture; 8263 struct tgsi_full_src_register coord, ref; 8264 int offsets[3]; 8265 struct tex_swizzle_info swz_info; 8266 8267 /* check that the sampler returns a float */ 8268 if (!is_valid_tex_instruction(emit, inst)) 8269 return TRUE; 8270 8271 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 8272 8273 get_texel_offsets(emit, inst, offsets); 8274 8275 coord = setup_texcoord(emit, unit, &inst->Src[0]); 8276 ref = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 8277 8278 /* SAMPLE_C dst, coord, resource, sampler, ref */ 8279 begin_emit_instruction(emit); 8280 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_C, 8281 inst->Instruction.Saturate, offsets); 8282 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 8283 emit_src_register(emit, &coord); 8284 emit_resource_register(emit, unit); 8285 emit_sampler_register(emit, unit); 8286 emit_tex_compare_refcoord(emit, target, &ref); 8287 end_emit_instruction(emit); 8288 8289 end_tex_swizzle(emit, &swz_info); 8290 8291 free_temp_indexes(emit); 8292 8293 return TRUE; 8294} 8295 8296 8297/** 8298 * Emit code for TGSI_OPCODE_TXP (projective texture) 8299 */ 8300static boolean 8301emit_txp(struct svga_shader_emitter_v10 *emit, 8302 const struct tgsi_full_instruction *inst) 8303{ 8304 const uint unit = inst->Src[1].Register.Index; 8305 const enum tgsi_texture_type target = inst->Texture.Texture; 8306 VGPU10_OPCODE_TYPE opcode; 8307 int offsets[3]; 8308 unsigned tmp = get_temp_index(emit); 8309 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 8310 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 8311 struct tgsi_full_src_register src0_wwww = 8312 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 8313 struct tgsi_full_src_register coord; 8314 struct tex_swizzle_info swz_info; 8315 8316 /* check that the sampler returns a float */ 8317 if (!is_valid_tex_instruction(emit, inst)) 8318 return TRUE; 8319 8320 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 8321 8322 get_texel_offsets(emit, inst, offsets); 8323 8324 coord = setup_texcoord(emit, unit, &inst->Src[0]); 8325 8326 /* DIV tmp, coord, coord.wwww */ 8327 emit_instruction_op2(emit, VGPU10_OPCODE_DIV, &tmp_dst, 8328 &coord, &src0_wwww); 8329 8330 /* SAMPLE dst, coord(tmp), resource, sampler */ 8331 begin_emit_instruction(emit); 8332 8333 if (tgsi_is_shadow_target(target)) 8334 /* NOTE: for non-fragment shaders, we should use 8335 * VGPU10_OPCODE_SAMPLE_C_LZ, but our virtual GPU accepts this as-is. 8336 */ 8337 opcode = VGPU10_OPCODE_SAMPLE_C; 8338 else 8339 opcode = VGPU10_OPCODE_SAMPLE; 8340 8341 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 8342 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 8343 emit_src_register(emit, &tmp_src); /* projected coord */ 8344 emit_resource_register(emit, unit); 8345 emit_sampler_register(emit, unit); 8346 if (opcode == VGPU10_OPCODE_SAMPLE_C) { 8347 emit_tex_compare_refcoord(emit, target, &tmp_src); 8348 } 8349 end_emit_instruction(emit); 8350 8351 end_tex_swizzle(emit, &swz_info); 8352 8353 free_temp_indexes(emit); 8354 8355 return TRUE; 8356} 8357 8358 8359/** 8360 * Emit code for TGSI_OPCODE_TXD (explicit derivatives) 8361 */ 8362static boolean 8363emit_txd(struct svga_shader_emitter_v10 *emit, 8364 const struct tgsi_full_instruction *inst) 8365{ 8366 const uint unit = inst->Src[3].Register.Index; 8367 const enum tgsi_texture_type target = inst->Texture.Texture; 8368 int offsets[3]; 8369 struct tgsi_full_src_register coord; 8370 struct tex_swizzle_info swz_info; 8371 8372 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 8373 &swz_info); 8374 8375 get_texel_offsets(emit, inst, offsets); 8376 8377 coord = setup_texcoord(emit, unit, &inst->Src[0]); 8378 8379 /* SAMPLE_D dst, coord(s0), resource, sampler, Xderiv(s1), Yderiv(s2) */ 8380 begin_emit_instruction(emit); 8381 emit_sample_opcode(emit, VGPU10_OPCODE_SAMPLE_D, 8382 inst->Instruction.Saturate, offsets); 8383 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 8384 emit_src_register(emit, &coord); 8385 emit_resource_register(emit, unit); 8386 emit_sampler_register(emit, unit); 8387 emit_src_register(emit, &inst->Src[1]); /* Xderiv */ 8388 emit_src_register(emit, &inst->Src[2]); /* Yderiv */ 8389 end_emit_instruction(emit); 8390 8391 end_tex_swizzle(emit, &swz_info); 8392 8393 free_temp_indexes(emit); 8394 8395 return TRUE; 8396} 8397 8398 8399/** 8400 * Emit code for TGSI_OPCODE_TXF (texel fetch) 8401 */ 8402static boolean 8403emit_txf(struct svga_shader_emitter_v10 *emit, 8404 const struct tgsi_full_instruction *inst) 8405{ 8406 const uint unit = inst->Src[1].Register.Index; 8407 const boolean msaa = tgsi_is_msaa_target(inst->Texture.Texture) 8408 && emit->key.tex[unit].num_samples > 1; 8409 int offsets[3]; 8410 struct tex_swizzle_info swz_info; 8411 8412 begin_tex_swizzle(emit, unit, inst, FALSE, &swz_info); 8413 8414 get_texel_offsets(emit, inst, offsets); 8415 8416 if (msaa) { 8417 assert(emit->key.tex[unit].num_samples > 1); 8418 8419 /* Fetch one sample from an MSAA texture */ 8420 struct tgsi_full_src_register sampleIndex = 8421 scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 8422 /* LD_MS dst, coord(s0), resource, sampleIndex */ 8423 begin_emit_instruction(emit); 8424 emit_sample_opcode(emit, VGPU10_OPCODE_LD_MS, 8425 inst->Instruction.Saturate, offsets); 8426 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 8427 emit_src_register(emit, &inst->Src[0]); 8428 emit_resource_register(emit, unit); 8429 emit_src_register(emit, &sampleIndex); 8430 end_emit_instruction(emit); 8431 } 8432 else { 8433 /* Fetch one texel specified by integer coordinate */ 8434 /* LD dst, coord(s0), resource */ 8435 begin_emit_instruction(emit); 8436 emit_sample_opcode(emit, VGPU10_OPCODE_LD, 8437 inst->Instruction.Saturate, offsets); 8438 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 8439 emit_src_register(emit, &inst->Src[0]); 8440 emit_resource_register(emit, unit); 8441 end_emit_instruction(emit); 8442 } 8443 8444 end_tex_swizzle(emit, &swz_info); 8445 8446 free_temp_indexes(emit); 8447 8448 return TRUE; 8449} 8450 8451 8452/** 8453 * Emit code for TGSI_OPCODE_TXL (explicit LOD) or TGSI_OPCODE_TXB (LOD bias) 8454 * or TGSI_OPCODE_TXB2 (for cube shadow maps). 8455 */ 8456static boolean 8457emit_txl_txb(struct svga_shader_emitter_v10 *emit, 8458 const struct tgsi_full_instruction *inst) 8459{ 8460 const enum tgsi_texture_type target = inst->Texture.Texture; 8461 VGPU10_OPCODE_TYPE opcode; 8462 unsigned unit; 8463 int offsets[3]; 8464 struct tgsi_full_src_register coord, lod_bias; 8465 struct tex_swizzle_info swz_info; 8466 8467 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL || 8468 inst->Instruction.Opcode == TGSI_OPCODE_TXB || 8469 inst->Instruction.Opcode == TGSI_OPCODE_TXB2); 8470 8471 if (inst->Instruction.Opcode == TGSI_OPCODE_TXB2) { 8472 lod_bias = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 8473 unit = inst->Src[2].Register.Index; 8474 } 8475 else { 8476 lod_bias = scalar_src(&inst->Src[0], TGSI_SWIZZLE_W); 8477 unit = inst->Src[1].Register.Index; 8478 } 8479 8480 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 8481 &swz_info); 8482 8483 get_texel_offsets(emit, inst, offsets); 8484 8485 coord = setup_texcoord(emit, unit, &inst->Src[0]); 8486 8487 /* SAMPLE_L/B dst, coord(s0), resource, sampler, lod(s3) */ 8488 begin_emit_instruction(emit); 8489 if (inst->Instruction.Opcode == TGSI_OPCODE_TXL) { 8490 opcode = VGPU10_OPCODE_SAMPLE_L; 8491 } 8492 else { 8493 opcode = VGPU10_OPCODE_SAMPLE_B; 8494 } 8495 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 8496 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 8497 emit_src_register(emit, &coord); 8498 emit_resource_register(emit, unit); 8499 emit_sampler_register(emit, unit); 8500 emit_src_register(emit, &lod_bias); 8501 end_emit_instruction(emit); 8502 8503 end_tex_swizzle(emit, &swz_info); 8504 8505 free_temp_indexes(emit); 8506 8507 return TRUE; 8508} 8509 8510 8511/** 8512 * Emit code for TGSI_OPCODE_TXL2 (explicit LOD) for cubemap array. 8513 */ 8514static boolean 8515emit_txl2(struct svga_shader_emitter_v10 *emit, 8516 const struct tgsi_full_instruction *inst) 8517{ 8518 unsigned target = inst->Texture.Texture; 8519 unsigned opcode, unit; 8520 int offsets[3]; 8521 struct tgsi_full_src_register coord, lod; 8522 struct tex_swizzle_info swz_info; 8523 8524 assert(inst->Instruction.Opcode == TGSI_OPCODE_TXL2); 8525 8526 lod = scalar_src(&inst->Src[1], TGSI_SWIZZLE_X); 8527 unit = inst->Src[2].Register.Index; 8528 8529 begin_tex_swizzle(emit, unit, inst, tgsi_is_shadow_target(target), 8530 &swz_info); 8531 8532 get_texel_offsets(emit, inst, offsets); 8533 8534 coord = setup_texcoord(emit, unit, &inst->Src[0]); 8535 8536 /* SAMPLE_L dst, coord(s0), resource, sampler, lod(s3) */ 8537 begin_emit_instruction(emit); 8538 opcode = VGPU10_OPCODE_SAMPLE_L; 8539 emit_sample_opcode(emit, opcode, inst->Instruction.Saturate, offsets); 8540 emit_dst_register(emit, get_tex_swizzle_dst(&swz_info)); 8541 emit_src_register(emit, &coord); 8542 emit_resource_register(emit, unit); 8543 emit_sampler_register(emit, unit); 8544 emit_src_register(emit, &lod); 8545 end_emit_instruction(emit); 8546 8547 end_tex_swizzle(emit, &swz_info); 8548 8549 free_temp_indexes(emit); 8550 8551 return TRUE; 8552} 8553 8554 8555/** 8556 * Emit code for TGSI_OPCODE_TXQ (texture query) instruction. 8557 */ 8558static boolean 8559emit_txq(struct svga_shader_emitter_v10 *emit, 8560 const struct tgsi_full_instruction *inst) 8561{ 8562 const uint unit = inst->Src[1].Register.Index; 8563 8564 if (emit->key.tex[unit].target == PIPE_BUFFER) { 8565 /* RESINFO does not support querying texture buffers, so we instead 8566 * store texture buffer sizes in shader constants, then copy them to 8567 * implement TXQ instead of emitting RESINFO. 8568 * MOV dst, const[texture_buffer_size_index[unit]] 8569 */ 8570 struct tgsi_full_src_register size_src = 8571 make_src_const_reg(emit->texture_buffer_size_index[unit]); 8572 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], &size_src); 8573 } else { 8574 /* RESINFO dst, srcMipLevel, resource */ 8575 begin_emit_instruction(emit); 8576 emit_opcode_resinfo(emit, VGPU10_RESINFO_RETURN_UINT); 8577 emit_dst_register(emit, &inst->Dst[0]); 8578 emit_src_register(emit, &inst->Src[0]); 8579 emit_resource_register(emit, unit); 8580 end_emit_instruction(emit); 8581 } 8582 8583 free_temp_indexes(emit); 8584 8585 return TRUE; 8586} 8587 8588 8589/** 8590 * Does this opcode produce a double-precision result? 8591 * XXX perhaps move this to a TGSI utility. 8592 */ 8593static bool 8594opcode_has_dbl_dst(unsigned opcode) 8595{ 8596 switch (opcode) { 8597 case TGSI_OPCODE_F2D: 8598 case TGSI_OPCODE_DABS: 8599 case TGSI_OPCODE_DADD: 8600 case TGSI_OPCODE_DFRAC: 8601 case TGSI_OPCODE_DMAX: 8602 case TGSI_OPCODE_DMIN: 8603 case TGSI_OPCODE_DMUL: 8604 case TGSI_OPCODE_DNEG: 8605 case TGSI_OPCODE_I2D: 8606 case TGSI_OPCODE_U2D: 8607 // XXX more TBD 8608 return true; 8609 default: 8610 return false; 8611 } 8612} 8613 8614 8615/** 8616 * Does this opcode use double-precision source registers? 8617 */ 8618static bool 8619opcode_has_dbl_src(unsigned opcode) 8620{ 8621 switch (opcode) { 8622 case TGSI_OPCODE_D2F: 8623 case TGSI_OPCODE_DABS: 8624 case TGSI_OPCODE_DADD: 8625 case TGSI_OPCODE_DFRAC: 8626 case TGSI_OPCODE_DMAX: 8627 case TGSI_OPCODE_DMIN: 8628 case TGSI_OPCODE_DMUL: 8629 case TGSI_OPCODE_DNEG: 8630 case TGSI_OPCODE_D2I: 8631 case TGSI_OPCODE_D2U: 8632 // XXX more TBD 8633 return true; 8634 default: 8635 return false; 8636 } 8637} 8638 8639 8640/** 8641 * Check that the swizzle for reading from a double-precision register 8642 * is valid. 8643 */ 8644static void 8645check_double_src_swizzle(const struct tgsi_full_src_register *reg) 8646{ 8647 assert((reg->Register.SwizzleX == PIPE_SWIZZLE_X && 8648 reg->Register.SwizzleY == PIPE_SWIZZLE_Y) || 8649 (reg->Register.SwizzleX == PIPE_SWIZZLE_Z && 8650 reg->Register.SwizzleY == PIPE_SWIZZLE_W)); 8651 8652 assert((reg->Register.SwizzleZ == PIPE_SWIZZLE_X && 8653 reg->Register.SwizzleW == PIPE_SWIZZLE_Y) || 8654 (reg->Register.SwizzleZ == PIPE_SWIZZLE_Z && 8655 reg->Register.SwizzleW == PIPE_SWIZZLE_W)); 8656} 8657 8658 8659/** 8660 * Check that the writemask for a double-precision instruction is valid. 8661 */ 8662static void 8663check_double_dst_writemask(const struct tgsi_full_instruction *inst) 8664{ 8665 ASSERTED unsigned writemask = inst->Dst[0].Register.WriteMask; 8666 8667 switch (inst->Instruction.Opcode) { 8668 case TGSI_OPCODE_DABS: 8669 case TGSI_OPCODE_DADD: 8670 case TGSI_OPCODE_DFRAC: 8671 case TGSI_OPCODE_DNEG: 8672 case TGSI_OPCODE_DMAD: 8673 case TGSI_OPCODE_DMAX: 8674 case TGSI_OPCODE_DMIN: 8675 case TGSI_OPCODE_DMUL: 8676 case TGSI_OPCODE_DRCP: 8677 case TGSI_OPCODE_DSQRT: 8678 case TGSI_OPCODE_F2D: 8679 assert(writemask == TGSI_WRITEMASK_XYZW || 8680 writemask == TGSI_WRITEMASK_XY || 8681 writemask == TGSI_WRITEMASK_ZW); 8682 break; 8683 case TGSI_OPCODE_DSEQ: 8684 case TGSI_OPCODE_DSGE: 8685 case TGSI_OPCODE_DSNE: 8686 case TGSI_OPCODE_DSLT: 8687 case TGSI_OPCODE_D2I: 8688 case TGSI_OPCODE_D2U: 8689 /* Write to 1 or 2 components only */ 8690 assert(util_bitcount(writemask) <= 2); 8691 break; 8692 default: 8693 /* XXX this list may be incomplete */ 8694 ; 8695 } 8696} 8697 8698 8699/** 8700 * Double-precision absolute value. 8701 */ 8702static boolean 8703emit_dabs(struct svga_shader_emitter_v10 *emit, 8704 const struct tgsi_full_instruction *inst) 8705{ 8706 assert(emit->version >= 50); 8707 check_double_src_swizzle(&inst->Src[0]); 8708 check_double_dst_writemask(inst); 8709 8710 struct tgsi_full_src_register abs_src = absolute_src(&inst->Src[0]); 8711 8712 /* DMOV dst, |src| */ 8713 emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &abs_src); 8714 8715 return TRUE; 8716} 8717 8718 8719/** 8720 * Double-precision negation 8721 */ 8722static boolean 8723emit_dneg(struct svga_shader_emitter_v10 *emit, 8724 const struct tgsi_full_instruction *inst) 8725{ 8726 assert(emit->version >= 50); 8727 check_double_src_swizzle(&inst->Src[0]); 8728 check_double_dst_writemask(inst); 8729 8730 struct tgsi_full_src_register neg_src = negate_src(&inst->Src[0]); 8731 8732 /* DMOV dst, -src */ 8733 emit_instruction_op1(emit, VGPU10_OPCODE_DMOV, &inst->Dst[0], &neg_src); 8734 8735 return TRUE; 8736} 8737 8738 8739/** 8740 * SM5 has no DMAD opcode. Implement negation with DMUL/DADD. 8741 */ 8742static boolean 8743emit_dmad(struct svga_shader_emitter_v10 *emit, 8744 const struct tgsi_full_instruction *inst) 8745{ 8746 assert(emit->version >= 50); 8747 check_double_src_swizzle(&inst->Src[0]); 8748 check_double_src_swizzle(&inst->Src[1]); 8749 check_double_src_swizzle(&inst->Src[2]); 8750 check_double_dst_writemask(inst); 8751 8752 unsigned tmp = get_temp_index(emit); 8753 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 8754 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 8755 8756 /* DMUL tmp, src[0], src[1] */ 8757 emit_instruction_opn(emit, VGPU10_OPCODE_DMUL, 8758 &tmp_dst, &inst->Src[0], &inst->Src[1], NULL, 8759 FALSE, inst->Instruction.Precise); 8760 8761 /* DADD dst, tmp, src[2] */ 8762 emit_instruction_opn(emit, VGPU10_OPCODE_DADD, 8763 &inst->Dst[0], &tmp_src, &inst->Src[2], NULL, 8764 inst->Instruction.Saturate, inst->Instruction.Precise); 8765 free_temp_indexes(emit); 8766 8767 return TRUE; 8768} 8769 8770 8771/** 8772 * Double precision reciprocal square root 8773 */ 8774static boolean 8775emit_drsq(struct svga_shader_emitter_v10 *emit, 8776 const struct tgsi_full_dst_register *dst, 8777 const struct tgsi_full_src_register *src) 8778{ 8779 assert(emit->version >= 50); 8780 8781 VGPU10OpcodeToken0 token0; 8782 begin_emit_instruction(emit); 8783 8784 token0.value = 0; 8785 token0.opcodeType = VGPU10_OPCODE_VMWARE; 8786 token0.vmwareOpcodeType = VGPU10_VMWARE_OPCODE_DRSQ; 8787 emit_dword(emit, token0.value); 8788 8789 emit_dst_register(emit, dst); 8790 8791 check_double_src_swizzle(src); 8792 emit_src_register(emit, src); 8793 8794 end_emit_instruction(emit); 8795 8796 return TRUE; 8797} 8798 8799 8800/** 8801 * There is no SM5 opcode for double precision square root. 8802 * It will be implemented with DRSQ. 8803 * dst = src * DRSQ(src) 8804 */ 8805static boolean 8806emit_dsqrt(struct svga_shader_emitter_v10 *emit, 8807 const struct tgsi_full_instruction *inst) 8808{ 8809 assert(emit->version >= 50); 8810 8811 check_double_src_swizzle(&inst->Src[0]); 8812 8813 /* temporary register to hold the source */ 8814 unsigned tmp = get_temp_index(emit); 8815 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 8816 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 8817 8818 /* temporary register to hold the DEQ result */ 8819 unsigned tmp_cond = get_temp_index(emit); 8820 struct tgsi_full_dst_register tmp_cond_dst = make_dst_temp_reg(tmp_cond); 8821 struct tgsi_full_dst_register tmp_cond_dst_xy = 8822 writemask_dst(&tmp_cond_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y); 8823 struct tgsi_full_src_register tmp_cond_src = make_src_temp_reg(tmp_cond); 8824 struct tgsi_full_src_register tmp_cond_src_xy = 8825 swizzle_src(&tmp_cond_src, 8826 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, 8827 PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y); 8828 8829 /* The reciprocal square root of zero yields INF. 8830 * So if the source is 0, we replace it with 1 in the tmp register. 8831 * The later multiplication of zero in the original source will yield 0 8832 * in the result. 8833 */ 8834 8835 /* tmp1 = (src == 0) ? 1 : src; 8836 * EQ tmp1, 0, src 8837 * MOVC tmp, tmp1, 1.0, src 8838 */ 8839 struct tgsi_full_src_register zero = 8840 make_immediate_reg_double(emit, 0); 8841 8842 struct tgsi_full_src_register one = 8843 make_immediate_reg_double(emit, 1.0); 8844 8845 emit_instruction_op2(emit, VGPU10_OPCODE_DEQ, &tmp_cond_dst_xy, 8846 &zero, &inst->Src[0]); 8847 emit_instruction_op3(emit, VGPU10_OPCODE_DMOVC, &tmp_dst, 8848 &tmp_cond_src_xy, &one, &inst->Src[0]); 8849 8850 struct tgsi_full_dst_register tmp_rsq_dst = make_dst_temp_reg(tmp); 8851 struct tgsi_full_src_register tmp_rsq_src = make_src_temp_reg(tmp); 8852 8853 /* DRSQ tmp_rsq, tmp */ 8854 emit_drsq(emit, &tmp_rsq_dst, &tmp_src); 8855 8856 /* DMUL dst, tmp_rsq, src[0] */ 8857 emit_instruction_op2(emit, VGPU10_OPCODE_DMUL, &inst->Dst[0], 8858 &tmp_rsq_src, &inst->Src[0]); 8859 8860 free_temp_indexes(emit); 8861 8862 return TRUE; 8863} 8864 8865 8866static boolean 8867emit_interp_offset(struct svga_shader_emitter_v10 *emit, 8868 const struct tgsi_full_instruction *inst) 8869{ 8870 assert(emit->version >= 50); 8871 8872 /* The src1.xy offset is a float with values in the range [-0.5, 0.5] 8873 * where (0,0) is the center of the pixel. We need to translate that 8874 * into an integer offset on a 16x16 grid in the range [-8/16, 7/16]. 8875 * Also need to flip the Y axis (I think). 8876 */ 8877 unsigned tmp = get_temp_index(emit); 8878 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 8879 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 8880 struct tgsi_full_dst_register tmp_dst_xy = 8881 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X | TGSI_WRITEMASK_Y); 8882 struct tgsi_full_src_register const16 = 8883 make_immediate_reg_float4(emit, 16.0f, -16.0, 0, 0); 8884 8885 /* MUL tmp.xy, src1, {16, -16, 0, 0} */ 8886 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, 8887 &tmp_dst_xy, &inst->Src[1], &const16); 8888 8889 /* FTOI tmp.xy, tmp */ 8890 emit_instruction_op1(emit, VGPU10_OPCODE_FTOI, &tmp_dst_xy, &tmp_src); 8891 8892 /* EVAL_SNAPPED dst, src0, tmp */ 8893 emit_instruction_op2(emit, VGPU10_OPCODE_EVAL_SNAPPED, 8894 &inst->Dst[0], &inst->Src[0], &tmp_src); 8895 8896 free_temp_indexes(emit); 8897 8898 return TRUE; 8899} 8900 8901 8902/** 8903 * Emit a simple instruction (like ADD, MUL, MIN, etc). 8904 */ 8905static boolean 8906emit_simple(struct svga_shader_emitter_v10 *emit, 8907 const struct tgsi_full_instruction *inst) 8908{ 8909 const enum tgsi_opcode opcode = inst->Instruction.Opcode; 8910 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 8911 const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode); 8912 const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode); 8913 unsigned i; 8914 8915 if (inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) { 8916 emit->current_loop_depth++; 8917 } 8918 else if (inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) { 8919 emit->current_loop_depth--; 8920 } 8921 8922 begin_emit_instruction(emit); 8923 emit_opcode_precise(emit, translate_opcode(inst->Instruction.Opcode), 8924 inst->Instruction.Saturate, 8925 inst->Instruction.Precise); 8926 for (i = 0; i < op->num_dst; i++) { 8927 if (dbl_dst) { 8928 check_double_dst_writemask(inst); 8929 } 8930 emit_dst_register(emit, &inst->Dst[i]); 8931 } 8932 for (i = 0; i < op->num_src; i++) { 8933 if (dbl_src) { 8934 check_double_src_swizzle(&inst->Src[i]); 8935 } 8936 emit_src_register(emit, &inst->Src[i]); 8937 } 8938 end_emit_instruction(emit); 8939 8940 return TRUE; 8941} 8942 8943 8944/** 8945 * Emit MSB instruction (like IMSB, UMSB). 8946 * 8947 * GLSL returns the index starting from the LSB; 8948 * whereas in SM5, firstbit_hi/shi returns the index starting from the MSB. 8949 * To get correct location as per glsl from SM5 device, we should 8950 * return (31 - index) if returned index is not -1. 8951 */ 8952static boolean 8953emit_msb(struct svga_shader_emitter_v10 *emit, 8954 const struct tgsi_full_instruction *inst) 8955{ 8956 const struct tgsi_full_dst_register *index_dst = &inst->Dst[0]; 8957 8958 assert(index_dst->Register.File != TGSI_FILE_OUTPUT); 8959 8960 struct tgsi_full_src_register index_src = 8961 make_src_reg(index_dst->Register.File, index_dst->Register.Index); 8962 struct tgsi_full_src_register imm31 = 8963 make_immediate_reg_int(emit, 31); 8964 imm31 = scalar_src(&imm31, TGSI_SWIZZLE_X); 8965 struct tgsi_full_src_register neg_one = 8966 make_immediate_reg_int(emit, -1); 8967 neg_one = scalar_src(&neg_one, TGSI_SWIZZLE_X); 8968 unsigned tmp = get_temp_index(emit); 8969 const struct tgsi_full_dst_register tmp_dst = 8970 make_dst_temp_reg(tmp); 8971 const struct tgsi_full_dst_register tmp_dst_x = 8972 writemask_dst(&tmp_dst, TGSI_WRITEMASK_X); 8973 const struct tgsi_full_src_register tmp_src_x = 8974 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp, TGSI_SWIZZLE_X); 8975 int writemask = TGSI_WRITEMASK_X; 8976 int src_swizzle = TGSI_SWIZZLE_X; 8977 int dst_writemask = index_dst->Register.WriteMask; 8978 8979 emit_simple(emit, inst); 8980 8981 /* index conversion from SM5 to GLSL */ 8982 while (writemask & dst_writemask) { 8983 struct tgsi_full_src_register index_src_comp = 8984 scalar_src(&index_src, src_swizzle); 8985 struct tgsi_full_dst_register index_dst_comp = 8986 writemask_dst(index_dst, writemask); 8987 8988 /* check if index_src_comp != -1 */ 8989 emit_instruction_op2(emit, VGPU10_OPCODE_INE, 8990 &tmp_dst_x, &index_src_comp, &neg_one); 8991 8992 /* if */ 8993 emit_if(emit, &tmp_src_x); 8994 8995 index_src_comp = negate_src(&index_src_comp); 8996 /* SUB DST, IMM{31}, DST */ 8997 emit_instruction_op2(emit, VGPU10_OPCODE_IADD, 8998 &index_dst_comp, &imm31, &index_src_comp); 8999 9000 /* endif */ 9001 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); 9002 9003 writemask = writemask << 1; 9004 src_swizzle = src_swizzle + 1; 9005 } 9006 free_temp_indexes(emit); 9007 return TRUE; 9008} 9009 9010 9011/** 9012 * Emit a BFE instruction (like UBFE, IBFE). 9013 * tgsi representation: 9014 * U/IBFE dst, value, offset, width 9015 * SM5 representation: 9016 * U/IBFE dst, width, offset, value 9017 * Note: SM5 has width & offset range (0-31); 9018 * whereas GLSL has width & offset range (0-32) 9019 */ 9020static boolean 9021emit_bfe(struct svga_shader_emitter_v10 *emit, 9022 const struct tgsi_full_instruction *inst) 9023{ 9024 const enum tgsi_opcode opcode = inst->Instruction.Opcode; 9025 struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32); 9026 imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X); 9027 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0); 9028 zero = scalar_src(&zero, TGSI_SWIZZLE_X); 9029 9030 unsigned tmp1 = get_temp_index(emit); 9031 const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1); 9032 const struct tgsi_full_dst_register cond1_dst_x = 9033 writemask_dst(&cond1_dst, TGSI_WRITEMASK_X); 9034 const struct tgsi_full_src_register cond1_src_x = 9035 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X); 9036 9037 unsigned tmp2 = get_temp_index(emit); 9038 const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2); 9039 const struct tgsi_full_dst_register cond2_dst_x = 9040 writemask_dst(&cond2_dst, TGSI_WRITEMASK_X); 9041 const struct tgsi_full_src_register cond2_src_x = 9042 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X); 9043 9044 /** 9045 * In SM5, when width = 32 and offset = 0, it returns 0. 9046 * On the other hand GLSL, expects value to be copied as it is, to dst. 9047 */ 9048 9049 /* cond1 = width ! = 32 */ 9050 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, 9051 &cond1_dst_x, &inst->Src[2], &imm32); 9052 9053 /* cond2 = offset ! = 0 */ 9054 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, 9055 &cond2_dst_x, &inst->Src[1], &zero); 9056 9057 /* cond 2 = cond1 & cond 2 */ 9058 emit_instruction_op2(emit, VGPU10_OPCODE_AND, &cond2_dst_x, 9059 &cond2_src_x, 9060 &cond1_src_x); 9061 /* IF */ 9062 emit_if(emit, &cond2_src_x); 9063 9064 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 9065 &inst->Src[0]); 9066 9067 /* ELSE */ 9068 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); 9069 9070 /* U/IBFE dst, width, offset, value */ 9071 emit_instruction_op3(emit, translate_opcode(opcode), &inst->Dst[0], 9072 &inst->Src[2], &inst->Src[1], &inst->Src[0]); 9073 9074 /* ENDIF */ 9075 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); 9076 9077 free_temp_indexes(emit); 9078 return TRUE; 9079} 9080 9081 9082/** 9083 * Emit BFI instruction 9084 * tgsi representation: 9085 * BFI dst, base, insert, offset, width 9086 * SM5 representation: 9087 * BFI dst, width, offset, insert, base 9088 * Note: SM5 has width & offset range (0-31); 9089 * whereas GLSL has width & offset range (0-32) 9090 */ 9091static boolean 9092emit_bfi(struct svga_shader_emitter_v10 *emit, 9093 const struct tgsi_full_instruction *inst) 9094{ 9095 const enum tgsi_opcode opcode = inst->Instruction.Opcode; 9096 struct tgsi_full_src_register imm32 = make_immediate_reg_int(emit, 32); 9097 imm32 = scalar_src(&imm32, TGSI_SWIZZLE_X); 9098 9099 struct tgsi_full_src_register zero = make_immediate_reg_int(emit, 0); 9100 zero = scalar_src(&zero, TGSI_SWIZZLE_X); 9101 9102 unsigned tmp1 = get_temp_index(emit); 9103 const struct tgsi_full_dst_register cond1_dst = make_dst_temp_reg(tmp1); 9104 const struct tgsi_full_dst_register cond1_dst_x = 9105 writemask_dst(&cond1_dst, TGSI_WRITEMASK_X); 9106 const struct tgsi_full_src_register cond1_src_x = 9107 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp1, TGSI_SWIZZLE_X); 9108 9109 unsigned tmp2 = get_temp_index(emit); 9110 const struct tgsi_full_dst_register cond2_dst = make_dst_temp_reg(tmp2); 9111 const struct tgsi_full_dst_register cond2_dst_x = 9112 writemask_dst(&cond2_dst, TGSI_WRITEMASK_X); 9113 const struct tgsi_full_src_register cond2_src_x = 9114 make_src_scalar_reg(TGSI_FILE_TEMPORARY, tmp2, TGSI_SWIZZLE_X); 9115 9116 /** 9117 * In SM5, when width = 32 and offset = 0, it returns 0. 9118 * On the other hand GLSL, expects insert to be copied as it is, to dst. 9119 */ 9120 9121 /* cond1 = width == 32 */ 9122 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, 9123 &cond1_dst_x, &inst->Src[3], &imm32); 9124 9125 /* cond1 = offset == 0 */ 9126 emit_instruction_op2(emit, VGPU10_OPCODE_IEQ, 9127 &cond2_dst_x, &inst->Src[2], &zero); 9128 9129 /* cond2 = cond1 & cond2 */ 9130 emit_instruction_op2(emit, VGPU10_OPCODE_AND, 9131 &cond2_dst_x, &cond2_src_x, &cond1_src_x); 9132 9133 /* if */ 9134 emit_if(emit, &cond2_src_x); 9135 9136 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &inst->Dst[0], 9137 &inst->Src[1]); 9138 9139 /* else */ 9140 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); 9141 9142 /* BFI dst, width, offset, insert, base */ 9143 begin_emit_instruction(emit); 9144 emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate); 9145 emit_dst_register(emit, &inst->Dst[0]); 9146 emit_src_register(emit, &inst->Src[3]); 9147 emit_src_register(emit, &inst->Src[2]); 9148 emit_src_register(emit, &inst->Src[1]); 9149 emit_src_register(emit, &inst->Src[0]); 9150 end_emit_instruction(emit); 9151 9152 /* endif */ 9153 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); 9154 9155 free_temp_indexes(emit); 9156 return TRUE; 9157} 9158 9159 9160/** 9161 * We only special case the MOV instruction to try to detect constant 9162 * color writes in the fragment shader. 9163 */ 9164static boolean 9165emit_mov(struct svga_shader_emitter_v10 *emit, 9166 const struct tgsi_full_instruction *inst) 9167{ 9168 const struct tgsi_full_src_register *src = &inst->Src[0]; 9169 const struct tgsi_full_dst_register *dst = &inst->Dst[0]; 9170 9171 if (emit->unit == PIPE_SHADER_FRAGMENT && 9172 dst->Register.File == TGSI_FILE_OUTPUT && 9173 dst->Register.Index == 0 && 9174 src->Register.File == TGSI_FILE_CONSTANT && 9175 !src->Register.Indirect) { 9176 emit->constant_color_output = TRUE; 9177 } 9178 9179 return emit_simple(emit, inst); 9180} 9181 9182 9183/** 9184 * Emit a simple VGPU10 instruction which writes to multiple dest registers, 9185 * where TGSI only uses one dest register. 9186 */ 9187static boolean 9188emit_simple_1dst(struct svga_shader_emitter_v10 *emit, 9189 const struct tgsi_full_instruction *inst, 9190 unsigned dst_count, 9191 unsigned dst_index) 9192{ 9193 const enum tgsi_opcode opcode = inst->Instruction.Opcode; 9194 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 9195 unsigned i; 9196 9197 begin_emit_instruction(emit); 9198 emit_opcode(emit, translate_opcode(opcode), inst->Instruction.Saturate); 9199 9200 for (i = 0; i < dst_count; i++) { 9201 if (i == dst_index) { 9202 emit_dst_register(emit, &inst->Dst[0]); 9203 } else { 9204 emit_null_dst_register(emit); 9205 } 9206 } 9207 9208 for (i = 0; i < op->num_src; i++) { 9209 emit_src_register(emit, &inst->Src[i]); 9210 } 9211 end_emit_instruction(emit); 9212 9213 return TRUE; 9214} 9215 9216 9217/** 9218 * Emit a vmware specific VGPU10 instruction. 9219 */ 9220static boolean 9221emit_vmware(struct svga_shader_emitter_v10 *emit, 9222 const struct tgsi_full_instruction *inst, 9223 VGPU10_VMWARE_OPCODE_TYPE subopcode) 9224{ 9225 VGPU10OpcodeToken0 token0; 9226 const enum tgsi_opcode opcode = inst->Instruction.Opcode; 9227 const struct tgsi_opcode_info *op = tgsi_get_opcode_info(opcode); 9228 const bool dbl_dst = opcode_has_dbl_dst(inst->Instruction.Opcode); 9229 const bool dbl_src = opcode_has_dbl_src(inst->Instruction.Opcode); 9230 9231 unsigned i; 9232 9233 begin_emit_instruction(emit); 9234 9235 assert((subopcode > 0 && emit->version >= 50) || subopcode == 0); 9236 9237 token0.value = 0; 9238 token0.opcodeType = VGPU10_OPCODE_VMWARE; 9239 token0.vmwareOpcodeType = subopcode; 9240 emit_dword(emit, token0.value); 9241 9242 if (subopcode == VGPU10_VMWARE_OPCODE_IDIV) { 9243 /* IDIV only uses the first dest register. */ 9244 emit_dst_register(emit, &inst->Dst[0]); 9245 emit_null_dst_register(emit); 9246 } else { 9247 for (i = 0; i < op->num_dst; i++) { 9248 if (dbl_dst) { 9249 check_double_dst_writemask(inst); 9250 } 9251 emit_dst_register(emit, &inst->Dst[i]); 9252 } 9253 } 9254 9255 for (i = 0; i < op->num_src; i++) { 9256 if (dbl_src) { 9257 check_double_src_swizzle(&inst->Src[i]); 9258 } 9259 emit_src_register(emit, &inst->Src[i]); 9260 } 9261 end_emit_instruction(emit); 9262 9263 return TRUE; 9264} 9265 9266 9267/** 9268 * Translate a single TGSI instruction to VGPU10. 9269 */ 9270static boolean 9271emit_vgpu10_instruction(struct svga_shader_emitter_v10 *emit, 9272 unsigned inst_number, 9273 const struct tgsi_full_instruction *inst) 9274{ 9275 const enum tgsi_opcode opcode = inst->Instruction.Opcode; 9276 9277 if (emit->skip_instruction) 9278 return TRUE; 9279 9280 switch (opcode) { 9281 case TGSI_OPCODE_ADD: 9282 case TGSI_OPCODE_AND: 9283 case TGSI_OPCODE_BGNLOOP: 9284 case TGSI_OPCODE_BRK: 9285 case TGSI_OPCODE_CEIL: 9286 case TGSI_OPCODE_CONT: 9287 case TGSI_OPCODE_DDX: 9288 case TGSI_OPCODE_DDY: 9289 case TGSI_OPCODE_DIV: 9290 case TGSI_OPCODE_DP2: 9291 case TGSI_OPCODE_DP3: 9292 case TGSI_OPCODE_DP4: 9293 case TGSI_OPCODE_ELSE: 9294 case TGSI_OPCODE_ENDIF: 9295 case TGSI_OPCODE_ENDLOOP: 9296 case TGSI_OPCODE_ENDSUB: 9297 case TGSI_OPCODE_F2I: 9298 case TGSI_OPCODE_F2U: 9299 case TGSI_OPCODE_FLR: 9300 case TGSI_OPCODE_FRC: 9301 case TGSI_OPCODE_FSEQ: 9302 case TGSI_OPCODE_FSGE: 9303 case TGSI_OPCODE_FSLT: 9304 case TGSI_OPCODE_FSNE: 9305 case TGSI_OPCODE_I2F: 9306 case TGSI_OPCODE_IMAX: 9307 case TGSI_OPCODE_IMIN: 9308 case TGSI_OPCODE_INEG: 9309 case TGSI_OPCODE_ISGE: 9310 case TGSI_OPCODE_ISHR: 9311 case TGSI_OPCODE_ISLT: 9312 case TGSI_OPCODE_MAD: 9313 case TGSI_OPCODE_MAX: 9314 case TGSI_OPCODE_MIN: 9315 case TGSI_OPCODE_MUL: 9316 case TGSI_OPCODE_NOP: 9317 case TGSI_OPCODE_NOT: 9318 case TGSI_OPCODE_OR: 9319 case TGSI_OPCODE_UADD: 9320 case TGSI_OPCODE_USEQ: 9321 case TGSI_OPCODE_USGE: 9322 case TGSI_OPCODE_USLT: 9323 case TGSI_OPCODE_UMIN: 9324 case TGSI_OPCODE_UMAD: 9325 case TGSI_OPCODE_UMAX: 9326 case TGSI_OPCODE_ROUND: 9327 case TGSI_OPCODE_SQRT: 9328 case TGSI_OPCODE_SHL: 9329 case TGSI_OPCODE_TRUNC: 9330 case TGSI_OPCODE_U2F: 9331 case TGSI_OPCODE_UCMP: 9332 case TGSI_OPCODE_USHR: 9333 case TGSI_OPCODE_USNE: 9334 case TGSI_OPCODE_XOR: 9335 /* Begin SM5 opcodes */ 9336 case TGSI_OPCODE_F2D: 9337 case TGSI_OPCODE_D2F: 9338 case TGSI_OPCODE_DADD: 9339 case TGSI_OPCODE_DMUL: 9340 case TGSI_OPCODE_DMAX: 9341 case TGSI_OPCODE_DMIN: 9342 case TGSI_OPCODE_DSGE: 9343 case TGSI_OPCODE_DSLT: 9344 case TGSI_OPCODE_DSEQ: 9345 case TGSI_OPCODE_DSNE: 9346 case TGSI_OPCODE_BREV: 9347 case TGSI_OPCODE_POPC: 9348 case TGSI_OPCODE_LSB: 9349 case TGSI_OPCODE_INTERP_CENTROID: 9350 case TGSI_OPCODE_INTERP_SAMPLE: 9351 /* simple instructions */ 9352 return emit_simple(emit, inst); 9353 case TGSI_OPCODE_RET: 9354 if (emit->unit == PIPE_SHADER_TESS_CTRL && 9355 !emit->tcs.control_point_phase) { 9356 9357 /* store the tessellation levels in the patch constant phase only */ 9358 store_tesslevels(emit); 9359 } 9360 return emit_simple(emit, inst); 9361 9362 case TGSI_OPCODE_IMSB: 9363 case TGSI_OPCODE_UMSB: 9364 return emit_msb(emit, inst); 9365 case TGSI_OPCODE_IBFE: 9366 case TGSI_OPCODE_UBFE: 9367 return emit_bfe(emit, inst); 9368 case TGSI_OPCODE_BFI: 9369 return emit_bfi(emit, inst); 9370 case TGSI_OPCODE_MOV: 9371 return emit_mov(emit, inst); 9372 case TGSI_OPCODE_EMIT: 9373 return emit_vertex(emit, inst); 9374 case TGSI_OPCODE_ENDPRIM: 9375 return emit_endprim(emit, inst); 9376 case TGSI_OPCODE_IABS: 9377 return emit_iabs(emit, inst); 9378 case TGSI_OPCODE_ARL: 9379 FALLTHROUGH; 9380 case TGSI_OPCODE_UARL: 9381 return emit_arl_uarl(emit, inst); 9382 case TGSI_OPCODE_BGNSUB: 9383 /* no-op */ 9384 return TRUE; 9385 case TGSI_OPCODE_CAL: 9386 return emit_cal(emit, inst); 9387 case TGSI_OPCODE_CMP: 9388 return emit_cmp(emit, inst); 9389 case TGSI_OPCODE_COS: 9390 return emit_sincos(emit, inst); 9391 case TGSI_OPCODE_DST: 9392 return emit_dst(emit, inst); 9393 case TGSI_OPCODE_EX2: 9394 return emit_ex2(emit, inst); 9395 case TGSI_OPCODE_EXP: 9396 return emit_exp(emit, inst); 9397 case TGSI_OPCODE_IF: 9398 return emit_if(emit, &inst->Src[0]); 9399 case TGSI_OPCODE_KILL: 9400 return emit_kill(emit, inst); 9401 case TGSI_OPCODE_KILL_IF: 9402 return emit_kill_if(emit, inst); 9403 case TGSI_OPCODE_LG2: 9404 return emit_lg2(emit, inst); 9405 case TGSI_OPCODE_LIT: 9406 return emit_lit(emit, inst); 9407 case TGSI_OPCODE_LODQ: 9408 return emit_lodq(emit, inst); 9409 case TGSI_OPCODE_LOG: 9410 return emit_log(emit, inst); 9411 case TGSI_OPCODE_LRP: 9412 return emit_lrp(emit, inst); 9413 case TGSI_OPCODE_POW: 9414 return emit_pow(emit, inst); 9415 case TGSI_OPCODE_RCP: 9416 return emit_rcp(emit, inst); 9417 case TGSI_OPCODE_RSQ: 9418 return emit_rsq(emit, inst); 9419 case TGSI_OPCODE_SAMPLE: 9420 return emit_sample(emit, inst); 9421 case TGSI_OPCODE_SEQ: 9422 return emit_seq(emit, inst); 9423 case TGSI_OPCODE_SGE: 9424 return emit_sge(emit, inst); 9425 case TGSI_OPCODE_SGT: 9426 return emit_sgt(emit, inst); 9427 case TGSI_OPCODE_SIN: 9428 return emit_sincos(emit, inst); 9429 case TGSI_OPCODE_SLE: 9430 return emit_sle(emit, inst); 9431 case TGSI_OPCODE_SLT: 9432 return emit_slt(emit, inst); 9433 case TGSI_OPCODE_SNE: 9434 return emit_sne(emit, inst); 9435 case TGSI_OPCODE_SSG: 9436 return emit_ssg(emit, inst); 9437 case TGSI_OPCODE_ISSG: 9438 return emit_issg(emit, inst); 9439 case TGSI_OPCODE_TEX: 9440 return emit_tex(emit, inst); 9441 case TGSI_OPCODE_TG4: 9442 return emit_tg4(emit, inst); 9443 case TGSI_OPCODE_TEX2: 9444 return emit_tex2(emit, inst); 9445 case TGSI_OPCODE_TXP: 9446 return emit_txp(emit, inst); 9447 case TGSI_OPCODE_TXB: 9448 case TGSI_OPCODE_TXB2: 9449 case TGSI_OPCODE_TXL: 9450 return emit_txl_txb(emit, inst); 9451 case TGSI_OPCODE_TXD: 9452 return emit_txd(emit, inst); 9453 case TGSI_OPCODE_TXF: 9454 return emit_txf(emit, inst); 9455 case TGSI_OPCODE_TXL2: 9456 return emit_txl2(emit, inst); 9457 case TGSI_OPCODE_TXQ: 9458 return emit_txq(emit, inst); 9459 case TGSI_OPCODE_UIF: 9460 return emit_if(emit, &inst->Src[0]); 9461 case TGSI_OPCODE_UMUL_HI: 9462 case TGSI_OPCODE_IMUL_HI: 9463 case TGSI_OPCODE_UDIV: 9464 /* These cases use only the FIRST of two destination registers */ 9465 return emit_simple_1dst(emit, inst, 2, 0); 9466 case TGSI_OPCODE_IDIV: 9467 return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_IDIV); 9468 case TGSI_OPCODE_UMUL: 9469 case TGSI_OPCODE_UMOD: 9470 case TGSI_OPCODE_MOD: 9471 /* These cases use only the SECOND of two destination registers */ 9472 return emit_simple_1dst(emit, inst, 2, 1); 9473 9474 /* Begin SM5 opcodes */ 9475 case TGSI_OPCODE_DABS: 9476 return emit_dabs(emit, inst); 9477 case TGSI_OPCODE_DNEG: 9478 return emit_dneg(emit, inst); 9479 case TGSI_OPCODE_DRCP: 9480 return emit_simple(emit, inst); 9481 case TGSI_OPCODE_DSQRT: 9482 return emit_dsqrt(emit, inst); 9483 case TGSI_OPCODE_DMAD: 9484 return emit_dmad(emit, inst); 9485 case TGSI_OPCODE_DFRAC: 9486 return emit_vmware(emit, inst, VGPU10_VMWARE_OPCODE_DFRC); 9487 case TGSI_OPCODE_D2I: 9488 case TGSI_OPCODE_D2U: 9489 return emit_simple(emit, inst); 9490 case TGSI_OPCODE_I2D: 9491 case TGSI_OPCODE_U2D: 9492 return emit_simple(emit, inst); 9493 case TGSI_OPCODE_DRSQ: 9494 return emit_drsq(emit, &inst->Dst[0], &inst->Src[0]); 9495 case TGSI_OPCODE_DDIV: 9496 return emit_simple(emit, inst); 9497 case TGSI_OPCODE_INTERP_OFFSET: 9498 return emit_interp_offset(emit, inst); 9499 9500 /* The following opcodes should never be seen here. We return zero 9501 * for all the PIPE_CAP_TGSI_DROUND_SUPPORTED, DFRACEXP_DLDEXP_SUPPORTED, 9502 * FMA_SUPPORTED, LDEXP_SUPPORTED queries. 9503 */ 9504 case TGSI_OPCODE_FMA: 9505 case TGSI_OPCODE_LDEXP: 9506 case TGSI_OPCODE_DSSG: 9507 case TGSI_OPCODE_DFRACEXP: 9508 case TGSI_OPCODE_DLDEXP: 9509 case TGSI_OPCODE_DTRUNC: 9510 case TGSI_OPCODE_DCEIL: 9511 case TGSI_OPCODE_DFLR: 9512 debug_printf("Unexpected TGSI opcode %s. " 9513 "Should have been translated away by the GLSL compiler.\n", 9514 tgsi_get_opcode_name(opcode)); 9515 return FALSE; 9516 9517 case TGSI_OPCODE_LOAD: 9518 case TGSI_OPCODE_STORE: 9519 case TGSI_OPCODE_ATOMAND: 9520 case TGSI_OPCODE_ATOMCAS: 9521 case TGSI_OPCODE_ATOMIMAX: 9522 case TGSI_OPCODE_ATOMIMIN: 9523 case TGSI_OPCODE_ATOMOR: 9524 case TGSI_OPCODE_ATOMUADD: 9525 case TGSI_OPCODE_ATOMUMAX: 9526 case TGSI_OPCODE_ATOMUMIN: 9527 case TGSI_OPCODE_ATOMXCHG: 9528 case TGSI_OPCODE_ATOMXOR: 9529 return FALSE; 9530 case TGSI_OPCODE_BARRIER: 9531 if (emit->unit == PIPE_SHADER_TESS_CTRL) { 9532 /* SM5 device doesn't support BARRIER in tcs . If barrier is used 9533 * in shader, don't do anything for this opcode and continue rest 9534 * of shader translation 9535 */ 9536 pipe_debug_message(&emit->svga_debug_callback, INFO, 9537 "barrier instruction is not supported in tessellation control shader\n"); 9538 return TRUE; 9539 } 9540 else { 9541 return emit_simple(emit, inst); 9542 } 9543 9544 case TGSI_OPCODE_END: 9545 if (!emit_post_helpers(emit)) 9546 return FALSE; 9547 return emit_simple(emit, inst); 9548 9549 default: 9550 debug_printf("Unimplemented tgsi instruction %s\n", 9551 tgsi_get_opcode_name(opcode)); 9552 return FALSE; 9553 } 9554 9555 return TRUE; 9556} 9557 9558 9559/** 9560 * Emit the extra instructions to adjust the vertex position. 9561 * There are two possible adjustments: 9562 * 1. Converting from Gallium to VGPU10 coordinate space by applying the 9563 * "prescale" and "pretranslate" values. 9564 * 2. Undoing the viewport transformation when we use the swtnl/draw path. 9565 * \param vs_pos_tmp_index which temporary register contains the vertex pos. 9566 */ 9567static void 9568emit_vpos_instructions(struct svga_shader_emitter_v10 *emit) 9569{ 9570 struct tgsi_full_src_register tmp_pos_src; 9571 struct tgsi_full_dst_register pos_dst; 9572 const unsigned vs_pos_tmp_index = emit->vposition.tmp_index; 9573 9574 /* Don't bother to emit any extra vertex instructions if vertex position is 9575 * not written out 9576 */ 9577 if (emit->vposition.out_index == INVALID_INDEX) 9578 return; 9579 9580 /** 9581 * Reset the temporary vertex position register index 9582 * so that emit_dst_register() will use the real vertex position output 9583 */ 9584 emit->vposition.tmp_index = INVALID_INDEX; 9585 9586 tmp_pos_src = make_src_temp_reg(vs_pos_tmp_index); 9587 pos_dst = make_dst_output_reg(emit->vposition.out_index); 9588 9589 /* If non-adjusted vertex position register index 9590 * is valid, copy the vertex position from the temporary 9591 * vertex position register before it is modified by the 9592 * prescale computation. 9593 */ 9594 if (emit->vposition.so_index != INVALID_INDEX) { 9595 struct tgsi_full_dst_register pos_so_dst = 9596 make_dst_output_reg(emit->vposition.so_index); 9597 9598 /* MOV pos_so, tmp_pos */ 9599 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_so_dst, &tmp_pos_src); 9600 } 9601 9602 if (emit->vposition.need_prescale) { 9603 /* This code adjusts the vertex position to match the VGPU10 convention. 9604 * If p is the position computed by the shader (usually by applying the 9605 * modelview and projection matrices), the new position q is computed by: 9606 * 9607 * q.x = p.w * trans.x + p.x * scale.x 9608 * q.y = p.w * trans.y + p.y * scale.y 9609 * q.z = p.w * trans.z + p.z * scale.z; 9610 * q.w = p.w * trans.w + p.w; 9611 */ 9612 struct tgsi_full_src_register tmp_pos_src_w = 9613 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 9614 struct tgsi_full_dst_register tmp_pos_dst = 9615 make_dst_temp_reg(vs_pos_tmp_index); 9616 struct tgsi_full_dst_register tmp_pos_dst_xyz = 9617 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XYZ); 9618 9619 struct tgsi_full_src_register prescale_scale = 9620 make_src_temp_reg(emit->vposition.prescale_scale_index); 9621 struct tgsi_full_src_register prescale_trans = 9622 make_src_temp_reg(emit->vposition.prescale_trans_index); 9623 9624 /* MUL tmp_pos.xyz, tmp_pos, prescale.scale */ 9625 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xyz, 9626 &tmp_pos_src, &prescale_scale); 9627 9628 /* MAD pos, tmp_pos.wwww, prescale.trans, tmp_pos */ 9629 emit_instruction_op3(emit, VGPU10_OPCODE_MAD, &pos_dst, &tmp_pos_src_w, 9630 &prescale_trans, &tmp_pos_src); 9631 } 9632 else if (emit->key.vs.undo_viewport) { 9633 /* This code computes the final vertex position from the temporary 9634 * vertex position by undoing the viewport transformation and the 9635 * divide-by-W operation (we convert window coords back to clip coords). 9636 * This is needed when we use the 'draw' module for fallbacks. 9637 * If p is the temp pos in window coords, then the NDC coord q is: 9638 * q.x = (p.x - vp.x_trans) / vp.x_scale * p.w 9639 * q.y = (p.y - vp.y_trans) / vp.y_scale * p.w 9640 * q.z = p.z * p.w 9641 * q.w = p.w 9642 * CONST[vs_viewport_index] contains: 9643 * { 1/vp.x_scale, 1/vp.y_scale, -vp.x_trans, -vp.y_trans } 9644 */ 9645 struct tgsi_full_dst_register tmp_pos_dst = 9646 make_dst_temp_reg(vs_pos_tmp_index); 9647 struct tgsi_full_dst_register tmp_pos_dst_xy = 9648 writemask_dst(&tmp_pos_dst, TGSI_WRITEMASK_XY); 9649 struct tgsi_full_src_register tmp_pos_src_wwww = 9650 scalar_src(&tmp_pos_src, TGSI_SWIZZLE_W); 9651 9652 struct tgsi_full_dst_register pos_dst_xyz = 9653 writemask_dst(&pos_dst, TGSI_WRITEMASK_XYZ); 9654 struct tgsi_full_dst_register pos_dst_w = 9655 writemask_dst(&pos_dst, TGSI_WRITEMASK_W); 9656 9657 struct tgsi_full_src_register vp_xyzw = 9658 make_src_const_reg(emit->vs.viewport_index); 9659 struct tgsi_full_src_register vp_zwww = 9660 swizzle_src(&vp_xyzw, TGSI_SWIZZLE_Z, TGSI_SWIZZLE_W, 9661 TGSI_SWIZZLE_W, TGSI_SWIZZLE_W); 9662 9663 /* ADD tmp_pos.xy, tmp_pos.xy, viewport.zwww */ 9664 emit_instruction_op2(emit, VGPU10_OPCODE_ADD, &tmp_pos_dst_xy, 9665 &tmp_pos_src, &vp_zwww); 9666 9667 /* MUL tmp_pos.xy, tmp_pos.xyzw, viewport.xyzy */ 9668 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &tmp_pos_dst_xy, 9669 &tmp_pos_src, &vp_xyzw); 9670 9671 /* MUL pos.xyz, tmp_pos.xyz, tmp_pos.www */ 9672 emit_instruction_op2(emit, VGPU10_OPCODE_MUL, &pos_dst_xyz, 9673 &tmp_pos_src, &tmp_pos_src_wwww); 9674 9675 /* MOV pos.w, tmp_pos.w */ 9676 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &pos_dst_w, &tmp_pos_src); 9677 } 9678 else if (vs_pos_tmp_index != INVALID_INDEX) { 9679 /* This code is to handle the case where the temporary vertex 9680 * position register is created when the vertex shader has stream 9681 * output and prescale is disabled because rasterization is to be 9682 * discarded. 9683 */ 9684 struct tgsi_full_dst_register pos_dst = 9685 make_dst_output_reg(emit->vposition.out_index); 9686 9687 /* MOV pos, tmp_pos */ 9688 begin_emit_instruction(emit); 9689 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 9690 emit_dst_register(emit, &pos_dst); 9691 emit_src_register(emit, &tmp_pos_src); 9692 end_emit_instruction(emit); 9693 } 9694 9695 /* Restore original vposition.tmp_index value for the next GS vertex. 9696 * It doesn't matter for VS. 9697 */ 9698 emit->vposition.tmp_index = vs_pos_tmp_index; 9699} 9700 9701static void 9702emit_clipping_instructions(struct svga_shader_emitter_v10 *emit) 9703{ 9704 if (emit->clip_mode == CLIP_DISTANCE) { 9705 /* Copy from copy distance temporary to CLIPDIST & the shadow copy */ 9706 emit_clip_distance_instructions(emit); 9707 9708 } else if (emit->clip_mode == CLIP_VERTEX && 9709 emit->key.last_vertex_stage) { 9710 /* Convert TGSI CLIPVERTEX to CLIPDIST */ 9711 emit_clip_vertex_instructions(emit); 9712 } 9713 9714 /** 9715 * Emit vertex position and take care of legacy user planes only if 9716 * there is a valid vertex position register index. 9717 * This is to take care of the case 9718 * where the shader doesn't output vertex position. Then in 9719 * this case, don't bother to emit more vertex instructions. 9720 */ 9721 if (emit->vposition.out_index == INVALID_INDEX) 9722 return; 9723 9724 /** 9725 * Emit per-vertex clipping instructions for legacy user defined clip planes. 9726 * NOTE: we must emit the clip distance instructions before the 9727 * emit_vpos_instructions() call since the later function will change 9728 * the TEMP[vs_pos_tmp_index] value. 9729 */ 9730 if (emit->clip_mode == CLIP_LEGACY && emit->key.last_vertex_stage) { 9731 /* Emit CLIPDIST for legacy user defined clip planes */ 9732 emit_clip_distance_from_vpos(emit, emit->vposition.tmp_index); 9733 } 9734} 9735 9736 9737/** 9738 * Emit extra per-vertex instructions. This includes clip-coordinate 9739 * space conversion and computing clip distances. This is called for 9740 * each GS emit-vertex instruction and at the end of VS translation. 9741 */ 9742static void 9743emit_vertex_instructions(struct svga_shader_emitter_v10 *emit) 9744{ 9745 /* Emit clipping instructions based on clipping mode */ 9746 emit_clipping_instructions(emit); 9747 9748 /* Emit vertex position instructions */ 9749 emit_vpos_instructions(emit); 9750} 9751 9752 9753/** 9754 * Translate the TGSI_OPCODE_EMIT GS instruction. 9755 */ 9756static boolean 9757emit_vertex(struct svga_shader_emitter_v10 *emit, 9758 const struct tgsi_full_instruction *inst) 9759{ 9760 unsigned ret = TRUE; 9761 9762 assert(emit->unit == PIPE_SHADER_GEOMETRY); 9763 9764 /** 9765 * Emit the viewport array index for the first vertex. 9766 */ 9767 if (emit->gs.viewport_index_out_index != INVALID_INDEX) { 9768 struct tgsi_full_dst_register viewport_index_out = 9769 make_dst_output_reg(emit->gs.viewport_index_out_index); 9770 struct tgsi_full_dst_register viewport_index_out_x = 9771 writemask_dst(&viewport_index_out, TGSI_WRITEMASK_X); 9772 struct tgsi_full_src_register viewport_index_tmp = 9773 make_src_temp_reg(emit->gs.viewport_index_tmp_index); 9774 9775 /* Set the out index to INVALID_INDEX, so it will not 9776 * be assigned to a temp again in emit_dst_register, and 9777 * the viewport index will not be assigned again in the 9778 * subsequent vertices. 9779 */ 9780 emit->gs.viewport_index_out_index = INVALID_INDEX; 9781 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 9782 &viewport_index_out_x, &viewport_index_tmp); 9783 } 9784 9785 /** 9786 * Find the stream index associated with this emit vertex instruction. 9787 */ 9788 assert(inst->Src[0].Register.File == TGSI_FILE_IMMEDIATE); 9789 unsigned streamIndex = find_stream_index(emit, &inst->Src[0]); 9790 9791 /** 9792 * According to the ARB_gpu_shader5 spec, the built-in geometry shader 9793 * outputs are always associated with vertex stream zero. 9794 * So emit the extra vertex instructions for position or clip distance 9795 * for stream zero only. 9796 */ 9797 if (streamIndex == 0) { 9798 /** 9799 * Before emitting vertex instructions, emit the temporaries for 9800 * the prescale constants based on the viewport index if needed. 9801 */ 9802 if (emit->vposition.need_prescale && !emit->vposition.have_prescale) 9803 emit_temp_prescale_instructions(emit); 9804 9805 emit_vertex_instructions(emit); 9806 } 9807 9808 begin_emit_instruction(emit); 9809 if (emit->version >= 50) { 9810 if (emit->info.num_stream_output_components[streamIndex] == 0) { 9811 /** 9812 * If there is no output for this stream, discard this instruction. 9813 */ 9814 emit->discard_instruction = TRUE; 9815 } 9816 else { 9817 emit_opcode(emit, VGPU10_OPCODE_EMIT_STREAM, FALSE); 9818 emit_stream_register(emit, streamIndex); 9819 } 9820 } 9821 else { 9822 emit_opcode(emit, VGPU10_OPCODE_EMIT, FALSE); 9823 } 9824 end_emit_instruction(emit); 9825 9826 return ret; 9827} 9828 9829 9830/** 9831 * Emit the extra code to convert from VGPU10's boolean front-face 9832 * register to TGSI's signed front-face register. 9833 * 9834 * TODO: Make temporary front-face register a scalar. 9835 */ 9836static void 9837emit_frontface_instructions(struct svga_shader_emitter_v10 *emit) 9838{ 9839 assert(emit->unit == PIPE_SHADER_FRAGMENT); 9840 9841 if (emit->fs.face_input_index != INVALID_INDEX) { 9842 /* convert vgpu10 boolean face register to gallium +/-1 value */ 9843 struct tgsi_full_dst_register tmp_dst = 9844 make_dst_temp_reg(emit->fs.face_tmp_index); 9845 struct tgsi_full_src_register one = 9846 make_immediate_reg_float(emit, 1.0f); 9847 struct tgsi_full_src_register neg_one = 9848 make_immediate_reg_float(emit, -1.0f); 9849 9850 /* MOVC face_tmp, IS_FRONT_FACE.x, 1.0, -1.0 */ 9851 begin_emit_instruction(emit); 9852 emit_opcode(emit, VGPU10_OPCODE_MOVC, FALSE); 9853 emit_dst_register(emit, &tmp_dst); 9854 emit_face_register(emit); 9855 emit_src_register(emit, &one); 9856 emit_src_register(emit, &neg_one); 9857 end_emit_instruction(emit); 9858 } 9859} 9860 9861 9862/** 9863 * Emit the extra code to convert from VGPU10's fragcoord.w value to 1/w. 9864 */ 9865static void 9866emit_fragcoord_instructions(struct svga_shader_emitter_v10 *emit) 9867{ 9868 assert(emit->unit == PIPE_SHADER_FRAGMENT); 9869 9870 if (emit->fs.fragcoord_input_index != INVALID_INDEX) { 9871 struct tgsi_full_dst_register tmp_dst = 9872 make_dst_temp_reg(emit->fs.fragcoord_tmp_index); 9873 struct tgsi_full_dst_register tmp_dst_xyz = 9874 writemask_dst(&tmp_dst, TGSI_WRITEMASK_XYZ); 9875 struct tgsi_full_dst_register tmp_dst_w = 9876 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 9877 struct tgsi_full_src_register one = 9878 make_immediate_reg_float(emit, 1.0f); 9879 struct tgsi_full_src_register fragcoord = 9880 make_src_reg(TGSI_FILE_INPUT, emit->fs.fragcoord_input_index); 9881 9882 /* save the input index */ 9883 unsigned fragcoord_input_index = emit->fs.fragcoord_input_index; 9884 /* set to invalid to prevent substitution in emit_src_register() */ 9885 emit->fs.fragcoord_input_index = INVALID_INDEX; 9886 9887 /* MOV fragcoord_tmp.xyz, fragcoord.xyz */ 9888 begin_emit_instruction(emit); 9889 emit_opcode(emit, VGPU10_OPCODE_MOV, FALSE); 9890 emit_dst_register(emit, &tmp_dst_xyz); 9891 emit_src_register(emit, &fragcoord); 9892 end_emit_instruction(emit); 9893 9894 /* DIV fragcoord_tmp.w, 1.0, fragcoord.w */ 9895 begin_emit_instruction(emit); 9896 emit_opcode(emit, VGPU10_OPCODE_DIV, FALSE); 9897 emit_dst_register(emit, &tmp_dst_w); 9898 emit_src_register(emit, &one); 9899 emit_src_register(emit, &fragcoord); 9900 end_emit_instruction(emit); 9901 9902 /* restore saved value */ 9903 emit->fs.fragcoord_input_index = fragcoord_input_index; 9904 } 9905} 9906 9907 9908/** 9909 * Emit the extra code to get the current sample position value and 9910 * put it into a temp register. 9911 */ 9912static void 9913emit_sample_position_instructions(struct svga_shader_emitter_v10 *emit) 9914{ 9915 assert(emit->unit == PIPE_SHADER_FRAGMENT); 9916 9917 if (emit->fs.sample_pos_sys_index != INVALID_INDEX) { 9918 assert(emit->version >= 41); 9919 9920 struct tgsi_full_dst_register tmp_dst = 9921 make_dst_temp_reg(emit->fs.sample_pos_tmp_index); 9922 struct tgsi_full_src_register half = 9923 make_immediate_reg_float4(emit, 0.5, 0.5, 0.0, 0.0); 9924 9925 struct tgsi_full_src_register tmp_src = 9926 make_src_temp_reg(emit->fs.sample_pos_tmp_index); 9927 struct tgsi_full_src_register sample_index_reg = 9928 make_src_scalar_reg(TGSI_FILE_SYSTEM_VALUE, 9929 emit->fs.sample_id_sys_index, TGSI_SWIZZLE_X); 9930 9931 /* The first src register is a shader resource (if we want a 9932 * multisampled resource sample position) or the rasterizer register 9933 * (if we want the current sample position in the color buffer). We 9934 * want the later. 9935 */ 9936 9937 /* SAMPLE_POS dst, RASTERIZER, sampleIndex */ 9938 begin_emit_instruction(emit); 9939 emit_opcode(emit, VGPU10_OPCODE_SAMPLE_POS, FALSE); 9940 emit_dst_register(emit, &tmp_dst); 9941 emit_rasterizer_register(emit); 9942 emit_src_register(emit, &sample_index_reg); 9943 end_emit_instruction(emit); 9944 9945 /* Convert from D3D coords to GL coords by adding 0.5 bias */ 9946 /* ADD dst, dst, half */ 9947 begin_emit_instruction(emit); 9948 emit_opcode(emit, VGPU10_OPCODE_ADD, FALSE); 9949 emit_dst_register(emit, &tmp_dst); 9950 emit_src_register(emit, &tmp_src); 9951 emit_src_register(emit, &half); 9952 end_emit_instruction(emit); 9953 } 9954} 9955 9956 9957/** 9958 * Emit extra instructions to adjust VS inputs/attributes. This can 9959 * mean casting a vertex attribute from int to float or setting the 9960 * W component to 1, or both. 9961 */ 9962static void 9963emit_vertex_attrib_instructions(struct svga_shader_emitter_v10 *emit) 9964{ 9965 const unsigned save_w_1_mask = emit->key.vs.adjust_attrib_w_1; 9966 const unsigned save_itof_mask = emit->key.vs.adjust_attrib_itof; 9967 const unsigned save_utof_mask = emit->key.vs.adjust_attrib_utof; 9968 const unsigned save_is_bgra_mask = emit->key.vs.attrib_is_bgra; 9969 const unsigned save_puint_to_snorm_mask = emit->key.vs.attrib_puint_to_snorm; 9970 const unsigned save_puint_to_uscaled_mask = emit->key.vs.attrib_puint_to_uscaled; 9971 const unsigned save_puint_to_sscaled_mask = emit->key.vs.attrib_puint_to_sscaled; 9972 9973 unsigned adjust_mask = (save_w_1_mask | 9974 save_itof_mask | 9975 save_utof_mask | 9976 save_is_bgra_mask | 9977 save_puint_to_snorm_mask | 9978 save_puint_to_uscaled_mask | 9979 save_puint_to_sscaled_mask); 9980 9981 assert(emit->unit == PIPE_SHADER_VERTEX); 9982 9983 if (adjust_mask) { 9984 struct tgsi_full_src_register one = 9985 make_immediate_reg_float(emit, 1.0f); 9986 9987 struct tgsi_full_src_register one_int = 9988 make_immediate_reg_int(emit, 1); 9989 9990 /* We need to turn off these bitmasks while emitting the 9991 * instructions below, then restore them afterward. 9992 */ 9993 emit->key.vs.adjust_attrib_w_1 = 0; 9994 emit->key.vs.adjust_attrib_itof = 0; 9995 emit->key.vs.adjust_attrib_utof = 0; 9996 emit->key.vs.attrib_is_bgra = 0; 9997 emit->key.vs.attrib_puint_to_snorm = 0; 9998 emit->key.vs.attrib_puint_to_uscaled = 0; 9999 emit->key.vs.attrib_puint_to_sscaled = 0; 10000 10001 while (adjust_mask) { 10002 unsigned index = u_bit_scan(&adjust_mask); 10003 10004 /* skip the instruction if this vertex attribute is not being used */ 10005 if (emit->info.input_usage_mask[index] == 0) 10006 continue; 10007 10008 unsigned tmp = emit->vs.adjusted_input[index]; 10009 struct tgsi_full_src_register input_src = 10010 make_src_reg(TGSI_FILE_INPUT, index); 10011 10012 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 10013 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 10014 struct tgsi_full_dst_register tmp_dst_w = 10015 writemask_dst(&tmp_dst, TGSI_WRITEMASK_W); 10016 10017 /* ITOF/UTOF/MOV tmp, input[index] */ 10018 if (save_itof_mask & (1 << index)) { 10019 emit_instruction_op1(emit, VGPU10_OPCODE_ITOF, 10020 &tmp_dst, &input_src); 10021 } 10022 else if (save_utof_mask & (1 << index)) { 10023 emit_instruction_op1(emit, VGPU10_OPCODE_UTOF, 10024 &tmp_dst, &input_src); 10025 } 10026 else if (save_puint_to_snorm_mask & (1 << index)) { 10027 emit_puint_to_snorm(emit, &tmp_dst, &input_src); 10028 } 10029 else if (save_puint_to_uscaled_mask & (1 << index)) { 10030 emit_puint_to_uscaled(emit, &tmp_dst, &input_src); 10031 } 10032 else if (save_puint_to_sscaled_mask & (1 << index)) { 10033 emit_puint_to_sscaled(emit, &tmp_dst, &input_src); 10034 } 10035 else { 10036 assert((save_w_1_mask | save_is_bgra_mask) & (1 << index)); 10037 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 10038 &tmp_dst, &input_src); 10039 } 10040 10041 if (save_is_bgra_mask & (1 << index)) { 10042 emit_swap_r_b(emit, &tmp_dst, &tmp_src); 10043 } 10044 10045 if (save_w_1_mask & (1 << index)) { 10046 /* MOV tmp.w, 1.0 */ 10047 if (emit->key.vs.attrib_is_pure_int & (1 << index)) { 10048 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 10049 &tmp_dst_w, &one_int); 10050 } 10051 else { 10052 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, 10053 &tmp_dst_w, &one); 10054 } 10055 } 10056 } 10057 10058 emit->key.vs.adjust_attrib_w_1 = save_w_1_mask; 10059 emit->key.vs.adjust_attrib_itof = save_itof_mask; 10060 emit->key.vs.adjust_attrib_utof = save_utof_mask; 10061 emit->key.vs.attrib_is_bgra = save_is_bgra_mask; 10062 emit->key.vs.attrib_puint_to_snorm = save_puint_to_snorm_mask; 10063 emit->key.vs.attrib_puint_to_uscaled = save_puint_to_uscaled_mask; 10064 emit->key.vs.attrib_puint_to_sscaled = save_puint_to_sscaled_mask; 10065 } 10066} 10067 10068 10069/* Find zero-value immedate for default layer index */ 10070static void 10071emit_default_layer_instructions(struct svga_shader_emitter_v10 *emit) 10072{ 10073 assert(emit->unit == PIPE_SHADER_FRAGMENT); 10074 10075 /* immediate for default layer index 0 */ 10076 if (emit->fs.layer_input_index != INVALID_INDEX) { 10077 union tgsi_immediate_data imm; 10078 imm.Int = 0; 10079 emit->fs.layer_imm_index = find_immediate(emit, imm, 0); 10080 } 10081} 10082 10083 10084static void 10085emit_temp_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit, 10086 unsigned cbuf_index, 10087 struct tgsi_full_dst_register *scale, 10088 struct tgsi_full_dst_register *translate) 10089{ 10090 struct tgsi_full_src_register scale_cbuf = make_src_const_reg(cbuf_index); 10091 struct tgsi_full_src_register trans_cbuf = make_src_const_reg(cbuf_index+1); 10092 10093 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, scale, &scale_cbuf); 10094 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, translate, &trans_cbuf); 10095} 10096 10097 10098/** 10099 * A recursive helper function to find the prescale from the constant buffer 10100 */ 10101static void 10102find_prescale_from_cbuf(struct svga_shader_emitter_v10 *emit, 10103 unsigned index, unsigned num_prescale, 10104 struct tgsi_full_src_register *vp_index, 10105 struct tgsi_full_dst_register *scale, 10106 struct tgsi_full_dst_register *translate, 10107 struct tgsi_full_src_register *tmp_src, 10108 struct tgsi_full_dst_register *tmp_dst) 10109{ 10110 if (num_prescale == 0) 10111 return; 10112 10113 if (index > 0) { 10114 /* ELSE */ 10115 emit_instruction_op0(emit, VGPU10_OPCODE_ELSE); 10116 } 10117 10118 struct tgsi_full_src_register index_src = 10119 make_immediate_reg_int(emit, index); 10120 10121 if (index == 0) { 10122 /* GE tmp, vp_index, index */ 10123 emit_instruction_op2(emit, VGPU10_OPCODE_GE, tmp_dst, 10124 vp_index, &index_src); 10125 } else { 10126 /* EQ tmp, vp_index, index */ 10127 emit_instruction_op2(emit, VGPU10_OPCODE_EQ, tmp_dst, 10128 vp_index, &index_src); 10129 } 10130 10131 /* IF tmp */ 10132 emit_if(emit, tmp_src); 10133 emit_temp_prescale_from_cbuf(emit, 10134 emit->vposition.prescale_cbuf_index + 2 * index, 10135 scale, translate); 10136 10137 find_prescale_from_cbuf(emit, index+1, num_prescale-1, 10138 vp_index, scale, translate, 10139 tmp_src, tmp_dst); 10140 10141 /* ENDIF */ 10142 emit_instruction_op0(emit, VGPU10_OPCODE_ENDIF); 10143} 10144 10145 10146/** 10147 * This helper function emits instructions to set the prescale 10148 * and translate temporaries to the correct constants from the 10149 * constant buffer according to the designated viewport. 10150 */ 10151static void 10152emit_temp_prescale_instructions(struct svga_shader_emitter_v10 *emit) 10153{ 10154 struct tgsi_full_dst_register prescale_scale = 10155 make_dst_temp_reg(emit->vposition.prescale_scale_index); 10156 struct tgsi_full_dst_register prescale_translate = 10157 make_dst_temp_reg(emit->vposition.prescale_trans_index); 10158 10159 unsigned prescale_cbuf_index = emit->vposition.prescale_cbuf_index; 10160 10161 if (emit->vposition.num_prescale == 1) { 10162 emit_temp_prescale_from_cbuf(emit, 10163 prescale_cbuf_index, 10164 &prescale_scale, &prescale_translate); 10165 } else { 10166 /** 10167 * Since SM5 device does not support dynamic indexing, we need 10168 * to do the if-else to find the prescale constants for the 10169 * specified viewport. 10170 */ 10171 struct tgsi_full_src_register vp_index_src = 10172 make_src_temp_reg(emit->gs.viewport_index_tmp_index); 10173 10174 struct tgsi_full_src_register vp_index_src_x = 10175 scalar_src(&vp_index_src, TGSI_SWIZZLE_X); 10176 10177 unsigned tmp = get_temp_index(emit); 10178 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 10179 struct tgsi_full_src_register tmp_src_x = 10180 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 10181 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 10182 10183 find_prescale_from_cbuf(emit, 0, emit->vposition.num_prescale, 10184 &vp_index_src_x, 10185 &prescale_scale, &prescale_translate, 10186 &tmp_src_x, &tmp_dst); 10187 } 10188 10189 /* Mark prescale temporaries are emitted */ 10190 emit->vposition.have_prescale = 1; 10191} 10192 10193 10194/** 10195 * A helper function to emit an instruction in a vertex shader to add a bias 10196 * to the VertexID system value. This patches the VertexID in the SVGA vertex 10197 * shader to include the base vertex of an indexed primitive or the start index 10198 * of a non-indexed primitive. 10199 */ 10200static void 10201emit_vertex_id_nobase_instruction(struct svga_shader_emitter_v10 *emit) 10202{ 10203 struct tgsi_full_src_register vertex_id_bias_index = 10204 make_src_const_reg(emit->vs.vertex_id_bias_index); 10205 struct tgsi_full_src_register vertex_id_sys_src = 10206 make_src_reg(TGSI_FILE_SYSTEM_VALUE, emit->vs.vertex_id_sys_index); 10207 struct tgsi_full_src_register vertex_id_sys_src_x = 10208 scalar_src(&vertex_id_sys_src, TGSI_SWIZZLE_X); 10209 struct tgsi_full_dst_register vertex_id_tmp_dst = 10210 make_dst_temp_reg(emit->vs.vertex_id_tmp_index); 10211 10212 /* IADD vertex_id_tmp, vertex_id_sys, vertex_id_bias */ 10213 unsigned vertex_id_tmp_index = emit->vs.vertex_id_tmp_index; 10214 emit->vs.vertex_id_tmp_index = INVALID_INDEX; 10215 emit_instruction_opn(emit, VGPU10_OPCODE_IADD, &vertex_id_tmp_dst, 10216 &vertex_id_sys_src_x, &vertex_id_bias_index, NULL, FALSE, 10217 FALSE); 10218 emit->vs.vertex_id_tmp_index = vertex_id_tmp_index; 10219} 10220 10221/** 10222 * Hull Shader must have control point outputs. But tessellation 10223 * control shader can return without writing to control point output. 10224 * In this case, the control point output is assumed to be passthrough 10225 * from the control point input. 10226 * This helper function is to write out a control point output first in case 10227 * the tessellation control shader returns before writing a 10228 * control point output. 10229 */ 10230static void 10231emit_tcs_default_control_point_output(struct svga_shader_emitter_v10 *emit) 10232{ 10233 assert(emit->unit == PIPE_SHADER_TESS_CTRL); 10234 assert(emit->tcs.control_point_phase); 10235 assert(emit->tcs.control_point_input_index != INVALID_INDEX); 10236 assert(emit->tcs.control_point_out_index != INVALID_INDEX); 10237 assert(emit->tcs.invocation_id_sys_index != INVALID_INDEX); 10238 10239 /* UARL ADDR[INDEX].x INVOCATION.xxxx */ 10240 10241 struct tgsi_full_src_register invocation_src; 10242 struct tgsi_full_dst_register addr_dst; 10243 struct tgsi_full_dst_register addr_dst_x; 10244 unsigned addr_tmp; 10245 10246 addr_tmp = emit->address_reg_index[emit->tcs.control_point_addr_index]; 10247 addr_dst = make_dst_temp_reg(addr_tmp); 10248 addr_dst_x = writemask_dst(&addr_dst, TGSI_WRITEMASK_X); 10249 10250 invocation_src = make_src_reg(TGSI_FILE_SYSTEM_VALUE, 10251 emit->tcs.invocation_id_sys_index); 10252 10253 begin_emit_instruction(emit); 10254 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); 10255 emit_dst_register(emit, &addr_dst_x); 10256 emit_src_register(emit, &invocation_src); 10257 end_emit_instruction(emit); 10258 10259 10260 /* MOV OUTPUT INPUT[ADDR[INDEX].x][POSITION] */ 10261 10262 struct tgsi_full_src_register input_control_point; 10263 struct tgsi_full_dst_register output_control_point; 10264 10265 input_control_point = make_src_reg(TGSI_FILE_INPUT, 10266 emit->tcs.control_point_input_index); 10267 input_control_point.Register.Dimension = 1; 10268 input_control_point.Dimension.Indirect = 1; 10269 input_control_point.DimIndirect.File = TGSI_FILE_ADDRESS; 10270 input_control_point.DimIndirect.Index = emit->tcs.control_point_addr_index; 10271 output_control_point = 10272 make_dst_output_reg(emit->tcs.control_point_out_index); 10273 10274 begin_emit_instruction(emit); 10275 emit_opcode_precise(emit, VGPU10_OPCODE_MOV, FALSE, FALSE); 10276 emit_dst_register(emit, &output_control_point); 10277 emit_src_register(emit, &input_control_point); 10278 end_emit_instruction(emit); 10279} 10280 10281/** 10282 * This functions constructs temporary tessfactor from VGPU10*_TESSFACTOR 10283 * values in domain shader. SM5 has tessfactors as floating point values where 10284 * as tgsi emit them as vector. This function allows to construct temp 10285 * tessfactor vector similar to TGSI_SEMANTIC_TESSINNER/OUTER filled with 10286 * values from VGPU10*_TESSFACTOR. Use this constructed vector whenever 10287 * TGSI_SEMANTIC_TESSINNER/OUTER is used in shader. 10288 */ 10289static void 10290emit_temp_tessfactor_instructions(struct svga_shader_emitter_v10 *emit) 10291{ 10292 struct tgsi_full_src_register src; 10293 struct tgsi_full_dst_register dst; 10294 10295 if (emit->tes.inner.tgsi_index != INVALID_INDEX) { 10296 dst = make_dst_temp_reg(emit->tes.inner.temp_index); 10297 10298 switch (emit->tes.prim_mode) { 10299 case PIPE_PRIM_QUADS: 10300 src = make_src_scalar_reg(TGSI_FILE_INPUT, 10301 emit->tes.inner.in_index + 1, TGSI_SWIZZLE_X); 10302 dst = writemask_dst(&dst, TGSI_WRITEMASK_Y); 10303 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 10304 FALLTHROUGH; 10305 case PIPE_PRIM_TRIANGLES: 10306 src = make_src_scalar_reg(TGSI_FILE_INPUT, 10307 emit->tes.inner.in_index, TGSI_SWIZZLE_X); 10308 dst = writemask_dst(&dst, TGSI_WRITEMASK_X); 10309 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 10310 break; 10311 case PIPE_PRIM_LINES: 10312 /** 10313 * As per SM5 spec, InsideTessFactor for isolines are unused. 10314 * In fact glsl tessInnerLevel for isolines doesn't mean anything but if 10315 * any application try to read tessInnerLevel in TES when primitive type 10316 * is isolines, then instead of driver throwing segfault for accesing it, 10317 * return atleast vec(1.0f) 10318 */ 10319 src = make_immediate_reg_float(emit, 1.0f); 10320 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 10321 break; 10322 default: 10323 break; 10324 } 10325 } 10326 10327 if (emit->tes.outer.tgsi_index != INVALID_INDEX) { 10328 dst = make_dst_temp_reg(emit->tes.outer.temp_index); 10329 10330 switch (emit->tes.prim_mode) { 10331 case PIPE_PRIM_QUADS: 10332 src = make_src_scalar_reg(TGSI_FILE_INPUT, 10333 emit->tes.outer.in_index + 3, TGSI_SWIZZLE_X); 10334 dst = writemask_dst(&dst, TGSI_WRITEMASK_W); 10335 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 10336 FALLTHROUGH; 10337 case PIPE_PRIM_TRIANGLES: 10338 src = make_src_scalar_reg(TGSI_FILE_INPUT, 10339 emit->tes.outer.in_index + 2, TGSI_SWIZZLE_X); 10340 dst = writemask_dst(&dst, TGSI_WRITEMASK_Z); 10341 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 10342 FALLTHROUGH; 10343 case PIPE_PRIM_LINES: 10344 src = make_src_scalar_reg(TGSI_FILE_INPUT, 10345 emit->tes.outer.in_index + 1, TGSI_SWIZZLE_X); 10346 dst = writemask_dst(&dst, TGSI_WRITEMASK_Y); 10347 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 10348 10349 src = make_src_scalar_reg(TGSI_FILE_INPUT, 10350 emit->tes.outer.in_index , TGSI_SWIZZLE_X); 10351 dst = writemask_dst(&dst, TGSI_WRITEMASK_X); 10352 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 10353 10354 break; 10355 default: 10356 break; 10357 } 10358 } 10359} 10360 10361 10362static void 10363emit_initialize_temp_instruction(struct svga_shader_emitter_v10 *emit) 10364{ 10365 struct tgsi_full_src_register src; 10366 struct tgsi_full_dst_register dst; 10367 unsigned vgpu10_temp_index = remap_temp_index(emit, TGSI_FILE_TEMPORARY, 10368 emit->initialize_temp_index); 10369 src = make_immediate_reg_float(emit, 0.0f); 10370 dst = make_dst_temp_reg(vgpu10_temp_index); 10371 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &dst, &src); 10372 emit->temp_map[emit->initialize_temp_index].initialized = TRUE; 10373 emit->initialize_temp_index = INVALID_INDEX; 10374} 10375 10376 10377/** 10378 * Emit any extra/helper declarations/code that we might need between 10379 * the declaration section and code section. 10380 */ 10381static boolean 10382emit_pre_helpers(struct svga_shader_emitter_v10 *emit) 10383{ 10384 /* Properties */ 10385 if (emit->unit == PIPE_SHADER_GEOMETRY) 10386 emit_property_instructions(emit); 10387 else if (emit->unit == PIPE_SHADER_TESS_CTRL) { 10388 emit_hull_shader_declarations(emit); 10389 10390 /* Save the position of the first instruction token so that we can 10391 * do a second pass of the instructions for the patch constant phase. 10392 */ 10393 emit->tcs.instruction_token_pos = emit->cur_tgsi_token; 10394 emit->tcs.fork_phase_add_signature = FALSE; 10395 10396 if (!emit_hull_shader_control_point_phase(emit)) { 10397 emit->skip_instruction = TRUE; 10398 return TRUE; 10399 } 10400 10401 /* Set the current tcs phase to control point phase */ 10402 emit->tcs.control_point_phase = TRUE; 10403 } 10404 else if (emit->unit == PIPE_SHADER_TESS_EVAL) { 10405 emit_domain_shader_declarations(emit); 10406 } 10407 10408 /* Declare inputs */ 10409 if (!emit_input_declarations(emit)) 10410 return FALSE; 10411 10412 /* Declare outputs */ 10413 if (!emit_output_declarations(emit)) 10414 return FALSE; 10415 10416 /* Declare temporary registers */ 10417 emit_temporaries_declaration(emit); 10418 10419 /* For PIPE_SHADER_TESS_CTRL, constants, samplers, resources and immediates 10420 * will already be declared in hs_decls (emit_hull_shader_declarations) 10421 */ 10422 if (emit->unit != PIPE_SHADER_TESS_CTRL) { 10423 /* Declare constant registers */ 10424 emit_constant_declaration(emit); 10425 10426 /* Declare samplers and resources */ 10427 emit_sampler_declarations(emit); 10428 emit_resource_declarations(emit); 10429 10430 alloc_common_immediates(emit); 10431 /* Now, emit the constant block containing all the immediates 10432 * declared by shader, as well as the extra ones seen above. 10433 */ 10434 } 10435 10436 if (emit->unit != PIPE_SHADER_FRAGMENT) { 10437 /* 10438 * Declare clip distance output registers for ClipVertex or 10439 * user defined planes 10440 */ 10441 emit_clip_distance_declarations(emit); 10442 } 10443 10444 if (emit->unit == PIPE_SHADER_FRAGMENT && 10445 emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 10446 float alpha = emit->key.fs.alpha_ref; 10447 emit->fs.alpha_ref_index = 10448 alloc_immediate_float4(emit, alpha, alpha, alpha, alpha); 10449 } 10450 10451 if (emit->unit != PIPE_SHADER_TESS_CTRL) { 10452 /** 10453 * For PIPE_SHADER_TESS_CTRL, immediates are already declared in 10454 * hs_decls 10455 */ 10456 emit_vgpu10_immediates_block(emit); 10457 } 10458 else { 10459 emit_tcs_default_control_point_output(emit); 10460 } 10461 10462 if (emit->unit == PIPE_SHADER_FRAGMENT) { 10463 emit_frontface_instructions(emit); 10464 emit_fragcoord_instructions(emit); 10465 emit_sample_position_instructions(emit); 10466 emit_default_layer_instructions(emit); 10467 } 10468 else if (emit->unit == PIPE_SHADER_VERTEX) { 10469 emit_vertex_attrib_instructions(emit); 10470 10471 if (emit->info.uses_vertexid) 10472 emit_vertex_id_nobase_instruction(emit); 10473 } 10474 else if (emit->unit == PIPE_SHADER_TESS_EVAL) { 10475 emit_temp_tessfactor_instructions(emit); 10476 } 10477 10478 /** 10479 * For geometry shader that writes to viewport index, the prescale 10480 * temporaries will be done at the first vertex emission. 10481 */ 10482 if (emit->vposition.need_prescale && emit->vposition.num_prescale == 1) 10483 emit_temp_prescale_instructions(emit); 10484 10485 return TRUE; 10486} 10487 10488 10489/** 10490 * The device has no direct support for the pipe_blend_state::alpha_to_one 10491 * option so we implement it here with shader code. 10492 * 10493 * Note that this is kind of pointless, actually. Here we're clobbering 10494 * the alpha value with 1.0. So if alpha-to-coverage is enabled, we'll wind 10495 * up with 100% coverage. That's almost certainly not what the user wants. 10496 * The work-around is to add extra shader code to compute coverage from alpha 10497 * and write it to the coverage output register (if the user's shader doesn't 10498 * do so already). We'll probably do that in the future. 10499 */ 10500static void 10501emit_alpha_to_one_instructions(struct svga_shader_emitter_v10 *emit, 10502 unsigned fs_color_tmp_index) 10503{ 10504 struct tgsi_full_src_register one = make_immediate_reg_float(emit, 1.0f); 10505 unsigned i; 10506 10507 /* Note: it's not 100% clear from the spec if we're supposed to clobber 10508 * the alpha for all render targets. But that's what NVIDIA does and 10509 * that's what Piglit tests. 10510 */ 10511 for (i = 0; i < emit->fs.num_color_outputs; i++) { 10512 struct tgsi_full_dst_register color_dst; 10513 10514 if (fs_color_tmp_index != INVALID_INDEX && i == 0) { 10515 /* write to the temp color register */ 10516 color_dst = make_dst_temp_reg(fs_color_tmp_index); 10517 } 10518 else { 10519 /* write directly to the color[i] output */ 10520 color_dst = make_dst_output_reg(emit->fs.color_out_index[i]); 10521 } 10522 10523 color_dst = writemask_dst(&color_dst, TGSI_WRITEMASK_W); 10524 10525 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &one); 10526 } 10527} 10528 10529 10530/** 10531 * Emit alpha test code. This compares TEMP[fs_color_tmp_index].w 10532 * against the alpha reference value and discards the fragment if the 10533 * comparison fails. 10534 */ 10535static void 10536emit_alpha_test_instructions(struct svga_shader_emitter_v10 *emit, 10537 unsigned fs_color_tmp_index) 10538{ 10539 /* compare output color's alpha to alpha ref and kill */ 10540 unsigned tmp = get_temp_index(emit); 10541 struct tgsi_full_src_register tmp_src = make_src_temp_reg(tmp); 10542 struct tgsi_full_src_register tmp_src_x = 10543 scalar_src(&tmp_src, TGSI_SWIZZLE_X); 10544 struct tgsi_full_dst_register tmp_dst = make_dst_temp_reg(tmp); 10545 struct tgsi_full_src_register color_src = 10546 make_src_temp_reg(fs_color_tmp_index); 10547 struct tgsi_full_src_register color_src_w = 10548 scalar_src(&color_src, TGSI_SWIZZLE_W); 10549 struct tgsi_full_src_register ref_src = 10550 make_src_immediate_reg(emit->fs.alpha_ref_index); 10551 struct tgsi_full_dst_register color_dst = 10552 make_dst_output_reg(emit->fs.color_out_index[0]); 10553 10554 assert(emit->unit == PIPE_SHADER_FRAGMENT); 10555 10556 /* dst = src0 'alpha_func' src1 */ 10557 emit_comparison(emit, emit->key.fs.alpha_func, &tmp_dst, 10558 &color_src_w, &ref_src); 10559 10560 /* DISCARD if dst.x == 0 */ 10561 begin_emit_instruction(emit); 10562 emit_discard_opcode(emit, FALSE); /* discard if src0.x is zero */ 10563 emit_src_register(emit, &tmp_src_x); 10564 end_emit_instruction(emit); 10565 10566 /* If we don't need to broadcast the color below, emit the final color here. 10567 */ 10568 if (emit->key.fs.write_color0_to_n_cbufs <= 1) { 10569 /* MOV output.color, tempcolor */ 10570 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src); 10571 } 10572 10573 free_temp_indexes(emit); 10574} 10575 10576 10577/** 10578 * Emit instructions for writing a single color output to multiple 10579 * color buffers. 10580 * This is used when the TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS (or 10581 * when key.fs.white_fragments is true). 10582 * property is set and the number of render targets is greater than one. 10583 * \param fs_color_tmp_index index of the temp register that holds the 10584 * color to broadcast. 10585 */ 10586static void 10587emit_broadcast_color_instructions(struct svga_shader_emitter_v10 *emit, 10588 unsigned fs_color_tmp_index) 10589{ 10590 const unsigned n = emit->key.fs.write_color0_to_n_cbufs; 10591 unsigned i; 10592 struct tgsi_full_src_register color_src; 10593 10594 if (emit->key.fs.white_fragments) { 10595 /* set all color outputs to white */ 10596 color_src = make_immediate_reg_float(emit, 1.0f); 10597 } 10598 else { 10599 /* set all color outputs to TEMP[fs_color_tmp_index] */ 10600 assert(fs_color_tmp_index != INVALID_INDEX); 10601 color_src = make_src_temp_reg(fs_color_tmp_index); 10602 } 10603 10604 assert(emit->unit == PIPE_SHADER_FRAGMENT); 10605 10606 for (i = 0; i < n; i++) { 10607 unsigned output_reg = emit->fs.color_out_index[i]; 10608 struct tgsi_full_dst_register color_dst = 10609 make_dst_output_reg(output_reg); 10610 10611 /* Fill in this semantic here since we'll use it later in 10612 * emit_dst_register(). 10613 */ 10614 emit->info.output_semantic_name[output_reg] = TGSI_SEMANTIC_COLOR; 10615 10616 /* MOV output.color[i], tempcolor */ 10617 emit_instruction_op1(emit, VGPU10_OPCODE_MOV, &color_dst, &color_src); 10618 } 10619} 10620 10621 10622/** 10623 * Emit extra helper code after the original shader code, but before the 10624 * last END/RET instruction. 10625 * For vertex shaders this means emitting the extra code to apply the 10626 * prescale scale/translation. 10627 */ 10628static boolean 10629emit_post_helpers(struct svga_shader_emitter_v10 *emit) 10630{ 10631 if (emit->unit == PIPE_SHADER_VERTEX) { 10632 emit_vertex_instructions(emit); 10633 } 10634 else if (emit->unit == PIPE_SHADER_FRAGMENT) { 10635 const unsigned fs_color_tmp_index = emit->fs.color_tmp_index; 10636 10637 assert(!(emit->key.fs.white_fragments && 10638 emit->key.fs.write_color0_to_n_cbufs == 0)); 10639 10640 /* We no longer want emit_dst_register() to substitute the 10641 * temporary fragment color register for the real color output. 10642 */ 10643 emit->fs.color_tmp_index = INVALID_INDEX; 10644 10645 if (emit->key.fs.alpha_to_one) { 10646 emit_alpha_to_one_instructions(emit, fs_color_tmp_index); 10647 } 10648 if (emit->key.fs.alpha_func != SVGA3D_CMP_ALWAYS) { 10649 emit_alpha_test_instructions(emit, fs_color_tmp_index); 10650 } 10651 if (emit->key.fs.write_color0_to_n_cbufs > 1 || 10652 emit->key.fs.white_fragments) { 10653 emit_broadcast_color_instructions(emit, fs_color_tmp_index); 10654 } 10655 } 10656 else if (emit->unit == PIPE_SHADER_TESS_CTRL) { 10657 if (!emit->tcs.control_point_phase) { 10658 /* store the tessellation levels in the patch constant phase only */ 10659 store_tesslevels(emit); 10660 } 10661 else { 10662 emit_clipping_instructions(emit); 10663 } 10664 } 10665 else if (emit->unit == PIPE_SHADER_TESS_EVAL) { 10666 emit_vertex_instructions(emit); 10667 } 10668 10669 return TRUE; 10670} 10671 10672 10673/** 10674 * Translate the TGSI tokens into VGPU10 tokens. 10675 */ 10676static boolean 10677emit_vgpu10_instructions(struct svga_shader_emitter_v10 *emit, 10678 const struct tgsi_token *tokens) 10679{ 10680 struct tgsi_parse_context parse; 10681 boolean ret = TRUE; 10682 boolean pre_helpers_emitted = FALSE; 10683 unsigned inst_number = 0; 10684 10685 tgsi_parse_init(&parse, tokens); 10686 10687 while (!tgsi_parse_end_of_tokens(&parse)) { 10688 10689 /* Save the current tgsi token starting position */ 10690 emit->cur_tgsi_token = parse.Position; 10691 10692 tgsi_parse_token(&parse); 10693 10694 switch (parse.FullToken.Token.Type) { 10695 case TGSI_TOKEN_TYPE_IMMEDIATE: 10696 ret = emit_vgpu10_immediate(emit, &parse.FullToken.FullImmediate); 10697 if (!ret) 10698 goto done; 10699 break; 10700 10701 case TGSI_TOKEN_TYPE_DECLARATION: 10702 ret = emit_vgpu10_declaration(emit, &parse.FullToken.FullDeclaration); 10703 if (!ret) 10704 goto done; 10705 break; 10706 10707 case TGSI_TOKEN_TYPE_INSTRUCTION: 10708 if (!pre_helpers_emitted) { 10709 ret = emit_pre_helpers(emit); 10710 if (!ret) 10711 goto done; 10712 pre_helpers_emitted = TRUE; 10713 } 10714 ret = emit_vgpu10_instruction(emit, inst_number++, 10715 &parse.FullToken.FullInstruction); 10716 10717 /* Usually this applies to TCS only. If shader is reading control 10718 * point outputs in control point phase, we should reemit all 10719 * instructions which are writting into control point output in 10720 * control phase to store results into temporaries. 10721 */ 10722 if (emit->reemit_instruction) { 10723 assert(emit->unit == PIPE_SHADER_TESS_CTRL); 10724 ret = emit_vgpu10_instruction(emit, inst_number, 10725 &parse.FullToken.FullInstruction); 10726 } 10727 else if (emit->initialize_temp_index != INVALID_INDEX) { 10728 emit_initialize_temp_instruction(emit); 10729 emit->initialize_temp_index = INVALID_INDEX; 10730 ret = emit_vgpu10_instruction(emit, inst_number - 1, 10731 &parse.FullToken.FullInstruction); 10732 } 10733 10734 if (!ret) 10735 goto done; 10736 break; 10737 10738 case TGSI_TOKEN_TYPE_PROPERTY: 10739 ret = emit_vgpu10_property(emit, &parse.FullToken.FullProperty); 10740 if (!ret) 10741 goto done; 10742 break; 10743 10744 default: 10745 break; 10746 } 10747 } 10748 10749 if (emit->unit == PIPE_SHADER_TESS_CTRL) { 10750 ret = emit_hull_shader_patch_constant_phase(emit, &parse); 10751 } 10752 10753done: 10754 tgsi_parse_free(&parse); 10755 return ret; 10756} 10757 10758 10759/** 10760 * Emit the first VGPU10 shader tokens. 10761 */ 10762static boolean 10763emit_vgpu10_header(struct svga_shader_emitter_v10 *emit) 10764{ 10765 VGPU10ProgramToken ptoken; 10766 10767 /* First token: VGPU10ProgramToken (version info, program type (VS,GS,PS)) */ 10768 ptoken.value = 0; /* init whole token to zero */ 10769 ptoken.majorVersion = emit->version / 10; 10770 ptoken.minorVersion = emit->version % 10; 10771 ptoken.programType = translate_shader_type(emit->unit); 10772 if (!emit_dword(emit, ptoken.value)) 10773 return FALSE; 10774 10775 /* Second token: total length of shader, in tokens. We can't fill this 10776 * in until we're all done. Emit zero for now. 10777 */ 10778 if (!emit_dword(emit, 0)) 10779 return FALSE; 10780 10781 if (emit->version >= 50) { 10782 VGPU10OpcodeToken0 token; 10783 10784 if (emit->unit == PIPE_SHADER_TESS_CTRL) { 10785 /* For hull shader, we need to start the declarations phase first before 10786 * emitting any declarations including the global flags. 10787 */ 10788 token.value = 0; 10789 token.opcodeType = VGPU10_OPCODE_HS_DECLS; 10790 begin_emit_instruction(emit); 10791 emit_dword(emit, token.value); 10792 end_emit_instruction(emit); 10793 } 10794 10795 /* Emit global flags */ 10796 token.value = 0; /* init whole token to zero */ 10797 token.opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS; 10798 token.enableDoublePrecisionFloatOps = 1; /* set bit */ 10799 token.instructionLength = 1; 10800 if (!emit_dword(emit, token.value)) 10801 return FALSE; 10802 } 10803 10804 if (emit->version >= 40) { 10805 VGPU10OpcodeToken0 token; 10806 10807 /* Reserved for global flag such as refactoringAllowed. 10808 * If the shader does not use the precise qualifier, we will set the 10809 * refactoringAllowed global flag; otherwise, we will leave the reserved 10810 * token to NOP. 10811 */ 10812 emit->reserved_token = (emit->ptr - emit->buf) / sizeof(VGPU10OpcodeToken0); 10813 token.value = 0; 10814 token.opcodeType = VGPU10_OPCODE_NOP; 10815 token.instructionLength = 1; 10816 if (!emit_dword(emit, token.value)) 10817 return FALSE; 10818 } 10819 10820 return TRUE; 10821} 10822 10823 10824static boolean 10825emit_vgpu10_tail(struct svga_shader_emitter_v10 *emit) 10826{ 10827 VGPU10ProgramToken *tokens; 10828 10829 /* Replace the second token with total shader length */ 10830 tokens = (VGPU10ProgramToken *) emit->buf; 10831 tokens[1].value = emit_get_num_tokens(emit); 10832 10833 if (emit->version >= 40 && !emit->uses_precise_qualifier) { 10834 /* Replace the reserved token with the RefactoringAllowed global flag */ 10835 VGPU10OpcodeToken0 *ptoken; 10836 10837 ptoken = (VGPU10OpcodeToken0 *)&tokens[emit->reserved_token]; 10838 assert(ptoken->opcodeType == VGPU10_OPCODE_NOP); 10839 ptoken->opcodeType = VGPU10_OPCODE_DCL_GLOBAL_FLAGS; 10840 ptoken->refactoringAllowed = 1; 10841 } 10842 10843 return TRUE; 10844} 10845 10846 10847/** 10848 * Modify the FS to read the BCOLORs and use the FACE register 10849 * to choose between the front/back colors. 10850 */ 10851static const struct tgsi_token * 10852transform_fs_twoside(const struct tgsi_token *tokens) 10853{ 10854 if (0) { 10855 debug_printf("Before tgsi_add_two_side ------------------\n"); 10856 tgsi_dump(tokens,0); 10857 } 10858 tokens = tgsi_add_two_side(tokens); 10859 if (0) { 10860 debug_printf("After tgsi_add_two_side ------------------\n"); 10861 tgsi_dump(tokens, 0); 10862 } 10863 return tokens; 10864} 10865 10866 10867/** 10868 * Modify the FS to do polygon stipple. 10869 */ 10870static const struct tgsi_token * 10871transform_fs_pstipple(struct svga_shader_emitter_v10 *emit, 10872 const struct tgsi_token *tokens) 10873{ 10874 const struct tgsi_token *new_tokens; 10875 unsigned unit; 10876 10877 if (0) { 10878 debug_printf("Before pstipple ------------------\n"); 10879 tgsi_dump(tokens,0); 10880 } 10881 10882 new_tokens = util_pstipple_create_fragment_shader(tokens, &unit, 0, 10883 TGSI_FILE_INPUT); 10884 10885 emit->fs.pstipple_sampler_unit = unit; 10886 10887 /* Setup texture state for stipple */ 10888 emit->sampler_target[unit] = TGSI_TEXTURE_2D; 10889 emit->key.tex[unit].swizzle_r = TGSI_SWIZZLE_X; 10890 emit->key.tex[unit].swizzle_g = TGSI_SWIZZLE_Y; 10891 emit->key.tex[unit].swizzle_b = TGSI_SWIZZLE_Z; 10892 emit->key.tex[unit].swizzle_a = TGSI_SWIZZLE_W; 10893 emit->key.tex[unit].target = PIPE_TEXTURE_2D; 10894 10895 if (0) { 10896 debug_printf("After pstipple ------------------\n"); 10897 tgsi_dump(new_tokens, 0); 10898 } 10899 10900 return new_tokens; 10901} 10902 10903/** 10904 * Modify the FS to support anti-aliasing point. 10905 */ 10906static const struct tgsi_token * 10907transform_fs_aapoint(const struct tgsi_token *tokens, 10908 int aa_coord_index) 10909{ 10910 if (0) { 10911 debug_printf("Before tgsi_add_aa_point ------------------\n"); 10912 tgsi_dump(tokens,0); 10913 } 10914 tokens = tgsi_add_aa_point(tokens, aa_coord_index); 10915 if (0) { 10916 debug_printf("After tgsi_add_aa_point ------------------\n"); 10917 tgsi_dump(tokens, 0); 10918 } 10919 return tokens; 10920} 10921 10922 10923/** 10924 * A helper function to determine the shader in the previous stage and 10925 * then call the linker function to determine the input mapping for this 10926 * shader to match the output indices from the shader in the previous stage. 10927 */ 10928static void 10929compute_input_mapping(struct svga_context *svga, 10930 struct svga_shader_emitter_v10 *emit, 10931 enum pipe_shader_type unit) 10932{ 10933 struct svga_shader *prevShader = NULL; /* shader in the previous stage */ 10934 10935 if (unit == PIPE_SHADER_FRAGMENT) { 10936 prevShader = svga->curr.gs ? 10937 &svga->curr.gs->base : (svga->curr.tes ? 10938 &svga->curr.tes->base : &svga->curr.vs->base); 10939 } else if (unit == PIPE_SHADER_GEOMETRY) { 10940 prevShader = svga->curr.tes ? &svga->curr.tes->base : &svga->curr.vs->base; 10941 } else if (unit == PIPE_SHADER_TESS_EVAL) { 10942 assert(svga->curr.tcs); 10943 prevShader = &svga->curr.tcs->base; 10944 } else if (unit == PIPE_SHADER_TESS_CTRL) { 10945 assert(svga->curr.vs); 10946 prevShader = &svga->curr.vs->base; 10947 } 10948 10949 if (prevShader != NULL) { 10950 svga_link_shaders(&prevShader->info, &emit->info, &emit->linkage); 10951 emit->prevShaderInfo = &prevShader->info; 10952 } 10953 else { 10954 /** 10955 * Since vertex shader does not need to go through the linker to 10956 * establish the input map, we need to make sure the highest index 10957 * of input registers is set properly here. 10958 */ 10959 emit->linkage.input_map_max = MAX2((int)emit->linkage.input_map_max, 10960 emit->info.file_max[TGSI_FILE_INPUT]); 10961 } 10962} 10963 10964 10965/** 10966 * Copies the shader signature info to the shader variant 10967 */ 10968static void 10969copy_shader_signature(struct svga_shader_signature *sgn, 10970 struct svga_shader_variant *variant) 10971{ 10972 SVGA3dDXShaderSignatureHeader *header = &sgn->header; 10973 10974 /* Calculate the signature length */ 10975 variant->signatureLen = sizeof(SVGA3dDXShaderSignatureHeader) + 10976 (header->numInputSignatures + 10977 header->numOutputSignatures + 10978 header->numPatchConstantSignatures) * 10979 sizeof(SVGA3dDXShaderSignatureEntry); 10980 10981 /* Allocate buffer for the signature info */ 10982 variant->signature = 10983 (SVGA3dDXShaderSignatureHeader *)CALLOC(1, variant->signatureLen); 10984 10985 char *sgnBuf = (char *)variant->signature; 10986 unsigned sgnLen; 10987 10988 /* Copy the signature info to the shader variant structure */ 10989 memcpy(sgnBuf, &sgn->header, sizeof(SVGA3dDXShaderSignatureHeader)); 10990 sgnBuf += sizeof(SVGA3dDXShaderSignatureHeader); 10991 10992 if (header->numInputSignatures) { 10993 sgnLen = 10994 header->numInputSignatures * sizeof(SVGA3dDXShaderSignatureEntry); 10995 memcpy(sgnBuf, &sgn->inputs[0], sgnLen); 10996 sgnBuf += sgnLen; 10997 } 10998 10999 if (header->numOutputSignatures) { 11000 sgnLen = 11001 header->numOutputSignatures * sizeof(SVGA3dDXShaderSignatureEntry); 11002 memcpy(sgnBuf, &sgn->outputs[0], sgnLen); 11003 sgnBuf += sgnLen; 11004 } 11005 11006 if (header->numPatchConstantSignatures) { 11007 sgnLen = 11008 header->numPatchConstantSignatures * sizeof(SVGA3dDXShaderSignatureEntry); 11009 memcpy(sgnBuf, &sgn->patchConstants[0], sgnLen); 11010 } 11011} 11012 11013 11014/** 11015 * This is the main entrypoint for the TGSI -> VPGU10 translator. 11016 */ 11017struct svga_shader_variant * 11018svga_tgsi_vgpu10_translate(struct svga_context *svga, 11019 const struct svga_shader *shader, 11020 const struct svga_compile_key *key, 11021 enum pipe_shader_type unit) 11022{ 11023 struct svga_shader_variant *variant = NULL; 11024 struct svga_shader_emitter_v10 *emit; 11025 const struct tgsi_token *tokens = shader->tokens; 11026 11027 (void) make_immediate_reg_double; /* unused at this time */ 11028 11029 assert(unit == PIPE_SHADER_VERTEX || 11030 unit == PIPE_SHADER_GEOMETRY || 11031 unit == PIPE_SHADER_FRAGMENT || 11032 unit == PIPE_SHADER_TESS_CTRL || 11033 unit == PIPE_SHADER_TESS_EVAL || 11034 unit == PIPE_SHADER_COMPUTE); 11035 11036 /* These two flags cannot be used together */ 11037 assert(key->vs.need_prescale + key->vs.undo_viewport <= 1); 11038 11039 SVGA_STATS_TIME_PUSH(svga_sws(svga), SVGA_STATS_TIME_TGSIVGPU10TRANSLATE); 11040 /* 11041 * Setup the code emitter 11042 */ 11043 emit = alloc_emitter(); 11044 if (!emit) 11045 goto done; 11046 11047 emit->unit = unit; 11048 if (svga_have_sm5(svga)) { 11049 emit->version = 50; 11050 } else if (svga_have_sm4_1(svga)) { 11051 emit->version = 41; 11052 } else { 11053 emit->version = 40; 11054 } 11055 11056 emit->signature.header.headerVersion = SVGADX_SIGNATURE_HEADER_VERSION_0; 11057 11058 emit->key = *key; 11059 11060 emit->vposition.need_prescale = (emit->key.vs.need_prescale || 11061 emit->key.gs.need_prescale || 11062 emit->key.tes.need_prescale); 11063 11064 /* Determine how many prescale factors in the constant buffer */ 11065 emit->vposition.num_prescale = 1; 11066 if (emit->vposition.need_prescale && emit->key.gs.writes_viewport_index) { 11067 assert(emit->unit == PIPE_SHADER_GEOMETRY); 11068 emit->vposition.num_prescale = emit->key.gs.num_prescale; 11069 } 11070 11071 emit->vposition.tmp_index = INVALID_INDEX; 11072 emit->vposition.so_index = INVALID_INDEX; 11073 emit->vposition.out_index = INVALID_INDEX; 11074 11075 emit->vs.vertex_id_sys_index = INVALID_INDEX; 11076 emit->vs.vertex_id_tmp_index = INVALID_INDEX; 11077 emit->vs.vertex_id_bias_index = INVALID_INDEX; 11078 11079 emit->fs.color_tmp_index = INVALID_INDEX; 11080 emit->fs.face_input_index = INVALID_INDEX; 11081 emit->fs.fragcoord_input_index = INVALID_INDEX; 11082 emit->fs.sample_id_sys_index = INVALID_INDEX; 11083 emit->fs.sample_pos_sys_index = INVALID_INDEX; 11084 emit->fs.sample_mask_in_sys_index = INVALID_INDEX; 11085 emit->fs.layer_input_index = INVALID_INDEX; 11086 emit->fs.layer_imm_index = INVALID_INDEX; 11087 11088 emit->gs.prim_id_index = INVALID_INDEX; 11089 emit->gs.invocation_id_sys_index = INVALID_INDEX; 11090 emit->gs.viewport_index_out_index = INVALID_INDEX; 11091 emit->gs.viewport_index_tmp_index = INVALID_INDEX; 11092 11093 emit->tcs.vertices_per_patch_index = INVALID_INDEX; 11094 emit->tcs.invocation_id_sys_index = INVALID_INDEX; 11095 emit->tcs.control_point_input_index = INVALID_INDEX; 11096 emit->tcs.control_point_addr_index = INVALID_INDEX; 11097 emit->tcs.control_point_out_index = INVALID_INDEX; 11098 emit->tcs.control_point_tmp_index = INVALID_INDEX; 11099 emit->tcs.control_point_out_count = 0; 11100 emit->tcs.inner.out_index = INVALID_INDEX; 11101 emit->tcs.inner.out_index = INVALID_INDEX; 11102 emit->tcs.inner.temp_index = INVALID_INDEX; 11103 emit->tcs.inner.tgsi_index = INVALID_INDEX; 11104 emit->tcs.outer.out_index = INVALID_INDEX; 11105 emit->tcs.outer.temp_index = INVALID_INDEX; 11106 emit->tcs.outer.tgsi_index = INVALID_INDEX; 11107 emit->tcs.patch_generic_out_count = 0; 11108 emit->tcs.patch_generic_out_index = INVALID_INDEX; 11109 emit->tcs.patch_generic_tmp_index = INVALID_INDEX; 11110 emit->tcs.prim_id_index = INVALID_INDEX; 11111 11112 emit->tes.tesscoord_sys_index = INVALID_INDEX; 11113 emit->tes.inner.in_index = INVALID_INDEX; 11114 emit->tes.inner.temp_index = INVALID_INDEX; 11115 emit->tes.inner.tgsi_index = INVALID_INDEX; 11116 emit->tes.outer.in_index = INVALID_INDEX; 11117 emit->tes.outer.temp_index = INVALID_INDEX; 11118 emit->tes.outer.tgsi_index = INVALID_INDEX; 11119 emit->tes.prim_id_index = INVALID_INDEX; 11120 11121 emit->clip_dist_out_index = INVALID_INDEX; 11122 emit->clip_dist_tmp_index = INVALID_INDEX; 11123 emit->clip_dist_so_index = INVALID_INDEX; 11124 emit->clip_vertex_out_index = INVALID_INDEX; 11125 emit->clip_vertex_tmp_index = INVALID_INDEX; 11126 emit->svga_debug_callback = svga->debug.callback; 11127 11128 emit->index_range.start_index = INVALID_INDEX; 11129 emit->index_range.count = 0; 11130 emit->index_range.required = FALSE; 11131 emit->index_range.operandType = VGPU10_NUM_OPERANDS; 11132 emit->index_range.dim = 0; 11133 emit->index_range.size = 0; 11134 11135 emit->current_loop_depth = 0; 11136 11137 emit->initialize_temp_index = INVALID_INDEX; 11138 11139 if (emit->key.fs.alpha_func == SVGA3D_CMP_INVALID) { 11140 emit->key.fs.alpha_func = SVGA3D_CMP_ALWAYS; 11141 } 11142 11143 if (unit == PIPE_SHADER_FRAGMENT) { 11144 if (key->fs.light_twoside) { 11145 tokens = transform_fs_twoside(tokens); 11146 } 11147 if (key->fs.pstipple) { 11148 const struct tgsi_token *new_tokens = 11149 transform_fs_pstipple(emit, tokens); 11150 if (tokens != shader->tokens) { 11151 /* free the two-sided shader tokens */ 11152 tgsi_free_tokens(tokens); 11153 } 11154 tokens = new_tokens; 11155 } 11156 if (key->fs.aa_point) { 11157 tokens = transform_fs_aapoint(tokens, key->fs.aa_point_coord_index); 11158 } 11159 } 11160 11161 if (SVGA_DEBUG & DEBUG_TGSI) { 11162 debug_printf("#####################################\n"); 11163 debug_printf("### TGSI Shader %u\n", shader->id); 11164 tgsi_dump(tokens, 0); 11165 } 11166 11167 /** 11168 * Rescan the header if the token string is different from the one 11169 * included in the shader; otherwise, the header info is already up-to-date 11170 */ 11171 if (tokens != shader->tokens) { 11172 tgsi_scan_shader(tokens, &emit->info); 11173 } else { 11174 emit->info = shader->info; 11175 } 11176 11177 emit->num_outputs = emit->info.num_outputs; 11178 11179 /** 11180 * Compute input mapping to match the outputs from shader 11181 * in the previous stage 11182 */ 11183 compute_input_mapping(svga, emit, unit); 11184 11185 determine_clipping_mode(emit); 11186 11187 if (unit == PIPE_SHADER_GEOMETRY || unit == PIPE_SHADER_VERTEX || 11188 unit == PIPE_SHADER_TESS_CTRL || unit == PIPE_SHADER_TESS_EVAL) { 11189 if (shader->stream_output != NULL || emit->clip_mode == CLIP_DISTANCE) { 11190 /* if there is stream output declarations associated 11191 * with this shader or the shader writes to ClipDistance 11192 * then reserve extra registers for the non-adjusted vertex position 11193 * and the ClipDistance shadow copy. 11194 */ 11195 emit->vposition.so_index = emit->num_outputs++; 11196 11197 if (emit->clip_mode == CLIP_DISTANCE) { 11198 emit->clip_dist_so_index = emit->num_outputs++; 11199 if (emit->info.num_written_clipdistance > 4) 11200 emit->num_outputs++; 11201 } 11202 } 11203 } 11204 11205 /* 11206 * Do actual shader translation. 11207 */ 11208 if (!emit_vgpu10_header(emit)) { 11209 debug_printf("svga: emit VGPU10 header failed\n"); 11210 goto cleanup; 11211 } 11212 11213 if (!emit_vgpu10_instructions(emit, tokens)) { 11214 debug_printf("svga: emit VGPU10 instructions failed\n"); 11215 goto cleanup; 11216 } 11217 11218 if (!emit_vgpu10_tail(emit)) { 11219 debug_printf("svga: emit VGPU10 tail failed\n"); 11220 goto cleanup; 11221 } 11222 11223 if (emit->register_overflow) { 11224 goto cleanup; 11225 } 11226 11227 /* 11228 * Create, initialize the 'variant' object. 11229 */ 11230 variant = svga_new_shader_variant(svga, unit); 11231 if (!variant) 11232 goto cleanup; 11233 11234 variant->shader = shader; 11235 variant->nr_tokens = emit_get_num_tokens(emit); 11236 variant->tokens = (const unsigned *)emit->buf; 11237 11238 /* Copy shader signature info to the shader variant */ 11239 if (svga_have_sm5(svga)) { 11240 copy_shader_signature(&emit->signature, variant); 11241 } 11242 11243 emit->buf = NULL; /* buffer is no longer owed by emitter context */ 11244 memcpy(&variant->key, key, sizeof(*key)); 11245 variant->id = UTIL_BITMASK_INVALID_INDEX; 11246 11247 /* The extra constant starting offset starts with the number of 11248 * shader constants declared in the shader. 11249 */ 11250 variant->extra_const_start = emit->num_shader_consts[0]; 11251 if (key->gs.wide_point) { 11252 /** 11253 * The extra constant added in the transformed shader 11254 * for inverse viewport scale is to be supplied by the driver. 11255 * So the extra constant starting offset needs to be reduced by 1. 11256 */ 11257 assert(variant->extra_const_start > 0); 11258 variant->extra_const_start--; 11259 } 11260 11261 if (unit == PIPE_SHADER_FRAGMENT) { 11262 struct svga_fs_variant *fs_variant = svga_fs_variant(variant); 11263 11264 fs_variant->pstipple_sampler_unit = emit->fs.pstipple_sampler_unit; 11265 11266 /* If there was exactly one write to a fragment shader output register 11267 * and it came from a constant buffer, we know all fragments will have 11268 * the same color (except for blending). 11269 */ 11270 fs_variant->constant_color_output = 11271 emit->constant_color_output && emit->num_output_writes == 1; 11272 11273 /** keep track in the variant if flat interpolation is used 11274 * for any of the varyings. 11275 */ 11276 fs_variant->uses_flat_interp = emit->uses_flat_interp; 11277 11278 fs_variant->fs_shadow_compare_units = emit->fs.shadow_compare_units; 11279 } 11280 else if (unit == PIPE_SHADER_TESS_EVAL) { 11281 struct svga_tes_variant *tes_variant = svga_tes_variant(variant); 11282 11283 /* Keep track in the tes variant some of the layout parameters. 11284 * These parameters will be referenced by the tcs to emit 11285 * the necessary declarations for the hull shader. 11286 */ 11287 tes_variant->prim_mode = emit->tes.prim_mode; 11288 tes_variant->spacing = emit->tes.spacing; 11289 tes_variant->vertices_order_cw = emit->tes.vertices_order_cw; 11290 tes_variant->point_mode = emit->tes.point_mode; 11291 } 11292 11293 11294 if (tokens != shader->tokens) { 11295 tgsi_free_tokens(tokens); 11296 } 11297 11298cleanup: 11299 free_emitter(emit); 11300 11301done: 11302 SVGA_STATS_TIME_POP(svga_sws(svga)); 11303 return variant; 11304} 11305