1/* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright © 2010 Intel Corporation 5 * Copyright © 2011 Bryan Cain 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a 8 * copy of this software and associated documentation files (the "Software"), 9 * to deal in the Software without restriction, including without limitation 10 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 11 * and/or sell copies of the Software, and to permit persons to whom the 12 * Software is furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the next 15 * paragraph) shall be included in all copies or substantial portions of the 16 * Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 * DEALINGS IN THE SOFTWARE. 25 */ 26 27/** 28 * \file glsl_to_tgsi.cpp 29 * 30 * Translate GLSL IR to TGSI. 31 */ 32 33#include "st_glsl_to_tgsi.h" 34 35#include "compiler/glsl/glsl_parser_extras.h" 36#include "compiler/glsl/ir_optimization.h" 37#include "compiler/glsl/program.h" 38 39#include "main/errors.h" 40#include "main/shaderobj.h" 41#include "main/uniforms.h" 42#include "main/shaderapi.h" 43#include "main/shaderimage.h" 44#include "program/prog_instruction.h" 45 46#include "pipe/p_context.h" 47#include "pipe/p_screen.h" 48#include "tgsi/tgsi_ureg.h" 49#include "tgsi/tgsi_info.h" 50#include "util/u_math.h" 51#include "util/u_memory.h" 52#include "st_program.h" 53#include "st_format.h" 54#include "st_glsl_to_tgsi_temprename.h" 55 56#include "util/hash_table.h" 57#include <algorithm> 58 59#define PROGRAM_ANY_CONST ((1 << PROGRAM_STATE_VAR) | \ 60 (1 << PROGRAM_CONSTANT) | \ 61 (1 << PROGRAM_UNIFORM)) 62 63#define MAX_GLSL_TEXTURE_OFFSET 4 64 65#ifndef NDEBUG 66#include "util/u_atomic.h" 67#include "util/simple_mtx.h" 68#include <fstream> 69#include <ios> 70 71/* Prepare to make it possible to specify log file */ 72static std::ofstream stats_log; 73 74/* Helper function to check whether we want to write some statistics 75 * of the shader conversion. 76 */ 77 78static simple_mtx_t print_stats_mutex = _SIMPLE_MTX_INITIALIZER_NP; 79 80static inline bool print_stats_enabled () 81{ 82 static int stats_enabled = 0; 83 84 if (!stats_enabled) { 85 simple_mtx_lock(&print_stats_mutex); 86 if (!stats_enabled) { 87 const char *stats_filename = getenv("GLSL_TO_TGSI_PRINT_STATS"); 88 if (stats_filename) { 89 bool write_header = std::ifstream(stats_filename).fail(); 90 stats_log.open(stats_filename, std::ios_base::out | std::ios_base::app); 91 stats_enabled = stats_log.good() ? 1 : -1; 92 if (write_header) 93 stats_log << "arrays,temps,temps in arrays,total,instructions\n"; 94 } else { 95 stats_enabled = -1; 96 } 97 } 98 simple_mtx_unlock(&print_stats_mutex); 99 } 100 return stats_enabled > 0; 101} 102#define PRINT_STATS(X) if (print_stats_enabled()) do { X; } while (false); 103#else 104#define PRINT_STATS(X) 105#endif 106 107 108static unsigned is_precise(const ir_variable *ir) 109{ 110 if (!ir) 111 return 0; 112 return ir->data.precise || ir->data.invariant; 113} 114 115class variable_storage { 116 DECLARE_RZALLOC_CXX_OPERATORS(variable_storage) 117 118public: 119 variable_storage(ir_variable *var, gl_register_file file, int index, 120 unsigned array_id = 0) 121 : file(file), index(index), component(0), var(var), array_id(array_id) 122 { 123 assert(file != PROGRAM_ARRAY || array_id != 0); 124 } 125 126 gl_register_file file; 127 int index; 128 129 /* Explicit component location. This is given in terms of the GLSL-style 130 * swizzles where each double is a single component, i.e. for 64-bit types 131 * it can only be 0 or 1. 132 */ 133 int component; 134 ir_variable *var; /* variable that maps to this, if any */ 135 unsigned array_id; 136}; 137 138class immediate_storage : public exec_node { 139public: 140 immediate_storage(gl_constant_value *values, int size32, GLenum type) 141 { 142 memcpy(this->values, values, size32 * sizeof(gl_constant_value)); 143 this->size32 = size32; 144 this->type = type; 145 } 146 147 /* doubles are stored across 2 gl_constant_values */ 148 gl_constant_value values[4]; 149 int size32; /**< Number of 32-bit components (1-4) */ 150 GLenum type; /**< GL_DOUBLE, GL_FLOAT, GL_INT, GL_BOOL, or GL_UNSIGNED_INT */ 151}; 152 153static const st_src_reg undef_src = st_src_reg(PROGRAM_UNDEFINED, 0, GLSL_TYPE_ERROR); 154static const st_dst_reg undef_dst = st_dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP, GLSL_TYPE_ERROR); 155 156struct inout_decl { 157 unsigned mesa_index; 158 unsigned array_id; /* TGSI ArrayID; 1-based: 0 means not an array */ 159 unsigned size; 160 unsigned interp_loc; 161 unsigned gs_out_streams; 162 enum glsl_interp_mode interp; 163 enum glsl_base_type base_type; 164 ubyte usage_mask; /* GLSL-style usage-mask, i.e. single bit per double */ 165 bool invariant; 166}; 167 168static struct inout_decl * 169find_inout_array(struct inout_decl *decls, unsigned count, unsigned array_id) 170{ 171 assert(array_id != 0); 172 173 for (unsigned i = 0; i < count; i++) { 174 struct inout_decl *decl = &decls[i]; 175 176 if (array_id == decl->array_id) { 177 return decl; 178 } 179 } 180 181 return NULL; 182} 183 184static enum glsl_base_type 185find_array_type(struct inout_decl *decls, unsigned count, unsigned array_id) 186{ 187 if (!array_id) 188 return GLSL_TYPE_ERROR; 189 struct inout_decl *decl = find_inout_array(decls, count, array_id); 190 if (decl) 191 return decl->base_type; 192 return GLSL_TYPE_ERROR; 193} 194 195struct hwatomic_decl { 196 unsigned location; 197 unsigned binding; 198 unsigned size; 199 unsigned array_id; 200}; 201 202struct glsl_to_tgsi_visitor : public ir_visitor { 203public: 204 glsl_to_tgsi_visitor(); 205 ~glsl_to_tgsi_visitor(); 206 207 struct gl_context *ctx; 208 struct gl_program *prog; 209 struct gl_shader_program *shader_program; 210 struct gl_linked_shader *shader; 211 struct gl_shader_compiler_options *options; 212 213 int next_temp; 214 215 unsigned *array_sizes; 216 unsigned max_num_arrays; 217 unsigned next_array; 218 219 struct inout_decl inputs[4 * PIPE_MAX_SHADER_INPUTS]; 220 unsigned num_inputs; 221 unsigned num_input_arrays; 222 struct inout_decl outputs[4 * PIPE_MAX_SHADER_OUTPUTS]; 223 unsigned num_outputs; 224 unsigned num_output_arrays; 225 226 struct hwatomic_decl atomic_info[PIPE_MAX_HW_ATOMIC_BUFFERS]; 227 unsigned num_atomics; 228 unsigned num_atomic_arrays; 229 int num_address_regs; 230 uint32_t samplers_used; 231 glsl_base_type sampler_types[PIPE_MAX_SAMPLERS]; 232 enum tgsi_texture_type sampler_targets[PIPE_MAX_SAMPLERS]; 233 int images_used; 234 enum tgsi_texture_type image_targets[PIPE_MAX_SHADER_IMAGES]; 235 enum pipe_format image_formats[PIPE_MAX_SHADER_IMAGES]; 236 bool image_wr[PIPE_MAX_SHADER_IMAGES]; 237 bool indirect_addr_consts; 238 int wpos_transform_const; 239 240 bool native_integers; 241 bool have_sqrt; 242 bool have_fma; 243 bool use_shared_memory; 244 bool has_tex_txf_lz; 245 bool precise; 246 bool need_uarl; 247 bool tg4_component_in_swizzle; 248 249 variable_storage *find_variable_storage(ir_variable *var); 250 251 int add_constant(gl_register_file file, gl_constant_value values[8], 252 int size, GLenum datatype, uint16_t *swizzle_out); 253 254 st_src_reg get_temp(const glsl_type *type); 255 void reladdr_to_temp(ir_instruction *ir, st_src_reg *reg, int *num_reladdr); 256 257 st_src_reg st_src_reg_for_double(double val); 258 st_src_reg st_src_reg_for_float(float val); 259 st_src_reg st_src_reg_for_int(int val); 260 st_src_reg st_src_reg_for_int64(int64_t val); 261 st_src_reg st_src_reg_for_type(enum glsl_base_type type, int val); 262 263 /** 264 * \name Visit methods 265 * 266 * As typical for the visitor pattern, there must be one \c visit method for 267 * each concrete subclass of \c ir_instruction. Virtual base classes within 268 * the hierarchy should not have \c visit methods. 269 */ 270 /*@{*/ 271 virtual void visit(ir_variable *); 272 virtual void visit(ir_loop *); 273 virtual void visit(ir_loop_jump *); 274 virtual void visit(ir_function_signature *); 275 virtual void visit(ir_function *); 276 virtual void visit(ir_expression *); 277 virtual void visit(ir_swizzle *); 278 virtual void visit(ir_dereference_variable *); 279 virtual void visit(ir_dereference_array *); 280 virtual void visit(ir_dereference_record *); 281 virtual void visit(ir_assignment *); 282 virtual void visit(ir_constant *); 283 virtual void visit(ir_call *); 284 virtual void visit(ir_return *); 285 virtual void visit(ir_discard *); 286 virtual void visit(ir_demote *); 287 virtual void visit(ir_texture *); 288 virtual void visit(ir_if *); 289 virtual void visit(ir_emit_vertex *); 290 virtual void visit(ir_end_primitive *); 291 virtual void visit(ir_barrier *); 292 /*@}*/ 293 294 void ATTRIBUTE_NOINLINE visit_expression(ir_expression *, st_src_reg *); 295 296 void visit_atomic_counter_intrinsic(ir_call *); 297 void visit_ssbo_intrinsic(ir_call *); 298 void visit_membar_intrinsic(ir_call *); 299 void visit_shared_intrinsic(ir_call *); 300 void visit_image_intrinsic(ir_call *); 301 void visit_generic_intrinsic(ir_call *, enum tgsi_opcode op); 302 303 st_src_reg result; 304 305 /** List of variable_storage */ 306 struct hash_table *variables; 307 308 /** List of immediate_storage */ 309 exec_list immediates; 310 unsigned num_immediates; 311 312 /** List of glsl_to_tgsi_instruction */ 313 exec_list instructions; 314 315 glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, enum tgsi_opcode op, 316 st_dst_reg dst = undef_dst, 317 st_src_reg src0 = undef_src, 318 st_src_reg src1 = undef_src, 319 st_src_reg src2 = undef_src, 320 st_src_reg src3 = undef_src); 321 322 glsl_to_tgsi_instruction *emit_asm(ir_instruction *ir, enum tgsi_opcode op, 323 st_dst_reg dst, st_dst_reg dst1, 324 st_src_reg src0 = undef_src, 325 st_src_reg src1 = undef_src, 326 st_src_reg src2 = undef_src, 327 st_src_reg src3 = undef_src); 328 329 enum tgsi_opcode get_opcode(enum tgsi_opcode op, 330 st_dst_reg dst, 331 st_src_reg src0, st_src_reg src1); 332 333 /** 334 * Emit the correct dot-product instruction for the type of arguments 335 */ 336 glsl_to_tgsi_instruction *emit_dp(ir_instruction *ir, 337 st_dst_reg dst, 338 st_src_reg src0, 339 st_src_reg src1, 340 unsigned elements); 341 342 void emit_scalar(ir_instruction *ir, enum tgsi_opcode op, 343 st_dst_reg dst, st_src_reg src0); 344 345 void emit_scalar(ir_instruction *ir, enum tgsi_opcode op, 346 st_dst_reg dst, st_src_reg src0, st_src_reg src1); 347 348 void emit_arl(ir_instruction *ir, st_dst_reg dst, st_src_reg src0); 349 350 void get_deref_offsets(ir_dereference *ir, 351 unsigned *array_size, 352 unsigned *base, 353 uint16_t *index, 354 st_src_reg *reladdr, 355 bool opaque); 356 void calc_deref_offsets(ir_dereference *tail, 357 unsigned *array_elements, 358 uint16_t *index, 359 st_src_reg *indirect, 360 unsigned *location); 361 st_src_reg canonicalize_gather_offset(st_src_reg offset); 362 bool handle_bound_deref(ir_dereference *ir); 363 364 bool try_emit_mad(ir_expression *ir, 365 int mul_operand); 366 bool try_emit_mad_for_and_not(ir_expression *ir, 367 int mul_operand); 368 369 void emit_swz(ir_expression *ir); 370 371 bool process_move_condition(ir_rvalue *ir); 372 373 void simplify_cmp(void); 374 375 void rename_temp_registers(struct rename_reg_pair *renames); 376 void get_first_temp_read(int *first_reads); 377 void get_first_temp_write(int *first_writes); 378 void get_last_temp_read_first_temp_write(int *last_reads, int *first_writes); 379 void get_last_temp_write(int *last_writes); 380 381 void copy_propagate(void); 382 int eliminate_dead_code(void); 383 384 void split_arrays(void); 385 void merge_two_dsts(void); 386 void merge_registers(void); 387 void renumber_registers(void); 388 389 void emit_block_mov(ir_assignment *ir, const struct glsl_type *type, 390 st_dst_reg *l, st_src_reg *r, 391 st_src_reg *cond, bool cond_swap); 392 393 void print_stats(); 394 395 void *mem_ctx; 396}; 397 398static st_dst_reg address_reg = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, 399 GLSL_TYPE_FLOAT, 0); 400static st_dst_reg address_reg2 = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, 401 GLSL_TYPE_FLOAT, 1); 402static st_dst_reg sampler_reladdr = st_dst_reg(PROGRAM_ADDRESS, WRITEMASK_X, 403 GLSL_TYPE_FLOAT, 2); 404 405static void 406fail_link(struct gl_shader_program *prog, const char *fmt, ...) 407 PRINTFLIKE(2, 3); 408 409static void 410fail_link(struct gl_shader_program *prog, const char *fmt, ...) 411{ 412 va_list args; 413 va_start(args, fmt); 414 ralloc_vasprintf_append(&prog->data->InfoLog, fmt, args); 415 va_end(args); 416 417 prog->data->LinkStatus = LINKING_FAILURE; 418} 419 420int 421swizzle_for_size(int size) 422{ 423 static const int size_swizzles[4] = { 424 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 425 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 426 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 427 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 428 }; 429 430 assert((size >= 1) && (size <= 4)); 431 return size_swizzles[size - 1]; 432} 433 434 435/** 436 * Map mesa texture target to TGSI texture target. 437 */ 438static enum tgsi_texture_type 439st_translate_texture_target(gl_texture_index textarget, GLboolean shadow) 440{ 441 if (shadow) { 442 switch (textarget) { 443 case TEXTURE_1D_INDEX: 444 return TGSI_TEXTURE_SHADOW1D; 445 case TEXTURE_2D_INDEX: 446 return TGSI_TEXTURE_SHADOW2D; 447 case TEXTURE_RECT_INDEX: 448 return TGSI_TEXTURE_SHADOWRECT; 449 case TEXTURE_1D_ARRAY_INDEX: 450 return TGSI_TEXTURE_SHADOW1D_ARRAY; 451 case TEXTURE_2D_ARRAY_INDEX: 452 return TGSI_TEXTURE_SHADOW2D_ARRAY; 453 case TEXTURE_CUBE_INDEX: 454 return TGSI_TEXTURE_SHADOWCUBE; 455 case TEXTURE_CUBE_ARRAY_INDEX: 456 return TGSI_TEXTURE_SHADOWCUBE_ARRAY; 457 default: 458 break; 459 } 460 } 461 462 switch (textarget) { 463 case TEXTURE_2D_MULTISAMPLE_INDEX: 464 return TGSI_TEXTURE_2D_MSAA; 465 case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX: 466 return TGSI_TEXTURE_2D_ARRAY_MSAA; 467 case TEXTURE_BUFFER_INDEX: 468 return TGSI_TEXTURE_BUFFER; 469 case TEXTURE_1D_INDEX: 470 return TGSI_TEXTURE_1D; 471 case TEXTURE_2D_INDEX: 472 return TGSI_TEXTURE_2D; 473 case TEXTURE_3D_INDEX: 474 return TGSI_TEXTURE_3D; 475 case TEXTURE_CUBE_INDEX: 476 return TGSI_TEXTURE_CUBE; 477 case TEXTURE_CUBE_ARRAY_INDEX: 478 return TGSI_TEXTURE_CUBE_ARRAY; 479 case TEXTURE_RECT_INDEX: 480 return TGSI_TEXTURE_RECT; 481 case TEXTURE_1D_ARRAY_INDEX: 482 return TGSI_TEXTURE_1D_ARRAY; 483 case TEXTURE_2D_ARRAY_INDEX: 484 return TGSI_TEXTURE_2D_ARRAY; 485 case TEXTURE_EXTERNAL_INDEX: 486 return TGSI_TEXTURE_2D; 487 default: 488 debug_assert(!"unexpected texture target index"); 489 return TGSI_TEXTURE_1D; 490 } 491} 492 493 494/** 495 * Map GLSL base type to TGSI return type. 496 */ 497static enum tgsi_return_type 498st_translate_texture_type(enum glsl_base_type type) 499{ 500 switch (type) { 501 case GLSL_TYPE_INT: 502 return TGSI_RETURN_TYPE_SINT; 503 case GLSL_TYPE_UINT: 504 return TGSI_RETURN_TYPE_UINT; 505 case GLSL_TYPE_FLOAT: 506 return TGSI_RETURN_TYPE_FLOAT; 507 default: 508 assert(!"unexpected texture type"); 509 return TGSI_RETURN_TYPE_UNKNOWN; 510 } 511} 512 513 514glsl_to_tgsi_instruction * 515glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, enum tgsi_opcode op, 516 st_dst_reg dst, st_dst_reg dst1, 517 st_src_reg src0, st_src_reg src1, 518 st_src_reg src2, st_src_reg src3) 519{ 520 glsl_to_tgsi_instruction *inst = new(mem_ctx) glsl_to_tgsi_instruction(); 521 int num_reladdr = 0, i, j; 522 bool dst_is_64bit[2]; 523 524 op = get_opcode(op, dst, src0, src1); 525 526 /* If we have to do relative addressing, we want to load the ARL 527 * reg directly for one of the regs, and preload the other reladdr 528 * sources into temps. 529 */ 530 num_reladdr += dst.reladdr != NULL || dst.reladdr2; 531 assert(!dst1.reladdr); /* should be lowered in earlier passes */ 532 num_reladdr += src0.reladdr != NULL || src0.reladdr2 != NULL; 533 num_reladdr += src1.reladdr != NULL || src1.reladdr2 != NULL; 534 num_reladdr += src2.reladdr != NULL || src2.reladdr2 != NULL; 535 num_reladdr += src3.reladdr != NULL || src3.reladdr2 != NULL; 536 537 reladdr_to_temp(ir, &src3, &num_reladdr); 538 reladdr_to_temp(ir, &src2, &num_reladdr); 539 reladdr_to_temp(ir, &src1, &num_reladdr); 540 reladdr_to_temp(ir, &src0, &num_reladdr); 541 542 if (dst.reladdr || dst.reladdr2) { 543 if (dst.reladdr) 544 emit_arl(ir, address_reg, *dst.reladdr); 545 if (dst.reladdr2) 546 emit_arl(ir, address_reg2, *dst.reladdr2); 547 num_reladdr--; 548 } 549 550 assert(num_reladdr == 0); 551 552 /* inst->op has only 8 bits. */ 553 STATIC_ASSERT(TGSI_OPCODE_LAST <= 255); 554 555 inst->op = op; 556 inst->precise = this->precise; 557 inst->info = tgsi_get_opcode_info(op); 558 inst->dst[0] = dst; 559 inst->dst[1] = dst1; 560 inst->src[0] = src0; 561 inst->src[1] = src1; 562 inst->src[2] = src2; 563 inst->src[3] = src3; 564 inst->is_64bit_expanded = false; 565 inst->ir = ir; 566 inst->dead_mask = 0; 567 inst->tex_offsets = NULL; 568 inst->tex_offset_num_offset = 0; 569 inst->saturate = 0; 570 inst->tex_shadow = 0; 571 /* default to float, for paths where this is not initialized 572 * (since 0==UINT which is likely wrong): 573 */ 574 inst->tex_type = GLSL_TYPE_FLOAT; 575 576 /* Update indirect addressing status used by TGSI */ 577 if (dst.reladdr || dst.reladdr2) { 578 switch (dst.file) { 579 case PROGRAM_STATE_VAR: 580 case PROGRAM_CONSTANT: 581 case PROGRAM_UNIFORM: 582 this->indirect_addr_consts = true; 583 break; 584 case PROGRAM_IMMEDIATE: 585 assert(!"immediates should not have indirect addressing"); 586 break; 587 default: 588 break; 589 } 590 } 591 else { 592 for (i = 0; i < 4; i++) { 593 if (inst->src[i].reladdr) { 594 switch (inst->src[i].file) { 595 case PROGRAM_STATE_VAR: 596 case PROGRAM_CONSTANT: 597 case PROGRAM_UNIFORM: 598 this->indirect_addr_consts = true; 599 break; 600 case PROGRAM_IMMEDIATE: 601 assert(!"immediates should not have indirect addressing"); 602 break; 603 default: 604 break; 605 } 606 } 607 } 608 } 609 610 /* 611 * This section contains the double processing. 612 * GLSL just represents doubles as single channel values, 613 * however most HW and TGSI represent doubles as pairs of register channels. 614 * 615 * so we have to fixup destination writemask/index and src swizzle/indexes. 616 * dest writemasks need to translate from single channel write mask 617 * to a dual-channel writemask, but also need to modify the index, 618 * if we are touching the Z,W fields in the pre-translated writemask. 619 * 620 * src channels have similiar index modifications along with swizzle 621 * changes to we pick the XY, ZW pairs from the correct index. 622 * 623 * GLSL [0].x -> TGSI [0].xy 624 * GLSL [0].y -> TGSI [0].zw 625 * GLSL [0].z -> TGSI [1].xy 626 * GLSL [0].w -> TGSI [1].zw 627 */ 628 for (j = 0; j < 2; j++) { 629 dst_is_64bit[j] = glsl_base_type_is_64bit(inst->dst[j].type); 630 if (!dst_is_64bit[j] && inst->dst[j].file == PROGRAM_OUTPUT && 631 inst->dst[j].type == GLSL_TYPE_ARRAY) { 632 enum glsl_base_type type = find_array_type(this->outputs, 633 this->num_outputs, 634 inst->dst[j].array_id); 635 if (glsl_base_type_is_64bit(type)) 636 dst_is_64bit[j] = true; 637 } 638 } 639 640 if (dst_is_64bit[0] || dst_is_64bit[1] || 641 glsl_base_type_is_64bit(inst->src[0].type)) { 642 glsl_to_tgsi_instruction *dinst = NULL; 643 int initial_src_swz[4], initial_src_idx[4]; 644 int initial_dst_idx[2], initial_dst_writemask[2]; 645 /* select the writemask for dst0 or dst1 */ 646 unsigned writemask = inst->dst[1].file == PROGRAM_UNDEFINED 647 ? inst->dst[0].writemask : inst->dst[1].writemask; 648 649 /* copy out the writemask, index and swizzles for all src/dsts. */ 650 for (j = 0; j < 2; j++) { 651 initial_dst_writemask[j] = inst->dst[j].writemask; 652 initial_dst_idx[j] = inst->dst[j].index; 653 } 654 655 for (j = 0; j < 4; j++) { 656 initial_src_swz[j] = inst->src[j].swizzle; 657 initial_src_idx[j] = inst->src[j].index; 658 } 659 660 /* 661 * scan all the components in the dst writemask 662 * generate an instruction for each of them if required. 663 */ 664 st_src_reg addr; 665 while (writemask) { 666 667 int i = u_bit_scan(&writemask); 668 669 /* before emitting the instruction, see if we have to adjust 670 * load / store address */ 671 if (i > 1 && (inst->op == TGSI_OPCODE_LOAD || 672 inst->op == TGSI_OPCODE_STORE) && 673 addr.file == PROGRAM_UNDEFINED) { 674 /* We have to advance the buffer address by 16 */ 675 addr = get_temp(glsl_type::uint_type); 676 emit_asm(ir, TGSI_OPCODE_UADD, st_dst_reg(addr), 677 inst->src[0], st_src_reg_for_int(16)); 678 } 679 680 /* first time use previous instruction */ 681 if (dinst == NULL) { 682 dinst = inst; 683 } else { 684 /* create a new instructions for subsequent attempts */ 685 dinst = new(mem_ctx) glsl_to_tgsi_instruction(); 686 *dinst = *inst; 687 dinst->next = NULL; 688 dinst->prev = NULL; 689 } 690 this->instructions.push_tail(dinst); 691 dinst->is_64bit_expanded = true; 692 693 /* modify the destination if we are splitting */ 694 for (j = 0; j < 2; j++) { 695 if (dst_is_64bit[j]) { 696 dinst->dst[j].writemask = (i & 1) ? WRITEMASK_ZW : WRITEMASK_XY; 697 dinst->dst[j].index = initial_dst_idx[j]; 698 if (i > 1) { 699 if (dinst->op == TGSI_OPCODE_LOAD || 700 dinst->op == TGSI_OPCODE_STORE) 701 dinst->src[0] = addr; 702 if (dinst->op != TGSI_OPCODE_STORE) 703 dinst->dst[j].index++; 704 } 705 } else { 706 /* if we aren't writing to a double, just get the bit of the 707 * initial writemask for this channel 708 */ 709 dinst->dst[j].writemask = initial_dst_writemask[j] & (1 << i); 710 } 711 } 712 713 /* modify the src registers */ 714 for (j = 0; j < 4; j++) { 715 int swz = GET_SWZ(initial_src_swz[j], i); 716 717 if (glsl_base_type_is_64bit(dinst->src[j].type)) { 718 dinst->src[j].index = initial_src_idx[j]; 719 if (swz > 1) { 720 dinst->src[j].double_reg2 = true; 721 dinst->src[j].index++; 722 } 723 724 if (swz & 1) 725 dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_W, 726 SWIZZLE_Z, SWIZZLE_W); 727 else 728 dinst->src[j].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, 729 SWIZZLE_X, SWIZZLE_Y); 730 731 } else { 732 /* some opcodes are special case in what they use as sources 733 * - [FUI]2D/[UI]2I64 is a float/[u]int src0, (D)LDEXP is 734 * integer src1 735 */ 736 if (op == TGSI_OPCODE_F2D || op == TGSI_OPCODE_U2D || 737 op == TGSI_OPCODE_I2D || 738 op == TGSI_OPCODE_I2I64 || op == TGSI_OPCODE_U2I64 || 739 op == TGSI_OPCODE_DLDEXP || op == TGSI_OPCODE_LDEXP || 740 (op == TGSI_OPCODE_UCMP && dst_is_64bit[0])) { 741 dinst->src[j].swizzle = MAKE_SWIZZLE4(swz, swz, swz, swz); 742 } 743 } 744 } 745 } 746 inst = dinst; 747 } else { 748 this->instructions.push_tail(inst); 749 } 750 751 752 return inst; 753} 754 755glsl_to_tgsi_instruction * 756glsl_to_tgsi_visitor::emit_asm(ir_instruction *ir, enum tgsi_opcode op, 757 st_dst_reg dst, 758 st_src_reg src0, st_src_reg src1, 759 st_src_reg src2, st_src_reg src3) 760{ 761 return emit_asm(ir, op, dst, undef_dst, src0, src1, src2, src3); 762} 763 764/** 765 * Determines whether to use an integer, unsigned integer, or float opcode 766 * based on the operands and input opcode, then emits the result. 767 */ 768enum tgsi_opcode 769glsl_to_tgsi_visitor::get_opcode(enum tgsi_opcode op, 770 st_dst_reg dst, 771 st_src_reg src0, st_src_reg src1) 772{ 773 enum glsl_base_type type = GLSL_TYPE_FLOAT; 774 775 if (op == TGSI_OPCODE_MOV) 776 return op; 777 778 assert(src0.type != GLSL_TYPE_ARRAY); 779 assert(src0.type != GLSL_TYPE_STRUCT); 780 assert(src1.type != GLSL_TYPE_ARRAY); 781 assert(src1.type != GLSL_TYPE_STRUCT); 782 783 if (is_resource_instruction(op)) 784 type = src1.type; 785 else if (src0.type == GLSL_TYPE_INT64 || src1.type == GLSL_TYPE_INT64) 786 type = GLSL_TYPE_INT64; 787 else if (src0.type == GLSL_TYPE_UINT64 || src1.type == GLSL_TYPE_UINT64) 788 type = GLSL_TYPE_UINT64; 789 else if (src0.type == GLSL_TYPE_DOUBLE || src1.type == GLSL_TYPE_DOUBLE) 790 type = GLSL_TYPE_DOUBLE; 791 else if (src0.type == GLSL_TYPE_FLOAT || src1.type == GLSL_TYPE_FLOAT) 792 type = GLSL_TYPE_FLOAT; 793 else if (native_integers) 794 type = src0.type == GLSL_TYPE_BOOL ? GLSL_TYPE_INT : src0.type; 795 796#define case7(c, f, i, u, d, i64, ui64) \ 797 case TGSI_OPCODE_##c: \ 798 if (type == GLSL_TYPE_UINT64) \ 799 op = TGSI_OPCODE_##ui64; \ 800 else if (type == GLSL_TYPE_INT64) \ 801 op = TGSI_OPCODE_##i64; \ 802 else if (type == GLSL_TYPE_DOUBLE) \ 803 op = TGSI_OPCODE_##d; \ 804 else if (type == GLSL_TYPE_INT) \ 805 op = TGSI_OPCODE_##i; \ 806 else if (type == GLSL_TYPE_UINT) \ 807 op = TGSI_OPCODE_##u; \ 808 else \ 809 op = TGSI_OPCODE_##f; \ 810 break; 811 812#define casecomp(c, f, i, u, d, i64, ui64) \ 813 case TGSI_OPCODE_##c: \ 814 if (type == GLSL_TYPE_INT64) \ 815 op = TGSI_OPCODE_##i64; \ 816 else if (type == GLSL_TYPE_UINT64) \ 817 op = TGSI_OPCODE_##ui64; \ 818 else if (type == GLSL_TYPE_DOUBLE) \ 819 op = TGSI_OPCODE_##d; \ 820 else if (type == GLSL_TYPE_INT || type == GLSL_TYPE_SUBROUTINE) \ 821 op = TGSI_OPCODE_##i; \ 822 else if (type == GLSL_TYPE_UINT) \ 823 op = TGSI_OPCODE_##u; \ 824 else if (native_integers) \ 825 op = TGSI_OPCODE_##f; \ 826 else \ 827 op = TGSI_OPCODE_##c; \ 828 break; 829 830 switch (op) { 831 /* Some instructions are initially selected without considering the type. 832 * This fixes the type: 833 * 834 * INIT FLOAT SINT UINT DOUBLE SINT64 UINT64 835 */ 836 case7(ADD, ADD, UADD, UADD, DADD, U64ADD, U64ADD); 837 case7(CEIL, CEIL, LAST, LAST, DCEIL, LAST, LAST); 838 case7(DIV, DIV, IDIV, UDIV, DDIV, I64DIV, U64DIV); 839 case7(FMA, FMA, UMAD, UMAD, DFMA, LAST, LAST); 840 case7(FLR, FLR, LAST, LAST, DFLR, LAST, LAST); 841 case7(FRC, FRC, LAST, LAST, DFRAC, LAST, LAST); 842 case7(MUL, MUL, UMUL, UMUL, DMUL, U64MUL, U64MUL); 843 case7(MAD, MAD, UMAD, UMAD, DMAD, LAST, LAST); 844 case7(MAX, MAX, IMAX, UMAX, DMAX, I64MAX, U64MAX); 845 case7(MIN, MIN, IMIN, UMIN, DMIN, I64MIN, U64MIN); 846 case7(RCP, RCP, LAST, LAST, DRCP, LAST, LAST); 847 case7(ROUND, ROUND,LAST, LAST, DROUND, LAST, LAST); 848 case7(RSQ, RSQ, LAST, LAST, DRSQ, LAST, LAST); 849 case7(SQRT, SQRT, LAST, LAST, DSQRT, LAST, LAST); 850 case7(SSG, SSG, ISSG, ISSG, DSSG, I64SSG, I64SSG); 851 case7(TRUNC, TRUNC,LAST, LAST, DTRUNC, LAST, LAST); 852 853 case7(MOD, LAST, MOD, UMOD, LAST, I64MOD, U64MOD); 854 case7(SHL, LAST, SHL, SHL, LAST, U64SHL, U64SHL); 855 case7(IBFE, LAST, IBFE, UBFE, LAST, LAST, LAST); 856 case7(IMSB, LAST, IMSB, UMSB, LAST, LAST, LAST); 857 case7(IMUL_HI, LAST, IMUL_HI, UMUL_HI, LAST, LAST, LAST); 858 case7(ISHR, LAST, ISHR, USHR, LAST, I64SHR, U64SHR); 859 case7(ATOMIMAX,LAST, ATOMIMAX,ATOMUMAX,LAST, LAST, LAST); 860 case7(ATOMIMIN,LAST, ATOMIMIN,ATOMUMIN,LAST, LAST, LAST); 861 case7(ATOMUADD,ATOMFADD,ATOMUADD,ATOMUADD,LAST, LAST, LAST); 862 863 casecomp(SEQ, FSEQ, USEQ, USEQ, DSEQ, U64SEQ, U64SEQ); 864 casecomp(SNE, FSNE, USNE, USNE, DSNE, U64SNE, U64SNE); 865 casecomp(SGE, FSGE, ISGE, USGE, DSGE, I64SGE, U64SGE); 866 casecomp(SLT, FSLT, ISLT, USLT, DSLT, I64SLT, U64SLT); 867 868 default: 869 break; 870 } 871 872 assert(op != TGSI_OPCODE_LAST); 873 return op; 874} 875 876glsl_to_tgsi_instruction * 877glsl_to_tgsi_visitor::emit_dp(ir_instruction *ir, 878 st_dst_reg dst, st_src_reg src0, st_src_reg src1, 879 unsigned elements) 880{ 881 static const enum tgsi_opcode dot_opcodes[] = { 882 TGSI_OPCODE_DP2, TGSI_OPCODE_DP3, TGSI_OPCODE_DP4 883 }; 884 885 return emit_asm(ir, dot_opcodes[elements - 2], dst, src0, src1); 886} 887 888/** 889 * Emits TGSI scalar opcodes to produce unique answers across channels. 890 * 891 * Some TGSI opcodes are scalar-only, like ARB_fp/vp. The src X 892 * channel determines the result across all channels. So to do a vec4 893 * of this operation, we want to emit a scalar per source channel used 894 * to produce dest channels. 895 */ 896void 897glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum tgsi_opcode op, 898 st_dst_reg dst, 899 st_src_reg orig_src0, st_src_reg orig_src1) 900{ 901 int i, j; 902 int done_mask = ~dst.writemask; 903 904 /* TGSI RCP is a scalar operation splatting results to all channels, 905 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 906 * dst channels. 907 */ 908 for (i = 0; i < 4; i++) { 909 GLuint this_mask = (1 << i); 910 st_src_reg src0 = orig_src0; 911 st_src_reg src1 = orig_src1; 912 913 if (done_mask & this_mask) 914 continue; 915 916 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 917 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 918 for (j = i + 1; j < 4; j++) { 919 /* If there is another enabled component in the destination that is 920 * derived from the same inputs, generate its value on this pass as 921 * well. 922 */ 923 if (!(done_mask & (1 << j)) && 924 GET_SWZ(src0.swizzle, j) == src0_swiz && 925 GET_SWZ(src1.swizzle, j) == src1_swiz) { 926 this_mask |= (1 << j); 927 } 928 } 929 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 930 src0_swiz, src0_swiz); 931 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 932 src1_swiz, src1_swiz); 933 934 dst.writemask = this_mask; 935 emit_asm(ir, op, dst, src0, src1); 936 done_mask |= this_mask; 937 } 938} 939 940void 941glsl_to_tgsi_visitor::emit_scalar(ir_instruction *ir, enum tgsi_opcode op, 942 st_dst_reg dst, st_src_reg src0) 943{ 944 st_src_reg undef = undef_src; 945 946 undef.swizzle = SWIZZLE_XXXX; 947 948 emit_scalar(ir, op, dst, src0, undef); 949} 950 951void 952glsl_to_tgsi_visitor::emit_arl(ir_instruction *ir, 953 st_dst_reg dst, st_src_reg src0) 954{ 955 enum tgsi_opcode op = TGSI_OPCODE_ARL; 956 957 if (src0.type == GLSL_TYPE_INT || src0.type == GLSL_TYPE_UINT) { 958 if (!this->need_uarl && src0.is_legal_tgsi_address_operand()) 959 return; 960 961 op = TGSI_OPCODE_UARL; 962 } 963 964 assert(dst.file == PROGRAM_ADDRESS); 965 if (dst.index >= this->num_address_regs) 966 this->num_address_regs = dst.index + 1; 967 968 emit_asm(NULL, op, dst, src0); 969} 970 971int 972glsl_to_tgsi_visitor::add_constant(gl_register_file file, 973 gl_constant_value values[8], int size, 974 GLenum datatype, 975 uint16_t *swizzle_out) 976{ 977 if (file == PROGRAM_CONSTANT) { 978 GLuint swizzle = swizzle_out ? *swizzle_out : 0; 979 int result = _mesa_add_typed_unnamed_constant(this->prog->Parameters, 980 values, size, datatype, 981 &swizzle); 982 if (swizzle_out) 983 *swizzle_out = swizzle; 984 return result; 985 } 986 987 assert(file == PROGRAM_IMMEDIATE); 988 989 int index = 0; 990 immediate_storage *entry; 991 int size32 = size * ((datatype == GL_DOUBLE || 992 datatype == GL_INT64_ARB || 993 datatype == GL_UNSIGNED_INT64_ARB) ? 2 : 1); 994 int i; 995 996 /* Search immediate storage to see if we already have an identical 997 * immediate that we can use instead of adding a duplicate entry. 998 */ 999 foreach_in_list(immediate_storage, entry, &this->immediates) { 1000 immediate_storage *tmp = entry; 1001 1002 for (i = 0; i * 4 < size32; i++) { 1003 int slot_size = MIN2(size32 - (i * 4), 4); 1004 if (tmp->type != datatype || tmp->size32 != slot_size) 1005 break; 1006 if (memcmp(tmp->values, &values[i * 4], 1007 slot_size * sizeof(gl_constant_value))) 1008 break; 1009 1010 /* Everything matches, keep going until the full size is matched */ 1011 tmp = (immediate_storage *)tmp->next; 1012 } 1013 1014 /* The full value matched */ 1015 if (i * 4 >= size32) 1016 return index; 1017 1018 index++; 1019 } 1020 1021 for (i = 0; i * 4 < size32; i++) { 1022 int slot_size = MIN2(size32 - (i * 4), 4); 1023 /* Add this immediate to the list. */ 1024 entry = new(mem_ctx) immediate_storage(&values[i * 4], 1025 slot_size, datatype); 1026 this->immediates.push_tail(entry); 1027 this->num_immediates++; 1028 } 1029 return index; 1030} 1031 1032st_src_reg 1033glsl_to_tgsi_visitor::st_src_reg_for_float(float val) 1034{ 1035 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_FLOAT); 1036 union gl_constant_value uval; 1037 1038 uval.f = val; 1039 src.index = add_constant(src.file, &uval, 1, GL_FLOAT, &src.swizzle); 1040 1041 return src; 1042} 1043 1044st_src_reg 1045glsl_to_tgsi_visitor::st_src_reg_for_double(double val) 1046{ 1047 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_DOUBLE); 1048 union gl_constant_value uval[2]; 1049 1050 memcpy(uval, &val, sizeof(uval)); 1051 src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle); 1052 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); 1053 return src; 1054} 1055 1056st_src_reg 1057glsl_to_tgsi_visitor::st_src_reg_for_int(int val) 1058{ 1059 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT); 1060 union gl_constant_value uval; 1061 1062 assert(native_integers); 1063 1064 uval.i = val; 1065 src.index = add_constant(src.file, &uval, 1, GL_INT, &src.swizzle); 1066 1067 return src; 1068} 1069 1070st_src_reg 1071glsl_to_tgsi_visitor::st_src_reg_for_int64(int64_t val) 1072{ 1073 st_src_reg src(PROGRAM_IMMEDIATE, -1, GLSL_TYPE_INT64); 1074 union gl_constant_value uval[2]; 1075 1076 memcpy(uval, &val, sizeof(uval)); 1077 src.index = add_constant(src.file, uval, 1, GL_DOUBLE, &src.swizzle); 1078 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_X, SWIZZLE_Y); 1079 1080 return src; 1081} 1082 1083st_src_reg 1084glsl_to_tgsi_visitor::st_src_reg_for_type(enum glsl_base_type type, int val) 1085{ 1086 if (native_integers) 1087 return type == GLSL_TYPE_FLOAT ? st_src_reg_for_float(val) : 1088 st_src_reg_for_int(val); 1089 else 1090 return st_src_reg_for_float(val); 1091} 1092 1093static int 1094attrib_type_size(const struct glsl_type *type, bool is_vs_input) 1095{ 1096 return type->count_attribute_slots(is_vs_input); 1097} 1098 1099static int 1100type_size(const struct glsl_type *type) 1101{ 1102 return type->count_attribute_slots(false); 1103} 1104 1105static void 1106add_buffer_to_load_and_stores(glsl_to_tgsi_instruction *inst, st_src_reg *buf, 1107 exec_list *instructions, ir_constant *access) 1108{ 1109 /** 1110 * emit_asm() might have actually split the op into pieces, e.g. for 1111 * double stores. We have to go back and fix up all the generated ops. 1112 */ 1113 enum tgsi_opcode op = inst->op; 1114 do { 1115 inst->resource = *buf; 1116 if (access) 1117 inst->buffer_access = access->value.u[0]; 1118 1119 if (inst == instructions->get_head_raw()) 1120 break; 1121 inst = (glsl_to_tgsi_instruction *)inst->get_prev(); 1122 1123 if (inst->op == TGSI_OPCODE_UADD) { 1124 if (inst == instructions->get_head_raw()) 1125 break; 1126 inst = (glsl_to_tgsi_instruction *)inst->get_prev(); 1127 } 1128 } while (inst->op == op && inst->resource.file == PROGRAM_UNDEFINED); 1129} 1130 1131/** 1132 * If the given GLSL type is an array or matrix or a structure containing 1133 * an array/matrix member, return true. Else return false. 1134 * 1135 * This is used to determine which kind of temp storage (PROGRAM_TEMPORARY 1136 * or PROGRAM_ARRAY) should be used for variables of this type. Anytime 1137 * we have an array that might be indexed with a variable, we need to use 1138 * the later storage type. 1139 */ 1140static bool 1141type_has_array_or_matrix(const glsl_type *type) 1142{ 1143 if (type->is_array() || type->is_matrix()) 1144 return true; 1145 1146 if (type->is_struct()) { 1147 for (unsigned i = 0; i < type->length; i++) { 1148 if (type_has_array_or_matrix(type->fields.structure[i].type)) { 1149 return true; 1150 } 1151 } 1152 } 1153 1154 return false; 1155} 1156 1157 1158/** 1159 * In the initial pass of codegen, we assign temporary numbers to 1160 * intermediate results. (not SSA -- variable assignments will reuse 1161 * storage). 1162 */ 1163st_src_reg 1164glsl_to_tgsi_visitor::get_temp(const glsl_type *type) 1165{ 1166 st_src_reg src; 1167 1168 src.type = native_integers ? type->base_type : GLSL_TYPE_FLOAT; 1169 src.reladdr = NULL; 1170 src.negate = 0; 1171 src.abs = 0; 1172 1173 if (!options->EmitNoIndirectTemp && type_has_array_or_matrix(type)) { 1174 if (next_array >= max_num_arrays) { 1175 max_num_arrays += 32; 1176 array_sizes = (unsigned*) 1177 realloc(array_sizes, sizeof(array_sizes[0]) * max_num_arrays); 1178 } 1179 1180 src.file = PROGRAM_ARRAY; 1181 src.index = 0; 1182 src.array_id = next_array + 1; 1183 array_sizes[next_array] = type_size(type); 1184 ++next_array; 1185 1186 } else { 1187 src.file = PROGRAM_TEMPORARY; 1188 src.index = next_temp; 1189 next_temp += type_size(type); 1190 } 1191 1192 if (type->is_array() || type->is_struct()) { 1193 src.swizzle = SWIZZLE_NOOP; 1194 } else { 1195 src.swizzle = swizzle_for_size(type->vector_elements); 1196 } 1197 1198 return src; 1199} 1200 1201variable_storage * 1202glsl_to_tgsi_visitor::find_variable_storage(ir_variable *var) 1203{ 1204 struct hash_entry *entry; 1205 1206 entry = _mesa_hash_table_search(this->variables, var); 1207 if (!entry) 1208 return NULL; 1209 1210 return (variable_storage *)entry->data; 1211} 1212 1213void 1214glsl_to_tgsi_visitor::visit(ir_variable *ir) 1215{ 1216 if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 1217 unsigned int i; 1218 const ir_state_slot *const slots = ir->get_state_slots(); 1219 assert(slots != NULL); 1220 1221 /* Check if this statevar's setup in the STATE file exactly 1222 * matches how we'll want to reference it as a 1223 * struct/array/whatever. If not, then we need to move it into 1224 * temporary storage and hope that it'll get copy-propagated 1225 * out. 1226 */ 1227 for (i = 0; i < ir->get_num_state_slots(); i++) { 1228 if (slots[i].swizzle != SWIZZLE_XYZW) { 1229 break; 1230 } 1231 } 1232 1233 variable_storage *storage; 1234 st_dst_reg dst; 1235 if (i == ir->get_num_state_slots()) { 1236 /* We'll set the index later. */ 1237 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 1238 1239 _mesa_hash_table_insert(this->variables, ir, storage); 1240 1241 dst = undef_dst; 1242 } else { 1243 /* The variable_storage constructor allocates slots based on the size 1244 * of the type. However, this had better match the number of state 1245 * elements that we're going to copy into the new temporary. 1246 */ 1247 assert((int) ir->get_num_state_slots() == type_size(ir->type)); 1248 1249 dst = st_dst_reg(get_temp(ir->type)); 1250 1251 storage = new(mem_ctx) variable_storage(ir, dst.file, dst.index, 1252 dst.array_id); 1253 1254 _mesa_hash_table_insert(this->variables, ir, storage); 1255 } 1256 1257 1258 for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { 1259 int index = _mesa_add_state_reference(this->prog->Parameters, 1260 slots[i].tokens); 1261 1262 if (storage->file == PROGRAM_STATE_VAR) { 1263 if (storage->index == -1) { 1264 storage->index = index; 1265 } else { 1266 assert(index == storage->index + (int)i); 1267 } 1268 } else { 1269 /* We use GLSL_TYPE_FLOAT here regardless of the actual type of 1270 * the data being moved since MOV does not care about the type of 1271 * data it is moving, and we don't want to declare registers with 1272 * array or struct types. 1273 */ 1274 st_src_reg src(PROGRAM_STATE_VAR, index, GLSL_TYPE_FLOAT); 1275 src.swizzle = slots[i].swizzle; 1276 emit_asm(ir, TGSI_OPCODE_MOV, dst, src); 1277 /* even a float takes up a whole vec4 reg in a struct/array. */ 1278 dst.index++; 1279 } 1280 } 1281 1282 if (storage->file == PROGRAM_TEMPORARY && 1283 dst.index != storage->index + (int) ir->get_num_state_slots()) { 1284 fail_link(this->shader_program, 1285 "failed to load builtin uniform `%s' (%d/%d regs loaded)\n", 1286 ir->name, dst.index - storage->index, 1287 type_size(ir->type)); 1288 } 1289 } 1290} 1291 1292void 1293glsl_to_tgsi_visitor::visit(ir_loop *ir) 1294{ 1295 emit_asm(NULL, TGSI_OPCODE_BGNLOOP); 1296 1297 visit_exec_list(&ir->body_instructions, this); 1298 1299 emit_asm(NULL, TGSI_OPCODE_ENDLOOP); 1300} 1301 1302void 1303glsl_to_tgsi_visitor::visit(ir_loop_jump *ir) 1304{ 1305 switch (ir->mode) { 1306 case ir_loop_jump::jump_break: 1307 emit_asm(NULL, TGSI_OPCODE_BRK); 1308 break; 1309 case ir_loop_jump::jump_continue: 1310 emit_asm(NULL, TGSI_OPCODE_CONT); 1311 break; 1312 } 1313} 1314 1315 1316void 1317glsl_to_tgsi_visitor::visit(ir_function_signature *ir) 1318{ 1319 assert(0); 1320 (void)ir; 1321} 1322 1323void 1324glsl_to_tgsi_visitor::visit(ir_function *ir) 1325{ 1326 /* Ignore function bodies other than main() -- we shouldn't see calls to 1327 * them since they should all be inlined before we get to glsl_to_tgsi. 1328 */ 1329 if (strcmp(ir->name, "main") == 0) { 1330 const ir_function_signature *sig; 1331 exec_list empty; 1332 1333 sig = ir->matching_signature(NULL, &empty, false); 1334 1335 assert(sig); 1336 1337 foreach_in_list(ir_instruction, ir, &sig->body) { 1338 ir->accept(this); 1339 } 1340 } 1341} 1342 1343bool 1344glsl_to_tgsi_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 1345{ 1346 int nonmul_operand = 1 - mul_operand; 1347 st_src_reg a, b, c; 1348 st_dst_reg result_dst; 1349 1350 // there is no TGSI opcode for this 1351 if (ir->type->is_integer_64()) 1352 return false; 1353 1354 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 1355 if (!expr || expr->operation != ir_binop_mul) 1356 return false; 1357 1358 expr->operands[0]->accept(this); 1359 a = this->result; 1360 expr->operands[1]->accept(this); 1361 b = this->result; 1362 ir->operands[nonmul_operand]->accept(this); 1363 c = this->result; 1364 1365 this->result = get_temp(ir->type); 1366 result_dst = st_dst_reg(this->result); 1367 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1368 emit_asm(ir, TGSI_OPCODE_MAD, result_dst, a, b, c); 1369 1370 return true; 1371} 1372 1373/** 1374 * Emit MAD(a, -b, a) instead of AND(a, NOT(b)) 1375 * 1376 * The logic values are 1.0 for true and 0.0 for false. Logical-and is 1377 * implemented using multiplication, and logical-or is implemented using 1378 * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 1379 * As result, the logical expression (a & !b) can be rewritten as: 1380 * 1381 * - a * !b 1382 * - a * (1 - b) 1383 * - (a * 1) - (a * b) 1384 * - a + -(a * b) 1385 * - a + (a * -b) 1386 * 1387 * This final expression can be implemented as a single MAD(a, -b, a) 1388 * instruction. 1389 */ 1390bool 1391glsl_to_tgsi_visitor::try_emit_mad_for_and_not(ir_expression *ir, 1392 int try_operand) 1393{ 1394 const int other_operand = 1 - try_operand; 1395 st_src_reg a, b; 1396 1397 ir_expression *expr = ir->operands[try_operand]->as_expression(); 1398 if (!expr || expr->operation != ir_unop_logic_not) 1399 return false; 1400 1401 ir->operands[other_operand]->accept(this); 1402 a = this->result; 1403 expr->operands[0]->accept(this); 1404 b = this->result; 1405 1406 b.negate = ~b.negate; 1407 1408 this->result = get_temp(ir->type); 1409 emit_asm(ir, TGSI_OPCODE_MAD, st_dst_reg(this->result), a, b, a); 1410 1411 return true; 1412} 1413 1414void 1415glsl_to_tgsi_visitor::reladdr_to_temp(ir_instruction *ir, 1416 st_src_reg *reg, int *num_reladdr) 1417{ 1418 if (!reg->reladdr && !reg->reladdr2) 1419 return; 1420 1421 if (reg->reladdr) 1422 emit_arl(ir, address_reg, *reg->reladdr); 1423 if (reg->reladdr2) 1424 emit_arl(ir, address_reg2, *reg->reladdr2); 1425 1426 if (*num_reladdr != 1) { 1427 st_src_reg temp = get_temp(glsl_type::get_instance(reg->type, 4, 1)); 1428 1429 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), *reg); 1430 *reg = temp; 1431 } 1432 1433 (*num_reladdr)--; 1434} 1435 1436void 1437glsl_to_tgsi_visitor::visit(ir_expression *ir) 1438{ 1439 st_src_reg op[ARRAY_SIZE(ir->operands)]; 1440 1441 /* Quick peephole: Emit MAD(a, b, c) instead of ADD(MUL(a, b), c) 1442 */ 1443 if (!this->precise && ir->operation == ir_binop_add) { 1444 if (try_emit_mad(ir, 1)) 1445 return; 1446 if (try_emit_mad(ir, 0)) 1447 return; 1448 } 1449 1450 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 1451 */ 1452 if (!native_integers && ir->operation == ir_binop_logic_and) { 1453 if (try_emit_mad_for_and_not(ir, 1)) 1454 return; 1455 if (try_emit_mad_for_and_not(ir, 0)) 1456 return; 1457 } 1458 1459 if (ir->operation == ir_quadop_vector) 1460 assert(!"ir_quadop_vector should have been lowered"); 1461 1462 for (unsigned int operand = 0; operand < ir->num_operands; operand++) { 1463 this->result.file = PROGRAM_UNDEFINED; 1464 ir->operands[operand]->accept(this); 1465 if (this->result.file == PROGRAM_UNDEFINED) { 1466 printf("Failed to get tree for expression operand:\n"); 1467 ir->operands[operand]->print(); 1468 printf("\n"); 1469 exit(1); 1470 } 1471 op[operand] = this->result; 1472 1473 /* Matrix expression operands should have been broken down to vector 1474 * operations already. 1475 */ 1476 assert(!ir->operands[operand]->type->is_matrix()); 1477 } 1478 1479 visit_expression(ir, op); 1480} 1481 1482/* The non-recursive part of the expression visitor lives in a separate 1483 * function and should be prevented from being inlined, to avoid a stack 1484 * explosion when deeply nested expressions are visited. 1485 */ 1486void 1487glsl_to_tgsi_visitor::visit_expression(ir_expression* ir, st_src_reg *op) 1488{ 1489 st_src_reg result_src; 1490 st_dst_reg result_dst; 1491 1492 int vector_elements = ir->operands[0]->type->vector_elements; 1493 if (ir->operands[1] && 1494 ir->operation != ir_binop_interpolate_at_offset && 1495 ir->operation != ir_binop_interpolate_at_sample) { 1496 st_src_reg *swz_op = NULL; 1497 if (vector_elements > ir->operands[1]->type->vector_elements) { 1498 assert(ir->operands[1]->type->vector_elements == 1); 1499 swz_op = &op[1]; 1500 } else if (vector_elements < ir->operands[1]->type->vector_elements) { 1501 assert(ir->operands[0]->type->vector_elements == 1); 1502 swz_op = &op[0]; 1503 } 1504 if (swz_op) { 1505 uint16_t swizzle_x = GET_SWZ(swz_op->swizzle, 0); 1506 swz_op->swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x, 1507 swizzle_x, swizzle_x); 1508 } 1509 vector_elements = MAX2(vector_elements, 1510 ir->operands[1]->type->vector_elements); 1511 } 1512 if (ir->operands[2] && 1513 ir->operands[2]->type->vector_elements != vector_elements) { 1514 /* This can happen with ir_triop_lrp, i.e. glsl mix */ 1515 assert(ir->operands[2]->type->vector_elements == 1); 1516 uint16_t swizzle_x = GET_SWZ(op[2].swizzle, 0); 1517 op[2].swizzle = MAKE_SWIZZLE4(swizzle_x, swizzle_x, 1518 swizzle_x, swizzle_x); 1519 } 1520 1521 this->result.file = PROGRAM_UNDEFINED; 1522 1523 /* Storage for our result. Ideally for an assignment we'd be using 1524 * the actual storage for the result here, instead. 1525 */ 1526 result_src = get_temp(ir->type); 1527 /* convenience for the emit functions below. */ 1528 result_dst = st_dst_reg(result_src); 1529 /* Limit writes to the channels that will be used by result_src later. 1530 * This does limit this temp's use as a temporary for multi-instruction 1531 * sequences. 1532 */ 1533 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1534 1535 switch (ir->operation) { 1536 case ir_unop_logic_not: 1537 if (result_dst.type != GLSL_TYPE_FLOAT) 1538 emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 1539 else { 1540 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1541 * older GPUs implement SEQ using multiple instructions (i915 uses two 1542 * SGE instructions and a MUL instruction). Since our logic values are 1543 * 0.0 and 1.0, 1-x also implements !x. 1544 */ 1545 op[0].negate = ~op[0].negate; 1546 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], 1547 st_src_reg_for_float(1.0)); 1548 } 1549 break; 1550 case ir_unop_neg: 1551 if (result_dst.type == GLSL_TYPE_INT64 || 1552 result_dst.type == GLSL_TYPE_UINT64) 1553 emit_asm(ir, TGSI_OPCODE_I64NEG, result_dst, op[0]); 1554 else if (result_dst.type == GLSL_TYPE_INT || 1555 result_dst.type == GLSL_TYPE_UINT) 1556 emit_asm(ir, TGSI_OPCODE_INEG, result_dst, op[0]); 1557 else if (result_dst.type == GLSL_TYPE_DOUBLE) 1558 emit_asm(ir, TGSI_OPCODE_DNEG, result_dst, op[0]); 1559 else { 1560 op[0].negate = ~op[0].negate; 1561 result_src = op[0]; 1562 } 1563 break; 1564 case ir_unop_subroutine_to_int: 1565 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); 1566 break; 1567 case ir_unop_abs: 1568 if (result_dst.type == GLSL_TYPE_FLOAT) 1569 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0].get_abs()); 1570 else if (result_dst.type == GLSL_TYPE_DOUBLE) 1571 emit_asm(ir, TGSI_OPCODE_DABS, result_dst, op[0]); 1572 else if (result_dst.type == GLSL_TYPE_INT64 || 1573 result_dst.type == GLSL_TYPE_UINT64) 1574 emit_asm(ir, TGSI_OPCODE_I64ABS, result_dst, op[0]); 1575 else 1576 emit_asm(ir, TGSI_OPCODE_IABS, result_dst, op[0]); 1577 break; 1578 case ir_unop_sign: 1579 emit_asm(ir, TGSI_OPCODE_SSG, result_dst, op[0]); 1580 break; 1581 case ir_unop_rcp: 1582 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, op[0]); 1583 break; 1584 1585 case ir_unop_exp2: 1586 emit_scalar(ir, TGSI_OPCODE_EX2, result_dst, op[0]); 1587 break; 1588 case ir_unop_exp: 1589 assert(!"not reached: should be handled by exp_to_exp2"); 1590 break; 1591 case ir_unop_log: 1592 assert(!"not reached: should be handled by log_to_log2"); 1593 break; 1594 case ir_unop_log2: 1595 emit_scalar(ir, TGSI_OPCODE_LG2, result_dst, op[0]); 1596 break; 1597 case ir_unop_sin: 1598 emit_scalar(ir, TGSI_OPCODE_SIN, result_dst, op[0]); 1599 break; 1600 case ir_unop_cos: 1601 emit_scalar(ir, TGSI_OPCODE_COS, result_dst, op[0]); 1602 break; 1603 case ir_unop_saturate: { 1604 glsl_to_tgsi_instruction *inst; 1605 inst = emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); 1606 inst->saturate = true; 1607 break; 1608 } 1609 1610 case ir_unop_dFdx: 1611 case ir_unop_dFdx_coarse: 1612 emit_asm(ir, TGSI_OPCODE_DDX, result_dst, op[0]); 1613 break; 1614 case ir_unop_dFdx_fine: 1615 emit_asm(ir, TGSI_OPCODE_DDX_FINE, result_dst, op[0]); 1616 break; 1617 case ir_unop_dFdy: 1618 case ir_unop_dFdy_coarse: 1619 case ir_unop_dFdy_fine: 1620 { 1621 /* The X component contains 1 or -1 depending on whether the framebuffer 1622 * is a FBO or the window system buffer, respectively. 1623 * It is then multiplied with the source operand of DDY. 1624 */ 1625 static const gl_state_index16 transform_y_state[STATE_LENGTH] 1626 = { STATE_FB_WPOS_Y_TRANSFORM }; 1627 1628 unsigned transform_y_index = 1629 _mesa_add_state_reference(this->prog->Parameters, 1630 transform_y_state); 1631 1632 st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR, 1633 transform_y_index, 1634 glsl_type::vec4_type); 1635 transform_y.swizzle = SWIZZLE_XXXX; 1636 1637 st_src_reg temp = get_temp(glsl_type::vec4_type); 1638 1639 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(temp), transform_y, op[0]); 1640 emit_asm(ir, ir->operation == ir_unop_dFdy_fine ? 1641 TGSI_OPCODE_DDY_FINE : TGSI_OPCODE_DDY, result_dst, temp); 1642 break; 1643 } 1644 1645 case ir_unop_frexp_sig: 1646 emit_asm(ir, TGSI_OPCODE_DFRACEXP, result_dst, undef_dst, op[0]); 1647 break; 1648 1649 case ir_unop_frexp_exp: 1650 emit_asm(ir, TGSI_OPCODE_DFRACEXP, undef_dst, result_dst, op[0]); 1651 break; 1652 1653 case ir_binop_add: 1654 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1655 break; 1656 case ir_binop_sub: 1657 op[1].negate = ~op[1].negate; 1658 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1659 break; 1660 1661 case ir_binop_mul: 1662 emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1663 break; 1664 case ir_binop_div: 1665 emit_asm(ir, TGSI_OPCODE_DIV, result_dst, op[0], op[1]); 1666 break; 1667 case ir_binop_mod: 1668 if (result_dst.type == GLSL_TYPE_FLOAT) 1669 assert(!"ir_binop_mod should have been converted to b * fract(a/b)"); 1670 else 1671 emit_asm(ir, TGSI_OPCODE_MOD, result_dst, op[0], op[1]); 1672 break; 1673 1674 case ir_binop_less: 1675 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, op[0], op[1]); 1676 break; 1677 case ir_binop_gequal: 1678 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, op[0], op[1]); 1679 break; 1680 case ir_binop_equal: 1681 emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1682 break; 1683 case ir_binop_nequal: 1684 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1685 break; 1686 case ir_binop_all_equal: 1687 /* "==" operator producing a scalar boolean. */ 1688 if (ir->operands[0]->type->is_vector() || 1689 ir->operands[1]->type->is_vector()) { 1690 st_src_reg temp = get_temp(native_integers ? 1691 glsl_type::uvec4_type : 1692 glsl_type::vec4_type); 1693 1694 if (native_integers) { 1695 st_dst_reg temp_dst = st_dst_reg(temp); 1696 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1697 1698 if (ir->operands[0]->type->is_boolean() && 1699 ir->operands[1]->as_constant() && 1700 ir->operands[1]->as_constant()->is_one()) { 1701 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]); 1702 } else { 1703 emit_asm(ir, TGSI_OPCODE_SEQ, st_dst_reg(temp), op[0], op[1]); 1704 } 1705 1706 /* Emit 1-3 AND operations to combine the SEQ results. */ 1707 switch (ir->operands[0]->type->vector_elements) { 1708 case 2: 1709 break; 1710 case 3: 1711 temp_dst.writemask = WRITEMASK_Y; 1712 temp1.swizzle = SWIZZLE_YYYY; 1713 temp2.swizzle = SWIZZLE_ZZZZ; 1714 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1715 break; 1716 case 4: 1717 temp_dst.writemask = WRITEMASK_X; 1718 temp1.swizzle = SWIZZLE_XXXX; 1719 temp2.swizzle = SWIZZLE_YYYY; 1720 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1721 temp_dst.writemask = WRITEMASK_Y; 1722 temp1.swizzle = SWIZZLE_ZZZZ; 1723 temp2.swizzle = SWIZZLE_WWWW; 1724 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, temp1, temp2); 1725 } 1726 1727 temp1.swizzle = SWIZZLE_XXXX; 1728 temp2.swizzle = SWIZZLE_YYYY; 1729 emit_asm(ir, TGSI_OPCODE_AND, result_dst, temp1, temp2); 1730 } else { 1731 emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1732 1733 /* After the dot-product, the value will be an integer on the 1734 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1735 */ 1736 emit_dp(ir, result_dst, temp, temp, vector_elements); 1737 1738 /* Negating the result of the dot-product gives values on the range 1739 * [-4, 0]. Zero becomes 1.0, and negative values become zero. 1740 * This is achieved using SGE. 1741 */ 1742 st_src_reg sge_src = result_src; 1743 sge_src.negate = ~sge_src.negate; 1744 emit_asm(ir, TGSI_OPCODE_SGE, result_dst, sge_src, 1745 st_src_reg_for_float(0.0)); 1746 } 1747 } else { 1748 emit_asm(ir, TGSI_OPCODE_SEQ, result_dst, op[0], op[1]); 1749 } 1750 break; 1751 case ir_binop_any_nequal: 1752 /* "!=" operator producing a scalar boolean. */ 1753 if (ir->operands[0]->type->is_vector() || 1754 ir->operands[1]->type->is_vector()) { 1755 st_src_reg temp = get_temp(native_integers ? 1756 glsl_type::uvec4_type : 1757 glsl_type::vec4_type); 1758 if (ir->operands[0]->type->is_boolean() && 1759 ir->operands[1]->as_constant() && 1760 ir->operands[1]->as_constant()->is_zero()) { 1761 emit_asm(ir, TGSI_OPCODE_MOV, st_dst_reg(temp), op[0]); 1762 } else { 1763 emit_asm(ir, TGSI_OPCODE_SNE, st_dst_reg(temp), op[0], op[1]); 1764 } 1765 1766 if (native_integers) { 1767 st_dst_reg temp_dst = st_dst_reg(temp); 1768 st_src_reg temp1 = st_src_reg(temp), temp2 = st_src_reg(temp); 1769 1770 /* Emit 1-3 OR operations to combine the SNE results. */ 1771 switch (ir->operands[0]->type->vector_elements) { 1772 case 2: 1773 break; 1774 case 3: 1775 temp_dst.writemask = WRITEMASK_Y; 1776 temp1.swizzle = SWIZZLE_YYYY; 1777 temp2.swizzle = SWIZZLE_ZZZZ; 1778 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1779 break; 1780 case 4: 1781 temp_dst.writemask = WRITEMASK_X; 1782 temp1.swizzle = SWIZZLE_XXXX; 1783 temp2.swizzle = SWIZZLE_YYYY; 1784 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1785 temp_dst.writemask = WRITEMASK_Y; 1786 temp1.swizzle = SWIZZLE_ZZZZ; 1787 temp2.swizzle = SWIZZLE_WWWW; 1788 emit_asm(ir, TGSI_OPCODE_OR, temp_dst, temp1, temp2); 1789 } 1790 1791 temp1.swizzle = SWIZZLE_XXXX; 1792 temp2.swizzle = SWIZZLE_YYYY; 1793 emit_asm(ir, TGSI_OPCODE_OR, result_dst, temp1, temp2); 1794 } else { 1795 /* After the dot-product, the value will be an integer on the 1796 * range [0,4]. Zero stays zero, and positive values become 1.0. 1797 */ 1798 glsl_to_tgsi_instruction *const dp = 1799 emit_dp(ir, result_dst, temp, temp, vector_elements); 1800 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1801 /* The clamping to [0,1] can be done for free in the fragment 1802 * shader with a saturate. 1803 */ 1804 dp->saturate = true; 1805 } else { 1806 /* Negating the result of the dot-product gives values on the 1807 * range [-4, 0]. Zero stays zero, and negative values become 1808 * 1.0. This achieved using SLT. 1809 */ 1810 st_src_reg slt_src = result_src; 1811 slt_src.negate = ~slt_src.negate; 1812 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, 1813 st_src_reg_for_float(0.0)); 1814 } 1815 } 1816 } else { 1817 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1818 } 1819 break; 1820 1821 case ir_binop_logic_xor: 1822 if (native_integers) 1823 emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 1824 else 1825 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], op[1]); 1826 break; 1827 1828 case ir_binop_logic_or: { 1829 if (native_integers) { 1830 /* If integers are used as booleans, we can use an actual "or" 1831 * instruction. 1832 */ 1833 assert(native_integers); 1834 emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 1835 } else { 1836 /* After the addition, the value will be an integer on the 1837 * range [0,2]. Zero stays zero, and positive values become 1.0. 1838 */ 1839 glsl_to_tgsi_instruction *add = 1840 emit_asm(ir, TGSI_OPCODE_ADD, result_dst, op[0], op[1]); 1841 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1842 /* The clamping to [0,1] can be done for free in the fragment 1843 * shader with a saturate if floats are being used as boolean 1844 * values. 1845 */ 1846 add->saturate = true; 1847 } else { 1848 /* Negating the result of the addition gives values on the range 1849 * [-2, 0]. Zero stays zero, and negative values become 1.0 1850 * This is achieved using SLT. 1851 */ 1852 st_src_reg slt_src = result_src; 1853 slt_src.negate = ~slt_src.negate; 1854 emit_asm(ir, TGSI_OPCODE_SLT, result_dst, slt_src, 1855 st_src_reg_for_float(0.0)); 1856 } 1857 } 1858 break; 1859 } 1860 1861 case ir_binop_logic_and: 1862 /* If native integers are disabled, the bool args are stored as float 0.0 1863 * or 1.0, so "mul" gives us "and". If they're enabled, just use the 1864 * actual AND opcode. 1865 */ 1866 if (native_integers) 1867 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 1868 else 1869 emit_asm(ir, TGSI_OPCODE_MUL, result_dst, op[0], op[1]); 1870 break; 1871 1872 case ir_binop_dot: 1873 assert(ir->operands[0]->type->is_vector()); 1874 assert(ir->operands[0]->type == ir->operands[1]->type); 1875 emit_dp(ir, result_dst, op[0], op[1], 1876 ir->operands[0]->type->vector_elements); 1877 break; 1878 1879 case ir_unop_sqrt: 1880 if (have_sqrt) { 1881 emit_scalar(ir, TGSI_OPCODE_SQRT, result_dst, op[0]); 1882 } else { 1883 /* This is the only instruction sequence that makes the game "Risen" 1884 * render correctly. ABS is not required for the game, but since GLSL 1885 * declares negative values as "undefined", allowing us to do whatever 1886 * we want, I choose to use ABS to match DX9 and pre-GLSL RSQ 1887 * behavior. 1888 */ 1889 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0].get_abs()); 1890 emit_scalar(ir, TGSI_OPCODE_RCP, result_dst, result_src); 1891 } 1892 break; 1893 case ir_unop_rsq: 1894 emit_scalar(ir, TGSI_OPCODE_RSQ, result_dst, op[0]); 1895 break; 1896 case ir_unop_i2f: 1897 if (native_integers) { 1898 emit_asm(ir, TGSI_OPCODE_I2F, result_dst, op[0]); 1899 break; 1900 } 1901 FALLTHROUGH; 1902 case ir_unop_b2f: 1903 if (native_integers) { 1904 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], 1905 st_src_reg_for_float(1.0)); 1906 break; 1907 } 1908 FALLTHROUGH; 1909 case ir_unop_i2u: 1910 case ir_unop_u2i: 1911 case ir_unop_i642u64: 1912 case ir_unop_u642i64: 1913 /* Converting between signed and unsigned integers is a no-op. */ 1914 result_src = op[0]; 1915 result_src.type = result_dst.type; 1916 break; 1917 case ir_unop_b2i: 1918 if (native_integers) { 1919 /* Booleans are stored as integers using ~0 for true and 0 for false. 1920 * GLSL requires that int(bool) return 1 for true and 0 for false. 1921 * This conversion is done with AND, but it could be done with NEG. 1922 */ 1923 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], 1924 st_src_reg_for_int(1)); 1925 } else { 1926 /* Booleans and integers are both stored as floats when native 1927 * integers are disabled. 1928 */ 1929 result_src = op[0]; 1930 } 1931 break; 1932 case ir_unop_f2i: 1933 if (native_integers) 1934 emit_asm(ir, TGSI_OPCODE_F2I, result_dst, op[0]); 1935 else 1936 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1937 break; 1938 case ir_unop_f2u: 1939 if (native_integers) 1940 emit_asm(ir, TGSI_OPCODE_F2U, result_dst, op[0]); 1941 else 1942 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1943 break; 1944 case ir_unop_bitcast_f2i: 1945 case ir_unop_bitcast_f2u: 1946 /* Make sure we don't propagate the negate modifier to integer opcodes. */ 1947 if (op[0].negate || op[0].abs) 1948 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); 1949 else 1950 result_src = op[0]; 1951 result_src.type = ir->operation == ir_unop_bitcast_f2i ? GLSL_TYPE_INT : 1952 GLSL_TYPE_UINT; 1953 break; 1954 case ir_unop_bitcast_i2f: 1955 case ir_unop_bitcast_u2f: 1956 result_src = op[0]; 1957 result_src.type = GLSL_TYPE_FLOAT; 1958 break; 1959 case ir_unop_f2b: 1960 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], 1961 st_src_reg_for_float(0.0)); 1962 break; 1963 case ir_unop_d2b: 1964 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], 1965 st_src_reg_for_double(0.0)); 1966 break; 1967 case ir_unop_i2b: 1968 if (native_integers) 1969 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, op[0], 1970 st_src_reg_for_int(0)); 1971 else 1972 emit_asm(ir, TGSI_OPCODE_SNE, result_dst, op[0], 1973 st_src_reg_for_float(0.0)); 1974 break; 1975 case ir_unop_bitcast_u642d: 1976 case ir_unop_bitcast_i642d: 1977 result_src = op[0]; 1978 result_src.type = GLSL_TYPE_DOUBLE; 1979 break; 1980 case ir_unop_bitcast_d2i64: 1981 result_src = op[0]; 1982 result_src.type = GLSL_TYPE_INT64; 1983 break; 1984 case ir_unop_bitcast_d2u64: 1985 result_src = op[0]; 1986 result_src.type = GLSL_TYPE_UINT64; 1987 break; 1988 case ir_unop_trunc: 1989 emit_asm(ir, TGSI_OPCODE_TRUNC, result_dst, op[0]); 1990 break; 1991 case ir_unop_ceil: 1992 emit_asm(ir, TGSI_OPCODE_CEIL, result_dst, op[0]); 1993 break; 1994 case ir_unop_floor: 1995 emit_asm(ir, TGSI_OPCODE_FLR, result_dst, op[0]); 1996 break; 1997 case ir_unop_round_even: 1998 emit_asm(ir, TGSI_OPCODE_ROUND, result_dst, op[0]); 1999 break; 2000 case ir_unop_fract: 2001 emit_asm(ir, TGSI_OPCODE_FRC, result_dst, op[0]); 2002 break; 2003 2004 case ir_binop_min: 2005 emit_asm(ir, TGSI_OPCODE_MIN, result_dst, op[0], op[1]); 2006 break; 2007 case ir_binop_max: 2008 emit_asm(ir, TGSI_OPCODE_MAX, result_dst, op[0], op[1]); 2009 break; 2010 case ir_binop_pow: 2011 emit_scalar(ir, TGSI_OPCODE_POW, result_dst, op[0], op[1]); 2012 break; 2013 2014 case ir_unop_bit_not: 2015 if (native_integers) { 2016 emit_asm(ir, TGSI_OPCODE_NOT, result_dst, op[0]); 2017 break; 2018 } 2019 FALLTHROUGH; 2020 case ir_unop_u2f: 2021 if (native_integers) { 2022 emit_asm(ir, TGSI_OPCODE_U2F, result_dst, op[0]); 2023 break; 2024 } 2025 FALLTHROUGH; 2026 case ir_binop_lshift: 2027 case ir_binop_rshift: 2028 if (native_integers) { 2029 enum tgsi_opcode opcode = ir->operation == ir_binop_lshift 2030 ? TGSI_OPCODE_SHL : TGSI_OPCODE_ISHR; 2031 st_src_reg count; 2032 2033 if (glsl_base_type_is_64bit(op[0].type)) { 2034 /* GLSL shift operations have 32-bit shift counts, but TGSI uses 2035 * 64 bits. 2036 */ 2037 count = get_temp(glsl_type::u64vec(ir->operands[1] 2038 ->type->components())); 2039 emit_asm(ir, TGSI_OPCODE_U2I64, st_dst_reg(count), op[1]); 2040 } else { 2041 count = op[1]; 2042 } 2043 2044 emit_asm(ir, opcode, result_dst, op[0], count); 2045 break; 2046 } 2047 FALLTHROUGH; 2048 case ir_binop_bit_and: 2049 if (native_integers) { 2050 emit_asm(ir, TGSI_OPCODE_AND, result_dst, op[0], op[1]); 2051 break; 2052 } 2053 FALLTHROUGH; 2054 case ir_binop_bit_xor: 2055 if (native_integers) { 2056 emit_asm(ir, TGSI_OPCODE_XOR, result_dst, op[0], op[1]); 2057 break; 2058 } 2059 FALLTHROUGH; 2060 case ir_binop_bit_or: 2061 if (native_integers) { 2062 emit_asm(ir, TGSI_OPCODE_OR, result_dst, op[0], op[1]); 2063 break; 2064 } 2065 2066 assert(!"GLSL 1.30 features unsupported"); 2067 break; 2068 2069 case ir_binop_ubo_load: { 2070 if (ctx->Const.UseSTD430AsDefaultPacking) { 2071 ir_rvalue *block = ir->operands[0]; 2072 ir_rvalue *offset = ir->operands[1]; 2073 ir_constant *const_block = block->as_constant(); 2074 2075 st_src_reg cbuf(PROGRAM_CONSTANT, 2076 (const_block ? const_block->value.u[0] + 1 : 1), 2077 ir->type->base_type); 2078 2079 cbuf.has_index2 = true; 2080 2081 if (!const_block) { 2082 block->accept(this); 2083 cbuf.reladdr = ralloc(mem_ctx, st_src_reg); 2084 *cbuf.reladdr = this->result; 2085 emit_arl(ir, sampler_reladdr, this->result); 2086 } 2087 2088 /* Calculate the surface offset */ 2089 offset->accept(this); 2090 st_src_reg off = this->result; 2091 2092 glsl_to_tgsi_instruction *inst = 2093 emit_asm(ir, TGSI_OPCODE_LOAD, result_dst, off); 2094 2095 if (result_dst.type == GLSL_TYPE_BOOL) 2096 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, st_src_reg(result_dst), 2097 st_src_reg_for_int(0)); 2098 2099 add_buffer_to_load_and_stores(inst, &cbuf, &this->instructions, 2100 NULL); 2101 } else { 2102 ir_constant *const_uniform_block = ir->operands[0]->as_constant(); 2103 ir_constant *const_offset_ir = ir->operands[1]->as_constant(); 2104 unsigned const_offset = const_offset_ir ? 2105 const_offset_ir->value.u[0] : 0; 2106 unsigned const_block = const_uniform_block ? 2107 const_uniform_block->value.u[0] + 1 : 1; 2108 st_src_reg index_reg = get_temp(glsl_type::uint_type); 2109 st_src_reg cbuf; 2110 2111 cbuf.type = ir->type->base_type; 2112 cbuf.file = PROGRAM_CONSTANT; 2113 cbuf.index = 0; 2114 cbuf.reladdr = NULL; 2115 cbuf.negate = 0; 2116 cbuf.abs = 0; 2117 cbuf.index2D = const_block; 2118 2119 assert(ir->type->is_vector() || ir->type->is_scalar()); 2120 2121 if (const_offset_ir) { 2122 /* Constant index into constant buffer */ 2123 cbuf.reladdr = NULL; 2124 cbuf.index = const_offset / 16; 2125 } else { 2126 ir_expression *offset_expr = ir->operands[1]->as_expression(); 2127 st_src_reg offset = op[1]; 2128 2129 /* The OpenGL spec is written in such a way that accesses with 2130 * non-constant offset are almost always vec4-aligned. The only 2131 * exception to this are members of structs in arrays of structs: 2132 * each struct in an array of structs is at least vec4-aligned, 2133 * but single-element and [ui]vec2 members of the struct may be at 2134 * an offset that is not a multiple of 16 bytes. 2135 * 2136 * Here, we extract that offset, relying on previous passes to 2137 * always generate offset expressions of the form 2138 * (+ expr constant_offset). 2139 * 2140 * Note that the std430 layout, which allows more cases of 2141 * alignment less than vec4 in arrays, is not supported for 2142 * uniform blocks, so we do not have to deal with it here. 2143 */ 2144 if (offset_expr && offset_expr->operation == ir_binop_add) { 2145 const_offset_ir = offset_expr->operands[1]->as_constant(); 2146 if (const_offset_ir) { 2147 const_offset = const_offset_ir->value.u[0]; 2148 cbuf.index = const_offset / 16; 2149 offset_expr->operands[0]->accept(this); 2150 offset = this->result; 2151 } 2152 } 2153 2154 /* Relative/variable index into constant buffer */ 2155 emit_asm(ir, TGSI_OPCODE_USHR, st_dst_reg(index_reg), offset, 2156 st_src_reg_for_int(4)); 2157 cbuf.reladdr = ralloc(mem_ctx, st_src_reg); 2158 *cbuf.reladdr = index_reg; 2159 } 2160 2161 if (const_uniform_block) { 2162 /* Constant constant buffer */ 2163 cbuf.reladdr2 = NULL; 2164 } else { 2165 /* Relative/variable constant buffer */ 2166 cbuf.reladdr2 = ralloc(mem_ctx, st_src_reg); 2167 *cbuf.reladdr2 = op[0]; 2168 } 2169 cbuf.has_index2 = true; 2170 2171 cbuf.swizzle = swizzle_for_size(ir->type->vector_elements); 2172 if (glsl_base_type_is_64bit(cbuf.type)) 2173 cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 8, 2174 const_offset % 16 / 8, 2175 const_offset % 16 / 8, 2176 const_offset % 16 / 8); 2177 else 2178 cbuf.swizzle += MAKE_SWIZZLE4(const_offset % 16 / 4, 2179 const_offset % 16 / 4, 2180 const_offset % 16 / 4, 2181 const_offset % 16 / 4); 2182 2183 if (ir->type->is_boolean()) { 2184 emit_asm(ir, TGSI_OPCODE_USNE, result_dst, cbuf, 2185 st_src_reg_for_int(0)); 2186 } else { 2187 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, cbuf); 2188 } 2189 } 2190 break; 2191 } 2192 case ir_triop_lrp: 2193 /* note: we have to reorder the three args here */ 2194 emit_asm(ir, TGSI_OPCODE_LRP, result_dst, op[2], op[1], op[0]); 2195 break; 2196 case ir_triop_csel: 2197 if (this->ctx->Const.NativeIntegers) 2198 emit_asm(ir, TGSI_OPCODE_UCMP, result_dst, op[0], op[1], op[2]); 2199 else { 2200 op[0].negate = ~op[0].negate; 2201 emit_asm(ir, TGSI_OPCODE_CMP, result_dst, op[0], op[1], op[2]); 2202 } 2203 break; 2204 case ir_triop_bitfield_extract: 2205 emit_asm(ir, TGSI_OPCODE_IBFE, result_dst, op[0], op[1], op[2]); 2206 break; 2207 case ir_quadop_bitfield_insert: 2208 emit_asm(ir, TGSI_OPCODE_BFI, result_dst, op[0], op[1], op[2], op[3]); 2209 break; 2210 case ir_unop_bitfield_reverse: 2211 emit_asm(ir, TGSI_OPCODE_BREV, result_dst, op[0]); 2212 break; 2213 case ir_unop_bit_count: 2214 emit_asm(ir, TGSI_OPCODE_POPC, result_dst, op[0]); 2215 break; 2216 case ir_unop_find_msb: 2217 emit_asm(ir, TGSI_OPCODE_IMSB, result_dst, op[0]); 2218 break; 2219 case ir_unop_find_lsb: 2220 emit_asm(ir, TGSI_OPCODE_LSB, result_dst, op[0]); 2221 break; 2222 case ir_binop_imul_high: 2223 emit_asm(ir, TGSI_OPCODE_IMUL_HI, result_dst, op[0], op[1]); 2224 break; 2225 case ir_triop_fma: 2226 /* In theory, MAD is incorrect here. */ 2227 if (have_fma) 2228 emit_asm(ir, TGSI_OPCODE_FMA, result_dst, op[0], op[1], op[2]); 2229 else 2230 emit_asm(ir, TGSI_OPCODE_MAD, result_dst, op[0], op[1], op[2]); 2231 break; 2232 case ir_unop_interpolate_at_centroid: 2233 emit_asm(ir, TGSI_OPCODE_INTERP_CENTROID, result_dst, op[0]); 2234 break; 2235 case ir_binop_interpolate_at_offset: { 2236 /* The y coordinate needs to be flipped for the default fb */ 2237 static const gl_state_index16 transform_y_state[STATE_LENGTH] 2238 = { STATE_FB_WPOS_Y_TRANSFORM }; 2239 2240 unsigned transform_y_index = 2241 _mesa_add_state_reference(this->prog->Parameters, 2242 transform_y_state); 2243 2244 st_src_reg transform_y = st_src_reg(PROGRAM_STATE_VAR, 2245 transform_y_index, 2246 glsl_type::vec4_type); 2247 transform_y.swizzle = SWIZZLE_XXXX; 2248 2249 st_src_reg temp = get_temp(glsl_type::vec2_type); 2250 st_dst_reg temp_dst = st_dst_reg(temp); 2251 2252 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[1]); 2253 temp_dst.writemask = WRITEMASK_Y; 2254 emit_asm(ir, TGSI_OPCODE_MUL, temp_dst, transform_y, op[1]); 2255 emit_asm(ir, TGSI_OPCODE_INTERP_OFFSET, result_dst, op[0], temp); 2256 break; 2257 } 2258 case ir_binop_interpolate_at_sample: 2259 emit_asm(ir, TGSI_OPCODE_INTERP_SAMPLE, result_dst, op[0], op[1]); 2260 break; 2261 2262 case ir_unop_d2f: 2263 emit_asm(ir, TGSI_OPCODE_D2F, result_dst, op[0]); 2264 break; 2265 case ir_unop_f2d: 2266 emit_asm(ir, TGSI_OPCODE_F2D, result_dst, op[0]); 2267 break; 2268 case ir_unop_d2i: 2269 emit_asm(ir, TGSI_OPCODE_D2I, result_dst, op[0]); 2270 break; 2271 case ir_unop_i2d: 2272 emit_asm(ir, TGSI_OPCODE_I2D, result_dst, op[0]); 2273 break; 2274 case ir_unop_d2u: 2275 emit_asm(ir, TGSI_OPCODE_D2U, result_dst, op[0]); 2276 break; 2277 case ir_unop_u2d: 2278 emit_asm(ir, TGSI_OPCODE_U2D, result_dst, op[0]); 2279 break; 2280 case ir_unop_unpack_double_2x32: 2281 case ir_unop_pack_double_2x32: 2282 case ir_unop_unpack_int_2x32: 2283 case ir_unop_pack_int_2x32: 2284 case ir_unop_unpack_uint_2x32: 2285 case ir_unop_pack_uint_2x32: 2286 case ir_unop_unpack_sampler_2x32: 2287 case ir_unop_pack_sampler_2x32: 2288 case ir_unop_unpack_image_2x32: 2289 case ir_unop_pack_image_2x32: 2290 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, op[0]); 2291 break; 2292 2293 case ir_binop_ldexp: 2294 if (ir->operands[0]->type->is_double()) { 2295 emit_asm(ir, TGSI_OPCODE_DLDEXP, result_dst, op[0], op[1]); 2296 } else if (ir->operands[0]->type->is_float()) { 2297 emit_asm(ir, TGSI_OPCODE_LDEXP, result_dst, op[0], op[1]); 2298 } else { 2299 assert(!"Invalid ldexp for non-double opcode in glsl_to_tgsi_visitor::visit()"); 2300 } 2301 break; 2302 2303 case ir_unop_pack_half_2x16: 2304 emit_asm(ir, TGSI_OPCODE_PK2H, result_dst, op[0]); 2305 break; 2306 case ir_unop_unpack_half_2x16: 2307 emit_asm(ir, TGSI_OPCODE_UP2H, result_dst, op[0]); 2308 break; 2309 2310 case ir_unop_get_buffer_size: { 2311 ir_constant *const_offset = ir->operands[0]->as_constant(); 2312 st_src_reg buffer( 2313 PROGRAM_BUFFER, 2314 const_offset ? const_offset->value.u[0] : 0, 2315 GLSL_TYPE_UINT); 2316 if (!const_offset) { 2317 buffer.reladdr = ralloc(mem_ctx, st_src_reg); 2318 *buffer.reladdr = op[0]; 2319 emit_arl(ir, sampler_reladdr, op[0]); 2320 } 2321 emit_asm(ir, TGSI_OPCODE_RESQ, result_dst)->resource = buffer; 2322 break; 2323 } 2324 2325 case ir_unop_u2i64: 2326 case ir_unop_u2u64: 2327 case ir_unop_b2i64: { 2328 st_src_reg temp = get_temp(glsl_type::uvec4_type); 2329 st_dst_reg temp_dst = st_dst_reg(temp); 2330 unsigned orig_swz = op[0].swizzle; 2331 /* 2332 * To convert unsigned to 64-bit: 2333 * zero Y channel, copy X channel. 2334 */ 2335 temp_dst.writemask = WRITEMASK_Y; 2336 if (vector_elements > 1) 2337 temp_dst.writemask |= WRITEMASK_W; 2338 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0)); 2339 temp_dst.writemask = WRITEMASK_X; 2340 if (vector_elements > 1) 2341 temp_dst.writemask |= WRITEMASK_Z; 2342 op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 0), GET_SWZ(orig_swz, 0), 2343 GET_SWZ(orig_swz, 1), GET_SWZ(orig_swz, 1)); 2344 if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64) 2345 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); 2346 else 2347 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], st_src_reg_for_int(1)); 2348 result_src = temp; 2349 result_src.type = GLSL_TYPE_UINT64; 2350 if (vector_elements > 2) { 2351 /* Subtle: We rely on the fact that get_temp here returns the next 2352 * TGSI temporary register directly after the temp register used for 2353 * the first two components, so that the result gets picked up 2354 * automatically. 2355 */ 2356 st_src_reg temp = get_temp(glsl_type::uvec4_type); 2357 st_dst_reg temp_dst = st_dst_reg(temp); 2358 temp_dst.writemask = WRITEMASK_Y; 2359 if (vector_elements > 3) 2360 temp_dst.writemask |= WRITEMASK_W; 2361 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, st_src_reg_for_int(0)); 2362 2363 temp_dst.writemask = WRITEMASK_X; 2364 if (vector_elements > 3) 2365 temp_dst.writemask |= WRITEMASK_Z; 2366 op[0].swizzle = MAKE_SWIZZLE4(GET_SWZ(orig_swz, 2), 2367 GET_SWZ(orig_swz, 2), 2368 GET_SWZ(orig_swz, 3), 2369 GET_SWZ(orig_swz, 3)); 2370 if (ir->operation == ir_unop_u2i64 || ir->operation == ir_unop_u2u64) 2371 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); 2372 else 2373 emit_asm(ir, TGSI_OPCODE_AND, temp_dst, op[0], 2374 st_src_reg_for_int(1)); 2375 } 2376 break; 2377 } 2378 case ir_unop_i642i: 2379 case ir_unop_u642i: 2380 case ir_unop_u642u: 2381 case ir_unop_i642u: { 2382 st_src_reg temp = get_temp(glsl_type::uvec4_type); 2383 st_dst_reg temp_dst = st_dst_reg(temp); 2384 unsigned orig_swz = op[0].swizzle; 2385 unsigned orig_idx = op[0].index; 2386 int el; 2387 temp_dst.writemask = WRITEMASK_X; 2388 2389 for (el = 0; el < vector_elements; el++) { 2390 unsigned swz = GET_SWZ(orig_swz, el); 2391 if (swz & 1) 2392 op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_Z, SWIZZLE_Z, 2393 SWIZZLE_Z, SWIZZLE_Z); 2394 else 2395 op[0].swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, 2396 SWIZZLE_X, SWIZZLE_X); 2397 if (swz > 2) 2398 op[0].index = orig_idx + 1; 2399 op[0].type = GLSL_TYPE_UINT; 2400 temp_dst.writemask = WRITEMASK_X << el; 2401 emit_asm(ir, TGSI_OPCODE_MOV, temp_dst, op[0]); 2402 } 2403 result_src = temp; 2404 if (ir->operation == ir_unop_u642u || ir->operation == ir_unop_i642u) 2405 result_src.type = GLSL_TYPE_UINT; 2406 else 2407 result_src.type = GLSL_TYPE_INT; 2408 break; 2409 } 2410 case ir_unop_i642b: 2411 emit_asm(ir, TGSI_OPCODE_U64SNE, result_dst, op[0], 2412 st_src_reg_for_int64(0)); 2413 break; 2414 case ir_unop_i642f: 2415 emit_asm(ir, TGSI_OPCODE_I642F, result_dst, op[0]); 2416 break; 2417 case ir_unop_u642f: 2418 emit_asm(ir, TGSI_OPCODE_U642F, result_dst, op[0]); 2419 break; 2420 case ir_unop_i642d: 2421 emit_asm(ir, TGSI_OPCODE_I642D, result_dst, op[0]); 2422 break; 2423 case ir_unop_u642d: 2424 emit_asm(ir, TGSI_OPCODE_U642D, result_dst, op[0]); 2425 break; 2426 case ir_unop_i2i64: 2427 emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]); 2428 break; 2429 case ir_unop_f2i64: 2430 emit_asm(ir, TGSI_OPCODE_F2I64, result_dst, op[0]); 2431 break; 2432 case ir_unop_d2i64: 2433 emit_asm(ir, TGSI_OPCODE_D2I64, result_dst, op[0]); 2434 break; 2435 case ir_unop_i2u64: 2436 emit_asm(ir, TGSI_OPCODE_I2I64, result_dst, op[0]); 2437 break; 2438 case ir_unop_f2u64: 2439 emit_asm(ir, TGSI_OPCODE_F2U64, result_dst, op[0]); 2440 break; 2441 case ir_unop_d2u64: 2442 emit_asm(ir, TGSI_OPCODE_D2U64, result_dst, op[0]); 2443 break; 2444 /* these might be needed */ 2445 case ir_unop_pack_snorm_2x16: 2446 case ir_unop_pack_unorm_2x16: 2447 case ir_unop_pack_snorm_4x8: 2448 case ir_unop_pack_unorm_4x8: 2449 2450 case ir_unop_unpack_snorm_2x16: 2451 case ir_unop_unpack_unorm_2x16: 2452 case ir_unop_unpack_snorm_4x8: 2453 case ir_unop_unpack_unorm_4x8: 2454 2455 case ir_quadop_vector: 2456 case ir_binop_vector_extract: 2457 case ir_triop_vector_insert: 2458 case ir_binop_carry: 2459 case ir_binop_borrow: 2460 case ir_unop_ssbo_unsized_array_length: 2461 case ir_unop_implicitly_sized_array_length: 2462 case ir_unop_atan: 2463 case ir_binop_atan2: 2464 case ir_unop_clz: 2465 case ir_binop_add_sat: 2466 case ir_binop_sub_sat: 2467 case ir_binop_abs_sub: 2468 case ir_binop_avg: 2469 case ir_binop_avg_round: 2470 case ir_binop_mul_32x16: 2471 case ir_unop_f162f: 2472 case ir_unop_f2f16: 2473 case ir_unop_f2fmp: 2474 case ir_unop_f162b: 2475 case ir_unop_b2f16: 2476 case ir_unop_i2i: 2477 case ir_unop_i2imp: 2478 case ir_unop_u2u: 2479 case ir_unop_u2ump: 2480 /* This operation is not supported, or should have already been handled. 2481 */ 2482 assert(!"Invalid ir opcode in glsl_to_tgsi_visitor::visit()"); 2483 break; 2484 } 2485 2486 this->result = result_src; 2487} 2488 2489 2490void 2491glsl_to_tgsi_visitor::visit(ir_swizzle *ir) 2492{ 2493 st_src_reg src; 2494 int i; 2495 int swizzle[4] = {0}; 2496 2497 /* Note that this is only swizzles in expressions, not those on the left 2498 * hand side of an assignment, which do write masking. See ir_assignment 2499 * for that. 2500 */ 2501 2502 ir->val->accept(this); 2503 src = this->result; 2504 assert(src.file != PROGRAM_UNDEFINED); 2505 assert(ir->type->vector_elements > 0); 2506 2507 for (i = 0; i < 4; i++) { 2508 if (i < ir->type->vector_elements) { 2509 switch (i) { 2510 case 0: 2511 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 2512 break; 2513 case 1: 2514 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 2515 break; 2516 case 2: 2517 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 2518 break; 2519 case 3: 2520 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 2521 break; 2522 } 2523 } else { 2524 /* If the type is smaller than a vec4, replicate the last 2525 * channel out. 2526 */ 2527 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 2528 } 2529 } 2530 2531 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 2532 2533 this->result = src; 2534} 2535 2536/* Test if the variable is an array. Note that geometry and 2537 * tessellation shader inputs are outputs are always arrays (except 2538 * for patch inputs), so only the array element type is considered. 2539 */ 2540static bool 2541is_inout_array(unsigned stage, ir_variable *var, bool *remove_array) 2542{ 2543 const glsl_type *type = var->type; 2544 2545 *remove_array = false; 2546 2547 if ((stage == MESA_SHADER_VERTEX && var->data.mode == ir_var_shader_in) || 2548 (stage == MESA_SHADER_FRAGMENT && var->data.mode == ir_var_shader_out)) 2549 return false; 2550 2551 if (((stage == MESA_SHADER_GEOMETRY && var->data.mode == ir_var_shader_in) || 2552 (stage == MESA_SHADER_TESS_EVAL && var->data.mode == ir_var_shader_in) || 2553 stage == MESA_SHADER_TESS_CTRL) && 2554 !var->data.patch) { 2555 if (!var->type->is_array()) 2556 return false; /* a system value probably */ 2557 2558 type = var->type->fields.array; 2559 *remove_array = true; 2560 } 2561 2562 return type->is_array() || type->is_matrix(); 2563} 2564 2565static unsigned 2566st_translate_interp_loc(ir_variable *var) 2567{ 2568 if (var->data.centroid) 2569 return TGSI_INTERPOLATE_LOC_CENTROID; 2570 else if (var->data.sample) 2571 return TGSI_INTERPOLATE_LOC_SAMPLE; 2572 else 2573 return TGSI_INTERPOLATE_LOC_CENTER; 2574} 2575 2576void 2577glsl_to_tgsi_visitor::visit(ir_dereference_variable *ir) 2578{ 2579 variable_storage *entry; 2580 ir_variable *var = ir->var; 2581 bool remove_array; 2582 2583 if (handle_bound_deref(ir->as_dereference())) 2584 return; 2585 2586 entry = find_variable_storage(ir->var); 2587 2588 if (!entry) { 2589 switch (var->data.mode) { 2590 case ir_var_uniform: 2591 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 2592 var->data.param_index); 2593 _mesa_hash_table_insert(this->variables, var, entry); 2594 break; 2595 case ir_var_shader_in: { 2596 /* The linker assigns locations for varyings and attributes, 2597 * including deprecated builtins (like gl_Color), user-assign 2598 * generic attributes (glBindVertexLocation), and 2599 * user-defined varyings. 2600 */ 2601 assert(var->data.location != -1); 2602 2603 const glsl_type *type_without_array = var->type->without_array(); 2604 struct inout_decl *decl = &inputs[num_inputs]; 2605 unsigned component = var->data.location_frac; 2606 unsigned num_components; 2607 num_inputs++; 2608 2609 if (type_without_array->is_64bit()) 2610 component = component / 2; 2611 if (type_without_array->vector_elements) 2612 num_components = type_without_array->vector_elements; 2613 else 2614 num_components = 4; 2615 2616 decl->mesa_index = var->data.location; 2617 decl->interp = (glsl_interp_mode) var->data.interpolation; 2618 decl->interp_loc = st_translate_interp_loc(var); 2619 decl->base_type = type_without_array->base_type; 2620 decl->usage_mask = u_bit_consecutive(component, num_components); 2621 2622 if (is_inout_array(shader->Stage, var, &remove_array)) { 2623 decl->array_id = num_input_arrays + 1; 2624 num_input_arrays++; 2625 } else { 2626 decl->array_id = 0; 2627 } 2628 2629 if (remove_array) 2630 decl->size = type_size(var->type->fields.array); 2631 else 2632 decl->size = type_size(var->type); 2633 2634 entry = new(mem_ctx) variable_storage(var, 2635 PROGRAM_INPUT, 2636 decl->mesa_index, 2637 decl->array_id); 2638 entry->component = component; 2639 2640 _mesa_hash_table_insert(this->variables, var, entry); 2641 2642 break; 2643 } 2644 case ir_var_shader_out: { 2645 assert(var->data.location != -1); 2646 2647 const glsl_type *type_without_array = var->type->without_array(); 2648 struct inout_decl *decl = &outputs[num_outputs]; 2649 unsigned component = var->data.location_frac; 2650 unsigned num_components; 2651 num_outputs++; 2652 2653 decl->invariant = var->data.invariant; 2654 2655 if (type_without_array->is_64bit()) 2656 component = component / 2; 2657 if (type_without_array->vector_elements) 2658 num_components = type_without_array->vector_elements; 2659 else 2660 num_components = 4; 2661 2662 decl->mesa_index = var->data.location + FRAG_RESULT_MAX * var->data.index; 2663 decl->base_type = type_without_array->base_type; 2664 decl->usage_mask = u_bit_consecutive(component, num_components); 2665 if (var->data.stream & (1u << 31)) { 2666 decl->gs_out_streams = var->data.stream & ~(1u << 31); 2667 } else { 2668 assert(var->data.stream < 4); 2669 decl->gs_out_streams = 0; 2670 for (unsigned i = 0; i < num_components; ++i) 2671 decl->gs_out_streams |= var->data.stream << (2 * (component + i)); 2672 } 2673 2674 if (is_inout_array(shader->Stage, var, &remove_array)) { 2675 decl->array_id = num_output_arrays + 1; 2676 num_output_arrays++; 2677 } else { 2678 decl->array_id = 0; 2679 } 2680 2681 if (remove_array) 2682 decl->size = type_size(var->type->fields.array); 2683 else 2684 decl->size = type_size(var->type); 2685 2686 if (var->data.fb_fetch_output) { 2687 st_dst_reg dst = st_dst_reg(get_temp(var->type)); 2688 st_src_reg src = st_src_reg(PROGRAM_OUTPUT, decl->mesa_index, 2689 var->type, component, decl->array_id); 2690 emit_asm(NULL, TGSI_OPCODE_FBFETCH, dst, src); 2691 entry = new(mem_ctx) variable_storage(var, dst.file, dst.index, 2692 dst.array_id); 2693 } else { 2694 entry = new(mem_ctx) variable_storage(var, 2695 PROGRAM_OUTPUT, 2696 decl->mesa_index, 2697 decl->array_id); 2698 } 2699 entry->component = component; 2700 2701 _mesa_hash_table_insert(this->variables, var, entry); 2702 2703 break; 2704 } 2705 case ir_var_system_value: 2706 entry = new(mem_ctx) variable_storage(var, 2707 PROGRAM_SYSTEM_VALUE, 2708 var->data.location); 2709 break; 2710 case ir_var_auto: 2711 case ir_var_temporary: 2712 st_src_reg src = get_temp(var->type); 2713 2714 entry = new(mem_ctx) variable_storage(var, src.file, src.index, 2715 src.array_id); 2716 _mesa_hash_table_insert(this->variables, var, entry); 2717 2718 break; 2719 } 2720 2721 if (!entry) { 2722 printf("Failed to make storage for %s\n", var->name); 2723 exit(1); 2724 } 2725 } 2726 2727 this->result = st_src_reg(entry->file, entry->index, var->type, 2728 entry->component, entry->array_id); 2729 if (this->shader->Stage == MESA_SHADER_VERTEX && 2730 var->data.mode == ir_var_shader_in && 2731 var->type->without_array()->is_double()) 2732 this->result.is_double_vertex_input = true; 2733 if (!native_integers) 2734 this->result.type = GLSL_TYPE_FLOAT; 2735} 2736 2737static void 2738shrink_array_declarations(struct inout_decl *decls, unsigned count, 2739 GLbitfield64* usage_mask, 2740 GLbitfield64 double_usage_mask, 2741 GLbitfield* patch_usage_mask) 2742{ 2743 unsigned i; 2744 int j; 2745 2746 /* Fix array declarations by removing unused array elements at both ends 2747 * of the arrays. For example, mat4[3] where only mat[1] is used. 2748 */ 2749 for (i = 0; i < count; i++) { 2750 struct inout_decl *decl = &decls[i]; 2751 if (!decl->array_id) 2752 continue; 2753 2754 /* Shrink the beginning. */ 2755 for (j = 0; j < (int)decl->size; j++) { 2756 if (decl->mesa_index >= VARYING_SLOT_PATCH0) { 2757 if (*patch_usage_mask & 2758 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j)) 2759 break; 2760 } 2761 else { 2762 if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) 2763 break; 2764 if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1)) 2765 break; 2766 } 2767 2768 decl->mesa_index++; 2769 decl->size--; 2770 j--; 2771 } 2772 2773 /* Shrink the end. */ 2774 for (j = decl->size-1; j >= 0; j--) { 2775 if (decl->mesa_index >= VARYING_SLOT_PATCH0) { 2776 if (*patch_usage_mask & 2777 BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j)) 2778 break; 2779 } 2780 else { 2781 if (*usage_mask & BITFIELD64_BIT(decl->mesa_index+j)) 2782 break; 2783 if (double_usage_mask & BITFIELD64_BIT(decl->mesa_index+j-1)) 2784 break; 2785 } 2786 2787 decl->size--; 2788 } 2789 2790 /* When not all entries of an array are accessed, we mark them as used 2791 * here anyway, to ensure that the input/output mapping logic doesn't get 2792 * confused. 2793 * 2794 * TODO This happens when an array isn't used via indirect access, which 2795 * some game ports do (at least eON-based). There is an optimization 2796 * opportunity here by replacing the array declaration with non-array 2797 * declarations of those slots that are actually used. 2798 */ 2799 for (j = 1; j < (int)decl->size; ++j) { 2800 if (decl->mesa_index >= VARYING_SLOT_PATCH0) 2801 *patch_usage_mask |= BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j); 2802 else 2803 *usage_mask |= BITFIELD64_BIT(decl->mesa_index + j); 2804 } 2805 } 2806} 2807 2808 2809static void 2810mark_array_io(struct inout_decl *decls, unsigned count, 2811 GLbitfield64* usage_mask, 2812 GLbitfield64 double_usage_mask, 2813 GLbitfield* patch_usage_mask) 2814{ 2815 unsigned i; 2816 int j; 2817 2818 /* Fix array declarations by removing unused array elements at both ends 2819 * of the arrays. For example, mat4[3] where only mat[1] is used. 2820 */ 2821 for (i = 0; i < count; i++) { 2822 struct inout_decl *decl = &decls[i]; 2823 if (!decl->array_id) 2824 continue; 2825 2826 /* When not all entries of an array are accessed, we mark them as used 2827 * here anyway, to ensure that the input/output mapping logic doesn't get 2828 * confused. 2829 * 2830 * TODO This happens when an array isn't used via indirect access, which 2831 * some game ports do (at least eON-based). There is an optimization 2832 * opportunity here by replacing the array declaration with non-array 2833 * declarations of those slots that are actually used. 2834 */ 2835 for (j = 0; j < (int)decl->size; ++j) { 2836 if (decl->mesa_index >= VARYING_SLOT_PATCH0) 2837 *patch_usage_mask |= BITFIELD64_BIT(decl->mesa_index - VARYING_SLOT_PATCH0 + j); 2838 else 2839 *usage_mask |= BITFIELD64_BIT(decl->mesa_index + j); 2840 } 2841 } 2842} 2843 2844void 2845glsl_to_tgsi_visitor::visit(ir_dereference_array *ir) 2846{ 2847 ir_constant *index; 2848 st_src_reg src; 2849 bool is_2D = false; 2850 ir_variable *var = ir->variable_referenced(); 2851 2852 if (handle_bound_deref(ir->as_dereference())) 2853 return; 2854 2855 /* We only need the logic provided by count_vec4_slots() 2856 * for arrays of structs. Indirect sampler and image indexing is handled 2857 * elsewhere. 2858 */ 2859 int element_size = ir->type->without_array()->is_struct() ? 2860 ir->type->count_vec4_slots(false, var->data.bindless) : 2861 type_size(ir->type); 2862 2863 index = ir->array_index->constant_expression_value(ralloc_parent(ir)); 2864 2865 ir->array->accept(this); 2866 src = this->result; 2867 2868 if (!src.has_index2) { 2869 switch (this->prog->Target) { 2870 case GL_TESS_CONTROL_PROGRAM_NV: 2871 is_2D = (src.file == PROGRAM_INPUT || src.file == PROGRAM_OUTPUT) && 2872 !ir->variable_referenced()->data.patch; 2873 break; 2874 case GL_TESS_EVALUATION_PROGRAM_NV: 2875 is_2D = src.file == PROGRAM_INPUT && 2876 !ir->variable_referenced()->data.patch; 2877 break; 2878 case GL_GEOMETRY_PROGRAM_NV: 2879 is_2D = src.file == PROGRAM_INPUT; 2880 break; 2881 } 2882 } 2883 2884 if (is_2D) 2885 element_size = 1; 2886 2887 if (index) { 2888 2889 if (this->prog->Target == GL_VERTEX_PROGRAM_ARB && 2890 src.file == PROGRAM_INPUT) 2891 element_size = attrib_type_size(ir->type, true); 2892 if (is_2D) { 2893 src.index2D = index->value.i[0]; 2894 src.has_index2 = true; 2895 } else 2896 src.index += index->value.i[0] * element_size; 2897 } else { 2898 /* Variable index array dereference. It eats the "vec4" of the 2899 * base of the array and an index that offsets the TGSI register 2900 * index. 2901 */ 2902 ir->array_index->accept(this); 2903 2904 st_src_reg index_reg; 2905 2906 if (element_size == 1) { 2907 index_reg = this->result; 2908 } else { 2909 index_reg = get_temp(native_integers ? 2910 glsl_type::int_type : glsl_type::float_type); 2911 2912 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(index_reg), 2913 this->result, st_src_reg_for_type(index_reg.type, element_size)); 2914 } 2915 2916 /* If there was already a relative address register involved, add the 2917 * new and the old together to get the new offset. 2918 */ 2919 if (!is_2D && src.reladdr != NULL) { 2920 st_src_reg accum_reg = get_temp(native_integers ? 2921 glsl_type::int_type : glsl_type::float_type); 2922 2923 emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(accum_reg), 2924 index_reg, *src.reladdr); 2925 2926 index_reg = accum_reg; 2927 } 2928 2929 if (is_2D) { 2930 src.reladdr2 = ralloc(mem_ctx, st_src_reg); 2931 *src.reladdr2 = index_reg; 2932 src.index2D = 0; 2933 src.has_index2 = true; 2934 } else { 2935 src.reladdr = ralloc(mem_ctx, st_src_reg); 2936 *src.reladdr = index_reg; 2937 } 2938 } 2939 2940 /* Change the register type to the element type of the array. */ 2941 src.type = ir->type->base_type; 2942 2943 this->result = src; 2944} 2945 2946void 2947glsl_to_tgsi_visitor::visit(ir_dereference_record *ir) 2948{ 2949 unsigned int i; 2950 const glsl_type *struct_type = ir->record->type; 2951 ir_variable *var = ir->record->variable_referenced(); 2952 int offset = 0; 2953 2954 if (handle_bound_deref(ir->as_dereference())) 2955 return; 2956 2957 ir->record->accept(this); 2958 2959 assert(ir->field_idx >= 0); 2960 assert(var); 2961 for (i = 0; i < struct_type->length; i++) { 2962 if (i == (unsigned) ir->field_idx) 2963 break; 2964 const glsl_type *member_type = struct_type->fields.structure[i].type; 2965 offset += member_type->count_vec4_slots(false, var->data.bindless); 2966 } 2967 2968 /* If the type is smaller than a vec4, replicate the last channel out. */ 2969 if (ir->type->is_scalar() || ir->type->is_vector()) 2970 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 2971 else 2972 this->result.swizzle = SWIZZLE_NOOP; 2973 2974 this->result.index += offset; 2975 this->result.type = ir->type->base_type; 2976} 2977 2978/** 2979 * We want to be careful in assignment setup to hit the actual storage 2980 * instead of potentially using a temporary like we might with the 2981 * ir_dereference handler. 2982 */ 2983static st_dst_reg 2984get_assignment_lhs(ir_dereference *ir, glsl_to_tgsi_visitor *v, int *component) 2985{ 2986 /* The LHS must be a dereference. If the LHS is a variable indexed array 2987 * access of a vector, it must be separated into a series conditional moves 2988 * before reaching this point (see ir_vec_index_to_cond_assign). 2989 */ 2990 assert(ir->as_dereference()); 2991 ir_dereference_array *deref_array = ir->as_dereference_array(); 2992 if (deref_array) { 2993 assert(!deref_array->array->type->is_vector()); 2994 } 2995 2996 /* Use the rvalue deref handler for the most part. We write swizzles using 2997 * the writemask, but we do extract the base component for enhanced layouts 2998 * from the source swizzle. 2999 */ 3000 ir->accept(v); 3001 *component = GET_SWZ(v->result.swizzle, 0); 3002 return st_dst_reg(v->result); 3003} 3004 3005/** 3006 * Process the condition of a conditional assignment 3007 * 3008 * Examines the condition of a conditional assignment to generate the optimal 3009 * first operand of a \c CMP instruction. If the condition is a relational 3010 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 3011 * used as the source for the \c CMP instruction. Otherwise the comparison 3012 * is processed to a boolean result, and the boolean result is used as the 3013 * operand to the CMP instruction. 3014 */ 3015bool 3016glsl_to_tgsi_visitor::process_move_condition(ir_rvalue *ir) 3017{ 3018 ir_rvalue *src_ir = ir; 3019 bool negate = true; 3020 bool switch_order = false; 3021 3022 ir_expression *const expr = ir->as_expression(); 3023 3024 if (native_integers) { 3025 if ((expr != NULL) && (expr->num_operands == 2)) { 3026 enum glsl_base_type type = expr->operands[0]->type->base_type; 3027 if (type == GLSL_TYPE_INT || type == GLSL_TYPE_UINT || 3028 type == GLSL_TYPE_BOOL) { 3029 if (expr->operation == ir_binop_equal) { 3030 if (expr->operands[0]->is_zero()) { 3031 src_ir = expr->operands[1]; 3032 switch_order = true; 3033 } 3034 else if (expr->operands[1]->is_zero()) { 3035 src_ir = expr->operands[0]; 3036 switch_order = true; 3037 } 3038 } 3039 else if (expr->operation == ir_binop_nequal) { 3040 if (expr->operands[0]->is_zero()) { 3041 src_ir = expr->operands[1]; 3042 } 3043 else if (expr->operands[1]->is_zero()) { 3044 src_ir = expr->operands[0]; 3045 } 3046 } 3047 } 3048 } 3049 3050 src_ir->accept(this); 3051 return switch_order; 3052 } 3053 3054 if ((expr != NULL) && (expr->num_operands == 2)) { 3055 bool zero_on_left = false; 3056 3057 if (expr->operands[0]->is_zero()) { 3058 src_ir = expr->operands[1]; 3059 zero_on_left = true; 3060 } else if (expr->operands[1]->is_zero()) { 3061 src_ir = expr->operands[0]; 3062 zero_on_left = false; 3063 } 3064 3065 /* a is - 0 + - 0 + 3066 * (a < 0) T F F ( a < 0) T F F 3067 * (0 < a) F F T (-a < 0) F F T 3068 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 3069 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 3070 * 3071 * Note that exchanging the order of 0 and 'a' in the comparison simply 3072 * means that the value of 'a' should be negated. 3073 */ 3074 if (src_ir != ir) { 3075 switch (expr->operation) { 3076 case ir_binop_less: 3077 switch_order = false; 3078 negate = zero_on_left; 3079 break; 3080 3081 case ir_binop_gequal: 3082 switch_order = true; 3083 negate = zero_on_left; 3084 break; 3085 3086 default: 3087 /* This isn't the right kind of comparison afterall, so make sure 3088 * the whole condition is visited. 3089 */ 3090 src_ir = ir; 3091 break; 3092 } 3093 } 3094 } 3095 3096 src_ir->accept(this); 3097 3098 /* We use the TGSI_OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 3099 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 3100 * choose which value TGSI_OPCODE_CMP produces without an extra instruction 3101 * computing the condition. 3102 */ 3103 if (negate) 3104 this->result.negate = ~this->result.negate; 3105 3106 return switch_order; 3107} 3108 3109void 3110glsl_to_tgsi_visitor::emit_block_mov(ir_assignment *ir, const struct glsl_type *type, 3111 st_dst_reg *l, st_src_reg *r, 3112 st_src_reg *cond, bool cond_swap) 3113{ 3114 if (type->is_struct()) { 3115 for (unsigned int i = 0; i < type->length; i++) { 3116 emit_block_mov(ir, type->fields.structure[i].type, l, r, 3117 cond, cond_swap); 3118 } 3119 return; 3120 } 3121 3122 if (type->is_array()) { 3123 for (unsigned int i = 0; i < type->length; i++) { 3124 emit_block_mov(ir, type->fields.array, l, r, cond, cond_swap); 3125 } 3126 return; 3127 } 3128 3129 if (type->is_matrix()) { 3130 const struct glsl_type *vec_type; 3131 3132 vec_type = glsl_type::get_instance(type->is_double() 3133 ? GLSL_TYPE_DOUBLE : GLSL_TYPE_FLOAT, 3134 type->vector_elements, 1); 3135 3136 for (int i = 0; i < type->matrix_columns; i++) { 3137 emit_block_mov(ir, vec_type, l, r, cond, cond_swap); 3138 } 3139 return; 3140 } 3141 3142 assert(type->is_scalar() || type->is_vector()); 3143 3144 l->type = type->base_type; 3145 r->type = type->base_type; 3146 if (cond) { 3147 st_src_reg l_src = st_src_reg(*l); 3148 3149 if (l_src.file == PROGRAM_OUTPUT && 3150 this->prog->Target == GL_FRAGMENT_PROGRAM_ARB && 3151 (l_src.index == FRAG_RESULT_DEPTH || 3152 l_src.index == FRAG_RESULT_STENCIL)) { 3153 /* This is a special case because the source swizzles will be shifted 3154 * later to account for the difference between GLSL (where they're 3155 * plain floats) and TGSI (where they're Z and Y components). */ 3156 l_src.swizzle = SWIZZLE_XXXX; 3157 } 3158 3159 if (native_integers) { 3160 emit_asm(ir, TGSI_OPCODE_UCMP, *l, *cond, 3161 cond_swap ? l_src : *r, 3162 cond_swap ? *r : l_src); 3163 } else { 3164 emit_asm(ir, TGSI_OPCODE_CMP, *l, *cond, 3165 cond_swap ? l_src : *r, 3166 cond_swap ? *r : l_src); 3167 } 3168 } else { 3169 emit_asm(ir, TGSI_OPCODE_MOV, *l, *r); 3170 } 3171 l->index++; 3172 r->index++; 3173 if (type->is_dual_slot()) { 3174 l->index++; 3175 if (r->is_double_vertex_input == false) 3176 r->index++; 3177 } 3178} 3179 3180void 3181glsl_to_tgsi_visitor::visit(ir_assignment *ir) 3182{ 3183 int dst_component; 3184 st_dst_reg l; 3185 st_src_reg r; 3186 3187 /* all generated instructions need to be flaged as precise */ 3188 this->precise = is_precise(ir->lhs->variable_referenced()); 3189 ir->rhs->accept(this); 3190 r = this->result; 3191 3192 l = get_assignment_lhs(ir->lhs, this, &dst_component); 3193 3194 { 3195 int swizzles[4]; 3196 int first_enabled_chan = 0; 3197 int rhs_chan = 0; 3198 ir_variable *variable = ir->lhs->variable_referenced(); 3199 3200 if (shader->Stage == MESA_SHADER_FRAGMENT && 3201 variable->data.mode == ir_var_shader_out && 3202 (variable->data.location == FRAG_RESULT_DEPTH || 3203 variable->data.location == FRAG_RESULT_STENCIL)) { 3204 assert(ir->lhs->type->is_scalar()); 3205 assert(ir->write_mask == WRITEMASK_X); 3206 3207 if (variable->data.location == FRAG_RESULT_DEPTH) 3208 l.writemask = WRITEMASK_Z; 3209 else { 3210 assert(variable->data.location == FRAG_RESULT_STENCIL); 3211 l.writemask = WRITEMASK_Y; 3212 } 3213 } else if (ir->write_mask == 0) { 3214 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 3215 3216 unsigned num_elements = 3217 ir->lhs->type->without_array()->vector_elements; 3218 3219 if (num_elements) { 3220 l.writemask = u_bit_consecutive(0, num_elements); 3221 } else { 3222 /* The type is a struct or an array of (array of) structs. */ 3223 l.writemask = WRITEMASK_XYZW; 3224 } 3225 } else { 3226 l.writemask = ir->write_mask; 3227 } 3228 3229 for (int i = 0; i < 4; i++) { 3230 if (l.writemask & (1 << i)) { 3231 first_enabled_chan = GET_SWZ(r.swizzle, i); 3232 break; 3233 } 3234 } 3235 3236 l.writemask = l.writemask << dst_component; 3237 3238 /* Swizzle a small RHS vector into the channels being written. 3239 * 3240 * glsl ir treats write_mask as dictating how many channels are 3241 * present on the RHS while TGSI treats write_mask as just 3242 * showing which channels of the vec4 RHS get written. 3243 */ 3244 for (int i = 0; i < 4; i++) { 3245 if (l.writemask & (1 << i)) 3246 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 3247 else 3248 swizzles[i] = first_enabled_chan; 3249 } 3250 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 3251 swizzles[2], swizzles[3]); 3252 } 3253 3254 assert(l.file != PROGRAM_UNDEFINED); 3255 assert(r.file != PROGRAM_UNDEFINED); 3256 3257 if (ir->condition) { 3258 const bool switch_order = this->process_move_condition(ir->condition); 3259 st_src_reg condition = this->result; 3260 3261 emit_block_mov(ir, ir->lhs->type, &l, &r, &condition, switch_order); 3262 } else if (ir->rhs->as_expression() && 3263 this->instructions.get_tail() && 3264 ir->rhs == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->ir && 3265 !((glsl_to_tgsi_instruction *)this->instructions.get_tail())->is_64bit_expanded && 3266 type_size(ir->lhs->type) == 1 && 3267 !ir->lhs->type->is_64bit() && 3268 l.writemask == ((glsl_to_tgsi_instruction *)this->instructions.get_tail())->dst[0].writemask) { 3269 /* To avoid emitting an extra MOV when assigning an expression to a 3270 * variable, emit the last instruction of the expression again, but 3271 * replace the destination register with the target of the assignment. 3272 * Dead code elimination will remove the original instruction. 3273 */ 3274 glsl_to_tgsi_instruction *inst, *new_inst; 3275 inst = (glsl_to_tgsi_instruction *)this->instructions.get_tail(); 3276 new_inst = emit_asm(ir, inst->op, l, inst->src[0], inst->src[1], inst->src[2], inst->src[3]); 3277 new_inst->saturate = inst->saturate; 3278 new_inst->resource = inst->resource; 3279 inst->dead_mask = inst->dst[0].writemask; 3280 } else { 3281 emit_block_mov(ir, ir->rhs->type, &l, &r, NULL, false); 3282 } 3283 this->precise = 0; 3284} 3285 3286 3287void 3288glsl_to_tgsi_visitor::visit(ir_constant *ir) 3289{ 3290 st_src_reg src; 3291 GLdouble stack_vals[4] = { 0 }; 3292 gl_constant_value *values = (gl_constant_value *) stack_vals; 3293 GLenum gl_type = GL_NONE; 3294 unsigned int i, elements; 3295 static int in_array = 0; 3296 gl_register_file file = in_array ? PROGRAM_CONSTANT : PROGRAM_IMMEDIATE; 3297 3298 /* Unfortunately, 4 floats is all we can get into 3299 * _mesa_add_typed_unnamed_constant. So, make a temp to store an 3300 * aggregate constant and move each constant value into it. If we 3301 * get lucky, copy propagation will eliminate the extra moves. 3302 */ 3303 if (ir->type->is_struct()) { 3304 st_src_reg temp_base = get_temp(ir->type); 3305 st_dst_reg temp = st_dst_reg(temp_base); 3306 3307 for (i = 0; i < ir->type->length; i++) { 3308 ir_constant *const field_value = ir->get_record_field(i); 3309 int size = type_size(field_value->type); 3310 3311 assert(size > 0); 3312 3313 field_value->accept(this); 3314 src = this->result; 3315 3316 for (unsigned j = 0; j < (unsigned int)size; j++) { 3317 emit_asm(ir, TGSI_OPCODE_MOV, temp, src); 3318 3319 src.index++; 3320 temp.index++; 3321 } 3322 } 3323 this->result = temp_base; 3324 return; 3325 } 3326 3327 if (ir->type->is_array()) { 3328 st_src_reg temp_base = get_temp(ir->type); 3329 st_dst_reg temp = st_dst_reg(temp_base); 3330 int size = type_size(ir->type->fields.array); 3331 3332 assert(size > 0); 3333 in_array++; 3334 3335 for (i = 0; i < ir->type->length; i++) { 3336 ir->const_elements[i]->accept(this); 3337 src = this->result; 3338 for (int j = 0; j < size; j++) { 3339 emit_asm(ir, TGSI_OPCODE_MOV, temp, src); 3340 3341 src.index++; 3342 temp.index++; 3343 } 3344 } 3345 this->result = temp_base; 3346 in_array--; 3347 return; 3348 } 3349 3350 if (ir->type->is_matrix()) { 3351 st_src_reg mat = get_temp(ir->type); 3352 st_dst_reg mat_column = st_dst_reg(mat); 3353 3354 for (i = 0; i < ir->type->matrix_columns; i++) { 3355 switch (ir->type->base_type) { 3356 case GLSL_TYPE_FLOAT: 3357 values = (gl_constant_value *) 3358 &ir->value.f[i * ir->type->vector_elements]; 3359 3360 src = st_src_reg(file, -1, ir->type->base_type); 3361 src.index = add_constant(file, 3362 values, 3363 ir->type->vector_elements, 3364 GL_FLOAT, 3365 &src.swizzle); 3366 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3367 break; 3368 case GLSL_TYPE_DOUBLE: 3369 values = (gl_constant_value *) 3370 &ir->value.d[i * ir->type->vector_elements]; 3371 src = st_src_reg(file, -1, ir->type->base_type); 3372 src.index = add_constant(file, 3373 values, 3374 ir->type->vector_elements, 3375 GL_DOUBLE, 3376 &src.swizzle); 3377 if (ir->type->vector_elements >= 2) { 3378 mat_column.writemask = WRITEMASK_XY; 3379 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, 3380 SWIZZLE_X, SWIZZLE_Y); 3381 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3382 } else { 3383 mat_column.writemask = WRITEMASK_X; 3384 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, 3385 SWIZZLE_X, SWIZZLE_X); 3386 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3387 } 3388 src.index++; 3389 if (ir->type->vector_elements > 2) { 3390 if (ir->type->vector_elements == 4) { 3391 mat_column.writemask = WRITEMASK_ZW; 3392 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, 3393 SWIZZLE_X, SWIZZLE_Y); 3394 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3395 } else { 3396 mat_column.writemask = WRITEMASK_Z; 3397 src.swizzle = MAKE_SWIZZLE4(SWIZZLE_Y, SWIZZLE_Y, 3398 SWIZZLE_Y, SWIZZLE_Y); 3399 emit_asm(ir, TGSI_OPCODE_MOV, mat_column, src); 3400 mat_column.writemask = WRITEMASK_XYZW; 3401 src.swizzle = SWIZZLE_XYZW; 3402 } 3403 mat_column.index++; 3404 } 3405 break; 3406 default: 3407 unreachable("Illegal matrix constant type.\n"); 3408 break; 3409 } 3410 mat_column.index++; 3411 } 3412 this->result = mat; 3413 return; 3414 } 3415 3416 elements = ir->type->vector_elements; 3417 switch (ir->type->base_type) { 3418 case GLSL_TYPE_FLOAT: 3419 gl_type = GL_FLOAT; 3420 for (i = 0; i < ir->type->vector_elements; i++) { 3421 values[i].f = ir->value.f[i]; 3422 } 3423 break; 3424 case GLSL_TYPE_DOUBLE: 3425 gl_type = GL_DOUBLE; 3426 for (i = 0; i < ir->type->vector_elements; i++) { 3427 memcpy(&values[i * 2], &ir->value.d[i], sizeof(double)); 3428 } 3429 break; 3430 case GLSL_TYPE_INT64: 3431 gl_type = GL_INT64_ARB; 3432 for (i = 0; i < ir->type->vector_elements; i++) { 3433 memcpy(&values[i * 2], &ir->value.d[i], sizeof(int64_t)); 3434 } 3435 break; 3436 case GLSL_TYPE_UINT64: 3437 gl_type = GL_UNSIGNED_INT64_ARB; 3438 for (i = 0; i < ir->type->vector_elements; i++) { 3439 memcpy(&values[i * 2], &ir->value.d[i], sizeof(uint64_t)); 3440 } 3441 break; 3442 case GLSL_TYPE_UINT: 3443 gl_type = native_integers ? GL_UNSIGNED_INT : GL_FLOAT; 3444 for (i = 0; i < ir->type->vector_elements; i++) { 3445 if (native_integers) 3446 values[i].u = ir->value.u[i]; 3447 else 3448 values[i].f = ir->value.u[i]; 3449 } 3450 break; 3451 case GLSL_TYPE_INT: 3452 gl_type = native_integers ? GL_INT : GL_FLOAT; 3453 for (i = 0; i < ir->type->vector_elements; i++) { 3454 if (native_integers) 3455 values[i].i = ir->value.i[i]; 3456 else 3457 values[i].f = ir->value.i[i]; 3458 } 3459 break; 3460 case GLSL_TYPE_BOOL: 3461 gl_type = native_integers ? GL_BOOL : GL_FLOAT; 3462 for (i = 0; i < ir->type->vector_elements; i++) { 3463 values[i].u = ir->value.b[i] ? ctx->Const.UniformBooleanTrue : 0; 3464 } 3465 break; 3466 case GLSL_TYPE_SAMPLER: 3467 case GLSL_TYPE_IMAGE: 3468 gl_type = GL_UNSIGNED_INT; 3469 elements = 2; 3470 values[0].u = ir->value.u64[0] & 0xffffffff; 3471 values[1].u = ir->value.u64[0] >> 32; 3472 break; 3473 default: 3474 assert(!"Non-float/uint/int/bool/sampler/image constant"); 3475 } 3476 3477 this->result = st_src_reg(file, -1, ir->type); 3478 this->result.index = add_constant(file, 3479 values, 3480 elements, 3481 gl_type, 3482 &this->result.swizzle); 3483} 3484 3485void 3486glsl_to_tgsi_visitor::visit_atomic_counter_intrinsic(ir_call *ir) 3487{ 3488 exec_node *param = ir->actual_parameters.get_head(); 3489 ir_dereference *deref = static_cast<ir_dereference *>(param); 3490 ir_variable *location = deref->variable_referenced(); 3491 bool has_hw_atomics = st_context(ctx)->has_hw_atomics; 3492 /* Calculate the surface offset */ 3493 st_src_reg offset; 3494 unsigned array_size = 0, base = 0; 3495 uint16_t index = 0; 3496 st_src_reg resource; 3497 3498 get_deref_offsets(deref, &array_size, &base, &index, &offset, false); 3499 3500 if (has_hw_atomics) { 3501 variable_storage *entry = find_variable_storage(location); 3502 st_src_reg buffer(PROGRAM_HW_ATOMIC, 0, GLSL_TYPE_ATOMIC_UINT, 3503 location->data.binding); 3504 3505 if (!entry) { 3506 entry = new(mem_ctx) variable_storage(location, PROGRAM_HW_ATOMIC, 3507 num_atomics); 3508 _mesa_hash_table_insert(this->variables, location, entry); 3509 3510 atomic_info[num_atomics].location = location->data.location; 3511 atomic_info[num_atomics].binding = location->data.binding; 3512 atomic_info[num_atomics].size = location->type->arrays_of_arrays_size(); 3513 if (atomic_info[num_atomics].size == 0) 3514 atomic_info[num_atomics].size = 1; 3515 atomic_info[num_atomics].array_id = 0; 3516 num_atomics++; 3517 } 3518 3519 if (offset.file != PROGRAM_UNDEFINED) { 3520 if (atomic_info[entry->index].array_id == 0) { 3521 num_atomic_arrays++; 3522 atomic_info[entry->index].array_id = num_atomic_arrays; 3523 } 3524 buffer.array_id = atomic_info[entry->index].array_id; 3525 } 3526 3527 buffer.index = index; 3528 buffer.index += location->data.offset / ATOMIC_COUNTER_SIZE; 3529 buffer.has_index2 = true; 3530 3531 if (offset.file != PROGRAM_UNDEFINED) { 3532 buffer.reladdr = ralloc(mem_ctx, st_src_reg); 3533 *buffer.reladdr = offset; 3534 emit_arl(ir, sampler_reladdr, offset); 3535 } 3536 offset = st_src_reg_for_int(0); 3537 3538 resource = buffer; 3539 } else { 3540 st_src_reg buffer(PROGRAM_BUFFER, 3541 prog->info.num_ssbos + 3542 location->data.binding, 3543 GLSL_TYPE_ATOMIC_UINT); 3544 3545 if (offset.file != PROGRAM_UNDEFINED) { 3546 emit_asm(ir, TGSI_OPCODE_MUL, st_dst_reg(offset), 3547 offset, st_src_reg_for_int(ATOMIC_COUNTER_SIZE)); 3548 emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(offset), 3549 offset, st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE)); 3550 } else { 3551 offset = st_src_reg_for_int(location->data.offset + index * ATOMIC_COUNTER_SIZE); 3552 } 3553 resource = buffer; 3554 } 3555 3556 ir->return_deref->accept(this); 3557 st_dst_reg dst(this->result); 3558 dst.writemask = WRITEMASK_X; 3559 3560 glsl_to_tgsi_instruction *inst; 3561 3562 if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_read) { 3563 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, offset); 3564 } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_increment) { 3565 inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, 3566 st_src_reg_for_int(1)); 3567 } else if (ir->callee->intrinsic_id == ir_intrinsic_atomic_counter_predecrement) { 3568 inst = emit_asm(ir, TGSI_OPCODE_ATOMUADD, dst, offset, 3569 st_src_reg_for_int(-1)); 3570 emit_asm(ir, TGSI_OPCODE_ADD, dst, this->result, st_src_reg_for_int(-1)); 3571 } else { 3572 param = param->get_next(); 3573 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3574 val->accept(this); 3575 3576 st_src_reg data = this->result, data2 = undef_src; 3577 enum tgsi_opcode opcode; 3578 switch (ir->callee->intrinsic_id) { 3579 case ir_intrinsic_atomic_counter_add: 3580 opcode = TGSI_OPCODE_ATOMUADD; 3581 break; 3582 case ir_intrinsic_atomic_counter_min: 3583 opcode = TGSI_OPCODE_ATOMIMIN; 3584 break; 3585 case ir_intrinsic_atomic_counter_max: 3586 opcode = TGSI_OPCODE_ATOMIMAX; 3587 break; 3588 case ir_intrinsic_atomic_counter_and: 3589 opcode = TGSI_OPCODE_ATOMAND; 3590 break; 3591 case ir_intrinsic_atomic_counter_or: 3592 opcode = TGSI_OPCODE_ATOMOR; 3593 break; 3594 case ir_intrinsic_atomic_counter_xor: 3595 opcode = TGSI_OPCODE_ATOMXOR; 3596 break; 3597 case ir_intrinsic_atomic_counter_exchange: 3598 opcode = TGSI_OPCODE_ATOMXCHG; 3599 break; 3600 case ir_intrinsic_atomic_counter_comp_swap: { 3601 opcode = TGSI_OPCODE_ATOMCAS; 3602 param = param->get_next(); 3603 val = ((ir_instruction *)param)->as_rvalue(); 3604 val->accept(this); 3605 data2 = this->result; 3606 break; 3607 } 3608 default: 3609 assert(!"Unexpected intrinsic"); 3610 return; 3611 } 3612 3613 inst = emit_asm(ir, opcode, dst, offset, data, data2); 3614 } 3615 3616 inst->resource = resource; 3617} 3618 3619void 3620glsl_to_tgsi_visitor::visit_ssbo_intrinsic(ir_call *ir) 3621{ 3622 exec_node *param = ir->actual_parameters.get_head(); 3623 3624 ir_rvalue *block = ((ir_instruction *)param)->as_rvalue(); 3625 3626 param = param->get_next(); 3627 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 3628 3629 ir_constant *const_block = block->as_constant(); 3630 st_src_reg buffer( 3631 PROGRAM_BUFFER, 3632 const_block ? const_block->value.u[0] : 0, 3633 GLSL_TYPE_UINT); 3634 3635 if (!const_block) { 3636 block->accept(this); 3637 buffer.reladdr = ralloc(mem_ctx, st_src_reg); 3638 *buffer.reladdr = this->result; 3639 emit_arl(ir, sampler_reladdr, this->result); 3640 } 3641 3642 /* Calculate the surface offset */ 3643 offset->accept(this); 3644 st_src_reg off = this->result; 3645 3646 st_dst_reg dst = undef_dst; 3647 if (ir->return_deref) { 3648 ir->return_deref->accept(this); 3649 dst = st_dst_reg(this->result); 3650 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; 3651 } 3652 3653 glsl_to_tgsi_instruction *inst; 3654 3655 if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_load) { 3656 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); 3657 if (dst.type == GLSL_TYPE_BOOL) 3658 emit_asm(ir, TGSI_OPCODE_USNE, dst, st_src_reg(dst), 3659 st_src_reg_for_int(0)); 3660 } else if (ir->callee->intrinsic_id == ir_intrinsic_ssbo_store) { 3661 param = param->get_next(); 3662 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3663 val->accept(this); 3664 3665 param = param->get_next(); 3666 ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); 3667 assert(write_mask); 3668 dst.writemask = write_mask->value.u[0]; 3669 3670 dst.type = this->result.type; 3671 inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result); 3672 } else { 3673 param = param->get_next(); 3674 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3675 val->accept(this); 3676 3677 st_src_reg data = this->result, data2 = undef_src; 3678 enum tgsi_opcode opcode; 3679 switch (ir->callee->intrinsic_id) { 3680 case ir_intrinsic_ssbo_atomic_add: 3681 opcode = TGSI_OPCODE_ATOMUADD; 3682 break; 3683 case ir_intrinsic_ssbo_atomic_min: 3684 opcode = TGSI_OPCODE_ATOMIMIN; 3685 break; 3686 case ir_intrinsic_ssbo_atomic_max: 3687 opcode = TGSI_OPCODE_ATOMIMAX; 3688 break; 3689 case ir_intrinsic_ssbo_atomic_and: 3690 opcode = TGSI_OPCODE_ATOMAND; 3691 break; 3692 case ir_intrinsic_ssbo_atomic_or: 3693 opcode = TGSI_OPCODE_ATOMOR; 3694 break; 3695 case ir_intrinsic_ssbo_atomic_xor: 3696 opcode = TGSI_OPCODE_ATOMXOR; 3697 break; 3698 case ir_intrinsic_ssbo_atomic_exchange: 3699 opcode = TGSI_OPCODE_ATOMXCHG; 3700 break; 3701 case ir_intrinsic_ssbo_atomic_comp_swap: 3702 opcode = TGSI_OPCODE_ATOMCAS; 3703 param = param->get_next(); 3704 val = ((ir_instruction *)param)->as_rvalue(); 3705 val->accept(this); 3706 data2 = this->result; 3707 break; 3708 default: 3709 assert(!"Unexpected intrinsic"); 3710 return; 3711 } 3712 3713 inst = emit_asm(ir, opcode, dst, off, data, data2); 3714 } 3715 3716 param = param->get_next(); 3717 ir_constant *access = NULL; 3718 if (!param->is_tail_sentinel()) { 3719 access = ((ir_instruction *)param)->as_constant(); 3720 assert(access); 3721 } 3722 3723 add_buffer_to_load_and_stores(inst, &buffer, &this->instructions, access); 3724} 3725 3726void 3727glsl_to_tgsi_visitor::visit_membar_intrinsic(ir_call *ir) 3728{ 3729 switch (ir->callee->intrinsic_id) { 3730 case ir_intrinsic_memory_barrier: 3731 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3732 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | 3733 TGSI_MEMBAR_ATOMIC_BUFFER | 3734 TGSI_MEMBAR_SHADER_IMAGE | 3735 TGSI_MEMBAR_SHARED)); 3736 break; 3737 case ir_intrinsic_memory_barrier_atomic_counter: 3738 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3739 st_src_reg_for_int(TGSI_MEMBAR_ATOMIC_BUFFER)); 3740 break; 3741 case ir_intrinsic_memory_barrier_buffer: 3742 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3743 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER)); 3744 break; 3745 case ir_intrinsic_memory_barrier_image: 3746 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3747 st_src_reg_for_int(TGSI_MEMBAR_SHADER_IMAGE)); 3748 break; 3749 case ir_intrinsic_memory_barrier_shared: 3750 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3751 st_src_reg_for_int(TGSI_MEMBAR_SHARED)); 3752 break; 3753 case ir_intrinsic_group_memory_barrier: 3754 emit_asm(ir, TGSI_OPCODE_MEMBAR, undef_dst, 3755 st_src_reg_for_int(TGSI_MEMBAR_SHADER_BUFFER | 3756 TGSI_MEMBAR_ATOMIC_BUFFER | 3757 TGSI_MEMBAR_SHADER_IMAGE | 3758 TGSI_MEMBAR_SHARED | 3759 TGSI_MEMBAR_THREAD_GROUP)); 3760 break; 3761 default: 3762 assert(!"Unexpected memory barrier intrinsic"); 3763 } 3764} 3765 3766void 3767glsl_to_tgsi_visitor::visit_shared_intrinsic(ir_call *ir) 3768{ 3769 exec_node *param = ir->actual_parameters.get_head(); 3770 3771 ir_rvalue *offset = ((ir_instruction *)param)->as_rvalue(); 3772 3773 st_src_reg buffer(PROGRAM_MEMORY, 0, GLSL_TYPE_UINT); 3774 3775 /* Calculate the surface offset */ 3776 offset->accept(this); 3777 st_src_reg off = this->result; 3778 3779 st_dst_reg dst = undef_dst; 3780 if (ir->return_deref) { 3781 ir->return_deref->accept(this); 3782 dst = st_dst_reg(this->result); 3783 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; 3784 } 3785 3786 glsl_to_tgsi_instruction *inst; 3787 3788 if (ir->callee->intrinsic_id == ir_intrinsic_shared_load) { 3789 inst = emit_asm(ir, TGSI_OPCODE_LOAD, dst, off); 3790 inst->resource = buffer; 3791 } else if (ir->callee->intrinsic_id == ir_intrinsic_shared_store) { 3792 param = param->get_next(); 3793 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3794 val->accept(this); 3795 3796 param = param->get_next(); 3797 ir_constant *write_mask = ((ir_instruction *)param)->as_constant(); 3798 assert(write_mask); 3799 dst.writemask = write_mask->value.u[0]; 3800 3801 dst.type = this->result.type; 3802 inst = emit_asm(ir, TGSI_OPCODE_STORE, dst, off, this->result); 3803 inst->resource = buffer; 3804 } else { 3805 param = param->get_next(); 3806 ir_rvalue *val = ((ir_instruction *)param)->as_rvalue(); 3807 val->accept(this); 3808 3809 st_src_reg data = this->result, data2 = undef_src; 3810 enum tgsi_opcode opcode; 3811 switch (ir->callee->intrinsic_id) { 3812 case ir_intrinsic_shared_atomic_add: 3813 opcode = TGSI_OPCODE_ATOMUADD; 3814 break; 3815 case ir_intrinsic_shared_atomic_min: 3816 opcode = TGSI_OPCODE_ATOMIMIN; 3817 break; 3818 case ir_intrinsic_shared_atomic_max: 3819 opcode = TGSI_OPCODE_ATOMIMAX; 3820 break; 3821 case ir_intrinsic_shared_atomic_and: 3822 opcode = TGSI_OPCODE_ATOMAND; 3823 break; 3824 case ir_intrinsic_shared_atomic_or: 3825 opcode = TGSI_OPCODE_ATOMOR; 3826 break; 3827 case ir_intrinsic_shared_atomic_xor: 3828 opcode = TGSI_OPCODE_ATOMXOR; 3829 break; 3830 case ir_intrinsic_shared_atomic_exchange: 3831 opcode = TGSI_OPCODE_ATOMXCHG; 3832 break; 3833 case ir_intrinsic_shared_atomic_comp_swap: 3834 opcode = TGSI_OPCODE_ATOMCAS; 3835 param = param->get_next(); 3836 val = ((ir_instruction *)param)->as_rvalue(); 3837 val->accept(this); 3838 data2 = this->result; 3839 break; 3840 default: 3841 assert(!"Unexpected intrinsic"); 3842 return; 3843 } 3844 3845 inst = emit_asm(ir, opcode, dst, off, data, data2); 3846 inst->resource = buffer; 3847 } 3848} 3849 3850static void 3851get_image_qualifiers(ir_dereference *ir, const glsl_type **type, 3852 bool *memory_coherent, bool *memory_volatile, 3853 bool *memory_restrict, bool *memory_read_only, 3854 enum pipe_format *image_format) 3855{ 3856 3857 switch (ir->ir_type) { 3858 case ir_type_dereference_record: { 3859 ir_dereference_record *deref_record = ir->as_dereference_record(); 3860 const glsl_type *struct_type = deref_record->record->type; 3861 int fild_idx = deref_record->field_idx; 3862 3863 *type = struct_type->fields.structure[fild_idx].type->without_array(); 3864 *memory_coherent = 3865 struct_type->fields.structure[fild_idx].memory_coherent; 3866 *memory_volatile = 3867 struct_type->fields.structure[fild_idx].memory_volatile; 3868 *memory_restrict = 3869 struct_type->fields.structure[fild_idx].memory_restrict; 3870 *memory_read_only = 3871 struct_type->fields.structure[fild_idx].memory_read_only; 3872 *image_format = 3873 struct_type->fields.structure[fild_idx].image_format; 3874 break; 3875 } 3876 3877 case ir_type_dereference_array: { 3878 ir_dereference_array *deref_arr = ir->as_dereference_array(); 3879 get_image_qualifiers((ir_dereference *)deref_arr->array, type, 3880 memory_coherent, memory_volatile, memory_restrict, 3881 memory_read_only, image_format); 3882 break; 3883 } 3884 3885 case ir_type_dereference_variable: { 3886 ir_variable *var = ir->variable_referenced(); 3887 3888 *type = var->type->without_array(); 3889 *memory_coherent = var->data.memory_coherent; 3890 *memory_volatile = var->data.memory_volatile; 3891 *memory_restrict = var->data.memory_restrict; 3892 *memory_read_only = var->data.memory_read_only; 3893 *image_format = var->data.image_format; 3894 break; 3895 } 3896 3897 default: 3898 break; 3899 } 3900} 3901 3902void 3903glsl_to_tgsi_visitor::visit_image_intrinsic(ir_call *ir) 3904{ 3905 exec_node *param = ir->actual_parameters.get_head(); 3906 3907 ir_dereference *img = (ir_dereference *)param; 3908 const ir_variable *imgvar = img->variable_referenced(); 3909 unsigned sampler_array_size = 1, sampler_base = 0; 3910 bool memory_coherent = false, memory_volatile = false, 3911 memory_restrict = false, memory_read_only = false; 3912 enum pipe_format image_format = PIPE_FORMAT_NONE; 3913 const glsl_type *type = NULL; 3914 3915 get_image_qualifiers(img, &type, &memory_coherent, &memory_volatile, 3916 &memory_restrict, &memory_read_only, &image_format); 3917 3918 st_src_reg reladdr; 3919 st_src_reg image(PROGRAM_IMAGE, 0, GLSL_TYPE_UINT); 3920 uint16_t index = 0; 3921 get_deref_offsets(img, &sampler_array_size, &sampler_base, 3922 &index, &reladdr, !imgvar->contains_bindless()); 3923 3924 image.index = index; 3925 if (reladdr.file != PROGRAM_UNDEFINED) { 3926 image.reladdr = ralloc(mem_ctx, st_src_reg); 3927 *image.reladdr = reladdr; 3928 emit_arl(ir, sampler_reladdr, reladdr); 3929 } 3930 3931 st_dst_reg dst = undef_dst; 3932 if (ir->return_deref) { 3933 ir->return_deref->accept(this); 3934 dst = st_dst_reg(this->result); 3935 dst.writemask = (1 << ir->return_deref->type->vector_elements) - 1; 3936 } 3937 3938 glsl_to_tgsi_instruction *inst; 3939 3940 st_src_reg bindless; 3941 if (imgvar->contains_bindless()) { 3942 img->accept(this); 3943 bindless = this->result; 3944 } 3945 3946 if (ir->callee->intrinsic_id == ir_intrinsic_image_size) { 3947 dst.writemask = WRITEMASK_XYZ; 3948 inst = emit_asm(ir, TGSI_OPCODE_RESQ, dst); 3949 } else if (ir->callee->intrinsic_id == ir_intrinsic_image_samples) { 3950 st_src_reg res = get_temp(glsl_type::ivec4_type); 3951 st_dst_reg dstres = st_dst_reg(res); 3952 dstres.writemask = WRITEMASK_W; 3953 inst = emit_asm(ir, TGSI_OPCODE_RESQ, dstres); 3954 res.swizzle = SWIZZLE_WWWW; 3955 emit_asm(ir, TGSI_OPCODE_MOV, dst, res); 3956 } else { 3957 st_src_reg arg1 = undef_src, arg2 = undef_src; 3958 st_src_reg coord; 3959 st_dst_reg coord_dst; 3960 coord = get_temp(glsl_type::ivec4_type); 3961 coord_dst = st_dst_reg(coord); 3962 coord_dst.writemask = (1 << type->coordinate_components()) - 1; 3963 param = param->get_next(); 3964 ((ir_dereference *)param)->accept(this); 3965 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 3966 coord.swizzle = SWIZZLE_XXXX; 3967 switch (type->coordinate_components()) { 3968 case 4: assert(!"unexpected coord count"); 3969 FALLTHROUGH; 3970 case 3: coord.swizzle |= SWIZZLE_Z << 6; 3971 FALLTHROUGH; 3972 case 2: coord.swizzle |= SWIZZLE_Y << 3; 3973 } 3974 3975 if (type->sampler_dimensionality == GLSL_SAMPLER_DIM_MS) { 3976 param = param->get_next(); 3977 ((ir_dereference *)param)->accept(this); 3978 st_src_reg sample = this->result; 3979 sample.swizzle = SWIZZLE_XXXX; 3980 coord_dst.writemask = WRITEMASK_W; 3981 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample); 3982 coord.swizzle |= SWIZZLE_W << 9; 3983 } 3984 3985 param = param->get_next(); 3986 if (!param->is_tail_sentinel()) { 3987 ((ir_dereference *)param)->accept(this); 3988 arg1 = this->result; 3989 param = param->get_next(); 3990 } 3991 3992 if (!param->is_tail_sentinel()) { 3993 ((ir_dereference *)param)->accept(this); 3994 arg2 = this->result; 3995 param = param->get_next(); 3996 } 3997 3998 assert(param->is_tail_sentinel()); 3999 4000 enum tgsi_opcode opcode; 4001 switch (ir->callee->intrinsic_id) { 4002 case ir_intrinsic_image_load: 4003 opcode = TGSI_OPCODE_LOAD; 4004 break; 4005 case ir_intrinsic_image_store: 4006 opcode = TGSI_OPCODE_STORE; 4007 break; 4008 case ir_intrinsic_image_atomic_add: 4009 opcode = TGSI_OPCODE_ATOMUADD; 4010 break; 4011 case ir_intrinsic_image_atomic_min: 4012 opcode = TGSI_OPCODE_ATOMIMIN; 4013 break; 4014 case ir_intrinsic_image_atomic_max: 4015 opcode = TGSI_OPCODE_ATOMIMAX; 4016 break; 4017 case ir_intrinsic_image_atomic_and: 4018 opcode = TGSI_OPCODE_ATOMAND; 4019 break; 4020 case ir_intrinsic_image_atomic_or: 4021 opcode = TGSI_OPCODE_ATOMOR; 4022 break; 4023 case ir_intrinsic_image_atomic_xor: 4024 opcode = TGSI_OPCODE_ATOMXOR; 4025 break; 4026 case ir_intrinsic_image_atomic_exchange: 4027 opcode = TGSI_OPCODE_ATOMXCHG; 4028 break; 4029 case ir_intrinsic_image_atomic_comp_swap: 4030 opcode = TGSI_OPCODE_ATOMCAS; 4031 break; 4032 case ir_intrinsic_image_atomic_inc_wrap: { 4033 /* There's a bit of disagreement between GLSL and the hardware. The 4034 * hardware wants to wrap after the given wrap value, while GLSL 4035 * wants to wrap at the value. Subtract 1 to make up the difference. 4036 */ 4037 st_src_reg wrap = get_temp(glsl_type::uint_type); 4038 emit_asm(ir, TGSI_OPCODE_ADD, st_dst_reg(wrap), 4039 arg1, st_src_reg_for_int(-1)); 4040 arg1 = wrap; 4041 opcode = TGSI_OPCODE_ATOMINC_WRAP; 4042 break; 4043 } 4044 case ir_intrinsic_image_atomic_dec_wrap: 4045 opcode = TGSI_OPCODE_ATOMDEC_WRAP; 4046 break; 4047 default: 4048 assert(!"Unexpected intrinsic"); 4049 return; 4050 } 4051 4052 inst = emit_asm(ir, opcode, dst, coord, arg1, arg2); 4053 if (opcode == TGSI_OPCODE_STORE) 4054 inst->dst[0].writemask = WRITEMASK_XYZW; 4055 } 4056 4057 if (imgvar->contains_bindless()) { 4058 inst->resource = bindless; 4059 inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, 4060 SWIZZLE_X, SWIZZLE_Y); 4061 } else { 4062 inst->resource = image; 4063 inst->sampler_array_size = sampler_array_size; 4064 inst->sampler_base = sampler_base; 4065 } 4066 4067 inst->tex_target = type->sampler_index(); 4068 inst->image_format = image_format; 4069 inst->read_only = memory_read_only; 4070 4071 if (memory_coherent) 4072 inst->buffer_access |= TGSI_MEMORY_COHERENT; 4073 if (memory_restrict) 4074 inst->buffer_access |= TGSI_MEMORY_RESTRICT; 4075 if (memory_volatile) 4076 inst->buffer_access |= TGSI_MEMORY_VOLATILE; 4077} 4078 4079void 4080glsl_to_tgsi_visitor::visit_generic_intrinsic(ir_call *ir, enum tgsi_opcode op) 4081{ 4082 ir->return_deref->accept(this); 4083 st_dst_reg dst = st_dst_reg(this->result); 4084 4085 dst.writemask = u_bit_consecutive(0, ir->return_deref->var->type->vector_elements); 4086 4087 st_src_reg src[4] = { undef_src, undef_src, undef_src, undef_src }; 4088 unsigned num_src = 0; 4089 foreach_in_list(ir_rvalue, param, &ir->actual_parameters) { 4090 assert(num_src < ARRAY_SIZE(src)); 4091 4092 this->result.file = PROGRAM_UNDEFINED; 4093 param->accept(this); 4094 assert(this->result.file != PROGRAM_UNDEFINED); 4095 4096 src[num_src] = this->result; 4097 num_src++; 4098 } 4099 4100 emit_asm(ir, op, dst, src[0], src[1], src[2], src[3]); 4101} 4102 4103void 4104glsl_to_tgsi_visitor::visit(ir_call *ir) 4105{ 4106 ir_function_signature *sig = ir->callee; 4107 4108 /* Filter out intrinsics */ 4109 switch (sig->intrinsic_id) { 4110 case ir_intrinsic_atomic_counter_read: 4111 case ir_intrinsic_atomic_counter_increment: 4112 case ir_intrinsic_atomic_counter_predecrement: 4113 case ir_intrinsic_atomic_counter_add: 4114 case ir_intrinsic_atomic_counter_min: 4115 case ir_intrinsic_atomic_counter_max: 4116 case ir_intrinsic_atomic_counter_and: 4117 case ir_intrinsic_atomic_counter_or: 4118 case ir_intrinsic_atomic_counter_xor: 4119 case ir_intrinsic_atomic_counter_exchange: 4120 case ir_intrinsic_atomic_counter_comp_swap: 4121 visit_atomic_counter_intrinsic(ir); 4122 return; 4123 4124 case ir_intrinsic_ssbo_load: 4125 case ir_intrinsic_ssbo_store: 4126 case ir_intrinsic_ssbo_atomic_add: 4127 case ir_intrinsic_ssbo_atomic_min: 4128 case ir_intrinsic_ssbo_atomic_max: 4129 case ir_intrinsic_ssbo_atomic_and: 4130 case ir_intrinsic_ssbo_atomic_or: 4131 case ir_intrinsic_ssbo_atomic_xor: 4132 case ir_intrinsic_ssbo_atomic_exchange: 4133 case ir_intrinsic_ssbo_atomic_comp_swap: 4134 visit_ssbo_intrinsic(ir); 4135 return; 4136 4137 case ir_intrinsic_memory_barrier: 4138 case ir_intrinsic_memory_barrier_atomic_counter: 4139 case ir_intrinsic_memory_barrier_buffer: 4140 case ir_intrinsic_memory_barrier_image: 4141 case ir_intrinsic_memory_barrier_shared: 4142 case ir_intrinsic_group_memory_barrier: 4143 visit_membar_intrinsic(ir); 4144 return; 4145 4146 case ir_intrinsic_shared_load: 4147 case ir_intrinsic_shared_store: 4148 case ir_intrinsic_shared_atomic_add: 4149 case ir_intrinsic_shared_atomic_min: 4150 case ir_intrinsic_shared_atomic_max: 4151 case ir_intrinsic_shared_atomic_and: 4152 case ir_intrinsic_shared_atomic_or: 4153 case ir_intrinsic_shared_atomic_xor: 4154 case ir_intrinsic_shared_atomic_exchange: 4155 case ir_intrinsic_shared_atomic_comp_swap: 4156 visit_shared_intrinsic(ir); 4157 return; 4158 4159 case ir_intrinsic_image_load: 4160 case ir_intrinsic_image_store: 4161 case ir_intrinsic_image_atomic_add: 4162 case ir_intrinsic_image_atomic_min: 4163 case ir_intrinsic_image_atomic_max: 4164 case ir_intrinsic_image_atomic_and: 4165 case ir_intrinsic_image_atomic_or: 4166 case ir_intrinsic_image_atomic_xor: 4167 case ir_intrinsic_image_atomic_exchange: 4168 case ir_intrinsic_image_atomic_comp_swap: 4169 case ir_intrinsic_image_size: 4170 case ir_intrinsic_image_samples: 4171 case ir_intrinsic_image_atomic_inc_wrap: 4172 case ir_intrinsic_image_atomic_dec_wrap: 4173 visit_image_intrinsic(ir); 4174 return; 4175 4176 case ir_intrinsic_shader_clock: 4177 visit_generic_intrinsic(ir, TGSI_OPCODE_CLOCK); 4178 return; 4179 4180 case ir_intrinsic_vote_all: 4181 visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_ALL); 4182 return; 4183 case ir_intrinsic_vote_any: 4184 visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_ANY); 4185 return; 4186 case ir_intrinsic_vote_eq: 4187 visit_generic_intrinsic(ir, TGSI_OPCODE_VOTE_EQ); 4188 return; 4189 case ir_intrinsic_ballot: 4190 visit_generic_intrinsic(ir, TGSI_OPCODE_BALLOT); 4191 return; 4192 case ir_intrinsic_read_first_invocation: 4193 visit_generic_intrinsic(ir, TGSI_OPCODE_READ_FIRST); 4194 return; 4195 case ir_intrinsic_read_invocation: 4196 visit_generic_intrinsic(ir, TGSI_OPCODE_READ_INVOC); 4197 return; 4198 4199 case ir_intrinsic_helper_invocation: 4200 visit_generic_intrinsic(ir, TGSI_OPCODE_READ_HELPER); 4201 return; 4202 4203 case ir_intrinsic_invalid: 4204 case ir_intrinsic_generic_load: 4205 case ir_intrinsic_generic_store: 4206 case ir_intrinsic_generic_atomic_add: 4207 case ir_intrinsic_generic_atomic_and: 4208 case ir_intrinsic_generic_atomic_or: 4209 case ir_intrinsic_generic_atomic_xor: 4210 case ir_intrinsic_generic_atomic_min: 4211 case ir_intrinsic_generic_atomic_max: 4212 case ir_intrinsic_generic_atomic_exchange: 4213 case ir_intrinsic_generic_atomic_comp_swap: 4214 case ir_intrinsic_begin_invocation_interlock: 4215 case ir_intrinsic_end_invocation_interlock: 4216 unreachable("Invalid intrinsic"); 4217 } 4218} 4219 4220void 4221glsl_to_tgsi_visitor::calc_deref_offsets(ir_dereference *tail, 4222 unsigned *array_elements, 4223 uint16_t *index, 4224 st_src_reg *indirect, 4225 unsigned *location) 4226{ 4227 switch (tail->ir_type) { 4228 case ir_type_dereference_record: { 4229 ir_dereference_record *deref_record = tail->as_dereference_record(); 4230 const glsl_type *struct_type = deref_record->record->type; 4231 int field_index = deref_record->field_idx; 4232 4233 calc_deref_offsets(deref_record->record->as_dereference(), array_elements, index, indirect, location); 4234 4235 assert(field_index >= 0); 4236 *location += struct_type->struct_location_offset(field_index); 4237 break; 4238 } 4239 4240 case ir_type_dereference_array: { 4241 ir_dereference_array *deref_arr = tail->as_dereference_array(); 4242 4243 void *mem_ctx = ralloc_parent(deref_arr); 4244 ir_constant *array_index = 4245 deref_arr->array_index->constant_expression_value(mem_ctx); 4246 4247 if (!array_index) { 4248 st_src_reg temp_reg; 4249 st_dst_reg temp_dst; 4250 4251 temp_reg = get_temp(glsl_type::uint_type); 4252 temp_dst = st_dst_reg(temp_reg); 4253 temp_dst.writemask = 1; 4254 4255 deref_arr->array_index->accept(this); 4256 if (*array_elements != 1) 4257 emit_asm(NULL, TGSI_OPCODE_MUL, temp_dst, this->result, st_src_reg_for_int(*array_elements)); 4258 else 4259 emit_asm(NULL, TGSI_OPCODE_MOV, temp_dst, this->result); 4260 4261 if (indirect->file == PROGRAM_UNDEFINED) 4262 *indirect = temp_reg; 4263 else { 4264 temp_dst = st_dst_reg(*indirect); 4265 temp_dst.writemask = 1; 4266 emit_asm(NULL, TGSI_OPCODE_ADD, temp_dst, *indirect, temp_reg); 4267 } 4268 } else 4269 *index += array_index->value.u[0] * *array_elements; 4270 4271 *array_elements *= deref_arr->array->type->length; 4272 4273 calc_deref_offsets(deref_arr->array->as_dereference(), array_elements, index, indirect, location); 4274 break; 4275 } 4276 default: 4277 break; 4278 } 4279} 4280 4281void 4282glsl_to_tgsi_visitor::get_deref_offsets(ir_dereference *ir, 4283 unsigned *array_size, 4284 unsigned *base, 4285 uint16_t *index, 4286 st_src_reg *reladdr, 4287 bool opaque) 4288{ 4289 GLuint shader = _mesa_program_enum_to_shader_stage(this->prog->Target); 4290 unsigned location = 0; 4291 ir_variable *var = ir->variable_referenced(); 4292 4293 reladdr->reset(); 4294 4295 *base = 0; 4296 *array_size = 1; 4297 4298 assert(var); 4299 location = var->data.location; 4300 calc_deref_offsets(ir, array_size, index, reladdr, &location); 4301 4302 /* 4303 * If we end up with no indirect then adjust the base to the index, 4304 * and set the array size to 1. 4305 */ 4306 if (reladdr->file == PROGRAM_UNDEFINED) { 4307 *base = *index; 4308 *array_size = 1; 4309 } 4310 4311 if (opaque) { 4312 assert(location != 0xffffffff); 4313 *base += this->shader_program->data->UniformStorage[location].opaque[shader].index; 4314 *index += this->shader_program->data->UniformStorage[location].opaque[shader].index; 4315 } 4316} 4317 4318st_src_reg 4319glsl_to_tgsi_visitor::canonicalize_gather_offset(st_src_reg offset) 4320{ 4321 if (offset.reladdr || offset.reladdr2 || 4322 offset.has_index2 || 4323 offset.file == PROGRAM_UNIFORM || 4324 offset.file == PROGRAM_CONSTANT || 4325 offset.file == PROGRAM_STATE_VAR) { 4326 st_src_reg tmp = get_temp(glsl_type::ivec2_type); 4327 st_dst_reg tmp_dst = st_dst_reg(tmp); 4328 tmp_dst.writemask = WRITEMASK_XY; 4329 emit_asm(NULL, TGSI_OPCODE_MOV, tmp_dst, offset); 4330 return tmp; 4331 } 4332 4333 return offset; 4334} 4335 4336bool 4337glsl_to_tgsi_visitor::handle_bound_deref(ir_dereference *ir) 4338{ 4339 ir_variable *var = ir->variable_referenced(); 4340 4341 if (!var || var->data.mode != ir_var_uniform || var->data.bindless || 4342 !(ir->type->is_image() || ir->type->is_sampler())) 4343 return false; 4344 4345 /* Convert from bound sampler/image to bindless handle. */ 4346 bool is_image = ir->type->is_image(); 4347 st_src_reg resource(is_image ? PROGRAM_IMAGE : PROGRAM_SAMPLER, 0, GLSL_TYPE_UINT); 4348 uint16_t index = 0; 4349 unsigned array_size = 1, base = 0; 4350 st_src_reg reladdr; 4351 get_deref_offsets(ir, &array_size, &base, &index, &reladdr, true); 4352 4353 resource.index = index; 4354 if (reladdr.file != PROGRAM_UNDEFINED) { 4355 resource.reladdr = ralloc(mem_ctx, st_src_reg); 4356 *resource.reladdr = reladdr; 4357 emit_arl(ir, sampler_reladdr, reladdr); 4358 } 4359 4360 this->result = get_temp(glsl_type::uvec2_type); 4361 st_dst_reg dst(this->result); 4362 dst.writemask = WRITEMASK_XY; 4363 4364 glsl_to_tgsi_instruction *inst = emit_asm( 4365 ir, is_image ? TGSI_OPCODE_IMG2HND : TGSI_OPCODE_SAMP2HND, dst); 4366 4367 inst->tex_target = ir->type->sampler_index(); 4368 inst->resource = resource; 4369 inst->sampler_array_size = array_size; 4370 inst->sampler_base = base; 4371 4372 return true; 4373} 4374 4375void 4376glsl_to_tgsi_visitor::visit(ir_texture *ir) 4377{ 4378 st_src_reg result_src, coord, cube_sc, lod_info, projector, dx, dy; 4379 st_src_reg offset[MAX_GLSL_TEXTURE_OFFSET], sample_index, component; 4380 st_src_reg levels_src, reladdr; 4381 st_dst_reg result_dst, coord_dst, cube_sc_dst; 4382 glsl_to_tgsi_instruction *inst = NULL; 4383 enum tgsi_opcode opcode = TGSI_OPCODE_NOP; 4384 const glsl_type *sampler_type = ir->sampler->type; 4385 unsigned sampler_array_size = 1, sampler_base = 0; 4386 bool is_cube_array = false; 4387 ir_variable *var = ir->sampler->variable_referenced(); 4388 unsigned i; 4389 4390 /* if we are a cube array sampler or a cube shadow */ 4391 if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { 4392 is_cube_array = sampler_type->sampler_array; 4393 } 4394 4395 if (ir->coordinate) { 4396 ir->coordinate->accept(this); 4397 4398 /* Put our coords in a temp. We'll need to modify them for shadow, 4399 * projection, or LOD, so the only case we'd use it as-is is if 4400 * we're doing plain old texturing. The optimization passes on 4401 * glsl_to_tgsi_visitor should handle cleaning up our mess in that case. 4402 */ 4403 coord = get_temp(glsl_type::vec4_type); 4404 coord_dst = st_dst_reg(coord); 4405 coord_dst.writemask = (1 << ir->coordinate->type->vector_elements) - 1; 4406 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 4407 } 4408 4409 if (ir->projector) { 4410 ir->projector->accept(this); 4411 projector = this->result; 4412 } 4413 4414 /* Storage for our result. Ideally for an assignment we'd be using 4415 * the actual storage for the result here, instead. 4416 */ 4417 result_src = get_temp(ir->type); 4418 result_dst = st_dst_reg(result_src); 4419 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 4420 4421 switch (ir->op) { 4422 case ir_tex: 4423 opcode = (is_cube_array && ir->shadow_comparator) ? TGSI_OPCODE_TEX2 : TGSI_OPCODE_TEX; 4424 if (ir->offset) { 4425 ir->offset->accept(this); 4426 offset[0] = this->result; 4427 } 4428 break; 4429 case ir_txb: 4430 if (is_cube_array || 4431 (sampler_type->sampler_shadow && sampler_type->coordinate_components() >= 3)) { 4432 opcode = TGSI_OPCODE_TXB2; 4433 } 4434 else { 4435 opcode = TGSI_OPCODE_TXB; 4436 } 4437 ir->lod_info.bias->accept(this); 4438 lod_info = this->result; 4439 if (ir->offset) { 4440 ir->offset->accept(this); 4441 offset[0] = this->result; 4442 } 4443 break; 4444 case ir_txl: 4445 if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) { 4446 opcode = TGSI_OPCODE_TEX_LZ; 4447 } else { 4448 opcode = (is_cube_array || (sampler_type->sampler_shadow && sampler_type->coordinate_components() >= 3)) ? TGSI_OPCODE_TXL2 : TGSI_OPCODE_TXL; 4449 ir->lod_info.lod->accept(this); 4450 lod_info = this->result; 4451 } 4452 if (ir->offset) { 4453 ir->offset->accept(this); 4454 offset[0] = this->result; 4455 } 4456 break; 4457 case ir_txd: 4458 opcode = TGSI_OPCODE_TXD; 4459 ir->lod_info.grad.dPdx->accept(this); 4460 dx = this->result; 4461 ir->lod_info.grad.dPdy->accept(this); 4462 dy = this->result; 4463 if (ir->offset) { 4464 ir->offset->accept(this); 4465 offset[0] = this->result; 4466 } 4467 break; 4468 case ir_txs: 4469 opcode = TGSI_OPCODE_TXQ; 4470 ir->lod_info.lod->accept(this); 4471 lod_info = this->result; 4472 break; 4473 case ir_query_levels: 4474 opcode = TGSI_OPCODE_TXQ; 4475 lod_info = undef_src; 4476 levels_src = get_temp(ir->type); 4477 break; 4478 case ir_txf: 4479 if (this->has_tex_txf_lz && ir->lod_info.lod->is_zero()) { 4480 opcode = TGSI_OPCODE_TXF_LZ; 4481 } else { 4482 opcode = TGSI_OPCODE_TXF; 4483 ir->lod_info.lod->accept(this); 4484 lod_info = this->result; 4485 } 4486 if (ir->offset) { 4487 ir->offset->accept(this); 4488 offset[0] = this->result; 4489 } 4490 break; 4491 case ir_txf_ms: 4492 opcode = TGSI_OPCODE_TXF; 4493 ir->lod_info.sample_index->accept(this); 4494 sample_index = this->result; 4495 break; 4496 case ir_tg4: 4497 opcode = TGSI_OPCODE_TG4; 4498 ir->lod_info.component->accept(this); 4499 component = this->result; 4500 if (ir->offset) { 4501 ir->offset->accept(this); 4502 if (ir->offset->type->is_array()) { 4503 const glsl_type *elt_type = ir->offset->type->fields.array; 4504 for (i = 0; i < ir->offset->type->length; i++) { 4505 offset[i] = this->result; 4506 offset[i].index += i * type_size(elt_type); 4507 offset[i].type = elt_type->base_type; 4508 offset[i].swizzle = swizzle_for_size(elt_type->vector_elements); 4509 offset[i] = canonicalize_gather_offset(offset[i]); 4510 } 4511 } else { 4512 offset[0] = canonicalize_gather_offset(this->result); 4513 } 4514 } 4515 break; 4516 case ir_lod: 4517 opcode = TGSI_OPCODE_LODQ; 4518 break; 4519 case ir_texture_samples: 4520 opcode = TGSI_OPCODE_TXQS; 4521 break; 4522 case ir_samples_identical: 4523 unreachable("Unexpected ir_samples_identical opcode"); 4524 } 4525 4526 if (ir->projector) { 4527 if (opcode == TGSI_OPCODE_TEX) { 4528 /* Slot the projector in as the last component of the coord. */ 4529 coord_dst.writemask = WRITEMASK_W; 4530 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, projector); 4531 coord_dst.writemask = WRITEMASK_XYZW; 4532 opcode = TGSI_OPCODE_TXP; 4533 } else { 4534 st_src_reg coord_w = coord; 4535 coord_w.swizzle = SWIZZLE_WWWW; 4536 4537 /* For the other TEX opcodes there's no projective version 4538 * since the last slot is taken up by LOD info. Do the 4539 * projective divide now. 4540 */ 4541 coord_dst.writemask = WRITEMASK_W; 4542 emit_asm(ir, TGSI_OPCODE_RCP, coord_dst, projector); 4543 4544 /* In the case where we have to project the coordinates "by hand," 4545 * the shadow comparator value must also be projected. 4546 */ 4547 st_src_reg tmp_src = coord; 4548 if (ir->shadow_comparator) { 4549 /* Slot the shadow value in as the second to last component of the 4550 * coord. 4551 */ 4552 ir->shadow_comparator->accept(this); 4553 4554 tmp_src = get_temp(glsl_type::vec4_type); 4555 st_dst_reg tmp_dst = st_dst_reg(tmp_src); 4556 4557 /* Projective division not allowed for array samplers. */ 4558 assert(!sampler_type->sampler_array); 4559 4560 tmp_dst.writemask = WRITEMASK_Z; 4561 emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, this->result); 4562 4563 tmp_dst.writemask = WRITEMASK_XY; 4564 emit_asm(ir, TGSI_OPCODE_MOV, tmp_dst, coord); 4565 } 4566 4567 coord_dst.writemask = WRITEMASK_XYZ; 4568 emit_asm(ir, TGSI_OPCODE_MUL, coord_dst, tmp_src, coord_w); 4569 4570 coord_dst.writemask = WRITEMASK_XYZW; 4571 coord.swizzle = SWIZZLE_XYZW; 4572 } 4573 } 4574 4575 /* If projection is done and the opcode is not TGSI_OPCODE_TXP, then the 4576 * shadow comparator was put in the correct place (and projected) by the 4577 * code, above, that handles by-hand projection. 4578 */ 4579 if (ir->shadow_comparator && (!ir->projector || opcode == TGSI_OPCODE_TXP)) { 4580 /* Slot the shadow value in as the second to last component of the 4581 * coord. 4582 */ 4583 ir->shadow_comparator->accept(this); 4584 4585 if (is_cube_array) { 4586 if (lod_info.file != PROGRAM_UNDEFINED) { 4587 // If we have both a cube array *and* a bias/lod, stick the 4588 // comparator into the .Y of the second argument. 4589 st_src_reg tmp = get_temp(glsl_type::vec2_type); 4590 cube_sc_dst = st_dst_reg(tmp); 4591 cube_sc_dst.writemask = WRITEMASK_X; 4592 emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, lod_info); 4593 lod_info = tmp; 4594 cube_sc_dst.writemask = WRITEMASK_Y; 4595 } else { 4596 cube_sc = get_temp(glsl_type::float_type); 4597 cube_sc_dst = st_dst_reg(cube_sc); 4598 cube_sc_dst.writemask = WRITEMASK_X; 4599 } 4600 emit_asm(ir, TGSI_OPCODE_MOV, cube_sc_dst, this->result); 4601 } 4602 else { 4603 if ((sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 4604 sampler_type->sampler_array) || 4605 sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_CUBE) { 4606 coord_dst.writemask = WRITEMASK_W; 4607 } else { 4608 coord_dst.writemask = WRITEMASK_Z; 4609 } 4610 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, this->result); 4611 coord_dst.writemask = WRITEMASK_XYZW; 4612 } 4613 } 4614 4615 if (ir->op == ir_txf_ms) { 4616 coord_dst.writemask = WRITEMASK_W; 4617 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, sample_index); 4618 coord_dst.writemask = WRITEMASK_XYZW; 4619 } else if (opcode == TGSI_OPCODE_TXL || opcode == TGSI_OPCODE_TXB || 4620 opcode == TGSI_OPCODE_TXF) { 4621 /* TGSI stores LOD or LOD bias in the last channel of the coords. */ 4622 coord_dst.writemask = WRITEMASK_W; 4623 emit_asm(ir, TGSI_OPCODE_MOV, coord_dst, lod_info); 4624 coord_dst.writemask = WRITEMASK_XYZW; 4625 } 4626 4627 st_src_reg sampler(PROGRAM_SAMPLER, 0, GLSL_TYPE_UINT); 4628 4629 uint16_t index = 0; 4630 get_deref_offsets(ir->sampler, &sampler_array_size, &sampler_base, 4631 &index, &reladdr, !var->contains_bindless()); 4632 4633 sampler.index = index; 4634 if (reladdr.file != PROGRAM_UNDEFINED) { 4635 sampler.reladdr = ralloc(mem_ctx, st_src_reg); 4636 *sampler.reladdr = reladdr; 4637 emit_arl(ir, sampler_reladdr, reladdr); 4638 } 4639 4640 st_src_reg bindless; 4641 if (var->contains_bindless()) { 4642 ir->sampler->accept(this); 4643 bindless = this->result; 4644 } 4645 4646 if (opcode == TGSI_OPCODE_TXD) 4647 inst = emit_asm(ir, opcode, result_dst, coord, dx, dy); 4648 else if (opcode == TGSI_OPCODE_TXQ) { 4649 if (ir->op == ir_query_levels) { 4650 /* the level is stored in W */ 4651 inst = emit_asm(ir, opcode, st_dst_reg(levels_src), lod_info); 4652 result_dst.writemask = WRITEMASK_X; 4653 levels_src.swizzle = SWIZZLE_WWWW; 4654 emit_asm(ir, TGSI_OPCODE_MOV, result_dst, levels_src); 4655 } else 4656 inst = emit_asm(ir, opcode, result_dst, lod_info); 4657 } else if (opcode == TGSI_OPCODE_TXQS) { 4658 inst = emit_asm(ir, opcode, result_dst); 4659 } else if (opcode == TGSI_OPCODE_TXL2 || opcode == TGSI_OPCODE_TXB2) { 4660 inst = emit_asm(ir, opcode, result_dst, coord, lod_info); 4661 } else if (opcode == TGSI_OPCODE_TEX2) { 4662 inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); 4663 } else if (opcode == TGSI_OPCODE_TG4) { 4664 if (is_cube_array && ir->shadow_comparator) { 4665 inst = emit_asm(ir, opcode, result_dst, coord, cube_sc); 4666 } else { 4667 if (this->tg4_component_in_swizzle) { 4668 inst = emit_asm(ir, opcode, result_dst, coord); 4669 int idx = 0; 4670 foreach_in_list(immediate_storage, entry, &this->immediates) { 4671 if (component.index == idx) { 4672 gl_constant_value value = entry->values[component.swizzle]; 4673 inst->gather_component = value.i; 4674 break; 4675 } 4676 idx++; 4677 } 4678 } else { 4679 inst = emit_asm(ir, opcode, result_dst, coord, component); 4680 } 4681 } 4682 } else 4683 inst = emit_asm(ir, opcode, result_dst, coord); 4684 4685 if (ir->shadow_comparator) 4686 inst->tex_shadow = GL_TRUE; 4687 4688 if (var->contains_bindless()) { 4689 inst->resource = bindless; 4690 inst->resource.swizzle = MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, 4691 SWIZZLE_X, SWIZZLE_Y); 4692 } else { 4693 inst->resource = sampler; 4694 inst->sampler_array_size = sampler_array_size; 4695 inst->sampler_base = sampler_base; 4696 } 4697 4698 if (ir->offset) { 4699 if (!inst->tex_offsets) 4700 inst->tex_offsets = rzalloc_array(inst, st_src_reg, 4701 MAX_GLSL_TEXTURE_OFFSET); 4702 4703 for (i = 0; i < MAX_GLSL_TEXTURE_OFFSET && 4704 offset[i].file != PROGRAM_UNDEFINED; i++) 4705 inst->tex_offsets[i] = offset[i]; 4706 inst->tex_offset_num_offset = i; 4707 } 4708 4709 inst->tex_target = sampler_type->sampler_index(); 4710 inst->tex_type = ir->type->base_type; 4711 4712 this->result = result_src; 4713} 4714 4715void 4716glsl_to_tgsi_visitor::visit(ir_return *ir) 4717{ 4718 assert(!ir->get_value()); 4719 4720 emit_asm(ir, TGSI_OPCODE_RET); 4721} 4722 4723void 4724glsl_to_tgsi_visitor::visit(ir_discard *ir) 4725{ 4726 if (ir->condition) { 4727 ir->condition->accept(this); 4728 st_src_reg condition = this->result; 4729 4730 /* Convert the bool condition to a float so we can negate. */ 4731 if (native_integers) { 4732 st_src_reg temp = get_temp(ir->condition->type); 4733 emit_asm(ir, TGSI_OPCODE_AND, st_dst_reg(temp), 4734 condition, st_src_reg_for_float(1.0)); 4735 condition = temp; 4736 } 4737 4738 condition.negate = ~condition.negate; 4739 emit_asm(ir, TGSI_OPCODE_KILL_IF, undef_dst, condition); 4740 } else { 4741 /* unconditional kil */ 4742 emit_asm(ir, TGSI_OPCODE_KILL); 4743 } 4744} 4745 4746void 4747glsl_to_tgsi_visitor::visit(ir_demote *ir) 4748{ 4749 emit_asm(ir, TGSI_OPCODE_DEMOTE); 4750} 4751 4752void 4753glsl_to_tgsi_visitor::visit(ir_if *ir) 4754{ 4755 enum tgsi_opcode if_opcode; 4756 glsl_to_tgsi_instruction *if_inst; 4757 4758 ir->condition->accept(this); 4759 assert(this->result.file != PROGRAM_UNDEFINED); 4760 4761 if_opcode = native_integers ? TGSI_OPCODE_UIF : TGSI_OPCODE_IF; 4762 4763 if_inst = emit_asm(ir->condition, if_opcode, undef_dst, this->result); 4764 4765 this->instructions.push_tail(if_inst); 4766 4767 visit_exec_list(&ir->then_instructions, this); 4768 4769 if (!ir->else_instructions.is_empty()) { 4770 emit_asm(ir->condition, TGSI_OPCODE_ELSE); 4771 visit_exec_list(&ir->else_instructions, this); 4772 } 4773 4774 if_inst = emit_asm(ir->condition, TGSI_OPCODE_ENDIF); 4775} 4776 4777 4778void 4779glsl_to_tgsi_visitor::visit(ir_emit_vertex *ir) 4780{ 4781 assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV); 4782 4783 ir->stream->accept(this); 4784 emit_asm(ir, TGSI_OPCODE_EMIT, undef_dst, this->result); 4785} 4786 4787void 4788glsl_to_tgsi_visitor::visit(ir_end_primitive *ir) 4789{ 4790 assert(this->prog->Target == GL_GEOMETRY_PROGRAM_NV); 4791 4792 ir->stream->accept(this); 4793 emit_asm(ir, TGSI_OPCODE_ENDPRIM, undef_dst, this->result); 4794} 4795 4796void 4797glsl_to_tgsi_visitor::visit(ir_barrier *ir) 4798{ 4799 assert(this->prog->Target == GL_TESS_CONTROL_PROGRAM_NV || 4800 this->prog->Target == GL_COMPUTE_PROGRAM_NV); 4801 4802 emit_asm(ir, TGSI_OPCODE_BARRIER); 4803} 4804 4805glsl_to_tgsi_visitor::glsl_to_tgsi_visitor() 4806{ 4807 STATIC_ASSERT(sizeof(samplers_used) * 8 >= PIPE_MAX_SAMPLERS); 4808 4809 result.file = PROGRAM_UNDEFINED; 4810 next_temp = 1; 4811 array_sizes = NULL; 4812 max_num_arrays = 0; 4813 next_array = 0; 4814 num_inputs = 0; 4815 num_outputs = 0; 4816 num_input_arrays = 0; 4817 num_output_arrays = 0; 4818 num_atomics = 0; 4819 num_atomic_arrays = 0; 4820 num_immediates = 0; 4821 num_address_regs = 0; 4822 samplers_used = 0; 4823 images_used = 0; 4824 indirect_addr_consts = false; 4825 wpos_transform_const = -1; 4826 native_integers = false; 4827 mem_ctx = ralloc_context(NULL); 4828 ctx = NULL; 4829 prog = NULL; 4830 precise = 0; 4831 need_uarl = false; 4832 tg4_component_in_swizzle = false; 4833 shader_program = NULL; 4834 shader = NULL; 4835 options = NULL; 4836 have_sqrt = false; 4837 have_fma = false; 4838 use_shared_memory = false; 4839 has_tex_txf_lz = false; 4840 variables = NULL; 4841} 4842 4843static void var_destroy(struct hash_entry *entry) 4844{ 4845 variable_storage *storage = (variable_storage *)entry->data; 4846 4847 delete storage; 4848} 4849 4850glsl_to_tgsi_visitor::~glsl_to_tgsi_visitor() 4851{ 4852 _mesa_hash_table_destroy(variables, var_destroy); 4853 free(array_sizes); 4854 ralloc_free(mem_ctx); 4855} 4856 4857extern "C" void free_glsl_to_tgsi_visitor(glsl_to_tgsi_visitor *v) 4858{ 4859 delete v; 4860} 4861 4862 4863/** 4864 * Count resources used by the given gpu program (number of texture 4865 * samplers, etc). 4866 */ 4867static void 4868count_resources(glsl_to_tgsi_visitor *v, gl_program *prog) 4869{ 4870 v->samplers_used = 0; 4871 v->images_used = 0; 4872 BITSET_ZERO(prog->info.textures_used_by_txf); 4873 4874 foreach_in_list(glsl_to_tgsi_instruction, inst, &v->instructions) { 4875 if (inst->info->is_tex) { 4876 for (int i = 0; i < inst->sampler_array_size; i++) { 4877 unsigned idx = inst->sampler_base + i; 4878 v->samplers_used |= 1u << idx; 4879 4880 debug_assert(idx < (int)ARRAY_SIZE(v->sampler_types)); 4881 v->sampler_types[idx] = inst->tex_type; 4882 v->sampler_targets[idx] = 4883 st_translate_texture_target(inst->tex_target, inst->tex_shadow); 4884 4885 if (inst->op == TGSI_OPCODE_TXF || inst->op == TGSI_OPCODE_TXF_LZ) { 4886 BITSET_SET(prog->info.textures_used_by_txf, idx); 4887 } 4888 } 4889 } 4890 4891 if (inst->tex_target == TEXTURE_EXTERNAL_INDEX) 4892 prog->ExternalSamplersUsed |= 1 << inst->resource.index; 4893 4894 if (inst->resource.file != PROGRAM_UNDEFINED && ( 4895 is_resource_instruction(inst->op) || 4896 inst->op == TGSI_OPCODE_STORE)) { 4897 if (inst->resource.file == PROGRAM_MEMORY) { 4898 v->use_shared_memory = true; 4899 } else if (inst->resource.file == PROGRAM_IMAGE) { 4900 for (int i = 0; i < inst->sampler_array_size; i++) { 4901 unsigned idx = inst->sampler_base + i; 4902 v->images_used |= 1 << idx; 4903 v->image_targets[idx] = 4904 st_translate_texture_target(inst->tex_target, false); 4905 v->image_formats[idx] = inst->image_format; 4906 v->image_wr[idx] = !inst->read_only; 4907 } 4908 } 4909 } 4910 } 4911 prog->SamplersUsed = v->samplers_used; 4912 4913 if (v->shader_program != NULL) 4914 _mesa_update_shader_textures_used(v->shader_program, prog); 4915} 4916 4917/** 4918 * Returns the mask of channels (bitmask of WRITEMASK_X,Y,Z,W) which 4919 * are read from the given src in this instruction 4920 */ 4921static int 4922get_src_arg_mask(st_dst_reg dst, st_src_reg src) 4923{ 4924 int read_mask = 0, comp; 4925 4926 /* Now, given the src swizzle and the written channels, find which 4927 * components are actually read 4928 */ 4929 for (comp = 0; comp < 4; ++comp) { 4930 const unsigned coord = GET_SWZ(src.swizzle, comp); 4931 assert(coord < 4); 4932 if (dst.writemask & (1 << comp) && coord <= SWIZZLE_W) 4933 read_mask |= 1 << coord; 4934 } 4935 4936 return read_mask; 4937} 4938 4939/** 4940 * This pass replaces CMP T0, T1 T2 T0 with MOV T0, T2 when the CMP 4941 * instruction is the first instruction to write to register T0. There are 4942 * several lowering passes done in GLSL IR (e.g. branches and 4943 * relative addressing) that create a large number of conditional assignments 4944 * that ir_to_mesa converts to CMP instructions like the one mentioned above. 4945 * 4946 * Here is why this conversion is safe: 4947 * CMP T0, T1 T2 T0 can be expanded to: 4948 * if (T1 < 0.0) 4949 * MOV T0, T2; 4950 * else 4951 * MOV T0, T0; 4952 * 4953 * If (T1 < 0.0) evaluates to true then our replacement MOV T0, T2 is the same 4954 * as the original program. If (T1 < 0.0) evaluates to false, executing 4955 * MOV T0, T0 will store a garbage value in T0 since T0 is uninitialized. 4956 * Therefore, it doesn't matter that we are replacing MOV T0, T0 with MOV T0, T2 4957 * because any instruction that was going to read from T0 after this was going 4958 * to read a garbage value anyway. 4959 */ 4960void 4961glsl_to_tgsi_visitor::simplify_cmp(void) 4962{ 4963 int tempWritesSize = 0; 4964 unsigned *tempWrites = NULL; 4965 unsigned outputWrites[VARYING_SLOT_TESS_MAX]; 4966 4967 memset(outputWrites, 0, sizeof(outputWrites)); 4968 4969 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 4970 unsigned prevWriteMask = 0; 4971 4972 /* Give up if we encounter relative addressing or flow control. */ 4973 if (inst->dst[0].reladdr || inst->dst[0].reladdr2 || 4974 inst->dst[1].reladdr || inst->dst[1].reladdr2 || 4975 inst->info->is_branch || 4976 inst->op == TGSI_OPCODE_CONT || 4977 inst->op == TGSI_OPCODE_END || 4978 inst->op == TGSI_OPCODE_RET) { 4979 break; 4980 } 4981 4982 if (inst->dst[0].file == PROGRAM_OUTPUT) { 4983 assert(inst->dst[0].index < (signed)ARRAY_SIZE(outputWrites)); 4984 prevWriteMask = outputWrites[inst->dst[0].index]; 4985 outputWrites[inst->dst[0].index] |= inst->dst[0].writemask; 4986 } else if (inst->dst[0].file == PROGRAM_TEMPORARY) { 4987 if (inst->dst[0].index >= tempWritesSize) { 4988 const int inc = 4096; 4989 4990 tempWrites = (unsigned*) 4991 realloc(tempWrites, 4992 (tempWritesSize + inc) * sizeof(unsigned)); 4993 if (!tempWrites) 4994 return; 4995 4996 memset(tempWrites + tempWritesSize, 0, inc * sizeof(unsigned)); 4997 tempWritesSize += inc; 4998 } 4999 5000 prevWriteMask = tempWrites[inst->dst[0].index]; 5001 tempWrites[inst->dst[0].index] |= inst->dst[0].writemask; 5002 } else 5003 continue; 5004 5005 /* For a CMP to be considered a conditional write, the destination 5006 * register and source register two must be the same. */ 5007 if (inst->op == TGSI_OPCODE_CMP 5008 && !(inst->dst[0].writemask & prevWriteMask) 5009 && inst->src[2].file == inst->dst[0].file 5010 && inst->src[2].index == inst->dst[0].index 5011 && inst->dst[0].writemask == 5012 get_src_arg_mask(inst->dst[0], inst->src[2])) { 5013 5014 inst->op = TGSI_OPCODE_MOV; 5015 inst->info = tgsi_get_opcode_info(inst->op); 5016 inst->src[0] = inst->src[1]; 5017 } 5018 } 5019 5020 free(tempWrites); 5021} 5022 5023static void 5024rename_temp_handle_src(struct rename_reg_pair *renames, st_src_reg *src) 5025{ 5026 if (src && src->file == PROGRAM_TEMPORARY) { 5027 int old_idx = src->index; 5028 if (renames[old_idx].valid) 5029 src->index = renames[old_idx].new_reg; 5030 } 5031} 5032 5033/* Replaces all references to a temporary register index with another index. */ 5034void 5035glsl_to_tgsi_visitor::rename_temp_registers(struct rename_reg_pair *renames) 5036{ 5037 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 5038 unsigned j; 5039 for (j = 0; j < num_inst_src_regs(inst); j++) { 5040 rename_temp_handle_src(renames, &inst->src[j]); 5041 rename_temp_handle_src(renames, inst->src[j].reladdr); 5042 rename_temp_handle_src(renames, inst->src[j].reladdr2); 5043 } 5044 5045 for (j = 0; j < inst->tex_offset_num_offset; j++) { 5046 rename_temp_handle_src(renames, &inst->tex_offsets[j]); 5047 rename_temp_handle_src(renames, inst->tex_offsets[j].reladdr); 5048 rename_temp_handle_src(renames, inst->tex_offsets[j].reladdr2); 5049 } 5050 5051 rename_temp_handle_src(renames, &inst->resource); 5052 rename_temp_handle_src(renames, inst->resource.reladdr); 5053 rename_temp_handle_src(renames, inst->resource.reladdr2); 5054 5055 for (j = 0; j < num_inst_dst_regs(inst); j++) { 5056 if (inst->dst[j].file == PROGRAM_TEMPORARY) { 5057 int old_idx = inst->dst[j].index; 5058 if (renames[old_idx].valid) 5059 inst->dst[j].index = renames[old_idx].new_reg; 5060 } 5061 rename_temp_handle_src(renames, inst->dst[j].reladdr); 5062 rename_temp_handle_src(renames, inst->dst[j].reladdr2); 5063 } 5064 } 5065} 5066 5067void 5068glsl_to_tgsi_visitor::get_first_temp_write(int *first_writes) 5069{ 5070 int depth = 0; /* loop depth */ 5071 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 5072 unsigned i = 0, j; 5073 5074 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 5075 for (j = 0; j < num_inst_dst_regs(inst); j++) { 5076 if (inst->dst[j].file == PROGRAM_TEMPORARY) { 5077 if (first_writes[inst->dst[j].index] == -1) 5078 first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; 5079 } 5080 } 5081 5082 if (inst->op == TGSI_OPCODE_BGNLOOP) { 5083 if (depth++ == 0) 5084 loop_start = i; 5085 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 5086 if (--depth == 0) 5087 loop_start = -1; 5088 } 5089 assert(depth >= 0); 5090 i++; 5091 } 5092} 5093 5094void 5095glsl_to_tgsi_visitor::get_first_temp_read(int *first_reads) 5096{ 5097 int depth = 0; /* loop depth */ 5098 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 5099 unsigned i = 0, j; 5100 5101 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 5102 for (j = 0; j < num_inst_src_regs(inst); j++) { 5103 if (inst->src[j].file == PROGRAM_TEMPORARY) { 5104 if (first_reads[inst->src[j].index] == -1) 5105 first_reads[inst->src[j].index] = (depth == 0) ? i : loop_start; 5106 } 5107 } 5108 for (j = 0; j < inst->tex_offset_num_offset; j++) { 5109 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) { 5110 if (first_reads[inst->tex_offsets[j].index] == -1) 5111 first_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : loop_start; 5112 } 5113 } 5114 if (inst->op == TGSI_OPCODE_BGNLOOP) { 5115 if (depth++ == 0) 5116 loop_start = i; 5117 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 5118 if (--depth == 0) 5119 loop_start = -1; 5120 } 5121 assert(depth >= 0); 5122 i++; 5123 } 5124} 5125 5126void 5127glsl_to_tgsi_visitor::get_last_temp_read_first_temp_write(int *last_reads, int *first_writes) 5128{ 5129 int depth = 0; /* loop depth */ 5130 int loop_start = -1; /* index of the first active BGNLOOP (if any) */ 5131 unsigned i = 0, j; 5132 int k; 5133 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 5134 for (j = 0; j < num_inst_src_regs(inst); j++) { 5135 if (inst->src[j].file == PROGRAM_TEMPORARY) 5136 last_reads[inst->src[j].index] = (depth == 0) ? i : -2; 5137 } 5138 for (j = 0; j < num_inst_dst_regs(inst); j++) { 5139 if (inst->dst[j].file == PROGRAM_TEMPORARY) { 5140 if (first_writes[inst->dst[j].index] == -1) 5141 first_writes[inst->dst[j].index] = (depth == 0) ? i : loop_start; 5142 last_reads[inst->dst[j].index] = (depth == 0) ? i : -2; 5143 } 5144 } 5145 for (j = 0; j < inst->tex_offset_num_offset; j++) { 5146 if (inst->tex_offsets[j].file == PROGRAM_TEMPORARY) 5147 last_reads[inst->tex_offsets[j].index] = (depth == 0) ? i : -2; 5148 } 5149 if (inst->op == TGSI_OPCODE_BGNLOOP) { 5150 if (depth++ == 0) 5151 loop_start = i; 5152 } else if (inst->op == TGSI_OPCODE_ENDLOOP) { 5153 if (--depth == 0) { 5154 loop_start = -1; 5155 for (k = 0; k < this->next_temp; k++) { 5156 if (last_reads[k] == -2) { 5157 last_reads[k] = i; 5158 } 5159 } 5160 } 5161 } 5162 assert(depth >= 0); 5163 i++; 5164 } 5165} 5166 5167void 5168glsl_to_tgsi_visitor::get_last_temp_write(int *last_writes) 5169{ 5170 int depth = 0; /* loop depth */ 5171 int i = 0, k; 5172 unsigned j; 5173 5174 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 5175 for (j = 0; j < num_inst_dst_regs(inst); j++) { 5176 if (inst->dst[j].file == PROGRAM_TEMPORARY) 5177 last_writes[inst->dst[j].index] = (depth == 0) ? i : -2; 5178 } 5179 5180 if (inst->op == TGSI_OPCODE_BGNLOOP) 5181 depth++; 5182 else if (inst->op == TGSI_OPCODE_ENDLOOP) 5183 if (--depth == 0) { 5184 for (k = 0; k < this->next_temp; k++) { 5185 if (last_writes[k] == -2) { 5186 last_writes[k] = i; 5187 } 5188 } 5189 } 5190 assert(depth >= 0); 5191 i++; 5192 } 5193} 5194 5195/* 5196 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 5197 * channels for copy propagation and updates following instructions to 5198 * use the original versions. 5199 * 5200 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 5201 * will occur. As an example, a TXP production before this pass: 5202 * 5203 * 0: MOV TEMP[1], INPUT[4].xyyy; 5204 * 1: MOV TEMP[1].w, INPUT[4].wwww; 5205 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 5206 * 5207 * and after: 5208 * 5209 * 0: MOV TEMP[1], INPUT[4].xyyy; 5210 * 1: MOV TEMP[1].w, INPUT[4].wwww; 5211 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 5212 * 5213 * which allows for dead code elimination on TEMP[1]'s writes. 5214 */ 5215void 5216glsl_to_tgsi_visitor::copy_propagate(void) 5217{ 5218 glsl_to_tgsi_instruction **acp = rzalloc_array(mem_ctx, 5219 glsl_to_tgsi_instruction *, 5220 this->next_temp * 4); 5221 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 5222 int level = 0; 5223 5224 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 5225 assert(inst->dst[0].file != PROGRAM_TEMPORARY 5226 || inst->dst[0].index < this->next_temp); 5227 5228 /* First, do any copy propagation possible into the src regs. */ 5229 for (int r = 0; r < 3; r++) { 5230 glsl_to_tgsi_instruction *first = NULL; 5231 bool good = true; 5232 int acp_base = inst->src[r].index * 4; 5233 5234 if (inst->src[r].file != PROGRAM_TEMPORARY || 5235 inst->src[r].reladdr || 5236 inst->src[r].reladdr2) 5237 continue; 5238 5239 /* See if we can find entries in the ACP consisting of MOVs 5240 * from the same src register for all the swizzled channels 5241 * of this src register reference. 5242 */ 5243 for (int i = 0; i < 4; i++) { 5244 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 5245 glsl_to_tgsi_instruction *copy_chan = acp[acp_base + src_chan]; 5246 5247 if (!copy_chan) { 5248 good = false; 5249 break; 5250 } 5251 5252 assert(acp_level[acp_base + src_chan] <= level); 5253 5254 if (!first) { 5255 first = copy_chan; 5256 } else { 5257 if (first->src[0].file != copy_chan->src[0].file || 5258 first->src[0].index != copy_chan->src[0].index || 5259 first->src[0].double_reg2 != copy_chan->src[0].double_reg2 || 5260 first->src[0].index2D != copy_chan->src[0].index2D) { 5261 good = false; 5262 break; 5263 } 5264 } 5265 } 5266 5267 if (good) { 5268 /* We've now validated that we can copy-propagate to 5269 * replace this src register reference. Do it. 5270 */ 5271 inst->src[r].file = first->src[0].file; 5272 inst->src[r].index = first->src[0].index; 5273 inst->src[r].index2D = first->src[0].index2D; 5274 inst->src[r].has_index2 = first->src[0].has_index2; 5275 inst->src[r].double_reg2 = first->src[0].double_reg2; 5276 inst->src[r].array_id = first->src[0].array_id; 5277 5278 int swizzle = 0; 5279 for (int i = 0; i < 4; i++) { 5280 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 5281 glsl_to_tgsi_instruction *copy_inst = acp[acp_base + src_chan]; 5282 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << (3 * i)); 5283 } 5284 inst->src[r].swizzle = swizzle; 5285 } 5286 } 5287 5288 switch (inst->op) { 5289 case TGSI_OPCODE_BGNLOOP: 5290 case TGSI_OPCODE_ENDLOOP: 5291 /* End of a basic block, clear the ACP entirely. */ 5292 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 5293 break; 5294 5295 case TGSI_OPCODE_IF: 5296 case TGSI_OPCODE_UIF: 5297 ++level; 5298 break; 5299 5300 case TGSI_OPCODE_ENDIF: 5301 case TGSI_OPCODE_ELSE: 5302 /* Clear all channels written inside the block from the ACP, but 5303 * leaving those that were not touched. 5304 */ 5305 for (int r = 0; r < this->next_temp; r++) { 5306 for (int c = 0; c < 4; c++) { 5307 if (!acp[4 * r + c]) 5308 continue; 5309 5310 if (acp_level[4 * r + c] >= level) 5311 acp[4 * r + c] = NULL; 5312 } 5313 } 5314 if (inst->op == TGSI_OPCODE_ENDIF) 5315 --level; 5316 break; 5317 5318 default: 5319 /* Continuing the block, clear any written channels from 5320 * the ACP. 5321 */ 5322 for (int d = 0; d < 2; d++) { 5323 if (inst->dst[d].file == PROGRAM_TEMPORARY && inst->dst[d].reladdr) { 5324 /* Any temporary might be written, so no copy propagation 5325 * across this instruction. 5326 */ 5327 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 5328 } else if (inst->dst[d].file == PROGRAM_OUTPUT && 5329 inst->dst[d].reladdr) { 5330 /* Any output might be written, so no copy propagation 5331 * from outputs across this instruction. 5332 */ 5333 for (int r = 0; r < this->next_temp; r++) { 5334 for (int c = 0; c < 4; c++) { 5335 if (!acp[4 * r + c]) 5336 continue; 5337 5338 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 5339 acp[4 * r + c] = NULL; 5340 } 5341 } 5342 } else if (inst->dst[d].file == PROGRAM_TEMPORARY || 5343 inst->dst[d].file == PROGRAM_OUTPUT) { 5344 /* Clear where it's used as dst. */ 5345 if (inst->dst[d].file == PROGRAM_TEMPORARY) { 5346 for (int c = 0; c < 4; c++) { 5347 if (inst->dst[d].writemask & (1 << c)) 5348 acp[4 * inst->dst[d].index + c] = NULL; 5349 } 5350 } 5351 5352 /* Clear where it's used as src. */ 5353 for (int r = 0; r < this->next_temp; r++) { 5354 for (int c = 0; c < 4; c++) { 5355 if (!acp[4 * r + c]) 5356 continue; 5357 5358 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 5359 5360 if (acp[4 * r + c]->src[0].file == inst->dst[d].file && 5361 acp[4 * r + c]->src[0].index == inst->dst[d].index && 5362 inst->dst[d].writemask & (1 << src_chan)) { 5363 acp[4 * r + c] = NULL; 5364 } 5365 } 5366 } 5367 } 5368 } 5369 break; 5370 } 5371 5372 /* If this is a copy, add it to the ACP. */ 5373 if (inst->op == TGSI_OPCODE_MOV && 5374 inst->dst[0].file == PROGRAM_TEMPORARY && 5375 !(inst->dst[0].file == inst->src[0].file && 5376 inst->dst[0].index == inst->src[0].index) && 5377 !inst->dst[0].reladdr && 5378 !inst->dst[0].reladdr2 && 5379 !inst->saturate && 5380 inst->src[0].file != PROGRAM_ARRAY && 5381 (inst->src[0].file != PROGRAM_OUTPUT || 5382 this->shader->Stage != MESA_SHADER_TESS_CTRL) && 5383 !inst->src[0].reladdr && 5384 !inst->src[0].reladdr2 && 5385 !inst->src[0].negate && 5386 !inst->src[0].abs) { 5387 for (int i = 0; i < 4; i++) { 5388 if (inst->dst[0].writemask & (1 << i)) { 5389 acp[4 * inst->dst[0].index + i] = inst; 5390 acp_level[4 * inst->dst[0].index + i] = level; 5391 } 5392 } 5393 } 5394 } 5395 5396 ralloc_free(acp_level); 5397 ralloc_free(acp); 5398} 5399 5400static void 5401dead_code_handle_reladdr(glsl_to_tgsi_instruction **writes, st_src_reg *reladdr) 5402{ 5403 if (reladdr && reladdr->file == PROGRAM_TEMPORARY) { 5404 /* Clear where it's used as src. */ 5405 int swz = GET_SWZ(reladdr->swizzle, 0); 5406 writes[4 * reladdr->index + swz] = NULL; 5407 } 5408} 5409 5410/* 5411 * On a basic block basis, tracks available PROGRAM_TEMPORARY registers for dead 5412 * code elimination. 5413 * 5414 * The glsl_to_tgsi_visitor lazily produces code assuming that this pass 5415 * will occur. As an example, a TXP production after copy propagation but 5416 * before this pass: 5417 * 5418 * 0: MOV TEMP[1], INPUT[4].xyyy; 5419 * 1: MOV TEMP[1].w, INPUT[4].wwww; 5420 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 5421 * 5422 * and after this pass: 5423 * 5424 * 0: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 5425 */ 5426int 5427glsl_to_tgsi_visitor::eliminate_dead_code(void) 5428{ 5429 glsl_to_tgsi_instruction **writes = rzalloc_array(mem_ctx, 5430 glsl_to_tgsi_instruction *, 5431 this->next_temp * 4); 5432 int *write_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 5433 int level = 0; 5434 int removed = 0; 5435 5436 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 5437 assert(inst->dst[0].file != PROGRAM_TEMPORARY 5438 || inst->dst[0].index < this->next_temp); 5439 5440 switch (inst->op) { 5441 case TGSI_OPCODE_BGNLOOP: 5442 case TGSI_OPCODE_ENDLOOP: 5443 case TGSI_OPCODE_CONT: 5444 case TGSI_OPCODE_BRK: 5445 /* End of a basic block, clear the write array entirely. 5446 * 5447 * This keeps us from killing dead code when the writes are 5448 * on either side of a loop, even when the register isn't touched 5449 * inside the loop. However, glsl_to_tgsi_visitor doesn't seem to emit 5450 * dead code of this type, so it shouldn't make a difference as long as 5451 * the dead code elimination pass in the GLSL compiler does its job. 5452 */ 5453 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 5454 break; 5455 5456 case TGSI_OPCODE_ENDIF: 5457 case TGSI_OPCODE_ELSE: 5458 /* Promote the recorded level of all channels written inside the 5459 * preceding if or else block to the level above the if/else block. 5460 */ 5461 for (int r = 0; r < this->next_temp; r++) { 5462 for (int c = 0; c < 4; c++) { 5463 if (!writes[4 * r + c]) 5464 continue; 5465 5466 if (write_level[4 * r + c] == level) 5467 write_level[4 * r + c] = level-1; 5468 } 5469 } 5470 if (inst->op == TGSI_OPCODE_ENDIF) 5471 --level; 5472 break; 5473 5474 case TGSI_OPCODE_IF: 5475 case TGSI_OPCODE_UIF: 5476 ++level; 5477 FALLTHROUGH; /* to mark the condition as read */ 5478 default: 5479 /* Continuing the block, clear any channels from the write array that 5480 * are read by this instruction. 5481 */ 5482 for (unsigned i = 0; i < ARRAY_SIZE(inst->src); i++) { 5483 if (inst->src[i].file == PROGRAM_TEMPORARY && inst->src[i].reladdr){ 5484 /* Any temporary might be read, so no dead code elimination 5485 * across this instruction. 5486 */ 5487 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 5488 } else if (inst->src[i].file == PROGRAM_TEMPORARY) { 5489 /* Clear where it's used as src. */ 5490 int src_chans = 1 << GET_SWZ(inst->src[i].swizzle, 0); 5491 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 1); 5492 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 2); 5493 src_chans |= 1 << GET_SWZ(inst->src[i].swizzle, 3); 5494 5495 for (int c = 0; c < 4; c++) { 5496 if (src_chans & (1 << c)) 5497 writes[4 * inst->src[i].index + c] = NULL; 5498 } 5499 } 5500 dead_code_handle_reladdr(writes, inst->src[i].reladdr); 5501 dead_code_handle_reladdr(writes, inst->src[i].reladdr2); 5502 } 5503 for (unsigned i = 0; i < inst->tex_offset_num_offset; i++) { 5504 if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY && inst->tex_offsets[i].reladdr){ 5505 /* Any temporary might be read, so no dead code elimination 5506 * across this instruction. 5507 */ 5508 memset(writes, 0, sizeof(*writes) * this->next_temp * 4); 5509 } else if (inst->tex_offsets[i].file == PROGRAM_TEMPORARY) { 5510 /* Clear where it's used as src. */ 5511 int src_chans = 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 0); 5512 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 1); 5513 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 2); 5514 src_chans |= 1 << GET_SWZ(inst->tex_offsets[i].swizzle, 3); 5515 5516 for (int c = 0; c < 4; c++) { 5517 if (src_chans & (1 << c)) 5518 writes[4 * inst->tex_offsets[i].index + c] = NULL; 5519 } 5520 } 5521 dead_code_handle_reladdr(writes, inst->tex_offsets[i].reladdr); 5522 dead_code_handle_reladdr(writes, inst->tex_offsets[i].reladdr2); 5523 } 5524 5525 if (inst->resource.file == PROGRAM_TEMPORARY) { 5526 int src_chans; 5527 5528 src_chans = 1 << GET_SWZ(inst->resource.swizzle, 0); 5529 src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 1); 5530 src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 2); 5531 src_chans |= 1 << GET_SWZ(inst->resource.swizzle, 3); 5532 5533 for (int c = 0; c < 4; c++) { 5534 if (src_chans & (1 << c)) 5535 writes[4 * inst->resource.index + c] = NULL; 5536 } 5537 } 5538 dead_code_handle_reladdr(writes, inst->resource.reladdr); 5539 dead_code_handle_reladdr(writes, inst->resource.reladdr2); 5540 5541 for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) { 5542 dead_code_handle_reladdr(writes, inst->dst[i].reladdr); 5543 dead_code_handle_reladdr(writes, inst->dst[i].reladdr2); 5544 } 5545 break; 5546 } 5547 5548 /* If this instruction writes to a temporary, add it to the write array. 5549 * If there is already an instruction in the write array for one or more 5550 * of the channels, flag that channel write as dead. 5551 */ 5552 for (unsigned i = 0; i < ARRAY_SIZE(inst->dst); i++) { 5553 if (inst->dst[i].file == PROGRAM_TEMPORARY && 5554 !inst->dst[i].reladdr) { 5555 for (int c = 0; c < 4; c++) { 5556 if (inst->dst[i].writemask & (1 << c)) { 5557 if (writes[4 * inst->dst[i].index + c]) { 5558 if (write_level[4 * inst->dst[i].index + c] < level) 5559 continue; 5560 else 5561 writes[4 * inst->dst[i].index + c]->dead_mask |= (1 << c); 5562 } 5563 writes[4 * inst->dst[i].index + c] = inst; 5564 write_level[4 * inst->dst[i].index + c] = level; 5565 } 5566 } 5567 } 5568 } 5569 } 5570 5571 /* Anything still in the write array at this point is dead code. */ 5572 for (int r = 0; r < this->next_temp; r++) { 5573 for (int c = 0; c < 4; c++) { 5574 glsl_to_tgsi_instruction *inst = writes[4 * r + c]; 5575 if (inst) 5576 inst->dead_mask |= (1 << c); 5577 } 5578 } 5579 5580 /* Now actually remove the instructions that are completely dead and update 5581 * the writemask of other instructions with dead channels. 5582 */ 5583 foreach_in_list_safe(glsl_to_tgsi_instruction, inst, &this->instructions) { 5584 if (!inst->dead_mask || !inst->dst[0].writemask) 5585 continue; 5586 /* No amount of dead masks should remove memory stores */ 5587 if (inst->info->is_store) 5588 continue; 5589 5590 if ((inst->dst[0].writemask & ~inst->dead_mask) == 0) { 5591 inst->remove(); 5592 delete inst; 5593 removed++; 5594 } else { 5595 if (glsl_base_type_is_64bit(inst->dst[0].type)) { 5596 if (inst->dead_mask == WRITEMASK_XY || 5597 inst->dead_mask == WRITEMASK_ZW) 5598 inst->dst[0].writemask &= ~(inst->dead_mask); 5599 } else 5600 inst->dst[0].writemask &= ~(inst->dead_mask); 5601 } 5602 } 5603 5604 ralloc_free(write_level); 5605 ralloc_free(writes); 5606 5607 return removed; 5608} 5609 5610/* merge DFRACEXP instructions into one. */ 5611void 5612glsl_to_tgsi_visitor::merge_two_dsts(void) 5613{ 5614 /* We never delete inst, but we may delete its successor. */ 5615 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 5616 glsl_to_tgsi_instruction *inst2; 5617 unsigned defined; 5618 5619 if (num_inst_dst_regs(inst) != 2) 5620 continue; 5621 5622 if (inst->dst[0].file != PROGRAM_UNDEFINED && 5623 inst->dst[1].file != PROGRAM_UNDEFINED) 5624 continue; 5625 5626 assert(inst->dst[0].file != PROGRAM_UNDEFINED || 5627 inst->dst[1].file != PROGRAM_UNDEFINED); 5628 5629 if (inst->dst[0].file == PROGRAM_UNDEFINED) 5630 defined = 1; 5631 else 5632 defined = 0; 5633 5634 inst2 = (glsl_to_tgsi_instruction *) inst->next; 5635 while (!inst2->is_tail_sentinel()) { 5636 if (inst->op == inst2->op && 5637 inst2->dst[defined].file == PROGRAM_UNDEFINED && 5638 inst->src[0].file == inst2->src[0].file && 5639 inst->src[0].index == inst2->src[0].index && 5640 inst->src[0].type == inst2->src[0].type && 5641 inst->src[0].swizzle == inst2->src[0].swizzle) 5642 break; 5643 inst2 = (glsl_to_tgsi_instruction *) inst2->next; 5644 } 5645 5646 if (inst2->is_tail_sentinel()) { 5647 /* Undefined destinations are not allowed, substitute with an unused 5648 * temporary register. 5649 */ 5650 st_src_reg tmp = get_temp(glsl_type::vec4_type); 5651 inst->dst[defined ^ 1] = st_dst_reg(tmp); 5652 inst->dst[defined ^ 1].writemask = 0; 5653 continue; 5654 } 5655 5656 inst->dst[defined ^ 1] = inst2->dst[defined ^ 1]; 5657 inst2->remove(); 5658 delete inst2; 5659 } 5660} 5661 5662template <typename st_reg> 5663void test_indirect_access(const st_reg& reg, bool *has_indirect_access) 5664{ 5665 if (reg.file == PROGRAM_ARRAY) { 5666 if (reg.reladdr || reg.reladdr2 || reg.has_index2) { 5667 has_indirect_access[reg.array_id] = true; 5668 if (reg.reladdr) 5669 test_indirect_access(*reg.reladdr, has_indirect_access); 5670 if (reg.reladdr2) 5671 test_indirect_access(*reg.reladdr2, has_indirect_access); 5672 } 5673 } 5674} 5675 5676template <typename st_reg> 5677void remap_array(st_reg& reg, const int *array_remap_info, 5678 const bool *has_indirect_access) 5679{ 5680 if (reg.file == PROGRAM_ARRAY) { 5681 if (!has_indirect_access[reg.array_id]) { 5682 reg.file = PROGRAM_TEMPORARY; 5683 reg.index = reg.index + array_remap_info[reg.array_id]; 5684 reg.array_id = 0; 5685 } else { 5686 reg.array_id = array_remap_info[reg.array_id]; 5687 } 5688 5689 if (reg.reladdr) 5690 remap_array(*reg.reladdr, array_remap_info, has_indirect_access); 5691 5692 if (reg.reladdr2) 5693 remap_array(*reg.reladdr2, array_remap_info, has_indirect_access); 5694 } 5695} 5696 5697/* One-dimensional arrays whose elements are only accessed directly are 5698 * replaced by an according set of temporary registers that then can become 5699 * subject to further optimization steps like copy propagation and 5700 * register merging. 5701 */ 5702void 5703glsl_to_tgsi_visitor::split_arrays(void) 5704{ 5705 if (!next_array) 5706 return; 5707 5708 bool *has_indirect_access = rzalloc_array(mem_ctx, bool, next_array + 1); 5709 5710 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 5711 for (unsigned j = 0; j < num_inst_src_regs(inst); j++) 5712 test_indirect_access(inst->src[j], has_indirect_access); 5713 5714 for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) 5715 test_indirect_access(inst->tex_offsets[j], has_indirect_access); 5716 5717 for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) 5718 test_indirect_access(inst->dst[j], has_indirect_access); 5719 5720 test_indirect_access(inst->resource, has_indirect_access); 5721 } 5722 5723 unsigned array_offset = 0; 5724 unsigned n_remaining_arrays = 0; 5725 5726 /* Double use: For arrays that get split this value will contain 5727 * the base index of the temporary registers this array is replaced 5728 * with. For arrays that remain it contains the new array ID. 5729 */ 5730 int *array_remap_info = rzalloc_array(has_indirect_access, int, 5731 next_array + 1); 5732 5733 for (unsigned i = 1; i <= next_array; ++i) { 5734 if (!has_indirect_access[i]) { 5735 array_remap_info[i] = this->next_temp + array_offset; 5736 array_offset += array_sizes[i - 1]; 5737 } else { 5738 array_sizes[n_remaining_arrays] = array_sizes[i-1]; 5739 array_remap_info[i] = ++n_remaining_arrays; 5740 } 5741 } 5742 5743 if (next_array != n_remaining_arrays) { 5744 foreach_in_list(glsl_to_tgsi_instruction, inst, &this->instructions) { 5745 for (unsigned j = 0; j < num_inst_src_regs(inst); j++) 5746 remap_array(inst->src[j], array_remap_info, has_indirect_access); 5747 5748 for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) 5749 remap_array(inst->tex_offsets[j], array_remap_info, has_indirect_access); 5750 5751 for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) { 5752 remap_array(inst->dst[j], array_remap_info, has_indirect_access); 5753 } 5754 remap_array(inst->resource, array_remap_info, has_indirect_access); 5755 } 5756 } 5757 5758 ralloc_free(has_indirect_access); 5759 this->next_temp += array_offset; 5760 next_array = n_remaining_arrays; 5761} 5762 5763/* Merges temporary registers together where possible to reduce the number of 5764 * registers needed to run a program. 5765 * 5766 * Produces optimal code only after copy propagation and dead code elimination 5767 * have been run. */ 5768void 5769glsl_to_tgsi_visitor::merge_registers(void) 5770{ 5771 class array_live_range *arr_live_ranges = NULL; 5772 5773 struct register_live_range *reg_live_ranges = 5774 rzalloc_array(mem_ctx, struct register_live_range, this->next_temp); 5775 5776 if (this->next_array > 0) { 5777 arr_live_ranges = new array_live_range[this->next_array]; 5778 for (unsigned i = 0; i < this->next_array; ++i) 5779 arr_live_ranges[i] = array_live_range(i+1, this->array_sizes[i]); 5780 } 5781 5782 5783 if (get_temp_registers_required_live_ranges(reg_live_ranges, &this->instructions, 5784 this->next_temp, reg_live_ranges, 5785 this->next_array, arr_live_ranges)) { 5786 struct rename_reg_pair *renames = 5787 rzalloc_array(reg_live_ranges, struct rename_reg_pair, this->next_temp); 5788 get_temp_registers_remapping(reg_live_ranges, this->next_temp, 5789 reg_live_ranges, renames); 5790 rename_temp_registers(renames); 5791 5792 this->next_array = merge_arrays(this->next_array, this->array_sizes, 5793 &this->instructions, arr_live_ranges); 5794 } 5795 5796 if (arr_live_ranges) 5797 delete[] arr_live_ranges; 5798 5799 ralloc_free(reg_live_ranges); 5800} 5801 5802/* Reassign indices to temporary registers by reusing unused indices created 5803 * by optimization passes. */ 5804void 5805glsl_to_tgsi_visitor::renumber_registers(void) 5806{ 5807 int i = 0; 5808 int new_index = 0; 5809 int *first_writes = ralloc_array(mem_ctx, int, this->next_temp); 5810 struct rename_reg_pair *renames = rzalloc_array(mem_ctx, struct rename_reg_pair, this->next_temp); 5811 5812 for (i = 0; i < this->next_temp; i++) { 5813 first_writes[i] = -1; 5814 } 5815 get_first_temp_write(first_writes); 5816 5817 for (i = 0; i < this->next_temp; i++) { 5818 if (first_writes[i] < 0) continue; 5819 if (i != new_index) { 5820 renames[i].new_reg = new_index; 5821 renames[i].valid = true; 5822 } 5823 new_index++; 5824 } 5825 5826 rename_temp_registers(renames); 5827 this->next_temp = new_index; 5828 ralloc_free(renames); 5829 ralloc_free(first_writes); 5830} 5831 5832#ifndef NDEBUG 5833void glsl_to_tgsi_visitor::print_stats() 5834{ 5835 int narray_registers = 0; 5836 for (unsigned i = 0; i < this->next_array; ++i) 5837 narray_registers += this->array_sizes[i]; 5838 5839 int ninstructions = 0; 5840 foreach_in_list(glsl_to_tgsi_instruction, inst, &instructions) { 5841 ++ninstructions; 5842 } 5843 5844 simple_mtx_lock(&print_stats_mutex); 5845 stats_log << next_array << ", " 5846 << next_temp << ", " 5847 << narray_registers << ", " 5848 << next_temp + narray_registers << ", " 5849 << ninstructions << "\n"; 5850 simple_mtx_unlock(&print_stats_mutex); 5851} 5852#endif 5853/* ------------------------- TGSI conversion stuff -------------------------- */ 5854 5855/** 5856 * Intermediate state used during shader translation. 5857 */ 5858struct st_translate { 5859 struct ureg_program *ureg; 5860 5861 unsigned temps_size; 5862 struct ureg_dst *temps; 5863 5864 struct ureg_dst *arrays; 5865 unsigned num_temp_arrays; 5866 struct ureg_src *constants; 5867 int num_constants; 5868 struct ureg_src *immediates; 5869 int num_immediates; 5870 struct ureg_dst outputs[PIPE_MAX_SHADER_OUTPUTS]; 5871 struct ureg_src inputs[PIPE_MAX_SHADER_INPUTS]; 5872 struct ureg_dst address[3]; 5873 struct ureg_src samplers[PIPE_MAX_SAMPLERS]; 5874 struct ureg_src buffers[PIPE_MAX_SHADER_BUFFERS]; 5875 struct ureg_src images[PIPE_MAX_SHADER_IMAGES]; 5876 struct ureg_src systemValues[SYSTEM_VALUE_MAX]; 5877 struct ureg_src hw_atomics[PIPE_MAX_HW_ATOMIC_BUFFERS]; 5878 struct ureg_src shared_memory; 5879 unsigned *array_sizes; 5880 struct inout_decl *input_decls; 5881 unsigned num_input_decls; 5882 struct inout_decl *output_decls; 5883 unsigned num_output_decls; 5884 5885 const ubyte *inputMapping; 5886 const ubyte *outputMapping; 5887 5888 enum pipe_shader_type procType; /**< PIPE_SHADER_VERTEX/FRAGMENT */ 5889 bool need_uarl; 5890 bool tg4_component_in_swizzle; 5891}; 5892 5893/** 5894 * Map a glsl_to_tgsi constant/immediate to a TGSI immediate. 5895 */ 5896static struct ureg_src 5897emit_immediate(struct st_translate *t, 5898 gl_constant_value values[4], 5899 GLenum type, int size) 5900{ 5901 struct ureg_program *ureg = t->ureg; 5902 5903 switch (type) { 5904 case GL_FLOAT: 5905 return ureg_DECL_immediate(ureg, &values[0].f, size); 5906 case GL_DOUBLE: 5907 return ureg_DECL_immediate_f64(ureg, (double *)&values[0].f, size); 5908 case GL_INT64_ARB: 5909 return ureg_DECL_immediate_int64(ureg, (int64_t *)&values[0].f, size); 5910 case GL_UNSIGNED_INT64_ARB: 5911 return ureg_DECL_immediate_uint64(ureg, (uint64_t *)&values[0].f, size); 5912 case GL_INT: 5913 return ureg_DECL_immediate_int(ureg, &values[0].i, size); 5914 case GL_UNSIGNED_INT: 5915 case GL_BOOL: 5916 return ureg_DECL_immediate_uint(ureg, &values[0].u, size); 5917 default: 5918 assert(!"should not get here - type must be float, int, uint, or bool"); 5919 return ureg_src_undef(); 5920 } 5921} 5922 5923/** 5924 * Map a glsl_to_tgsi dst register to a TGSI ureg_dst register. 5925 */ 5926static struct ureg_dst 5927dst_register(struct st_translate *t, gl_register_file file, unsigned index, 5928 unsigned array_id) 5929{ 5930 unsigned array; 5931 5932 switch (file) { 5933 case PROGRAM_UNDEFINED: 5934 return ureg_dst_undef(); 5935 5936 case PROGRAM_TEMPORARY: 5937 /* Allocate space for temporaries on demand. */ 5938 if (index >= t->temps_size) { 5939 const int inc = align(index - t->temps_size + 1, 4096); 5940 5941 t->temps = (struct ureg_dst*) 5942 realloc(t->temps, 5943 (t->temps_size + inc) * sizeof(struct ureg_dst)); 5944 if (!t->temps) 5945 return ureg_dst_undef(); 5946 5947 memset(t->temps + t->temps_size, 0, inc * sizeof(struct ureg_dst)); 5948 t->temps_size += inc; 5949 } 5950 5951 if (ureg_dst_is_undef(t->temps[index])) 5952 t->temps[index] = ureg_DECL_local_temporary(t->ureg); 5953 5954 return t->temps[index]; 5955 5956 case PROGRAM_ARRAY: 5957 assert(array_id && array_id <= t->num_temp_arrays); 5958 array = array_id - 1; 5959 5960 if (ureg_dst_is_undef(t->arrays[array])) 5961 t->arrays[array] = ureg_DECL_array_temporary( 5962 t->ureg, t->array_sizes[array], TRUE); 5963 5964 return ureg_dst_array_offset(t->arrays[array], index); 5965 5966 case PROGRAM_OUTPUT: 5967 if (!array_id) { 5968 if (t->procType == PIPE_SHADER_FRAGMENT) 5969 assert(index < 2 * FRAG_RESULT_MAX); 5970 else if (t->procType == PIPE_SHADER_TESS_CTRL || 5971 t->procType == PIPE_SHADER_TESS_EVAL) 5972 assert(index < VARYING_SLOT_TESS_MAX); 5973 else 5974 assert(index < VARYING_SLOT_MAX); 5975 5976 assert(t->outputMapping[index] < ARRAY_SIZE(t->outputs)); 5977 assert(t->outputs[t->outputMapping[index]].File != TGSI_FILE_NULL); 5978 return t->outputs[t->outputMapping[index]]; 5979 } 5980 else { 5981 struct inout_decl *decl = 5982 find_inout_array(t->output_decls, 5983 t->num_output_decls, array_id); 5984 unsigned mesa_index = decl->mesa_index; 5985 ubyte slot = t->outputMapping[mesa_index]; 5986 5987 assert(slot != 0xff && t->outputs[slot].File == TGSI_FILE_OUTPUT); 5988 5989 struct ureg_dst dst = t->outputs[slot]; 5990 dst.ArrayID = array_id; 5991 return ureg_dst_array_offset(dst, index - mesa_index); 5992 } 5993 5994 case PROGRAM_ADDRESS: 5995 return t->address[index]; 5996 5997 default: 5998 assert(!"unknown dst register file"); 5999 return ureg_dst_undef(); 6000 } 6001} 6002 6003static struct ureg_src 6004translate_src(struct st_translate *t, const st_src_reg *src_reg); 6005 6006static struct ureg_src 6007translate_addr(struct st_translate *t, const st_src_reg *reladdr, 6008 unsigned addr_index) 6009{ 6010 if (t->need_uarl || !reladdr->is_legal_tgsi_address_operand()) 6011 return ureg_src(t->address[addr_index]); 6012 6013 return translate_src(t, reladdr); 6014} 6015 6016/** 6017 * Create a TGSI ureg_dst register from an st_dst_reg. 6018 */ 6019static struct ureg_dst 6020translate_dst(struct st_translate *t, 6021 const st_dst_reg *dst_reg, 6022 bool saturate) 6023{ 6024 struct ureg_dst dst = dst_register(t, dst_reg->file, dst_reg->index, 6025 dst_reg->array_id); 6026 6027 if (dst.File == TGSI_FILE_NULL) 6028 return dst; 6029 6030 dst = ureg_writemask(dst, dst_reg->writemask); 6031 6032 if (saturate) 6033 dst = ureg_saturate(dst); 6034 6035 if (dst_reg->reladdr != NULL) { 6036 assert(dst_reg->file != PROGRAM_TEMPORARY); 6037 dst = ureg_dst_indirect(dst, translate_addr(t, dst_reg->reladdr, 0)); 6038 } 6039 6040 if (dst_reg->has_index2) { 6041 if (dst_reg->reladdr2) 6042 dst = ureg_dst_dimension_indirect(dst, 6043 translate_addr(t, dst_reg->reladdr2, 1), 6044 dst_reg->index2D); 6045 else 6046 dst = ureg_dst_dimension(dst, dst_reg->index2D); 6047 } 6048 6049 return dst; 6050} 6051 6052/** 6053 * Create a TGSI ureg_src register from an st_src_reg. 6054 */ 6055static struct ureg_src 6056translate_src(struct st_translate *t, const st_src_reg *src_reg) 6057{ 6058 struct ureg_src src; 6059 int index = src_reg->index; 6060 int double_reg2 = src_reg->double_reg2 ? 1 : 0; 6061 6062 switch (src_reg->file) { 6063 case PROGRAM_UNDEFINED: 6064 src = ureg_imm4f(t->ureg, 0, 0, 0, 0); 6065 break; 6066 6067 case PROGRAM_TEMPORARY: 6068 case PROGRAM_ARRAY: 6069 src = ureg_src(dst_register(t, src_reg->file, src_reg->index, 6070 src_reg->array_id)); 6071 break; 6072 6073 case PROGRAM_OUTPUT: { 6074 struct ureg_dst dst = dst_register(t, src_reg->file, src_reg->index, 6075 src_reg->array_id); 6076 assert(dst.WriteMask != 0); 6077 unsigned shift = ffs(dst.WriteMask) - 1; 6078 src = ureg_swizzle(ureg_src(dst), 6079 shift, 6080 MIN2(shift + 1, 3), 6081 MIN2(shift + 2, 3), 6082 MIN2(shift + 3, 3)); 6083 break; 6084 } 6085 6086 case PROGRAM_UNIFORM: 6087 assert(src_reg->index >= 0); 6088 src = src_reg->index < t->num_constants ? 6089 t->constants[src_reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0); 6090 break; 6091 case PROGRAM_STATE_VAR: 6092 case PROGRAM_CONSTANT: /* ie, immediate */ 6093 if (src_reg->has_index2) 6094 src = ureg_src_register(TGSI_FILE_CONSTANT, src_reg->index); 6095 else 6096 src = src_reg->index >= 0 && src_reg->index < t->num_constants ? 6097 t->constants[src_reg->index] : ureg_imm4f(t->ureg, 0, 0, 0, 0); 6098 break; 6099 6100 case PROGRAM_IMMEDIATE: 6101 assert(src_reg->index >= 0 && src_reg->index < t->num_immediates); 6102 src = t->immediates[src_reg->index]; 6103 break; 6104 6105 case PROGRAM_INPUT: 6106 /* GLSL inputs are 64-bit containers, so we have to 6107 * map back to the original index and add the offset after 6108 * mapping. */ 6109 index -= double_reg2; 6110 if (!src_reg->array_id) { 6111 assert(t->inputMapping[index] < ARRAY_SIZE(t->inputs)); 6112 assert(t->inputs[t->inputMapping[index]].File != TGSI_FILE_NULL); 6113 src = t->inputs[t->inputMapping[index] + double_reg2]; 6114 } 6115 else { 6116 struct inout_decl *decl = find_inout_array(t->input_decls, 6117 t->num_input_decls, 6118 src_reg->array_id); 6119 unsigned mesa_index = decl->mesa_index; 6120 ubyte slot = t->inputMapping[mesa_index]; 6121 6122 assert(slot != 0xff && t->inputs[slot].File == TGSI_FILE_INPUT); 6123 6124 src = t->inputs[slot]; 6125 src.ArrayID = src_reg->array_id; 6126 src = ureg_src_array_offset(src, index + double_reg2 - mesa_index); 6127 } 6128 break; 6129 6130 case PROGRAM_ADDRESS: 6131 src = ureg_src(t->address[src_reg->index]); 6132 break; 6133 6134 case PROGRAM_SYSTEM_VALUE: 6135 assert(src_reg->index < (int) ARRAY_SIZE(t->systemValues)); 6136 src = t->systemValues[src_reg->index]; 6137 break; 6138 6139 case PROGRAM_HW_ATOMIC: 6140 src = ureg_src_array_register(TGSI_FILE_HW_ATOMIC, src_reg->index, 6141 src_reg->array_id); 6142 break; 6143 6144 default: 6145 assert(!"unknown src register file"); 6146 return ureg_src_undef(); 6147 } 6148 6149 if (src_reg->has_index2) { 6150 /* 2D indexes occur with geometry shader inputs (attrib, vertex) 6151 * and UBO constant buffers (buffer, position). 6152 */ 6153 if (src_reg->reladdr2) 6154 src = ureg_src_dimension_indirect(src, 6155 translate_addr(t, src_reg->reladdr2, 1), 6156 src_reg->index2D); 6157 else 6158 src = ureg_src_dimension(src, src_reg->index2D); 6159 } 6160 6161 src = ureg_swizzle(src, 6162 GET_SWZ(src_reg->swizzle, 0) & 0x3, 6163 GET_SWZ(src_reg->swizzle, 1) & 0x3, 6164 GET_SWZ(src_reg->swizzle, 2) & 0x3, 6165 GET_SWZ(src_reg->swizzle, 3) & 0x3); 6166 6167 if (src_reg->abs) 6168 src = ureg_abs(src); 6169 6170 if ((src_reg->negate & 0xf) == NEGATE_XYZW) 6171 src = ureg_negate(src); 6172 6173 if (src_reg->reladdr != NULL) { 6174 assert(src_reg->file != PROGRAM_TEMPORARY); 6175 src = ureg_src_indirect(src, translate_addr(t, src_reg->reladdr, 0)); 6176 } 6177 6178 return src; 6179} 6180 6181static struct tgsi_texture_offset 6182translate_tex_offset(struct st_translate *t, 6183 const st_src_reg *in_offset) 6184{ 6185 struct tgsi_texture_offset offset; 6186 struct ureg_src src = translate_src(t, in_offset); 6187 6188 offset.File = src.File; 6189 offset.Index = src.Index; 6190 offset.SwizzleX = src.SwizzleX; 6191 offset.SwizzleY = src.SwizzleY; 6192 offset.SwizzleZ = src.SwizzleZ; 6193 offset.Padding = 0; 6194 6195 assert(!src.Indirect); 6196 assert(!src.DimIndirect); 6197 assert(!src.Dimension); 6198 assert(!src.Absolute); /* those shouldn't be used with integers anyway */ 6199 assert(!src.Negate); 6200 6201 return offset; 6202} 6203 6204static void 6205compile_tgsi_instruction(struct st_translate *t, 6206 const glsl_to_tgsi_instruction *inst) 6207{ 6208 struct ureg_program *ureg = t->ureg; 6209 int i; 6210 struct ureg_dst dst[2]; 6211 struct ureg_src src[4]; 6212 struct tgsi_texture_offset texoffsets[MAX_GLSL_TEXTURE_OFFSET]; 6213 6214 int num_dst; 6215 int num_src; 6216 enum tgsi_texture_type tex_target = TGSI_TEXTURE_BUFFER; 6217 6218 num_dst = num_inst_dst_regs(inst); 6219 num_src = num_inst_src_regs(inst); 6220 6221 for (i = 0; i < num_dst; i++) 6222 dst[i] = translate_dst(t, 6223 &inst->dst[i], 6224 inst->saturate); 6225 6226 for (i = 0; i < num_src; i++) 6227 src[i] = translate_src(t, &inst->src[i]); 6228 6229 switch (inst->op) { 6230 case TGSI_OPCODE_BGNLOOP: 6231 case TGSI_OPCODE_ELSE: 6232 case TGSI_OPCODE_ENDLOOP: 6233 case TGSI_OPCODE_IF: 6234 case TGSI_OPCODE_UIF: 6235 assert(num_dst == 0); 6236 ureg_insn(ureg, inst->op, NULL, 0, src, num_src, inst->precise); 6237 return; 6238 6239 case TGSI_OPCODE_TEX: 6240 case TGSI_OPCODE_TEX_LZ: 6241 case TGSI_OPCODE_TXB: 6242 case TGSI_OPCODE_TXD: 6243 case TGSI_OPCODE_TXL: 6244 case TGSI_OPCODE_TXP: 6245 case TGSI_OPCODE_TXQ: 6246 case TGSI_OPCODE_TXQS: 6247 case TGSI_OPCODE_TXF: 6248 case TGSI_OPCODE_TXF_LZ: 6249 case TGSI_OPCODE_TEX2: 6250 case TGSI_OPCODE_TXB2: 6251 case TGSI_OPCODE_TXL2: 6252 case TGSI_OPCODE_TG4: 6253 case TGSI_OPCODE_LODQ: 6254 case TGSI_OPCODE_SAMP2HND: 6255 if (inst->resource.file == PROGRAM_SAMPLER) { 6256 src[num_src] = t->samplers[inst->resource.index]; 6257 if (t->tg4_component_in_swizzle && inst->op == TGSI_OPCODE_TG4) 6258 src[num_src].SwizzleX = inst->gather_component; 6259 } else { 6260 /* Bindless samplers. */ 6261 src[num_src] = translate_src(t, &inst->resource); 6262 } 6263 assert(src[num_src].File != TGSI_FILE_NULL); 6264 if (inst->resource.reladdr) 6265 src[num_src] = 6266 ureg_src_indirect(src[num_src], 6267 translate_addr(t, inst->resource.reladdr, 2)); 6268 num_src++; 6269 for (i = 0; i < (int)inst->tex_offset_num_offset; i++) { 6270 texoffsets[i] = translate_tex_offset(t, &inst->tex_offsets[i]); 6271 } 6272 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); 6273 6274 ureg_tex_insn(ureg, 6275 inst->op, 6276 dst, num_dst, 6277 tex_target, 6278 st_translate_texture_type(inst->tex_type), 6279 texoffsets, inst->tex_offset_num_offset, 6280 src, num_src); 6281 return; 6282 6283 case TGSI_OPCODE_RESQ: 6284 case TGSI_OPCODE_LOAD: 6285 case TGSI_OPCODE_ATOMUADD: 6286 case TGSI_OPCODE_ATOMXCHG: 6287 case TGSI_OPCODE_ATOMCAS: 6288 case TGSI_OPCODE_ATOMAND: 6289 case TGSI_OPCODE_ATOMOR: 6290 case TGSI_OPCODE_ATOMXOR: 6291 case TGSI_OPCODE_ATOMUMIN: 6292 case TGSI_OPCODE_ATOMUMAX: 6293 case TGSI_OPCODE_ATOMIMIN: 6294 case TGSI_OPCODE_ATOMIMAX: 6295 case TGSI_OPCODE_ATOMFADD: 6296 case TGSI_OPCODE_IMG2HND: 6297 case TGSI_OPCODE_ATOMINC_WRAP: 6298 case TGSI_OPCODE_ATOMDEC_WRAP: 6299 for (i = num_src - 1; i >= 0; i--) 6300 src[i + 1] = src[i]; 6301 num_src++; 6302 if (inst->resource.file == PROGRAM_MEMORY) { 6303 src[0] = t->shared_memory; 6304 } else if (inst->resource.file == PROGRAM_BUFFER) { 6305 src[0] = t->buffers[inst->resource.index]; 6306 } else if (inst->resource.file == PROGRAM_HW_ATOMIC) { 6307 src[0] = translate_src(t, &inst->resource); 6308 } else if (inst->resource.file == PROGRAM_CONSTANT) { 6309 assert(inst->resource.has_index2); 6310 src[0] = ureg_src_register(TGSI_FILE_CONSTBUF, inst->resource.index); 6311 } else { 6312 assert(inst->resource.file != PROGRAM_UNDEFINED); 6313 if (inst->resource.file == PROGRAM_IMAGE) { 6314 src[0] = t->images[inst->resource.index]; 6315 } else { 6316 /* Bindless images. */ 6317 src[0] = translate_src(t, &inst->resource); 6318 } 6319 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); 6320 } 6321 if (inst->resource.reladdr) 6322 src[0] = ureg_src_indirect(src[0], 6323 translate_addr(t, inst->resource.reladdr, 2)); 6324 assert(src[0].File != TGSI_FILE_NULL); 6325 ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src, 6326 inst->buffer_access, 6327 tex_target, inst->image_format); 6328 break; 6329 6330 case TGSI_OPCODE_STORE: 6331 if (inst->resource.file == PROGRAM_MEMORY) { 6332 dst[0] = ureg_dst(t->shared_memory); 6333 } else if (inst->resource.file == PROGRAM_BUFFER) { 6334 dst[0] = ureg_dst(t->buffers[inst->resource.index]); 6335 } else { 6336 if (inst->resource.file == PROGRAM_IMAGE) { 6337 dst[0] = ureg_dst(t->images[inst->resource.index]); 6338 } else { 6339 /* Bindless images. */ 6340 dst[0] = ureg_dst(translate_src(t, &inst->resource)); 6341 } 6342 tex_target = st_translate_texture_target(inst->tex_target, inst->tex_shadow); 6343 } 6344 dst[0] = ureg_writemask(dst[0], inst->dst[0].writemask); 6345 if (inst->resource.reladdr) 6346 dst[0] = ureg_dst_indirect(dst[0], 6347 translate_addr(t, inst->resource.reladdr, 2)); 6348 assert(dst[0].File != TGSI_FILE_NULL); 6349 ureg_memory_insn(ureg, inst->op, dst, num_dst, src, num_src, 6350 inst->buffer_access, 6351 tex_target, inst->image_format); 6352 break; 6353 6354 default: 6355 ureg_insn(ureg, 6356 inst->op, 6357 dst, num_dst, 6358 src, num_src, inst->precise); 6359 break; 6360 } 6361} 6362 6363/* Invert SamplePos.y when rendering to the default framebuffer. */ 6364static void 6365emit_samplepos_adjustment(struct st_translate *t, int wpos_y_transform) 6366{ 6367 struct ureg_program *ureg = t->ureg; 6368 6369 assert(wpos_y_transform >= 0); 6370 struct ureg_src trans_const = ureg_DECL_constant(ureg, wpos_y_transform); 6371 struct ureg_src samplepos_sysval = t->systemValues[SYSTEM_VALUE_SAMPLE_POS]; 6372 struct ureg_dst samplepos_flipped = ureg_DECL_temporary(ureg); 6373 struct ureg_dst is_fbo = ureg_DECL_temporary(ureg); 6374 6375 ureg_ADD(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_Y), 6376 ureg_imm1f(ureg, 1), ureg_negate(samplepos_sysval)); 6377 6378 /* If trans.x == 1, use samplepos.y, else use 1 - samplepos.y. */ 6379 ureg_FSEQ(ureg, ureg_writemask(is_fbo, TGSI_WRITEMASK_Y), 6380 ureg_scalar(trans_const, TGSI_SWIZZLE_X), ureg_imm1f(ureg, 1)); 6381 ureg_UCMP(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_Y), 6382 ureg_src(is_fbo), samplepos_sysval, ureg_src(samplepos_flipped)); 6383 ureg_MOV(ureg, ureg_writemask(samplepos_flipped, TGSI_WRITEMASK_X), 6384 samplepos_sysval); 6385 6386 /* Use the result in place of the system value. */ 6387 t->systemValues[SYSTEM_VALUE_SAMPLE_POS] = ureg_src(samplepos_flipped); 6388} 6389 6390 6391/** 6392 * Emit the TGSI instructions for inverting and adjusting WPOS. 6393 * This code is unavoidable because it also depends on whether 6394 * a FBO is bound (STATE_FB_WPOS_Y_TRANSFORM). 6395 */ 6396static void 6397emit_wpos_adjustment(struct gl_context *ctx, 6398 struct st_translate *t, 6399 int wpos_transform_const, 6400 boolean invert, 6401 GLfloat adjX, GLfloat adjY[2]) 6402{ 6403 struct ureg_program *ureg = t->ureg; 6404 6405 assert(wpos_transform_const >= 0); 6406 6407 /* Fragment program uses fragment position input. 6408 * Need to replace instances of INPUT[WPOS] with temp T 6409 * where T = INPUT[WPOS] is inverted by Y. 6410 */ 6411 struct ureg_src wpostrans = ureg_DECL_constant(ureg, wpos_transform_const); 6412 struct ureg_dst wpos_temp = ureg_DECL_temporary(ureg); 6413 struct ureg_src *wpos = 6414 ctx->Const.GLSLFragCoordIsSysVal ? 6415 &t->systemValues[SYSTEM_VALUE_FRAG_COORD] : 6416 &t->inputs[t->inputMapping[VARYING_SLOT_POS]]; 6417 struct ureg_src wpos_input = *wpos; 6418 6419 /* First, apply the coordinate shift: */ 6420 if (adjX || adjY[0] || adjY[1]) { 6421 if (adjY[0] != adjY[1]) { 6422 /* Adjust the y coordinate by adjY[1] or adjY[0] respectively 6423 * depending on whether inversion is actually going to be applied 6424 * or not, which is determined by testing against the inversion 6425 * state variable used below, which will be either +1 or -1. 6426 */ 6427 struct ureg_dst adj_temp = ureg_DECL_local_temporary(ureg); 6428 6429 ureg_CMP(ureg, adj_temp, 6430 ureg_scalar(wpostrans, invert ? 2 : 0), 6431 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f), 6432 ureg_imm4f(ureg, adjX, adjY[1], 0.0f, 0.0f)); 6433 ureg_ADD(ureg, wpos_temp, wpos_input, ureg_src(adj_temp)); 6434 } else { 6435 ureg_ADD(ureg, wpos_temp, wpos_input, 6436 ureg_imm4f(ureg, adjX, adjY[0], 0.0f, 0.0f)); 6437 } 6438 wpos_input = ureg_src(wpos_temp); 6439 } else { 6440 /* MOV wpos_temp, input[wpos] 6441 */ 6442 ureg_MOV(ureg, wpos_temp, wpos_input); 6443 } 6444 6445 /* Now the conditional y flip: STATE_FB_WPOS_Y_TRANSFORM.xy/zw will be 6446 * inversion/identity, or the other way around if we're drawing to an FBO. 6447 */ 6448 if (invert) { 6449 /* MAD wpos_temp.y, wpos_input, wpostrans.xxxx, wpostrans.yyyy 6450 */ 6451 ureg_MAD(ureg, 6452 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), 6453 wpos_input, 6454 ureg_scalar(wpostrans, 0), 6455 ureg_scalar(wpostrans, 1)); 6456 } else { 6457 /* MAD wpos_temp.y, wpos_input, wpostrans.zzzz, wpostrans.wwww 6458 */ 6459 ureg_MAD(ureg, 6460 ureg_writemask(wpos_temp, TGSI_WRITEMASK_Y), 6461 wpos_input, 6462 ureg_scalar(wpostrans, 2), 6463 ureg_scalar(wpostrans, 3)); 6464 } 6465 6466 /* Use wpos_temp as position input from here on: 6467 */ 6468 *wpos = ureg_src(wpos_temp); 6469} 6470 6471 6472/** 6473 * Emit fragment position/ooordinate code. 6474 */ 6475static void 6476emit_wpos(struct st_context *st, 6477 struct st_translate *t, 6478 const struct gl_program *program, 6479 struct ureg_program *ureg, 6480 int wpos_transform_const) 6481{ 6482 struct pipe_screen *pscreen = st->screen; 6483 GLfloat adjX = 0.0f; 6484 GLfloat adjY[2] = { 0.0f, 0.0f }; 6485 boolean invert = FALSE; 6486 6487 /* Query the pixel center conventions supported by the pipe driver and set 6488 * adjX, adjY to help out if it cannot handle the requested one internally. 6489 * 6490 * The bias of the y-coordinate depends on whether y-inversion takes place 6491 * (adjY[1]) or not (adjY[0]), which is in turn dependent on whether we are 6492 * drawing to an FBO (causes additional inversion), and whether the pipe 6493 * driver origin and the requested origin differ (the latter condition is 6494 * stored in the 'invert' variable). 6495 * 6496 * For height = 100 (i = integer, h = half-integer, l = lower, u = upper): 6497 * 6498 * center shift only: 6499 * i -> h: +0.5 6500 * h -> i: -0.5 6501 * 6502 * inversion only: 6503 * l,i -> u,i: ( 0.0 + 1.0) * -1 + 100 = 99 6504 * l,h -> u,h: ( 0.5 + 0.0) * -1 + 100 = 99.5 6505 * u,i -> l,i: (99.0 + 1.0) * -1 + 100 = 0 6506 * u,h -> l,h: (99.5 + 0.0) * -1 + 100 = 0.5 6507 * 6508 * inversion and center shift: 6509 * l,i -> u,h: ( 0.0 + 0.5) * -1 + 100 = 99.5 6510 * l,h -> u,i: ( 0.5 + 0.5) * -1 + 100 = 99 6511 * u,i -> l,h: (99.0 + 0.5) * -1 + 100 = 0.5 6512 * u,h -> l,i: (99.5 + 0.5) * -1 + 100 = 0 6513 */ 6514 if (program->info.fs.origin_upper_left) { 6515 /* Fragment shader wants origin in upper-left */ 6516 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) { 6517 /* the driver supports upper-left origin */ 6518 } 6519 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) { 6520 /* the driver supports lower-left origin, need to invert Y */ 6521 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, 6522 TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 6523 invert = TRUE; 6524 } 6525 else 6526 assert(0); 6527 } 6528 else { 6529 /* Fragment shader wants origin in lower-left */ 6530 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT)) 6531 /* the driver supports lower-left origin */ 6532 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_ORIGIN, 6533 TGSI_FS_COORD_ORIGIN_LOWER_LEFT); 6534 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT)) 6535 /* the driver supports upper-left origin, need to invert Y */ 6536 invert = TRUE; 6537 else 6538 assert(0); 6539 } 6540 6541 if (program->info.fs.pixel_center_integer) { 6542 /* Fragment shader wants pixel center integer */ 6543 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 6544 /* the driver supports pixel center integer */ 6545 adjY[1] = 1.0f; 6546 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, 6547 TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 6548 } 6549 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 6550 /* the driver supports pixel center half integer, need to bias X,Y */ 6551 adjX = -0.5f; 6552 adjY[0] = -0.5f; 6553 adjY[1] = 0.5f; 6554 } 6555 else 6556 assert(0); 6557 } 6558 else { 6559 /* Fragment shader wants pixel center half integer */ 6560 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER)) { 6561 /* the driver supports pixel center half integer */ 6562 } 6563 else if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER)) { 6564 /* the driver supports pixel center integer, need to bias X,Y */ 6565 adjX = adjY[0] = adjY[1] = 0.5f; 6566 ureg_property(ureg, TGSI_PROPERTY_FS_COORD_PIXEL_CENTER, 6567 TGSI_FS_COORD_PIXEL_CENTER_INTEGER); 6568 } 6569 else 6570 assert(0); 6571 } 6572 6573 /* we invert after adjustment so that we avoid the MOV to temporary, 6574 * and reuse the adjustment ADD instead */ 6575 emit_wpos_adjustment(st->ctx, t, wpos_transform_const, invert, adjX, adjY); 6576} 6577 6578/** 6579 * OpenGL's fragment gl_FrontFace input is 1 for front-facing, 0 for back. 6580 * TGSI uses +1 for front, -1 for back. 6581 * This function converts the TGSI value to the GL value. Simply clamping/ 6582 * saturating the value to [0,1] does the job. 6583 */ 6584static void 6585emit_face_var(struct gl_context *ctx, struct st_translate *t) 6586{ 6587 struct ureg_program *ureg = t->ureg; 6588 struct ureg_dst face_temp = ureg_DECL_temporary(ureg); 6589 struct ureg_src face_input = t->inputs[t->inputMapping[VARYING_SLOT_FACE]]; 6590 6591 if (ctx->Const.NativeIntegers) { 6592 ureg_FSGE(ureg, face_temp, face_input, ureg_imm1f(ureg, 0)); 6593 } 6594 else { 6595 /* MOV_SAT face_temp, input[face] */ 6596 ureg_MOV(ureg, ureg_saturate(face_temp), face_input); 6597 } 6598 6599 /* Use face_temp as face input from here on: */ 6600 t->inputs[t->inputMapping[VARYING_SLOT_FACE]] = ureg_src(face_temp); 6601} 6602 6603struct sort_inout_decls { 6604 bool operator()(const struct inout_decl &a, const struct inout_decl &b) const { 6605 return mapping[a.mesa_index] < mapping[b.mesa_index]; 6606 } 6607 6608 const ubyte *mapping; 6609}; 6610 6611/* Sort the given array of decls by the corresponding slot (TGSI file index). 6612 * 6613 * This is for the benefit of older drivers which are broken when the 6614 * declarations aren't sorted in this way. 6615 */ 6616static void 6617sort_inout_decls_by_slot(struct inout_decl *decls, 6618 unsigned count, 6619 const ubyte mapping[]) 6620{ 6621 sort_inout_decls sorter; 6622 sorter.mapping = mapping; 6623 std::sort(decls, decls + count, sorter); 6624} 6625 6626/** 6627 * Translate intermediate IR (glsl_to_tgsi_instruction) to TGSI format. 6628 * \param program the program to translate 6629 * \param numInputs number of input registers used 6630 * \param inputMapping maps Mesa fragment program inputs to TGSI generic 6631 * input indexes 6632 * \param inputSemanticName the TGSI_SEMANTIC flag for each input 6633 * \param inputSemanticIndex the semantic index (ex: which texcoord) for 6634 * each input 6635 * \param interpMode the TGSI_INTERPOLATE_LINEAR/PERSP mode for each input 6636 * \param numOutputs number of output registers used 6637 * \param outputMapping maps Mesa fragment program outputs to TGSI 6638 * generic outputs 6639 * \param outputSemanticName the TGSI_SEMANTIC flag for each output 6640 * \param outputSemanticIndex the semantic index (ex: which texcoord) for 6641 * each output 6642 * 6643 * \return PIPE_OK or PIPE_ERROR_OUT_OF_MEMORY 6644 */ 6645extern "C" enum pipe_error 6646st_translate_program( 6647 struct gl_context *ctx, 6648 enum pipe_shader_type procType, 6649 struct ureg_program *ureg, 6650 glsl_to_tgsi_visitor *program, 6651 const struct gl_program *proginfo, 6652 GLuint numInputs, 6653 const ubyte attrToIndex[], 6654 const ubyte inputSlotToAttr[], 6655 const ubyte inputSemanticName[], 6656 const ubyte inputSemanticIndex[], 6657 const ubyte interpMode[], 6658 GLuint numOutputs, 6659 const ubyte outputMapping[], 6660 const ubyte outputSemanticName[], 6661 const ubyte outputSemanticIndex[]) 6662{ 6663 struct pipe_screen *screen = st_context(ctx)->screen; 6664 struct st_translate *t; 6665 unsigned i; 6666 struct gl_program_constants *prog_const = 6667 &ctx->Const.Program[program->shader->Stage]; 6668 enum pipe_error ret = PIPE_OK; 6669 uint8_t inputMapping[VARYING_SLOT_TESS_MAX] = {0}; 6670 6671 assert(numInputs <= ARRAY_SIZE(t->inputs)); 6672 assert(numOutputs <= ARRAY_SIZE(t->outputs)); 6673 6674 ASSERT_BITFIELD_SIZE(st_src_reg, type, GLSL_TYPE_ERROR); 6675 ASSERT_BITFIELD_SIZE(st_src_reg, file, PROGRAM_FILE_MAX); 6676 ASSERT_BITFIELD_SIZE(st_dst_reg, type, GLSL_TYPE_ERROR); 6677 ASSERT_BITFIELD_SIZE(st_dst_reg, file, PROGRAM_FILE_MAX); 6678 ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_type, GLSL_TYPE_ERROR); 6679 ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format, PIPE_FORMAT_COUNT); 6680 ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, tex_target, 6681 (gl_texture_index) (NUM_TEXTURE_TARGETS - 1)); 6682 ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, image_format, 6683 (enum pipe_format) (PIPE_FORMAT_COUNT - 1)); 6684 ASSERT_BITFIELD_SIZE(glsl_to_tgsi_instruction, op, 6685 (enum tgsi_opcode) (TGSI_OPCODE_LAST - 1)); 6686 6687 if (proginfo->DualSlotInputs != 0) { 6688 /* adjust attrToIndex to include placeholder for second 6689 * part of a double attribute 6690 */ 6691 numInputs = 0; 6692 for (unsigned attr = 0; attr < VERT_ATTRIB_MAX; attr++) { 6693 if ((proginfo->info.inputs_read & BITFIELD64_BIT(attr)) != 0) { 6694 inputMapping[attr] = numInputs++; 6695 6696 if ((proginfo->DualSlotInputs & BITFIELD64_BIT(attr)) != 0) { 6697 /* add placeholder for second part of a double attribute */ 6698 numInputs++; 6699 } 6700 } 6701 } 6702 inputMapping[VERT_ATTRIB_EDGEFLAG] = numInputs; 6703 } 6704 else { 6705 memcpy(inputMapping, attrToIndex, sizeof(inputMapping)); 6706 } 6707 6708 t = CALLOC_STRUCT(st_translate); 6709 if (!t) { 6710 ret = PIPE_ERROR_OUT_OF_MEMORY; 6711 goto out; 6712 } 6713 6714 t->procType = procType; 6715 t->need_uarl = !screen->get_param(screen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS); 6716 t->tg4_component_in_swizzle = screen->get_param(screen, PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE); 6717 t->inputMapping = inputMapping; 6718 t->outputMapping = outputMapping; 6719 t->ureg = ureg; 6720 t->num_temp_arrays = program->next_array; 6721 if (t->num_temp_arrays) 6722 t->arrays = (struct ureg_dst*) 6723 calloc(t->num_temp_arrays, sizeof(t->arrays[0])); 6724 6725 /* 6726 * Declare input attributes. 6727 */ 6728 switch (procType) { 6729 case PIPE_SHADER_FRAGMENT: 6730 case PIPE_SHADER_GEOMETRY: 6731 case PIPE_SHADER_TESS_EVAL: 6732 case PIPE_SHADER_TESS_CTRL: 6733 sort_inout_decls_by_slot(program->inputs, program->num_inputs, inputMapping); 6734 6735 for (i = 0; i < program->num_inputs; ++i) { 6736 struct inout_decl *decl = &program->inputs[i]; 6737 unsigned slot = inputMapping[decl->mesa_index]; 6738 struct ureg_src src; 6739 ubyte tgsi_usage_mask = decl->usage_mask; 6740 6741 if (glsl_base_type_is_64bit(decl->base_type)) { 6742 if (tgsi_usage_mask == 1) 6743 tgsi_usage_mask = TGSI_WRITEMASK_XY; 6744 else if (tgsi_usage_mask == 2) 6745 tgsi_usage_mask = TGSI_WRITEMASK_ZW; 6746 else 6747 tgsi_usage_mask = TGSI_WRITEMASK_XYZW; 6748 } 6749 6750 enum tgsi_interpolate_mode interp_mode = TGSI_INTERPOLATE_CONSTANT; 6751 enum tgsi_interpolate_loc interp_location = TGSI_INTERPOLATE_LOC_CENTER; 6752 if (procType == PIPE_SHADER_FRAGMENT) { 6753 assert(interpMode); 6754 interp_mode = interpMode[slot] != TGSI_INTERPOLATE_COUNT ? 6755 (enum tgsi_interpolate_mode) interpMode[slot] : 6756 tgsi_get_interp_mode(decl->interp, 6757 inputSlotToAttr[slot] == VARYING_SLOT_COL0 || 6758 inputSlotToAttr[slot] == VARYING_SLOT_COL1); 6759 6760 interp_location = (enum tgsi_interpolate_loc) decl->interp_loc; 6761 } 6762 6763 src = ureg_DECL_fs_input_centroid_layout(ureg, 6764 (enum tgsi_semantic) inputSemanticName[slot], 6765 inputSemanticIndex[slot], 6766 interp_mode, interp_location, slot, tgsi_usage_mask, 6767 decl->array_id, decl->size); 6768 6769 for (unsigned j = 0; j < decl->size; ++j) { 6770 if (t->inputs[slot + j].File != TGSI_FILE_INPUT) { 6771 /* The ArrayID is set up in dst_register */ 6772 t->inputs[slot + j] = src; 6773 t->inputs[slot + j].ArrayID = 0; 6774 t->inputs[slot + j].Index += j; 6775 } 6776 } 6777 } 6778 break; 6779 case PIPE_SHADER_VERTEX: 6780 for (i = 0; i < numInputs; i++) { 6781 t->inputs[i] = ureg_DECL_vs_input(ureg, i); 6782 } 6783 break; 6784 case PIPE_SHADER_COMPUTE: 6785 break; 6786 default: 6787 assert(0); 6788 } 6789 6790 /* 6791 * Declare output attributes. 6792 */ 6793 switch (procType) { 6794 case PIPE_SHADER_FRAGMENT: 6795 case PIPE_SHADER_COMPUTE: 6796 break; 6797 case PIPE_SHADER_GEOMETRY: 6798 case PIPE_SHADER_TESS_EVAL: 6799 case PIPE_SHADER_TESS_CTRL: 6800 case PIPE_SHADER_VERTEX: 6801 sort_inout_decls_by_slot(program->outputs, program->num_outputs, outputMapping); 6802 6803 for (i = 0; i < program->num_outputs; ++i) { 6804 struct inout_decl *decl = &program->outputs[i]; 6805 unsigned slot = outputMapping[decl->mesa_index]; 6806 struct ureg_dst dst; 6807 ubyte tgsi_usage_mask = decl->usage_mask; 6808 6809 if (glsl_base_type_is_64bit(decl->base_type)) { 6810 if (tgsi_usage_mask == 1) 6811 tgsi_usage_mask = TGSI_WRITEMASK_XY; 6812 else if (tgsi_usage_mask == 2) 6813 tgsi_usage_mask = TGSI_WRITEMASK_ZW; 6814 else 6815 tgsi_usage_mask = TGSI_WRITEMASK_XYZW; 6816 } 6817 6818 dst = ureg_DECL_output_layout(ureg, 6819 (enum tgsi_semantic) outputSemanticName[slot], 6820 outputSemanticIndex[slot], 6821 decl->gs_out_streams, 6822 slot, tgsi_usage_mask, decl->array_id, decl->size, decl->invariant); 6823 dst.Invariant = decl->invariant; 6824 for (unsigned j = 0; j < decl->size; ++j) { 6825 if (t->outputs[slot + j].File != TGSI_FILE_OUTPUT) { 6826 /* The ArrayID is set up in dst_register */ 6827 t->outputs[slot + j] = dst; 6828 t->outputs[slot + j].ArrayID = 0; 6829 t->outputs[slot + j].Index += j; 6830 t->outputs[slot + j].Invariant = decl->invariant; 6831 } 6832 } 6833 } 6834 break; 6835 default: 6836 assert(0); 6837 } 6838 6839 if (procType == PIPE_SHADER_FRAGMENT) { 6840 if (proginfo->info.inputs_read & VARYING_BIT_POS) { 6841 /* Must do this after setting up t->inputs. */ 6842 emit_wpos(st_context(ctx), t, proginfo, ureg, 6843 program->wpos_transform_const); 6844 } 6845 6846 if (proginfo->info.inputs_read & VARYING_BIT_FACE) 6847 emit_face_var(ctx, t); 6848 6849 for (i = 0; i < numOutputs; i++) { 6850 switch (outputSemanticName[i]) { 6851 case TGSI_SEMANTIC_POSITION: 6852 t->outputs[i] = ureg_DECL_output(ureg, 6853 TGSI_SEMANTIC_POSITION, /* Z/Depth */ 6854 outputSemanticIndex[i]); 6855 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Z); 6856 break; 6857 case TGSI_SEMANTIC_STENCIL: 6858 t->outputs[i] = ureg_DECL_output(ureg, 6859 TGSI_SEMANTIC_STENCIL, /* Stencil */ 6860 outputSemanticIndex[i]); 6861 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_Y); 6862 break; 6863 case TGSI_SEMANTIC_COLOR: 6864 t->outputs[i] = ureg_DECL_output(ureg, 6865 TGSI_SEMANTIC_COLOR, 6866 outputSemanticIndex[i]); 6867 break; 6868 case TGSI_SEMANTIC_SAMPLEMASK: 6869 t->outputs[i] = ureg_DECL_output(ureg, 6870 TGSI_SEMANTIC_SAMPLEMASK, 6871 outputSemanticIndex[i]); 6872 /* TODO: If we ever support more than 32 samples, this will have 6873 * to become an array. 6874 */ 6875 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X); 6876 break; 6877 default: 6878 assert(!"fragment shader outputs must be POSITION/STENCIL/COLOR"); 6879 ret = PIPE_ERROR_BAD_INPUT; 6880 goto out; 6881 } 6882 } 6883 } 6884 else if (procType == PIPE_SHADER_VERTEX) { 6885 for (i = 0; i < numOutputs; i++) { 6886 if (outputSemanticName[i] == TGSI_SEMANTIC_FOG) { 6887 /* force register to contain a fog coordinate in the form (F, 0, 0, 1). */ 6888 ureg_MOV(ureg, 6889 ureg_writemask(t->outputs[i], TGSI_WRITEMASK_YZW), 6890 ureg_imm4f(ureg, 0.0f, 0.0f, 0.0f, 1.0f)); 6891 t->outputs[i] = ureg_writemask(t->outputs[i], TGSI_WRITEMASK_X); 6892 } 6893 } 6894 } 6895 6896 /* Declare address register. 6897 */ 6898 if (program->num_address_regs > 0) { 6899 assert(program->num_address_regs <= 3); 6900 for (int i = 0; i < program->num_address_regs; i++) 6901 t->address[i] = ureg_DECL_address(ureg); 6902 } 6903 6904 /* Declare misc input registers 6905 */ 6906 BITSET_FOREACH_SET(i, proginfo->info.system_values_read, SYSTEM_VALUE_MAX) { 6907 enum tgsi_semantic semName = tgsi_get_sysval_semantic(i); 6908 6909 t->systemValues[i] = ureg_DECL_system_value(ureg, semName, 0); 6910 6911 if (semName == TGSI_SEMANTIC_INSTANCEID || 6912 semName == TGSI_SEMANTIC_VERTEXID) { 6913 /* From Gallium perspective, these system values are always 6914 * integer, and require native integer support. However, if 6915 * native integer is supported on the vertex stage but not the 6916 * pixel stage (e.g, i915g + draw), Mesa will generate IR that 6917 * assumes these system values are floats. To resolve the 6918 * inconsistency, we insert a U2F. 6919 */ 6920 struct st_context *st = st_context(ctx); 6921 struct pipe_screen *pscreen = st->screen; 6922 assert(procType == PIPE_SHADER_VERTEX); 6923 assert(pscreen->get_shader_param(pscreen, PIPE_SHADER_VERTEX, PIPE_SHADER_CAP_INTEGERS)); 6924 (void) pscreen; 6925 if (!ctx->Const.NativeIntegers) { 6926 struct ureg_dst temp = ureg_DECL_local_temporary(t->ureg); 6927 ureg_U2F(t->ureg, ureg_writemask(temp, TGSI_WRITEMASK_X), 6928 t->systemValues[i]); 6929 t->systemValues[i] = ureg_scalar(ureg_src(temp), 0); 6930 } 6931 } 6932 6933 if (procType == PIPE_SHADER_FRAGMENT && 6934 semName == TGSI_SEMANTIC_POSITION) 6935 emit_wpos(st_context(ctx), t, proginfo, ureg, 6936 program->wpos_transform_const); 6937 6938 if (procType == PIPE_SHADER_FRAGMENT && 6939 semName == TGSI_SEMANTIC_SAMPLEPOS) 6940 emit_samplepos_adjustment(t, program->wpos_transform_const); 6941 } 6942 6943 t->array_sizes = program->array_sizes; 6944 t->input_decls = program->inputs; 6945 t->num_input_decls = program->num_inputs; 6946 t->output_decls = program->outputs; 6947 t->num_output_decls = program->num_outputs; 6948 6949 /* Emit constants and uniforms. TGSI uses a single index space for these, 6950 * so we put all the translated regs in t->constants. 6951 */ 6952 if (proginfo->Parameters) { 6953 t->constants = (struct ureg_src *) 6954 calloc(proginfo->Parameters->NumParameters, sizeof(t->constants[0])); 6955 if (t->constants == NULL) { 6956 ret = PIPE_ERROR_OUT_OF_MEMORY; 6957 goto out; 6958 } 6959 t->num_constants = proginfo->Parameters->NumParameters; 6960 6961 for (i = 0; i < proginfo->Parameters->NumParameters; i++) { 6962 unsigned pvo = proginfo->Parameters->Parameters[i].ValueOffset; 6963 6964 switch (proginfo->Parameters->Parameters[i].Type) { 6965 case PROGRAM_STATE_VAR: 6966 case PROGRAM_UNIFORM: 6967 t->constants[i] = ureg_DECL_constant(ureg, i); 6968 break; 6969 6970 /* Emit immediates for PROGRAM_CONSTANT only when there's no indirect 6971 * addressing of the const buffer. 6972 * FIXME: Be smarter and recognize param arrays: 6973 * indirect addressing is only valid within the referenced 6974 * array. 6975 */ 6976 case PROGRAM_CONSTANT: 6977 if (program->indirect_addr_consts) 6978 t->constants[i] = ureg_DECL_constant(ureg, i); 6979 else 6980 t->constants[i] = emit_immediate(t, 6981 proginfo->Parameters->ParameterValues + pvo, 6982 proginfo->Parameters->Parameters[i].DataType, 6983 4); 6984 break; 6985 default: 6986 break; 6987 } 6988 } 6989 } 6990 6991 for (i = 0; i < proginfo->info.num_ubos; i++) { 6992 unsigned size = proginfo->sh.UniformBlocks[i]->UniformBufferSize; 6993 unsigned num_const_vecs = (size + 15) / 16; 6994 unsigned first, last; 6995 assert(num_const_vecs > 0); 6996 first = 0; 6997 last = num_const_vecs > 0 ? num_const_vecs - 1 : 0; 6998 ureg_DECL_constant2D(t->ureg, first, last, i + 1); 6999 } 7000 7001 /* Emit immediate values. 7002 */ 7003 t->immediates = (struct ureg_src *) 7004 calloc(program->num_immediates, sizeof(struct ureg_src)); 7005 if (t->immediates == NULL) { 7006 ret = PIPE_ERROR_OUT_OF_MEMORY; 7007 goto out; 7008 } 7009 t->num_immediates = program->num_immediates; 7010 7011 i = 0; 7012 foreach_in_list(immediate_storage, imm, &program->immediates) { 7013 assert(i < program->num_immediates); 7014 t->immediates[i++] = emit_immediate(t, imm->values, imm->type, imm->size32); 7015 } 7016 assert(i == program->num_immediates); 7017 7018 /* texture samplers */ 7019 for (i = 0; i < prog_const->MaxTextureImageUnits; i++) { 7020 if (program->samplers_used & (1u << i)) { 7021 enum tgsi_return_type type = 7022 st_translate_texture_type(program->sampler_types[i]); 7023 7024 t->samplers[i] = ureg_DECL_sampler(ureg, i); 7025 7026 ureg_DECL_sampler_view(ureg, i, program->sampler_targets[i], 7027 type, type, type, type); 7028 } 7029 } 7030 7031 /* Declare atomic and shader storage buffers. */ 7032 { 7033 struct gl_program *prog = program->prog; 7034 7035 if (!st_context(ctx)->has_hw_atomics) { 7036 for (i = 0; i < prog->info.num_abos; i++) { 7037 unsigned index = (prog->info.num_ssbos + 7038 prog->sh.AtomicBuffers[i]->Binding); 7039 assert(prog->sh.AtomicBuffers[i]->Binding < 7040 prog_const->MaxAtomicBuffers); 7041 t->buffers[index] = ureg_DECL_buffer(ureg, index, true); 7042 } 7043 } else { 7044 for (i = 0; i < program->num_atomics; i++) { 7045 struct hwatomic_decl *ainfo = &program->atomic_info[i]; 7046 gl_uniform_storage *uni_storage = &prog->sh.data->UniformStorage[ainfo->location]; 7047 int base = uni_storage->offset / ATOMIC_COUNTER_SIZE; 7048 ureg_DECL_hw_atomic(ureg, base, base + ainfo->size - 1, ainfo->binding, 7049 ainfo->array_id); 7050 } 7051 } 7052 7053 assert(prog->info.num_ssbos <= prog_const->MaxShaderStorageBlocks); 7054 for (i = 0; i < prog->info.num_ssbos; i++) { 7055 t->buffers[i] = ureg_DECL_buffer(ureg, i, false); 7056 } 7057 } 7058 7059 if (program->use_shared_memory) 7060 t->shared_memory = ureg_DECL_memory(ureg, TGSI_MEMORY_TYPE_SHARED); 7061 7062 for (i = 0; i < program->shader->Program->info.num_images; i++) { 7063 if (program->images_used & (1 << i)) { 7064 t->images[i] = ureg_DECL_image(ureg, i, 7065 program->image_targets[i], 7066 program->image_formats[i], 7067 program->image_wr[i], 7068 false); 7069 } 7070 } 7071 7072 /* Emit each instruction in turn: 7073 */ 7074 foreach_in_list(glsl_to_tgsi_instruction, inst, &program->instructions) 7075 compile_tgsi_instruction(t, inst); 7076 7077out: 7078 if (t) { 7079 free(t->arrays); 7080 free(t->temps); 7081 free(t->constants); 7082 t->num_constants = 0; 7083 free(t->immediates); 7084 t->num_immediates = 0; 7085 FREE(t); 7086 } 7087 7088 return ret; 7089} 7090/* ----------------------------- End TGSI code ------------------------------ */ 7091 7092 7093/** 7094 * Convert a shader's GLSL IR into a Mesa gl_program, although without 7095 * generating Mesa IR. 7096 */ 7097static struct gl_program * 7098get_mesa_program_tgsi(struct gl_context *ctx, 7099 struct gl_shader_program *shader_program, 7100 struct gl_linked_shader *shader) 7101{ 7102 glsl_to_tgsi_visitor* v; 7103 struct gl_program *prog; 7104 struct gl_shader_compiler_options *options = 7105 &ctx->Const.ShaderCompilerOptions[shader->Stage]; 7106 struct pipe_screen *pscreen = st_context(ctx)->screen; 7107 enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(shader->Stage); 7108 unsigned skip_merge_registers; 7109 7110 validate_ir_tree(shader->ir); 7111 7112 prog = shader->Program; 7113 7114 prog->Parameters = _mesa_new_parameter_list(); 7115 v = new glsl_to_tgsi_visitor(); 7116 v->ctx = ctx; 7117 v->prog = prog; 7118 v->shader_program = shader_program; 7119 v->shader = shader; 7120 v->options = options; 7121 v->native_integers = ctx->Const.NativeIntegers; 7122 7123 v->have_sqrt = pscreen->get_shader_param(pscreen, ptarget, 7124 PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED); 7125 v->have_fma = pscreen->get_shader_param(pscreen, ptarget, 7126 PIPE_SHADER_CAP_TGSI_FMA_SUPPORTED); 7127 v->has_tex_txf_lz = pscreen->get_param(pscreen, 7128 PIPE_CAP_TGSI_TEX_TXF_LZ); 7129 v->need_uarl = !pscreen->get_param(pscreen, PIPE_CAP_TGSI_ANY_REG_AS_ADDRESS); 7130 7131 v->tg4_component_in_swizzle = pscreen->get_param(pscreen, PIPE_CAP_TGSI_TG4_COMPONENT_IN_SWIZZLE); 7132 v->variables = _mesa_hash_table_create(v->mem_ctx, _mesa_hash_pointer, 7133 _mesa_key_pointer_equal); 7134 skip_merge_registers = 7135 pscreen->get_shader_param(pscreen, ptarget, 7136 PIPE_SHADER_CAP_TGSI_SKIP_MERGE_REGISTERS); 7137 7138 _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader, 7139 prog->Parameters); 7140 7141 /* Remove reads from output registers. */ 7142 if (!pscreen->get_param(pscreen, PIPE_CAP_TGSI_CAN_READ_OUTPUTS)) 7143 lower_output_reads(shader->Stage, shader->ir); 7144 7145 /* Emit intermediate IR for main(). */ 7146 visit_exec_list(shader->ir, v); 7147 7148#if 0 7149 /* Print out some information (for debugging purposes) used by the 7150 * optimization passes. */ 7151 { 7152 int i; 7153 int *first_writes = ralloc_array(v->mem_ctx, int, v->next_temp); 7154 int *first_reads = ralloc_array(v->mem_ctx, int, v->next_temp); 7155 int *last_writes = ralloc_array(v->mem_ctx, int, v->next_temp); 7156 int *last_reads = ralloc_array(v->mem_ctx, int, v->next_temp); 7157 7158 for (i = 0; i < v->next_temp; i++) { 7159 first_writes[i] = -1; 7160 first_reads[i] = -1; 7161 last_writes[i] = -1; 7162 last_reads[i] = -1; 7163 } 7164 v->get_first_temp_read(first_reads); 7165 v->get_last_temp_read_first_temp_write(last_reads, first_writes); 7166 v->get_last_temp_write(last_writes); 7167 for (i = 0; i < v->next_temp; i++) 7168 printf("Temp %d: FR=%3d FW=%3d LR=%3d LW=%3d\n", i, first_reads[i], 7169 first_writes[i], 7170 last_reads[i], 7171 last_writes[i]); 7172 ralloc_free(first_writes); 7173 ralloc_free(first_reads); 7174 ralloc_free(last_writes); 7175 ralloc_free(last_reads); 7176 } 7177#endif 7178 7179 /* Perform optimizations on the instructions in the glsl_to_tgsi_visitor. */ 7180 v->simplify_cmp(); 7181 v->copy_propagate(); 7182 7183 while (v->eliminate_dead_code()); 7184 7185 v->merge_two_dsts(); 7186 7187 if (!skip_merge_registers) { 7188 v->split_arrays(); 7189 v->copy_propagate(); 7190 while (v->eliminate_dead_code()); 7191 7192 v->merge_registers(); 7193 v->copy_propagate(); 7194 while (v->eliminate_dead_code()); 7195 } 7196 7197 v->renumber_registers(); 7198 7199 /* Write the END instruction. */ 7200 v->emit_asm(NULL, TGSI_OPCODE_END); 7201 7202 if (ctx->_Shader->Flags & GLSL_DUMP) { 7203 _mesa_log("\n"); 7204 _mesa_log("GLSL IR for linked %s program %d:\n", 7205 _mesa_shader_stage_to_string(shader->Stage), 7206 shader_program->Name); 7207 _mesa_print_ir(_mesa_get_log_file(), shader->ir, NULL); 7208 _mesa_log("\n\n"); 7209 } 7210 7211 do_set_program_inouts(shader->ir, prog, shader->Stage); 7212 7213 _mesa_copy_linked_program_data(shader_program, shader); 7214 7215 if (pscreen->get_param(pscreen, PIPE_CAP_TGSI_SKIP_SHRINK_IO_ARRAYS)) { 7216 mark_array_io(v->inputs, v->num_inputs, 7217 &prog->info.inputs_read, 7218 prog->DualSlotInputs, 7219 &prog->info.patch_inputs_read); 7220 7221 mark_array_io(v->outputs, v->num_outputs, 7222 &prog->info.outputs_written, 0ULL, 7223 &prog->info.patch_outputs_written); 7224 } else { 7225 shrink_array_declarations(v->inputs, v->num_inputs, 7226 &prog->info.inputs_read, 7227 prog->DualSlotInputs, 7228 &prog->info.patch_inputs_read); 7229 shrink_array_declarations(v->outputs, v->num_outputs, 7230 &prog->info.outputs_written, 0ULL, 7231 &prog->info.patch_outputs_written); 7232 } 7233 7234 count_resources(v, prog); 7235 7236 /* The GLSL IR won't be needed anymore. */ 7237 ralloc_free(shader->ir); 7238 shader->ir = NULL; 7239 7240 /* This must be done before the uniform storage is associated. */ 7241 if (shader->Stage == MESA_SHADER_FRAGMENT && 7242 (prog->info.inputs_read & VARYING_BIT_POS || 7243 BITSET_TEST(prog->info.system_values_read, SYSTEM_VALUE_FRAG_COORD) || 7244 BITSET_TEST(prog->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS))) { 7245 static const gl_state_index16 wposTransformState[STATE_LENGTH] = { 7246 STATE_FB_WPOS_Y_TRANSFORM 7247 }; 7248 7249 v->wpos_transform_const = _mesa_add_state_reference(prog->Parameters, 7250 wposTransformState); 7251 } 7252 7253 /* Avoid reallocation of the program parameter list, because the uniform 7254 * storage is only associated with the original parameter list. 7255 * This should be enough for Bitmap and DrawPixels constants. 7256 */ 7257 _mesa_ensure_and_associate_uniform_storage(ctx, shader_program, prog, 8); 7258 if (!shader_program->data->LinkStatus) { 7259 free_glsl_to_tgsi_visitor(v); 7260 _mesa_reference_program(ctx, &shader->Program, NULL); 7261 return NULL; 7262 } 7263 7264 st_program(prog)->glsl_to_tgsi = v; 7265 7266 PRINT_STATS(v->print_stats()); 7267 7268 return prog; 7269} 7270 7271/* See if there are unsupported control flow statements. */ 7272class ir_control_flow_info_visitor : public ir_hierarchical_visitor { 7273private: 7274 const struct gl_shader_compiler_options *options; 7275public: 7276 ir_control_flow_info_visitor(const struct gl_shader_compiler_options *options) 7277 : options(options), 7278 unsupported(false) 7279 { 7280 } 7281 7282 virtual ir_visitor_status visit_enter(ir_function *ir) 7283 { 7284 /* Other functions are skipped (same as glsl_to_tgsi). */ 7285 if (strcmp(ir->name, "main") == 0) 7286 return visit_continue; 7287 7288 return visit_continue_with_parent; 7289 } 7290 7291 virtual ir_visitor_status visit_enter(ir_call *ir) 7292 { 7293 if (!ir->callee->is_intrinsic()) { 7294 unsupported = true; /* it's a function call */ 7295 return visit_stop; 7296 } 7297 return visit_continue; 7298 } 7299 7300 virtual ir_visitor_status visit_enter(ir_return *ir) 7301 { 7302 if (options->EmitNoMainReturn) { 7303 unsupported = true; 7304 return visit_stop; 7305 } 7306 return visit_continue; 7307 } 7308 7309 bool unsupported; 7310}; 7311 7312static bool 7313has_unsupported_control_flow(exec_list *ir, 7314 const struct gl_shader_compiler_options *options) 7315{ 7316 ir_control_flow_info_visitor visitor(options); 7317 visit_list_elements(&visitor, ir); 7318 return visitor.unsupported; 7319} 7320 7321/** 7322 * Link a shader. 7323 * This actually involves converting GLSL IR into an intermediate TGSI-like IR 7324 * with code lowering and other optimizations. 7325 */ 7326GLboolean 7327st_link_tgsi(struct gl_context *ctx, struct gl_shader_program *prog) 7328{ 7329 struct pipe_screen *pscreen = st_context(ctx)->screen; 7330 7331 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 7332 struct gl_linked_shader *shader = prog->_LinkedShaders[i]; 7333 if (shader == NULL) 7334 continue; 7335 7336 exec_list *ir = shader->ir; 7337 gl_shader_stage stage = shader->Stage; 7338 enum pipe_shader_type ptarget = pipe_shader_type_from_mesa(stage); 7339 const struct gl_shader_compiler_options *options = 7340 &ctx->Const.ShaderCompilerOptions[stage]; 7341 7342 unsigned if_threshold = pscreen->get_shader_param(pscreen, ptarget, 7343 PIPE_SHADER_CAP_LOWER_IF_THRESHOLD); 7344 if (ctx->Const.GLSLOptimizeConservatively) { 7345 /* Do it once and repeat only if there's unsupported control flow. */ 7346 do { 7347 do_common_optimization(ir, true, true, options, 7348 ctx->Const.NativeIntegers); 7349 lower_if_to_cond_assign((gl_shader_stage)i, ir, 7350 options->MaxIfDepth, if_threshold); 7351 } while (has_unsupported_control_flow(ir, options)); 7352 } else { 7353 /* Repeat it until it stops making changes. */ 7354 bool progress; 7355 do { 7356 progress = do_common_optimization(ir, true, true, options, 7357 ctx->Const.NativeIntegers); 7358 progress |= lower_if_to_cond_assign((gl_shader_stage)i, ir, 7359 options->MaxIfDepth, if_threshold); 7360 } while (progress); 7361 } 7362 7363 /* Do this again to lower ir_binop_vector_extract introduced 7364 * by optimization passes. 7365 */ 7366 do_vec_index_to_cond_assign(ir); 7367 7368 validate_ir_tree(ir); 7369 7370 struct gl_program *linked_prog = 7371 get_mesa_program_tgsi(ctx, prog, shader); 7372 st_set_prog_affected_state_flags(linked_prog); 7373 7374 if (linked_prog) { 7375 /* This is really conservative: */ 7376 linked_prog->info.writes_memory = 7377 linked_prog->info.num_ssbos || 7378 linked_prog->info.num_images || 7379 ctx->Extensions.ARB_bindless_texture || 7380 (linked_prog->sh.LinkedTransformFeedback && 7381 linked_prog->sh.LinkedTransformFeedback->NumVarying); 7382 7383 if (!ctx->Driver.ProgramStringNotify(ctx, 7384 _mesa_shader_stage_to_program(i), 7385 linked_prog)) { 7386 _mesa_reference_program(ctx, &shader->Program, NULL); 7387 return GL_FALSE; 7388 } 7389 } 7390 } 7391 7392 return GL_TRUE; 7393} 7394