1/* 2 * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 3 * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 4 * Copyright © 2010 Intel Corporation 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the "Software"), 8 * to deal in the Software without restriction, including without limitation 9 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10 * and/or sell copies of the Software, and to permit persons to whom the 11 * Software is furnished to do so, subject to the following conditions: 12 * 13 * The above copyright notice and this permission notice (including the next 14 * paragraph) shall be included in all copies or substantial portions of the 15 * Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 23 * DEALINGS IN THE SOFTWARE. 24 */ 25 26/** 27 * \file ir_to_mesa.cpp 28 * 29 * Translate GLSL IR to Mesa's gl_program representation. 30 */ 31 32#include <stdio.h> 33#include "main/macros.h" 34#include "main/mtypes.h" 35#include "main/shaderapi.h" 36#include "main/shaderobj.h" 37#include "main/uniforms.h" 38#include "main/glspirv.h" 39#include "compiler/glsl/ast.h" 40#include "compiler/glsl/ir.h" 41#include "compiler/glsl/ir_expression_flattening.h" 42#include "compiler/glsl/ir_visitor.h" 43#include "compiler/glsl/ir_optimization.h" 44#include "compiler/glsl/ir_uniform.h" 45#include "compiler/glsl/glsl_parser_extras.h" 46#include "compiler/glsl_types.h" 47#include "compiler/glsl/linker.h" 48#include "compiler/glsl/program.h" 49#include "compiler/glsl/shader_cache.h" 50#include "compiler/glsl/string_to_uint_map.h" 51#include "program/prog_instruction.h" 52#include "program/prog_optimize.h" 53#include "program/prog_print.h" 54#include "program/program.h" 55#include "program/prog_parameter.h" 56 57 58static int swizzle_for_size(int size); 59 60namespace { 61 62class src_reg; 63class dst_reg; 64 65/** 66 * This struct is a corresponding struct to Mesa prog_src_register, with 67 * wider fields. 68 */ 69class src_reg { 70public: 71 src_reg(gl_register_file file, int index, const glsl_type *type) 72 { 73 this->file = file; 74 this->index = index; 75 if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 76 this->swizzle = swizzle_for_size(type->vector_elements); 77 else 78 this->swizzle = SWIZZLE_XYZW; 79 this->negate = 0; 80 this->reladdr = NULL; 81 } 82 83 src_reg() 84 { 85 this->file = PROGRAM_UNDEFINED; 86 this->index = 0; 87 this->swizzle = 0; 88 this->negate = 0; 89 this->reladdr = NULL; 90 } 91 92 explicit src_reg(dst_reg reg); 93 94 gl_register_file file; /**< PROGRAM_* from Mesa */ 95 int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ 96 GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 97 int negate; /**< NEGATE_XYZW mask from mesa */ 98 /** Register index should be offset by the integer in this reg. */ 99 src_reg *reladdr; 100}; 101 102class dst_reg { 103public: 104 dst_reg(gl_register_file file, int writemask) 105 { 106 this->file = file; 107 this->index = 0; 108 this->writemask = writemask; 109 this->reladdr = NULL; 110 } 111 112 dst_reg() 113 { 114 this->file = PROGRAM_UNDEFINED; 115 this->index = 0; 116 this->writemask = 0; 117 this->reladdr = NULL; 118 } 119 120 explicit dst_reg(src_reg reg); 121 122 gl_register_file file; /**< PROGRAM_* from Mesa */ 123 int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ 124 int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 125 /** Register index should be offset by the integer in this reg. */ 126 src_reg *reladdr; 127}; 128 129} /* anonymous namespace */ 130 131src_reg::src_reg(dst_reg reg) 132{ 133 this->file = reg.file; 134 this->index = reg.index; 135 this->swizzle = SWIZZLE_XYZW; 136 this->negate = 0; 137 this->reladdr = reg.reladdr; 138} 139 140dst_reg::dst_reg(src_reg reg) 141{ 142 this->file = reg.file; 143 this->index = reg.index; 144 this->writemask = WRITEMASK_XYZW; 145 this->reladdr = reg.reladdr; 146} 147 148namespace { 149 150class ir_to_mesa_instruction : public exec_node { 151public: 152 DECLARE_RALLOC_CXX_OPERATORS(ir_to_mesa_instruction) 153 154 enum prog_opcode op; 155 dst_reg dst; 156 src_reg src[3]; 157 /** Pointer to the ir source this tree came from for debugging */ 158 ir_instruction *ir; 159 bool saturate; 160 int sampler; /**< sampler index */ 161 int tex_target; /**< One of TEXTURE_*_INDEX */ 162 GLboolean tex_shadow; 163}; 164 165class variable_storage : public exec_node { 166public: 167 variable_storage(ir_variable *var, gl_register_file file, int index) 168 : file(file), index(index), var(var) 169 { 170 /* empty */ 171 } 172 173 gl_register_file file; 174 int index; 175 ir_variable *var; /* variable that maps to this, if any */ 176}; 177 178class function_entry : public exec_node { 179public: 180 ir_function_signature *sig; 181 182 /** 183 * identifier of this function signature used by the program. 184 * 185 * At the point that Mesa instructions for function calls are 186 * generated, we don't know the address of the first instruction of 187 * the function body. So we make the BranchTarget that is called a 188 * small integer and rewrite them during set_branchtargets(). 189 */ 190 int sig_id; 191 192 /** 193 * Pointer to first instruction of the function body. 194 * 195 * Set during function body emits after main() is processed. 196 */ 197 ir_to_mesa_instruction *bgn_inst; 198 199 /** 200 * Index of the first instruction of the function body in actual 201 * Mesa IR. 202 * 203 * Set after convertion from ir_to_mesa_instruction to prog_instruction. 204 */ 205 int inst; 206 207 /** Storage for the return value. */ 208 src_reg return_reg; 209}; 210 211class ir_to_mesa_visitor : public ir_visitor { 212public: 213 ir_to_mesa_visitor(); 214 ~ir_to_mesa_visitor(); 215 216 function_entry *current_function; 217 218 struct gl_context *ctx; 219 struct gl_program *prog; 220 struct gl_shader_program *shader_program; 221 struct gl_shader_compiler_options *options; 222 223 int next_temp; 224 225 variable_storage *find_variable_storage(const ir_variable *var); 226 227 src_reg get_temp(const glsl_type *type); 228 void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); 229 230 src_reg src_reg_for_float(float val); 231 232 /** 233 * \name Visit methods 234 * 235 * As typical for the visitor pattern, there must be one \c visit method for 236 * each concrete subclass of \c ir_instruction. Virtual base classes within 237 * the hierarchy should not have \c visit methods. 238 */ 239 /*@{*/ 240 virtual void visit(ir_variable *); 241 virtual void visit(ir_loop *); 242 virtual void visit(ir_loop_jump *); 243 virtual void visit(ir_function_signature *); 244 virtual void visit(ir_function *); 245 virtual void visit(ir_expression *); 246 virtual void visit(ir_swizzle *); 247 virtual void visit(ir_dereference_variable *); 248 virtual void visit(ir_dereference_array *); 249 virtual void visit(ir_dereference_record *); 250 virtual void visit(ir_assignment *); 251 virtual void visit(ir_constant *); 252 virtual void visit(ir_call *); 253 virtual void visit(ir_return *); 254 virtual void visit(ir_discard *); 255 virtual void visit(ir_texture *); 256 virtual void visit(ir_if *); 257 virtual void visit(ir_emit_vertex *); 258 virtual void visit(ir_end_primitive *); 259 virtual void visit(ir_barrier *); 260 /*@}*/ 261 262 src_reg result; 263 264 /** List of variable_storage */ 265 exec_list variables; 266 267 /** List of function_entry */ 268 exec_list function_signatures; 269 int next_signature_id; 270 271 /** List of ir_to_mesa_instruction */ 272 exec_list instructions; 273 274 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op); 275 276 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 277 dst_reg dst, src_reg src0); 278 279 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 280 dst_reg dst, src_reg src0, src_reg src1); 281 282 ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 283 dst_reg dst, 284 src_reg src0, src_reg src1, src_reg src2); 285 286 /** 287 * Emit the correct dot-product instruction for the type of arguments 288 */ 289 ir_to_mesa_instruction * emit_dp(ir_instruction *ir, 290 dst_reg dst, 291 src_reg src0, 292 src_reg src1, 293 unsigned elements); 294 295 void emit_scalar(ir_instruction *ir, enum prog_opcode op, 296 dst_reg dst, src_reg src0); 297 298 void emit_scalar(ir_instruction *ir, enum prog_opcode op, 299 dst_reg dst, src_reg src0, src_reg src1); 300 301 bool try_emit_mad(ir_expression *ir, 302 int mul_operand); 303 bool try_emit_mad_for_and_not(ir_expression *ir, 304 int mul_operand); 305 306 void emit_swz(ir_expression *ir); 307 308 void emit_equality_comparison(ir_expression *ir, enum prog_opcode op, 309 dst_reg dst, 310 const src_reg &src0, const src_reg &src1); 311 312 inline void emit_sne(ir_expression *ir, dst_reg dst, 313 const src_reg &src0, const src_reg &src1) 314 { 315 emit_equality_comparison(ir, OPCODE_SLT, dst, src0, src1); 316 } 317 318 inline void emit_seq(ir_expression *ir, dst_reg dst, 319 const src_reg &src0, const src_reg &src1) 320 { 321 emit_equality_comparison(ir, OPCODE_SGE, dst, src0, src1); 322 } 323 324 bool process_move_condition(ir_rvalue *ir); 325 326 void copy_propagate(void); 327 328 void *mem_ctx; 329}; 330 331} /* anonymous namespace */ 332 333static src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL); 334 335static dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); 336 337static dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); 338 339static int 340swizzle_for_size(int size) 341{ 342 static const int size_swizzles[4] = { 343 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 344 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 345 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 346 MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 347 }; 348 349 assert((size >= 1) && (size <= 4)); 350 return size_swizzles[size - 1]; 351} 352 353ir_to_mesa_instruction * 354ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 355 dst_reg dst, 356 src_reg src0, src_reg src1, src_reg src2) 357{ 358 ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction(); 359 int num_reladdr = 0; 360 361 /* If we have to do relative addressing, we want to load the ARL 362 * reg directly for one of the regs, and preload the other reladdr 363 * sources into temps. 364 */ 365 num_reladdr += dst.reladdr != NULL; 366 num_reladdr += src0.reladdr != NULL; 367 num_reladdr += src1.reladdr != NULL; 368 num_reladdr += src2.reladdr != NULL; 369 370 reladdr_to_temp(ir, &src2, &num_reladdr); 371 reladdr_to_temp(ir, &src1, &num_reladdr); 372 reladdr_to_temp(ir, &src0, &num_reladdr); 373 374 if (dst.reladdr) { 375 emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); 376 num_reladdr--; 377 } 378 assert(num_reladdr == 0); 379 380 inst->op = op; 381 inst->dst = dst; 382 inst->src[0] = src0; 383 inst->src[1] = src1; 384 inst->src[2] = src2; 385 inst->ir = ir; 386 387 this->instructions.push_tail(inst); 388 389 return inst; 390} 391 392 393ir_to_mesa_instruction * 394ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 395 dst_reg dst, src_reg src0, src_reg src1) 396{ 397 return emit(ir, op, dst, src0, src1, undef_src); 398} 399 400ir_to_mesa_instruction * 401ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 402 dst_reg dst, src_reg src0) 403{ 404 assert(dst.writemask != 0); 405 return emit(ir, op, dst, src0, undef_src, undef_src); 406} 407 408ir_to_mesa_instruction * 409ir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op) 410{ 411 return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 412} 413 414ir_to_mesa_instruction * 415ir_to_mesa_visitor::emit_dp(ir_instruction *ir, 416 dst_reg dst, src_reg src0, src_reg src1, 417 unsigned elements) 418{ 419 static const enum prog_opcode dot_opcodes[] = { 420 OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 421 }; 422 423 return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 424} 425 426/** 427 * Emits Mesa scalar opcodes to produce unique answers across channels. 428 * 429 * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X 430 * channel determines the result across all channels. So to do a vec4 431 * of this operation, we want to emit a scalar per source channel used 432 * to produce dest channels. 433 */ 434void 435ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, 436 dst_reg dst, 437 src_reg orig_src0, src_reg orig_src1) 438{ 439 int i, j; 440 int done_mask = ~dst.writemask; 441 442 /* Mesa RCP is a scalar operation splatting results to all channels, 443 * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 444 * dst channels. 445 */ 446 for (i = 0; i < 4; i++) { 447 GLuint this_mask = (1 << i); 448 ir_to_mesa_instruction *inst; 449 src_reg src0 = orig_src0; 450 src_reg src1 = orig_src1; 451 452 if (done_mask & this_mask) 453 continue; 454 455 GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 456 GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 457 for (j = i + 1; j < 4; j++) { 458 /* If there is another enabled component in the destination that is 459 * derived from the same inputs, generate its value on this pass as 460 * well. 461 */ 462 if (!(done_mask & (1 << j)) && 463 GET_SWZ(src0.swizzle, j) == src0_swiz && 464 GET_SWZ(src1.swizzle, j) == src1_swiz) { 465 this_mask |= (1 << j); 466 } 467 } 468 src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 469 src0_swiz, src0_swiz); 470 src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 471 src1_swiz, src1_swiz); 472 473 inst = emit(ir, op, dst, src0, src1); 474 inst->dst.writemask = this_mask; 475 done_mask |= this_mask; 476 } 477} 478 479void 480ir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, 481 dst_reg dst, src_reg src0) 482{ 483 src_reg undef = undef_src; 484 485 undef.swizzle = SWIZZLE_XXXX; 486 487 emit_scalar(ir, op, dst, src0, undef); 488} 489 490src_reg 491ir_to_mesa_visitor::src_reg_for_float(float val) 492{ 493 src_reg src(PROGRAM_CONSTANT, -1, NULL); 494 495 src.index = _mesa_add_unnamed_constant(this->prog->Parameters, 496 (const gl_constant_value *)&val, 1, &src.swizzle); 497 498 return src; 499} 500 501static int 502storage_type_size(const struct glsl_type *type, bool bindless) 503{ 504 unsigned int i; 505 int size; 506 507 switch (type->base_type) { 508 case GLSL_TYPE_UINT: 509 case GLSL_TYPE_INT: 510 case GLSL_TYPE_UINT8: 511 case GLSL_TYPE_INT8: 512 case GLSL_TYPE_UINT16: 513 case GLSL_TYPE_INT16: 514 case GLSL_TYPE_FLOAT: 515 case GLSL_TYPE_FLOAT16: 516 case GLSL_TYPE_BOOL: 517 if (type->is_matrix()) { 518 return type->matrix_columns; 519 } else { 520 /* Regardless of size of vector, it gets a vec4. This is bad 521 * packing for things like floats, but otherwise arrays become a 522 * mess. Hopefully a later pass over the code can pack scalars 523 * down if appropriate. 524 */ 525 return 1; 526 } 527 break; 528 case GLSL_TYPE_DOUBLE: 529 if (type->is_matrix()) { 530 if (type->vector_elements > 2) 531 return type->matrix_columns * 2; 532 else 533 return type->matrix_columns; 534 } else { 535 if (type->vector_elements > 2) 536 return 2; 537 else 538 return 1; 539 } 540 break; 541 case GLSL_TYPE_UINT64: 542 case GLSL_TYPE_INT64: 543 if (type->vector_elements > 2) 544 return 2; 545 else 546 return 1; 547 case GLSL_TYPE_ARRAY: 548 assert(type->length > 0); 549 return storage_type_size(type->fields.array, bindless) * type->length; 550 case GLSL_TYPE_STRUCT: 551 size = 0; 552 for (i = 0; i < type->length; i++) { 553 size += storage_type_size(type->fields.structure[i].type, bindless); 554 } 555 return size; 556 case GLSL_TYPE_SAMPLER: 557 case GLSL_TYPE_IMAGE: 558 if (!bindless) 559 return 0; 560 /* fall through */ 561 case GLSL_TYPE_SUBROUTINE: 562 return 1; 563 case GLSL_TYPE_ATOMIC_UINT: 564 case GLSL_TYPE_VOID: 565 case GLSL_TYPE_ERROR: 566 case GLSL_TYPE_INTERFACE: 567 case GLSL_TYPE_FUNCTION: 568 assert(!"Invalid type in type_size"); 569 break; 570 } 571 572 return 0; 573} 574 575static int 576type_size(const struct glsl_type *type) 577{ 578 return storage_type_size(type, false); 579} 580 581/** 582 * In the initial pass of codegen, we assign temporary numbers to 583 * intermediate results. (not SSA -- variable assignments will reuse 584 * storage). Actual register allocation for the Mesa VM occurs in a 585 * pass over the Mesa IR later. 586 */ 587src_reg 588ir_to_mesa_visitor::get_temp(const glsl_type *type) 589{ 590 src_reg src; 591 592 src.file = PROGRAM_TEMPORARY; 593 src.index = next_temp; 594 src.reladdr = NULL; 595 next_temp += type_size(type); 596 597 if (type->is_array() || type->is_struct()) { 598 src.swizzle = SWIZZLE_NOOP; 599 } else { 600 src.swizzle = swizzle_for_size(type->vector_elements); 601 } 602 src.negate = 0; 603 604 return src; 605} 606 607variable_storage * 608ir_to_mesa_visitor::find_variable_storage(const ir_variable *var) 609{ 610 foreach_in_list(variable_storage, entry, &this->variables) { 611 if (entry->var == var) 612 return entry; 613 } 614 615 return NULL; 616} 617 618void 619ir_to_mesa_visitor::visit(ir_variable *ir) 620{ 621 if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 622 unsigned int i; 623 const ir_state_slot *const slots = ir->get_state_slots(); 624 assert(slots != NULL); 625 626 /* Check if this statevar's setup in the STATE file exactly 627 * matches how we'll want to reference it as a 628 * struct/array/whatever. If not, then we need to move it into 629 * temporary storage and hope that it'll get copy-propagated 630 * out. 631 */ 632 for (i = 0; i < ir->get_num_state_slots(); i++) { 633 if (slots[i].swizzle != SWIZZLE_XYZW) { 634 break; 635 } 636 } 637 638 variable_storage *storage; 639 dst_reg dst; 640 if (i == ir->get_num_state_slots()) { 641 /* We'll set the index later. */ 642 storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 643 this->variables.push_tail(storage); 644 645 dst = undef_dst; 646 } else { 647 /* The variable_storage constructor allocates slots based on the size 648 * of the type. However, this had better match the number of state 649 * elements that we're going to copy into the new temporary. 650 */ 651 assert((int) ir->get_num_state_slots() == type_size(ir->type)); 652 653 storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 654 this->next_temp); 655 this->variables.push_tail(storage); 656 this->next_temp += type_size(ir->type); 657 658 dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); 659 } 660 661 662 for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { 663 int index = _mesa_add_state_reference(this->prog->Parameters, 664 slots[i].tokens); 665 666 if (storage->file == PROGRAM_STATE_VAR) { 667 if (storage->index == -1) { 668 storage->index = index; 669 } else { 670 assert(index == storage->index + (int)i); 671 } 672 } else { 673 src_reg src(PROGRAM_STATE_VAR, index, NULL); 674 src.swizzle = slots[i].swizzle; 675 emit(ir, OPCODE_MOV, dst, src); 676 /* even a float takes up a whole vec4 reg in a struct/array. */ 677 dst.index++; 678 } 679 } 680 681 if (storage->file == PROGRAM_TEMPORARY && 682 dst.index != storage->index + (int) ir->get_num_state_slots()) { 683 linker_error(this->shader_program, 684 "failed to load builtin uniform `%s' " 685 "(%d/%d regs loaded)\n", 686 ir->name, dst.index - storage->index, 687 type_size(ir->type)); 688 } 689 } 690} 691 692void 693ir_to_mesa_visitor::visit(ir_loop *ir) 694{ 695 emit(NULL, OPCODE_BGNLOOP); 696 697 visit_exec_list(&ir->body_instructions, this); 698 699 emit(NULL, OPCODE_ENDLOOP); 700} 701 702void 703ir_to_mesa_visitor::visit(ir_loop_jump *ir) 704{ 705 switch (ir->mode) { 706 case ir_loop_jump::jump_break: 707 emit(NULL, OPCODE_BRK); 708 break; 709 case ir_loop_jump::jump_continue: 710 emit(NULL, OPCODE_CONT); 711 break; 712 } 713} 714 715 716void 717ir_to_mesa_visitor::visit(ir_function_signature *ir) 718{ 719 assert(0); 720 (void)ir; 721} 722 723void 724ir_to_mesa_visitor::visit(ir_function *ir) 725{ 726 /* Ignore function bodies other than main() -- we shouldn't see calls to 727 * them since they should all be inlined before we get to ir_to_mesa. 728 */ 729 if (strcmp(ir->name, "main") == 0) { 730 const ir_function_signature *sig; 731 exec_list empty; 732 733 sig = ir->matching_signature(NULL, &empty, false); 734 735 assert(sig); 736 737 foreach_in_list(ir_instruction, ir, &sig->body) { 738 ir->accept(this); 739 } 740 } 741} 742 743bool 744ir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 745{ 746 int nonmul_operand = 1 - mul_operand; 747 src_reg a, b, c; 748 749 ir_expression *expr = ir->operands[mul_operand]->as_expression(); 750 if (!expr || expr->operation != ir_binop_mul) 751 return false; 752 753 expr->operands[0]->accept(this); 754 a = this->result; 755 expr->operands[1]->accept(this); 756 b = this->result; 757 ir->operands[nonmul_operand]->accept(this); 758 c = this->result; 759 760 this->result = get_temp(ir->type); 761 emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c); 762 763 return true; 764} 765 766/** 767 * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b)) 768 * 769 * The logic values are 1.0 for true and 0.0 for false. Logical-and is 770 * implemented using multiplication, and logical-or is implemented using 771 * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 772 * As result, the logical expression (a & !b) can be rewritten as: 773 * 774 * - a * !b 775 * - a * (1 - b) 776 * - (a * 1) - (a * b) 777 * - a + -(a * b) 778 * - a + (a * -b) 779 * 780 * This final expression can be implemented as a single MAD(a, -b, a) 781 * instruction. 782 */ 783bool 784ir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 785{ 786 const int other_operand = 1 - try_operand; 787 src_reg a, b; 788 789 ir_expression *expr = ir->operands[try_operand]->as_expression(); 790 if (!expr || expr->operation != ir_unop_logic_not) 791 return false; 792 793 ir->operands[other_operand]->accept(this); 794 a = this->result; 795 expr->operands[0]->accept(this); 796 b = this->result; 797 798 b.negate = ~b.negate; 799 800 this->result = get_temp(ir->type); 801 emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a); 802 803 return true; 804} 805 806void 807ir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir, 808 src_reg *reg, int *num_reladdr) 809{ 810 if (!reg->reladdr) 811 return; 812 813 emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); 814 815 if (*num_reladdr != 1) { 816 src_reg temp = get_temp(glsl_type::vec4_type); 817 818 emit(ir, OPCODE_MOV, dst_reg(temp), *reg); 819 *reg = temp; 820 } 821 822 (*num_reladdr)--; 823} 824 825void 826ir_to_mesa_visitor::emit_swz(ir_expression *ir) 827{ 828 /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. 829 * This means that each of the operands is either an immediate value of -1, 830 * 0, or 1, or is a component from one source register (possibly with 831 * negation). 832 */ 833 uint8_t components[4] = { 0 }; 834 bool negate[4] = { false }; 835 ir_variable *var = NULL; 836 837 for (unsigned i = 0; i < ir->type->vector_elements; i++) { 838 ir_rvalue *op = ir->operands[i]; 839 840 assert(op->type->is_scalar()); 841 842 while (op != NULL) { 843 switch (op->ir_type) { 844 case ir_type_constant: { 845 846 assert(op->type->is_scalar()); 847 848 const ir_constant *const c = op->as_constant(); 849 if (c->is_one()) { 850 components[i] = SWIZZLE_ONE; 851 } else if (c->is_zero()) { 852 components[i] = SWIZZLE_ZERO; 853 } else if (c->is_negative_one()) { 854 components[i] = SWIZZLE_ONE; 855 negate[i] = true; 856 } else { 857 assert(!"SWZ constant must be 0.0 or 1.0."); 858 } 859 860 op = NULL; 861 break; 862 } 863 864 case ir_type_dereference_variable: { 865 ir_dereference_variable *const deref = 866 (ir_dereference_variable *) op; 867 868 assert((var == NULL) || (deref->var == var)); 869 components[i] = SWIZZLE_X; 870 var = deref->var; 871 op = NULL; 872 break; 873 } 874 875 case ir_type_expression: { 876 ir_expression *const expr = (ir_expression *) op; 877 878 assert(expr->operation == ir_unop_neg); 879 negate[i] = true; 880 881 op = expr->operands[0]; 882 break; 883 } 884 885 case ir_type_swizzle: { 886 ir_swizzle *const swiz = (ir_swizzle *) op; 887 888 components[i] = swiz->mask.x; 889 op = swiz->val; 890 break; 891 } 892 893 default: 894 assert(!"Should not get here."); 895 return; 896 } 897 } 898 } 899 900 assert(var != NULL); 901 902 ir_dereference_variable *const deref = 903 new(mem_ctx) ir_dereference_variable(var); 904 905 this->result.file = PROGRAM_UNDEFINED; 906 deref->accept(this); 907 if (this->result.file == PROGRAM_UNDEFINED) { 908 printf("Failed to get tree for expression operand:\n"); 909 deref->print(); 910 printf("\n"); 911 exit(1); 912 } 913 914 src_reg src; 915 916 src = this->result; 917 src.swizzle = MAKE_SWIZZLE4(components[0], 918 components[1], 919 components[2], 920 components[3]); 921 src.negate = ((unsigned(negate[0]) << 0) 922 | (unsigned(negate[1]) << 1) 923 | (unsigned(negate[2]) << 2) 924 | (unsigned(negate[3]) << 3)); 925 926 /* Storage for our result. Ideally for an assignment we'd be using the 927 * actual storage for the result here, instead. 928 */ 929 const src_reg result_src = get_temp(ir->type); 930 dst_reg result_dst = dst_reg(result_src); 931 932 /* Limit writes to the channels that will be used by result_src later. 933 * This does limit this temp's use as a temporary for multi-instruction 934 * sequences. 935 */ 936 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 937 938 emit(ir, OPCODE_SWZ, result_dst, src); 939 this->result = result_src; 940} 941 942void 943ir_to_mesa_visitor::emit_equality_comparison(ir_expression *ir, 944 enum prog_opcode op, 945 dst_reg dst, 946 const src_reg &src0, 947 const src_reg &src1) 948{ 949 src_reg difference; 950 src_reg abs_difference = get_temp(glsl_type::vec4_type); 951 const src_reg zero = src_reg_for_float(0.0); 952 953 /* x == y is equivalent to -abs(x-y) >= 0. Since all of the code that 954 * consumes the generated IR is pretty dumb, take special care when one 955 * of the operands is zero. 956 * 957 * Similarly, x != y is equivalent to -abs(x-y) < 0. 958 */ 959 if (src0.file == zero.file && 960 src0.index == zero.index && 961 src0.swizzle == zero.swizzle) { 962 difference = src1; 963 } else if (src1.file == zero.file && 964 src1.index == zero.index && 965 src1.swizzle == zero.swizzle) { 966 difference = src0; 967 } else { 968 difference = get_temp(glsl_type::vec4_type); 969 970 src_reg tmp_src = src0; 971 tmp_src.negate = ~tmp_src.negate; 972 973 emit(ir, OPCODE_ADD, dst_reg(difference), tmp_src, src1); 974 } 975 976 emit(ir, OPCODE_ABS, dst_reg(abs_difference), difference); 977 978 abs_difference.negate = ~abs_difference.negate; 979 emit(ir, op, dst, abs_difference, zero); 980} 981 982void 983ir_to_mesa_visitor::visit(ir_expression *ir) 984{ 985 unsigned int operand; 986 src_reg op[ARRAY_SIZE(ir->operands)]; 987 src_reg result_src; 988 dst_reg result_dst; 989 990 /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) 991 */ 992 if (ir->operation == ir_binop_add) { 993 if (try_emit_mad(ir, 1)) 994 return; 995 if (try_emit_mad(ir, 0)) 996 return; 997 } 998 999 /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 1000 */ 1001 if (ir->operation == ir_binop_logic_and) { 1002 if (try_emit_mad_for_and_not(ir, 1)) 1003 return; 1004 if (try_emit_mad_for_and_not(ir, 0)) 1005 return; 1006 } 1007 1008 if (ir->operation == ir_quadop_vector) { 1009 this->emit_swz(ir); 1010 return; 1011 } 1012 1013 for (operand = 0; operand < ir->num_operands; operand++) { 1014 this->result.file = PROGRAM_UNDEFINED; 1015 ir->operands[operand]->accept(this); 1016 if (this->result.file == PROGRAM_UNDEFINED) { 1017 printf("Failed to get tree for expression operand:\n"); 1018 ir->operands[operand]->print(); 1019 printf("\n"); 1020 exit(1); 1021 } 1022 op[operand] = this->result; 1023 1024 /* Matrix expression operands should have been broken down to vector 1025 * operations already. 1026 */ 1027 assert(!ir->operands[operand]->type->is_matrix()); 1028 } 1029 1030 int vector_elements = ir->operands[0]->type->vector_elements; 1031 if (ir->operands[1]) { 1032 vector_elements = MAX2(vector_elements, 1033 ir->operands[1]->type->vector_elements); 1034 } 1035 1036 this->result.file = PROGRAM_UNDEFINED; 1037 1038 /* Storage for our result. Ideally for an assignment we'd be using 1039 * the actual storage for the result here, instead. 1040 */ 1041 result_src = get_temp(ir->type); 1042 /* convenience for the emit functions below. */ 1043 result_dst = dst_reg(result_src); 1044 /* Limit writes to the channels that will be used by result_src later. 1045 * This does limit this temp's use as a temporary for multi-instruction 1046 * sequences. 1047 */ 1048 result_dst.writemask = (1 << ir->type->vector_elements) - 1; 1049 1050 switch (ir->operation) { 1051 case ir_unop_logic_not: 1052 /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 1053 * older GPUs implement SEQ using multiple instructions (i915 uses two 1054 * SGE instructions and a MUL instruction). Since our logic values are 1055 * 0.0 and 1.0, 1-x also implements !x. 1056 */ 1057 op[0].negate = ~op[0].negate; 1058 emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0)); 1059 break; 1060 case ir_unop_neg: 1061 op[0].negate = ~op[0].negate; 1062 result_src = op[0]; 1063 break; 1064 case ir_unop_abs: 1065 emit(ir, OPCODE_ABS, result_dst, op[0]); 1066 break; 1067 case ir_unop_sign: 1068 emit(ir, OPCODE_SSG, result_dst, op[0]); 1069 break; 1070 case ir_unop_rcp: 1071 emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); 1072 break; 1073 1074 case ir_unop_exp2: 1075 emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); 1076 break; 1077 case ir_unop_exp: 1078 assert(!"not reached: should be handled by exp_to_exp2"); 1079 break; 1080 case ir_unop_log: 1081 assert(!"not reached: should be handled by log_to_log2"); 1082 break; 1083 case ir_unop_log2: 1084 emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); 1085 break; 1086 case ir_unop_sin: 1087 emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); 1088 break; 1089 case ir_unop_cos: 1090 emit_scalar(ir, OPCODE_COS, result_dst, op[0]); 1091 break; 1092 1093 case ir_unop_dFdx: 1094 emit(ir, OPCODE_DDX, result_dst, op[0]); 1095 break; 1096 case ir_unop_dFdy: 1097 emit(ir, OPCODE_DDY, result_dst, op[0]); 1098 break; 1099 1100 case ir_unop_saturate: { 1101 ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV, 1102 result_dst, op[0]); 1103 inst->saturate = true; 1104 break; 1105 } 1106 case ir_unop_noise: { 1107 const enum prog_opcode opcode = 1108 prog_opcode(OPCODE_NOISE1 1109 + (ir->operands[0]->type->vector_elements) - 1); 1110 assert((opcode >= OPCODE_NOISE1) && (opcode <= OPCODE_NOISE4)); 1111 1112 emit(ir, opcode, result_dst, op[0]); 1113 break; 1114 } 1115 1116 case ir_binop_add: 1117 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1118 break; 1119 case ir_binop_sub: 1120 emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); 1121 break; 1122 1123 case ir_binop_mul: 1124 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 1125 break; 1126 case ir_binop_div: 1127 assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1128 break; 1129 case ir_binop_mod: 1130 /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ 1131 assert(ir->type->is_integer()); 1132 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 1133 break; 1134 1135 case ir_binop_less: 1136 emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); 1137 break; 1138 case ir_binop_gequal: 1139 emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); 1140 break; 1141 case ir_binop_equal: 1142 emit_seq(ir, result_dst, op[0], op[1]); 1143 break; 1144 case ir_binop_nequal: 1145 emit_sne(ir, result_dst, op[0], op[1]); 1146 break; 1147 case ir_binop_all_equal: 1148 /* "==" operator producing a scalar boolean. */ 1149 if (ir->operands[0]->type->is_vector() || 1150 ir->operands[1]->type->is_vector()) { 1151 src_reg temp = get_temp(glsl_type::vec4_type); 1152 emit_sne(ir, dst_reg(temp), op[0], op[1]); 1153 1154 /* After the dot-product, the value will be an integer on the 1155 * range [0,4]. Zero becomes 1.0, and positive values become zero. 1156 */ 1157 emit_dp(ir, result_dst, temp, temp, vector_elements); 1158 1159 /* Negating the result of the dot-product gives values on the range 1160 * [-4, 0]. Zero becomes 1.0, and negative values become zero. This 1161 * achieved using SGE. 1162 */ 1163 src_reg sge_src = result_src; 1164 sge_src.negate = ~sge_src.negate; 1165 emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0)); 1166 } else { 1167 emit_seq(ir, result_dst, op[0], op[1]); 1168 } 1169 break; 1170 case ir_binop_any_nequal: 1171 /* "!=" operator producing a scalar boolean. */ 1172 if (ir->operands[0]->type->is_vector() || 1173 ir->operands[1]->type->is_vector()) { 1174 src_reg temp = get_temp(glsl_type::vec4_type); 1175 if (ir->operands[0]->type->is_boolean() && 1176 ir->operands[1]->as_constant() && 1177 ir->operands[1]->as_constant()->is_zero()) { 1178 temp = op[0]; 1179 } else { 1180 emit_sne(ir, dst_reg(temp), op[0], op[1]); 1181 } 1182 1183 /* After the dot-product, the value will be an integer on the 1184 * range [0,4]. Zero stays zero, and positive values become 1.0. 1185 */ 1186 ir_to_mesa_instruction *const dp = 1187 emit_dp(ir, result_dst, temp, temp, vector_elements); 1188 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1189 /* The clamping to [0,1] can be done for free in the fragment 1190 * shader with a saturate. 1191 */ 1192 dp->saturate = true; 1193 } else { 1194 /* Negating the result of the dot-product gives values on the range 1195 * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1196 * achieved using SLT. 1197 */ 1198 src_reg slt_src = result_src; 1199 slt_src.negate = ~slt_src.negate; 1200 emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); 1201 } 1202 } else { 1203 emit_sne(ir, result_dst, op[0], op[1]); 1204 } 1205 break; 1206 1207 case ir_binop_logic_xor: 1208 emit_sne(ir, result_dst, op[0], op[1]); 1209 break; 1210 1211 case ir_binop_logic_or: { 1212 if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1213 /* After the addition, the value will be an integer on the 1214 * range [0,2]. Zero stays zero, and positive values become 1.0. 1215 */ 1216 ir_to_mesa_instruction *add = 1217 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1218 add->saturate = true; 1219 } else { 1220 /* The Boolean arguments are stored as float 0.0 and 1.0. If either 1221 * value is 1.0, the result of the logcal-or should be 1.0. If both 1222 * values are 0.0, the result should be 0.0. This is exactly what 1223 * MAX does. 1224 */ 1225 emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); 1226 } 1227 break; 1228 } 1229 1230 case ir_binop_logic_and: 1231 /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ 1232 emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 1233 break; 1234 1235 case ir_binop_dot: 1236 assert(ir->operands[0]->type->is_vector()); 1237 assert(ir->operands[0]->type == ir->operands[1]->type); 1238 emit_dp(ir, result_dst, op[0], op[1], 1239 ir->operands[0]->type->vector_elements); 1240 break; 1241 1242 case ir_unop_sqrt: 1243 /* sqrt(x) = x * rsq(x). */ 1244 emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); 1245 emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); 1246 /* For incoming channels <= 0, set the result to 0. */ 1247 op[0].negate = ~op[0].negate; 1248 emit(ir, OPCODE_CMP, result_dst, 1249 op[0], result_src, src_reg_for_float(0.0)); 1250 break; 1251 case ir_unop_rsq: 1252 emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); 1253 break; 1254 case ir_unop_i2f: 1255 case ir_unop_u2f: 1256 case ir_unop_b2f: 1257 case ir_unop_b2i: 1258 case ir_unop_i2u: 1259 case ir_unop_u2i: 1260 /* Mesa IR lacks types, ints are stored as truncated floats. */ 1261 result_src = op[0]; 1262 break; 1263 case ir_unop_f2i: 1264 case ir_unop_f2u: 1265 emit(ir, OPCODE_TRUNC, result_dst, op[0]); 1266 break; 1267 case ir_unop_f2b: 1268 case ir_unop_i2b: 1269 emit_sne(ir, result_dst, op[0], src_reg_for_float(0.0)); 1270 break; 1271 case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway 1272 case ir_unop_bitcast_f2u: 1273 case ir_unop_bitcast_i2f: 1274 case ir_unop_bitcast_u2f: 1275 break; 1276 case ir_unop_trunc: 1277 emit(ir, OPCODE_TRUNC, result_dst, op[0]); 1278 break; 1279 case ir_unop_ceil: 1280 op[0].negate = ~op[0].negate; 1281 emit(ir, OPCODE_FLR, result_dst, op[0]); 1282 result_src.negate = ~result_src.negate; 1283 break; 1284 case ir_unop_floor: 1285 emit(ir, OPCODE_FLR, result_dst, op[0]); 1286 break; 1287 case ir_unop_fract: 1288 emit(ir, OPCODE_FRC, result_dst, op[0]); 1289 break; 1290 case ir_unop_pack_snorm_2x16: 1291 case ir_unop_pack_snorm_4x8: 1292 case ir_unop_pack_unorm_2x16: 1293 case ir_unop_pack_unorm_4x8: 1294 case ir_unop_pack_half_2x16: 1295 case ir_unop_pack_double_2x32: 1296 case ir_unop_unpack_snorm_2x16: 1297 case ir_unop_unpack_snorm_4x8: 1298 case ir_unop_unpack_unorm_2x16: 1299 case ir_unop_unpack_unorm_4x8: 1300 case ir_unop_unpack_half_2x16: 1301 case ir_unop_unpack_double_2x32: 1302 case ir_unop_bitfield_reverse: 1303 case ir_unop_bit_count: 1304 case ir_unop_find_msb: 1305 case ir_unop_find_lsb: 1306 case ir_unop_d2f: 1307 case ir_unop_f2d: 1308 case ir_unop_d2i: 1309 case ir_unop_i2d: 1310 case ir_unop_d2u: 1311 case ir_unop_u2d: 1312 case ir_unop_d2b: 1313 case ir_unop_frexp_sig: 1314 case ir_unop_frexp_exp: 1315 assert(!"not supported"); 1316 break; 1317 case ir_binop_min: 1318 emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); 1319 break; 1320 case ir_binop_max: 1321 emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); 1322 break; 1323 case ir_binop_pow: 1324 emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); 1325 break; 1326 1327 /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since 1328 * hardware backends have no way to avoid Mesa IR generation 1329 * even if they don't use it, we need to emit "something" and 1330 * continue. 1331 */ 1332 case ir_binop_lshift: 1333 case ir_binop_rshift: 1334 case ir_binop_bit_and: 1335 case ir_binop_bit_xor: 1336 case ir_binop_bit_or: 1337 emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1338 break; 1339 1340 case ir_unop_bit_not: 1341 case ir_unop_round_even: 1342 emit(ir, OPCODE_MOV, result_dst, op[0]); 1343 break; 1344 1345 case ir_binop_ubo_load: 1346 assert(!"not supported"); 1347 break; 1348 1349 case ir_triop_lrp: 1350 /* ir_triop_lrp operands are (x, y, a) while 1351 * OPCODE_LRP operands are (a, y, x) to match ARB_fragment_program. 1352 */ 1353 emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]); 1354 break; 1355 1356 case ir_triop_csel: 1357 /* We assume that boolean true and false are 1.0 and 0.0. OPCODE_CMP 1358 * selects src1 if src0 is < 0, src2 otherwise. 1359 */ 1360 op[0].negate = ~op[0].negate; 1361 emit(ir, OPCODE_CMP, result_dst, op[0], op[1], op[2]); 1362 break; 1363 1364 case ir_binop_vector_extract: 1365 case ir_triop_fma: 1366 case ir_triop_bitfield_extract: 1367 case ir_triop_vector_insert: 1368 case ir_quadop_bitfield_insert: 1369 case ir_binop_ldexp: 1370 case ir_binop_carry: 1371 case ir_binop_borrow: 1372 case ir_binop_imul_high: 1373 case ir_unop_interpolate_at_centroid: 1374 case ir_binop_interpolate_at_offset: 1375 case ir_binop_interpolate_at_sample: 1376 case ir_unop_dFdx_coarse: 1377 case ir_unop_dFdx_fine: 1378 case ir_unop_dFdy_coarse: 1379 case ir_unop_dFdy_fine: 1380 case ir_unop_subroutine_to_int: 1381 case ir_unop_get_buffer_size: 1382 case ir_unop_bitcast_u642d: 1383 case ir_unop_bitcast_i642d: 1384 case ir_unop_bitcast_d2u64: 1385 case ir_unop_bitcast_d2i64: 1386 case ir_unop_i642i: 1387 case ir_unop_u642i: 1388 case ir_unop_i642u: 1389 case ir_unop_u642u: 1390 case ir_unop_i642b: 1391 case ir_unop_i642f: 1392 case ir_unop_u642f: 1393 case ir_unop_i642d: 1394 case ir_unop_u642d: 1395 case ir_unop_i2i64: 1396 case ir_unop_u2i64: 1397 case ir_unop_b2i64: 1398 case ir_unop_f2i64: 1399 case ir_unop_d2i64: 1400 case ir_unop_i2u64: 1401 case ir_unop_u2u64: 1402 case ir_unop_f2u64: 1403 case ir_unop_d2u64: 1404 case ir_unop_u642i64: 1405 case ir_unop_i642u64: 1406 case ir_unop_pack_int_2x32: 1407 case ir_unop_unpack_int_2x32: 1408 case ir_unop_pack_uint_2x32: 1409 case ir_unop_unpack_uint_2x32: 1410 case ir_unop_pack_sampler_2x32: 1411 case ir_unop_unpack_sampler_2x32: 1412 case ir_unop_pack_image_2x32: 1413 case ir_unop_unpack_image_2x32: 1414 assert(!"not supported"); 1415 break; 1416 1417 case ir_unop_ssbo_unsized_array_length: 1418 case ir_quadop_vector: 1419 /* This operation should have already been handled. 1420 */ 1421 assert(!"Should not get here."); 1422 break; 1423 } 1424 1425 this->result = result_src; 1426} 1427 1428 1429void 1430ir_to_mesa_visitor::visit(ir_swizzle *ir) 1431{ 1432 src_reg src; 1433 int i; 1434 int swizzle[4]; 1435 1436 /* Note that this is only swizzles in expressions, not those on the left 1437 * hand side of an assignment, which do write masking. See ir_assignment 1438 * for that. 1439 */ 1440 1441 ir->val->accept(this); 1442 src = this->result; 1443 assert(src.file != PROGRAM_UNDEFINED); 1444 assert(ir->type->vector_elements > 0); 1445 1446 for (i = 0; i < 4; i++) { 1447 if (i < ir->type->vector_elements) { 1448 switch (i) { 1449 case 0: 1450 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 1451 break; 1452 case 1: 1453 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 1454 break; 1455 case 2: 1456 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 1457 break; 1458 case 3: 1459 swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 1460 break; 1461 } 1462 } else { 1463 /* If the type is smaller than a vec4, replicate the last 1464 * channel out. 1465 */ 1466 swizzle[i] = swizzle[ir->type->vector_elements - 1]; 1467 } 1468 } 1469 1470 src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 1471 1472 this->result = src; 1473} 1474 1475void 1476ir_to_mesa_visitor::visit(ir_dereference_variable *ir) 1477{ 1478 variable_storage *entry = find_variable_storage(ir->var); 1479 ir_variable *var = ir->var; 1480 1481 if (!entry) { 1482 switch (var->data.mode) { 1483 case ir_var_uniform: 1484 entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 1485 var->data.param_index); 1486 this->variables.push_tail(entry); 1487 break; 1488 case ir_var_shader_in: 1489 /* The linker assigns locations for varyings and attributes, 1490 * including deprecated builtins (like gl_Color), 1491 * user-assigned generic attributes (glBindVertexLocation), 1492 * and user-defined varyings. 1493 */ 1494 assert(var->data.location != -1); 1495 entry = new(mem_ctx) variable_storage(var, 1496 PROGRAM_INPUT, 1497 var->data.location); 1498 break; 1499 case ir_var_shader_out: 1500 assert(var->data.location != -1); 1501 entry = new(mem_ctx) variable_storage(var, 1502 PROGRAM_OUTPUT, 1503 var->data.location); 1504 break; 1505 case ir_var_system_value: 1506 entry = new(mem_ctx) variable_storage(var, 1507 PROGRAM_SYSTEM_VALUE, 1508 var->data.location); 1509 break; 1510 case ir_var_auto: 1511 case ir_var_temporary: 1512 entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 1513 this->next_temp); 1514 this->variables.push_tail(entry); 1515 1516 next_temp += type_size(var->type); 1517 break; 1518 } 1519 1520 if (!entry) { 1521 printf("Failed to make storage for %s\n", var->name); 1522 exit(1); 1523 } 1524 } 1525 1526 this->result = src_reg(entry->file, entry->index, var->type); 1527} 1528 1529void 1530ir_to_mesa_visitor::visit(ir_dereference_array *ir) 1531{ 1532 ir_constant *index; 1533 src_reg src; 1534 int element_size = type_size(ir->type); 1535 1536 index = ir->array_index->constant_expression_value(ralloc_parent(ir)); 1537 1538 ir->array->accept(this); 1539 src = this->result; 1540 1541 if (index) { 1542 src.index += index->value.i[0] * element_size; 1543 } else { 1544 /* Variable index array dereference. It eats the "vec4" of the 1545 * base of the array and an index that offsets the Mesa register 1546 * index. 1547 */ 1548 ir->array_index->accept(this); 1549 1550 src_reg index_reg; 1551 1552 if (element_size == 1) { 1553 index_reg = this->result; 1554 } else { 1555 index_reg = get_temp(glsl_type::float_type); 1556 1557 emit(ir, OPCODE_MUL, dst_reg(index_reg), 1558 this->result, src_reg_for_float(element_size)); 1559 } 1560 1561 /* If there was already a relative address register involved, add the 1562 * new and the old together to get the new offset. 1563 */ 1564 if (src.reladdr != NULL) { 1565 src_reg accum_reg = get_temp(glsl_type::float_type); 1566 1567 emit(ir, OPCODE_ADD, dst_reg(accum_reg), 1568 index_reg, *src.reladdr); 1569 1570 index_reg = accum_reg; 1571 } 1572 1573 src.reladdr = ralloc(mem_ctx, src_reg); 1574 memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 1575 } 1576 1577 /* If the type is smaller than a vec4, replicate the last channel out. */ 1578 if (ir->type->is_scalar() || ir->type->is_vector()) 1579 src.swizzle = swizzle_for_size(ir->type->vector_elements); 1580 else 1581 src.swizzle = SWIZZLE_NOOP; 1582 1583 this->result = src; 1584} 1585 1586void 1587ir_to_mesa_visitor::visit(ir_dereference_record *ir) 1588{ 1589 unsigned int i; 1590 const glsl_type *struct_type = ir->record->type; 1591 int offset = 0; 1592 1593 ir->record->accept(this); 1594 1595 assert(ir->field_idx >= 0); 1596 for (i = 0; i < struct_type->length; i++) { 1597 if (i == (unsigned) ir->field_idx) 1598 break; 1599 offset += type_size(struct_type->fields.structure[i].type); 1600 } 1601 1602 /* If the type is smaller than a vec4, replicate the last channel out. */ 1603 if (ir->type->is_scalar() || ir->type->is_vector()) 1604 this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 1605 else 1606 this->result.swizzle = SWIZZLE_NOOP; 1607 1608 this->result.index += offset; 1609} 1610 1611/** 1612 * We want to be careful in assignment setup to hit the actual storage 1613 * instead of potentially using a temporary like we might with the 1614 * ir_dereference handler. 1615 */ 1616static dst_reg 1617get_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v) 1618{ 1619 /* The LHS must be a dereference. If the LHS is a variable indexed array 1620 * access of a vector, it must be separated into a series conditional moves 1621 * before reaching this point (see ir_vec_index_to_cond_assign). 1622 */ 1623 assert(ir->as_dereference()); 1624 ir_dereference_array *deref_array = ir->as_dereference_array(); 1625 if (deref_array) { 1626 assert(!deref_array->array->type->is_vector()); 1627 } 1628 1629 /* Use the rvalue deref handler for the most part. We'll ignore 1630 * swizzles in it and write swizzles using writemask, though. 1631 */ 1632 ir->accept(v); 1633 return dst_reg(v->result); 1634} 1635 1636/* Calculate the sampler index and also calculate the base uniform location 1637 * for struct members. 1638 */ 1639static void 1640calc_sampler_offsets(struct gl_shader_program *prog, ir_dereference *deref, 1641 unsigned *offset, unsigned *array_elements, 1642 unsigned *location) 1643{ 1644 if (deref->ir_type == ir_type_dereference_variable) 1645 return; 1646 1647 switch (deref->ir_type) { 1648 case ir_type_dereference_array: { 1649 ir_dereference_array *deref_arr = deref->as_dereference_array(); 1650 1651 void *mem_ctx = ralloc_parent(deref_arr); 1652 ir_constant *array_index = 1653 deref_arr->array_index->constant_expression_value(mem_ctx); 1654 1655 if (!array_index) { 1656 /* GLSL 1.10 and 1.20 allowed variable sampler array indices, 1657 * while GLSL 1.30 requires that the array indices be 1658 * constant integer expressions. We don't expect any driver 1659 * to actually work with a really variable array index, so 1660 * all that would work would be an unrolled loop counter that ends 1661 * up being constant above. 1662 */ 1663 ralloc_strcat(&prog->data->InfoLog, 1664 "warning: Variable sampler array index unsupported.\n" 1665 "This feature of the language was removed in GLSL 1.20 " 1666 "and is unlikely to be supported for 1.10 in Mesa.\n"); 1667 } else { 1668 *offset += array_index->value.u[0] * *array_elements; 1669 } 1670 1671 *array_elements *= deref_arr->array->type->length; 1672 1673 calc_sampler_offsets(prog, deref_arr->array->as_dereference(), 1674 offset, array_elements, location); 1675 break; 1676 } 1677 1678 case ir_type_dereference_record: { 1679 ir_dereference_record *deref_record = deref->as_dereference_record(); 1680 unsigned field_index = deref_record->field_idx; 1681 *location += 1682 deref_record->record->type->struct_location_offset(field_index); 1683 calc_sampler_offsets(prog, deref_record->record->as_dereference(), 1684 offset, array_elements, location); 1685 break; 1686 } 1687 1688 default: 1689 unreachable("Invalid deref type"); 1690 break; 1691 } 1692} 1693 1694static int 1695get_sampler_uniform_value(class ir_dereference *sampler, 1696 struct gl_shader_program *shader_program, 1697 const struct gl_program *prog) 1698{ 1699 GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target); 1700 ir_variable *var = sampler->variable_referenced(); 1701 unsigned location = var->data.location; 1702 unsigned array_elements = 1; 1703 unsigned offset = 0; 1704 1705 calc_sampler_offsets(shader_program, sampler, &offset, &array_elements, 1706 &location); 1707 1708 assert(shader_program->data->UniformStorage[location].opaque[shader].active); 1709 return shader_program->data->UniformStorage[location].opaque[shader].index + 1710 offset; 1711} 1712 1713/** 1714 * Process the condition of a conditional assignment 1715 * 1716 * Examines the condition of a conditional assignment to generate the optimal 1717 * first operand of a \c CMP instruction. If the condition is a relational 1718 * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 1719 * used as the source for the \c CMP instruction. Otherwise the comparison 1720 * is processed to a boolean result, and the boolean result is used as the 1721 * operand to the CMP instruction. 1722 */ 1723bool 1724ir_to_mesa_visitor::process_move_condition(ir_rvalue *ir) 1725{ 1726 ir_rvalue *src_ir = ir; 1727 bool negate = true; 1728 bool switch_order = false; 1729 1730 ir_expression *const expr = ir->as_expression(); 1731 if ((expr != NULL) && (expr->num_operands == 2)) { 1732 bool zero_on_left = false; 1733 1734 if (expr->operands[0]->is_zero()) { 1735 src_ir = expr->operands[1]; 1736 zero_on_left = true; 1737 } else if (expr->operands[1]->is_zero()) { 1738 src_ir = expr->operands[0]; 1739 zero_on_left = false; 1740 } 1741 1742 /* a is - 0 + - 0 + 1743 * (a < 0) T F F ( a < 0) T F F 1744 * (0 < a) F F T (-a < 0) F F T 1745 * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 1746 * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 1747 * 1748 * Note that exchanging the order of 0 and 'a' in the comparison simply 1749 * means that the value of 'a' should be negated. 1750 */ 1751 if (src_ir != ir) { 1752 switch (expr->operation) { 1753 case ir_binop_less: 1754 switch_order = false; 1755 negate = zero_on_left; 1756 break; 1757 1758 case ir_binop_gequal: 1759 switch_order = true; 1760 negate = zero_on_left; 1761 break; 1762 1763 default: 1764 /* This isn't the right kind of comparison afterall, so make sure 1765 * the whole condition is visited. 1766 */ 1767 src_ir = ir; 1768 break; 1769 } 1770 } 1771 } 1772 1773 src_ir->accept(this); 1774 1775 /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 1776 * condition we produced is 0.0 or 1.0. By flipping the sign, we can 1777 * choose which value OPCODE_CMP produces without an extra instruction 1778 * computing the condition. 1779 */ 1780 if (negate) 1781 this->result.negate = ~this->result.negate; 1782 1783 return switch_order; 1784} 1785 1786void 1787ir_to_mesa_visitor::visit(ir_assignment *ir) 1788{ 1789 dst_reg l; 1790 src_reg r; 1791 int i; 1792 1793 ir->rhs->accept(this); 1794 r = this->result; 1795 1796 l = get_assignment_lhs(ir->lhs, this); 1797 1798 /* FINISHME: This should really set to the correct maximal writemask for each 1799 * FINISHME: component written (in the loops below). This case can only 1800 * FINISHME: occur for matrices, arrays, and structures. 1801 */ 1802 if (ir->write_mask == 0) { 1803 assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 1804 l.writemask = WRITEMASK_XYZW; 1805 } else if (ir->lhs->type->is_scalar()) { 1806 /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 1807 * FINISHME: W component of fragment shader output zero, work correctly. 1808 */ 1809 l.writemask = WRITEMASK_XYZW; 1810 } else { 1811 int swizzles[4]; 1812 int first_enabled_chan = 0; 1813 int rhs_chan = 0; 1814 1815 assert(ir->lhs->type->is_vector()); 1816 l.writemask = ir->write_mask; 1817 1818 for (int i = 0; i < 4; i++) { 1819 if (l.writemask & (1 << i)) { 1820 first_enabled_chan = GET_SWZ(r.swizzle, i); 1821 break; 1822 } 1823 } 1824 1825 /* Swizzle a small RHS vector into the channels being written. 1826 * 1827 * glsl ir treats write_mask as dictating how many channels are 1828 * present on the RHS while Mesa IR treats write_mask as just 1829 * showing which channels of the vec4 RHS get written. 1830 */ 1831 for (int i = 0; i < 4; i++) { 1832 if (l.writemask & (1 << i)) 1833 swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 1834 else 1835 swizzles[i] = first_enabled_chan; 1836 } 1837 r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 1838 swizzles[2], swizzles[3]); 1839 } 1840 1841 assert(l.file != PROGRAM_UNDEFINED); 1842 assert(r.file != PROGRAM_UNDEFINED); 1843 1844 if (ir->condition) { 1845 const bool switch_order = this->process_move_condition(ir->condition); 1846 src_reg condition = this->result; 1847 1848 for (i = 0; i < type_size(ir->lhs->type); i++) { 1849 if (switch_order) { 1850 emit(ir, OPCODE_CMP, l, condition, src_reg(l), r); 1851 } else { 1852 emit(ir, OPCODE_CMP, l, condition, r, src_reg(l)); 1853 } 1854 1855 l.index++; 1856 r.index++; 1857 } 1858 } else { 1859 for (i = 0; i < type_size(ir->lhs->type); i++) { 1860 emit(ir, OPCODE_MOV, l, r); 1861 l.index++; 1862 r.index++; 1863 } 1864 } 1865} 1866 1867 1868void 1869ir_to_mesa_visitor::visit(ir_constant *ir) 1870{ 1871 src_reg src; 1872 GLfloat stack_vals[4] = { 0 }; 1873 GLfloat *values = stack_vals; 1874 unsigned int i; 1875 1876 /* Unfortunately, 4 floats is all we can get into 1877 * _mesa_add_unnamed_constant. So, make a temp to store an 1878 * aggregate constant and move each constant value into it. If we 1879 * get lucky, copy propagation will eliminate the extra moves. 1880 */ 1881 1882 if (ir->type->is_struct()) { 1883 src_reg temp_base = get_temp(ir->type); 1884 dst_reg temp = dst_reg(temp_base); 1885 1886 for (i = 0; i < ir->type->length; i++) { 1887 ir_constant *const field_value = ir->get_record_field(i); 1888 int size = type_size(field_value->type); 1889 1890 assert(size > 0); 1891 1892 field_value->accept(this); 1893 src = this->result; 1894 1895 for (unsigned j = 0; j < (unsigned int)size; j++) { 1896 emit(ir, OPCODE_MOV, temp, src); 1897 1898 src.index++; 1899 temp.index++; 1900 } 1901 } 1902 this->result = temp_base; 1903 return; 1904 } 1905 1906 if (ir->type->is_array()) { 1907 src_reg temp_base = get_temp(ir->type); 1908 dst_reg temp = dst_reg(temp_base); 1909 int size = type_size(ir->type->fields.array); 1910 1911 assert(size > 0); 1912 1913 for (i = 0; i < ir->type->length; i++) { 1914 ir->const_elements[i]->accept(this); 1915 src = this->result; 1916 for (int j = 0; j < size; j++) { 1917 emit(ir, OPCODE_MOV, temp, src); 1918 1919 src.index++; 1920 temp.index++; 1921 } 1922 } 1923 this->result = temp_base; 1924 return; 1925 } 1926 1927 if (ir->type->is_matrix()) { 1928 src_reg mat = get_temp(ir->type); 1929 dst_reg mat_column = dst_reg(mat); 1930 1931 for (i = 0; i < ir->type->matrix_columns; i++) { 1932 assert(ir->type->is_float()); 1933 values = &ir->value.f[i * ir->type->vector_elements]; 1934 1935 src = src_reg(PROGRAM_CONSTANT, -1, NULL); 1936 src.index = _mesa_add_unnamed_constant(this->prog->Parameters, 1937 (gl_constant_value *) values, 1938 ir->type->vector_elements, 1939 &src.swizzle); 1940 emit(ir, OPCODE_MOV, mat_column, src); 1941 1942 mat_column.index++; 1943 } 1944 1945 this->result = mat; 1946 return; 1947 } 1948 1949 src.file = PROGRAM_CONSTANT; 1950 switch (ir->type->base_type) { 1951 case GLSL_TYPE_FLOAT: 1952 values = &ir->value.f[0]; 1953 break; 1954 case GLSL_TYPE_UINT: 1955 for (i = 0; i < ir->type->vector_elements; i++) { 1956 values[i] = ir->value.u[i]; 1957 } 1958 break; 1959 case GLSL_TYPE_INT: 1960 for (i = 0; i < ir->type->vector_elements; i++) { 1961 values[i] = ir->value.i[i]; 1962 } 1963 break; 1964 case GLSL_TYPE_BOOL: 1965 for (i = 0; i < ir->type->vector_elements; i++) { 1966 values[i] = ir->value.b[i]; 1967 } 1968 break; 1969 default: 1970 assert(!"Non-float/uint/int/bool constant"); 1971 } 1972 1973 this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type); 1974 this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, 1975 (gl_constant_value *) values, 1976 ir->type->vector_elements, 1977 &this->result.swizzle); 1978} 1979 1980void 1981ir_to_mesa_visitor::visit(ir_call *) 1982{ 1983 assert(!"ir_to_mesa: All function calls should have been inlined by now."); 1984} 1985 1986void 1987ir_to_mesa_visitor::visit(ir_texture *ir) 1988{ 1989 src_reg result_src, coord, lod_info, projector, dx, dy; 1990 dst_reg result_dst, coord_dst; 1991 ir_to_mesa_instruction *inst = NULL; 1992 prog_opcode opcode = OPCODE_NOP; 1993 1994 if (ir->op == ir_txs) 1995 this->result = src_reg_for_float(0.0); 1996 else 1997 ir->coordinate->accept(this); 1998 1999 /* Put our coords in a temp. We'll need to modify them for shadow, 2000 * projection, or LOD, so the only case we'd use it as-is is if 2001 * we're doing plain old texturing. Mesa IR optimization should 2002 * handle cleaning up our mess in that case. 2003 */ 2004 coord = get_temp(glsl_type::vec4_type); 2005 coord_dst = dst_reg(coord); 2006 emit(ir, OPCODE_MOV, coord_dst, this->result); 2007 2008 if (ir->projector) { 2009 ir->projector->accept(this); 2010 projector = this->result; 2011 } 2012 2013 /* Storage for our result. Ideally for an assignment we'd be using 2014 * the actual storage for the result here, instead. 2015 */ 2016 result_src = get_temp(glsl_type::vec4_type); 2017 result_dst = dst_reg(result_src); 2018 2019 switch (ir->op) { 2020 case ir_tex: 2021 case ir_txs: 2022 opcode = OPCODE_TEX; 2023 break; 2024 case ir_txb: 2025 opcode = OPCODE_TXB; 2026 ir->lod_info.bias->accept(this); 2027 lod_info = this->result; 2028 break; 2029 case ir_txf: 2030 /* Pretend to be TXL so the sampler, coordinate, lod are available */ 2031 case ir_txl: 2032 opcode = OPCODE_TXL; 2033 ir->lod_info.lod->accept(this); 2034 lod_info = this->result; 2035 break; 2036 case ir_txd: 2037 opcode = OPCODE_TXD; 2038 ir->lod_info.grad.dPdx->accept(this); 2039 dx = this->result; 2040 ir->lod_info.grad.dPdy->accept(this); 2041 dy = this->result; 2042 break; 2043 case ir_txf_ms: 2044 assert(!"Unexpected ir_txf_ms opcode"); 2045 break; 2046 case ir_lod: 2047 assert(!"Unexpected ir_lod opcode"); 2048 break; 2049 case ir_tg4: 2050 assert(!"Unexpected ir_tg4 opcode"); 2051 break; 2052 case ir_query_levels: 2053 assert(!"Unexpected ir_query_levels opcode"); 2054 break; 2055 case ir_samples_identical: 2056 unreachable("Unexpected ir_samples_identical opcode"); 2057 case ir_texture_samples: 2058 unreachable("Unexpected ir_texture_samples opcode"); 2059 } 2060 2061 const glsl_type *sampler_type = ir->sampler->type; 2062 2063 if (ir->projector) { 2064 if (opcode == OPCODE_TEX) { 2065 /* Slot the projector in as the last component of the coord. */ 2066 coord_dst.writemask = WRITEMASK_W; 2067 emit(ir, OPCODE_MOV, coord_dst, projector); 2068 coord_dst.writemask = WRITEMASK_XYZW; 2069 opcode = OPCODE_TXP; 2070 } else { 2071 src_reg coord_w = coord; 2072 coord_w.swizzle = SWIZZLE_WWWW; 2073 2074 /* For the other TEX opcodes there's no projective version 2075 * since the last slot is taken up by lod info. Do the 2076 * projective divide now. 2077 */ 2078 coord_dst.writemask = WRITEMASK_W; 2079 emit(ir, OPCODE_RCP, coord_dst, projector); 2080 2081 /* In the case where we have to project the coordinates "by hand," 2082 * the shadow comparator value must also be projected. 2083 */ 2084 src_reg tmp_src = coord; 2085 if (ir->shadow_comparator) { 2086 /* Slot the shadow value in as the second to last component of the 2087 * coord. 2088 */ 2089 ir->shadow_comparator->accept(this); 2090 2091 tmp_src = get_temp(glsl_type::vec4_type); 2092 dst_reg tmp_dst = dst_reg(tmp_src); 2093 2094 /* Projective division not allowed for array samplers. */ 2095 assert(!sampler_type->sampler_array); 2096 2097 tmp_dst.writemask = WRITEMASK_Z; 2098 emit(ir, OPCODE_MOV, tmp_dst, this->result); 2099 2100 tmp_dst.writemask = WRITEMASK_XY; 2101 emit(ir, OPCODE_MOV, tmp_dst, coord); 2102 } 2103 2104 coord_dst.writemask = WRITEMASK_XYZ; 2105 emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); 2106 2107 coord_dst.writemask = WRITEMASK_XYZW; 2108 coord.swizzle = SWIZZLE_XYZW; 2109 } 2110 } 2111 2112 /* If projection is done and the opcode is not OPCODE_TXP, then the shadow 2113 * comparator was put in the correct place (and projected) by the code, 2114 * above, that handles by-hand projection. 2115 */ 2116 if (ir->shadow_comparator && (!ir->projector || opcode == OPCODE_TXP)) { 2117 /* Slot the shadow value in as the second to last component of the 2118 * coord. 2119 */ 2120 ir->shadow_comparator->accept(this); 2121 2122 /* XXX This will need to be updated for cubemap array samplers. */ 2123 if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 2124 sampler_type->sampler_array) { 2125 coord_dst.writemask = WRITEMASK_W; 2126 } else { 2127 coord_dst.writemask = WRITEMASK_Z; 2128 } 2129 2130 emit(ir, OPCODE_MOV, coord_dst, this->result); 2131 coord_dst.writemask = WRITEMASK_XYZW; 2132 } 2133 2134 if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { 2135 /* Mesa IR stores lod or lod bias in the last channel of the coords. */ 2136 coord_dst.writemask = WRITEMASK_W; 2137 emit(ir, OPCODE_MOV, coord_dst, lod_info); 2138 coord_dst.writemask = WRITEMASK_XYZW; 2139 } 2140 2141 if (opcode == OPCODE_TXD) 2142 inst = emit(ir, opcode, result_dst, coord, dx, dy); 2143 else 2144 inst = emit(ir, opcode, result_dst, coord); 2145 2146 if (ir->shadow_comparator) 2147 inst->tex_shadow = GL_TRUE; 2148 2149 inst->sampler = get_sampler_uniform_value(ir->sampler, shader_program, 2150 prog); 2151 2152 switch (sampler_type->sampler_dimensionality) { 2153 case GLSL_SAMPLER_DIM_1D: 2154 inst->tex_target = (sampler_type->sampler_array) 2155 ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 2156 break; 2157 case GLSL_SAMPLER_DIM_2D: 2158 inst->tex_target = (sampler_type->sampler_array) 2159 ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 2160 break; 2161 case GLSL_SAMPLER_DIM_3D: 2162 inst->tex_target = TEXTURE_3D_INDEX; 2163 break; 2164 case GLSL_SAMPLER_DIM_CUBE: 2165 inst->tex_target = TEXTURE_CUBE_INDEX; 2166 break; 2167 case GLSL_SAMPLER_DIM_RECT: 2168 inst->tex_target = TEXTURE_RECT_INDEX; 2169 break; 2170 case GLSL_SAMPLER_DIM_BUF: 2171 assert(!"FINISHME: Implement ARB_texture_buffer_object"); 2172 break; 2173 case GLSL_SAMPLER_DIM_EXTERNAL: 2174 inst->tex_target = TEXTURE_EXTERNAL_INDEX; 2175 break; 2176 default: 2177 assert(!"Should not get here."); 2178 } 2179 2180 this->result = result_src; 2181} 2182 2183void 2184ir_to_mesa_visitor::visit(ir_return *ir) 2185{ 2186 /* Non-void functions should have been inlined. We may still emit RETs 2187 * from main() unless the EmitNoMainReturn option is set. 2188 */ 2189 assert(!ir->get_value()); 2190 emit(ir, OPCODE_RET); 2191} 2192 2193void 2194ir_to_mesa_visitor::visit(ir_discard *ir) 2195{ 2196 if (!ir->condition) 2197 ir->condition = new(mem_ctx) ir_constant(true); 2198 2199 ir->condition->accept(this); 2200 this->result.negate = ~this->result.negate; 2201 emit(ir, OPCODE_KIL, undef_dst, this->result); 2202} 2203 2204void 2205ir_to_mesa_visitor::visit(ir_if *ir) 2206{ 2207 ir_to_mesa_instruction *if_inst; 2208 2209 ir->condition->accept(this); 2210 assert(this->result.file != PROGRAM_UNDEFINED); 2211 2212 if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); 2213 2214 this->instructions.push_tail(if_inst); 2215 2216 visit_exec_list(&ir->then_instructions, this); 2217 2218 if (!ir->else_instructions.is_empty()) { 2219 emit(ir->condition, OPCODE_ELSE); 2220 visit_exec_list(&ir->else_instructions, this); 2221 } 2222 2223 emit(ir->condition, OPCODE_ENDIF); 2224} 2225 2226void 2227ir_to_mesa_visitor::visit(ir_emit_vertex *) 2228{ 2229 assert(!"Geometry shaders not supported."); 2230} 2231 2232void 2233ir_to_mesa_visitor::visit(ir_end_primitive *) 2234{ 2235 assert(!"Geometry shaders not supported."); 2236} 2237 2238void 2239ir_to_mesa_visitor::visit(ir_barrier *) 2240{ 2241 unreachable("GLSL barrier() not supported."); 2242} 2243 2244ir_to_mesa_visitor::ir_to_mesa_visitor() 2245{ 2246 result.file = PROGRAM_UNDEFINED; 2247 next_temp = 1; 2248 next_signature_id = 1; 2249 current_function = NULL; 2250 mem_ctx = ralloc_context(NULL); 2251} 2252 2253ir_to_mesa_visitor::~ir_to_mesa_visitor() 2254{ 2255 ralloc_free(mem_ctx); 2256} 2257 2258static struct prog_src_register 2259mesa_src_reg_from_ir_src_reg(src_reg reg) 2260{ 2261 struct prog_src_register mesa_reg; 2262 2263 mesa_reg.File = reg.file; 2264 assert(reg.index < (1 << INST_INDEX_BITS)); 2265 mesa_reg.Index = reg.index; 2266 mesa_reg.Swizzle = reg.swizzle; 2267 mesa_reg.RelAddr = reg.reladdr != NULL; 2268 mesa_reg.Negate = reg.negate; 2269 2270 return mesa_reg; 2271} 2272 2273static void 2274set_branchtargets(ir_to_mesa_visitor *v, 2275 struct prog_instruction *mesa_instructions, 2276 int num_instructions) 2277{ 2278 int if_count = 0, loop_count = 0; 2279 int *if_stack, *loop_stack; 2280 int if_stack_pos = 0, loop_stack_pos = 0; 2281 int i, j; 2282 2283 for (i = 0; i < num_instructions; i++) { 2284 switch (mesa_instructions[i].Opcode) { 2285 case OPCODE_IF: 2286 if_count++; 2287 break; 2288 case OPCODE_BGNLOOP: 2289 loop_count++; 2290 break; 2291 case OPCODE_BRK: 2292 case OPCODE_CONT: 2293 mesa_instructions[i].BranchTarget = -1; 2294 break; 2295 default: 2296 break; 2297 } 2298 } 2299 2300 if_stack = rzalloc_array(v->mem_ctx, int, if_count); 2301 loop_stack = rzalloc_array(v->mem_ctx, int, loop_count); 2302 2303 for (i = 0; i < num_instructions; i++) { 2304 switch (mesa_instructions[i].Opcode) { 2305 case OPCODE_IF: 2306 if_stack[if_stack_pos] = i; 2307 if_stack_pos++; 2308 break; 2309 case OPCODE_ELSE: 2310 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; 2311 if_stack[if_stack_pos - 1] = i; 2312 break; 2313 case OPCODE_ENDIF: 2314 mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; 2315 if_stack_pos--; 2316 break; 2317 case OPCODE_BGNLOOP: 2318 loop_stack[loop_stack_pos] = i; 2319 loop_stack_pos++; 2320 break; 2321 case OPCODE_ENDLOOP: 2322 loop_stack_pos--; 2323 /* Rewrite any breaks/conts at this nesting level (haven't 2324 * already had a BranchTarget assigned) to point to the end 2325 * of the loop. 2326 */ 2327 for (j = loop_stack[loop_stack_pos]; j < i; j++) { 2328 if (mesa_instructions[j].Opcode == OPCODE_BRK || 2329 mesa_instructions[j].Opcode == OPCODE_CONT) { 2330 if (mesa_instructions[j].BranchTarget == -1) { 2331 mesa_instructions[j].BranchTarget = i; 2332 } 2333 } 2334 } 2335 /* The loop ends point at each other. */ 2336 mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; 2337 mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; 2338 break; 2339 case OPCODE_CAL: 2340 foreach_in_list(function_entry, entry, &v->function_signatures) { 2341 if (entry->sig_id == mesa_instructions[i].BranchTarget) { 2342 mesa_instructions[i].BranchTarget = entry->inst; 2343 break; 2344 } 2345 } 2346 break; 2347 default: 2348 break; 2349 } 2350 } 2351} 2352 2353static void 2354print_program(struct prog_instruction *mesa_instructions, 2355 ir_instruction **mesa_instruction_annotation, 2356 int num_instructions) 2357{ 2358 ir_instruction *last_ir = NULL; 2359 int i; 2360 int indent = 0; 2361 2362 for (i = 0; i < num_instructions; i++) { 2363 struct prog_instruction *mesa_inst = mesa_instructions + i; 2364 ir_instruction *ir = mesa_instruction_annotation[i]; 2365 2366 fprintf(stdout, "%3d: ", i); 2367 2368 if (last_ir != ir && ir) { 2369 int j; 2370 2371 for (j = 0; j < indent; j++) { 2372 fprintf(stdout, " "); 2373 } 2374 ir->print(); 2375 printf("\n"); 2376 last_ir = ir; 2377 2378 fprintf(stdout, " "); /* line number spacing. */ 2379 } 2380 2381 indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, 2382 PROG_PRINT_DEBUG, NULL); 2383 } 2384} 2385 2386namespace { 2387 2388class add_uniform_to_shader : public program_resource_visitor { 2389public: 2390 add_uniform_to_shader(struct gl_context *ctx, 2391 struct gl_shader_program *shader_program, 2392 struct gl_program_parameter_list *params) 2393 : ctx(ctx), params(params), idx(-1) 2394 { 2395 /* empty */ 2396 } 2397 2398 void process(ir_variable *var) 2399 { 2400 this->idx = -1; 2401 this->var = var; 2402 this->program_resource_visitor::process(var, 2403 ctx->Const.UseSTD430AsDefaultPacking); 2404 var->data.param_index = this->idx; 2405 } 2406 2407private: 2408 virtual void visit_field(const glsl_type *type, const char *name, 2409 bool row_major, const glsl_type *record_type, 2410 const enum glsl_interface_packing packing, 2411 bool last_field); 2412 2413 struct gl_context *ctx; 2414 struct gl_program_parameter_list *params; 2415 int idx; 2416 ir_variable *var; 2417}; 2418 2419} /* anonymous namespace */ 2420 2421void 2422add_uniform_to_shader::visit_field(const glsl_type *type, const char *name, 2423 bool /* row_major */, 2424 const glsl_type * /* record_type */, 2425 const enum glsl_interface_packing, 2426 bool /* last_field */) 2427{ 2428 /* opaque types don't use storage in the param list unless they are 2429 * bindless samplers or images. 2430 */ 2431 if (type->contains_opaque() && !var->data.bindless) 2432 return; 2433 2434 /* Add the uniform to the param list */ 2435 assert(_mesa_lookup_parameter_index(params, name) < 0); 2436 int index = _mesa_lookup_parameter_index(params, name); 2437 2438 unsigned num_params = type->arrays_of_arrays_size(); 2439 num_params = MAX2(num_params, 1); 2440 num_params *= type->without_array()->matrix_columns; 2441 2442 bool is_dual_slot = type->without_array()->is_dual_slot(); 2443 if (is_dual_slot) 2444 num_params *= 2; 2445 2446 _mesa_reserve_parameter_storage(params, num_params); 2447 index = params->NumParameters; 2448 2449 if (ctx->Const.PackedDriverUniformStorage) { 2450 for (unsigned i = 0; i < num_params; i++) { 2451 unsigned dmul = type->without_array()->is_64bit() ? 2 : 1; 2452 unsigned comps = type->without_array()->vector_elements * dmul; 2453 if (is_dual_slot) { 2454 if (i & 0x1) 2455 comps -= 4; 2456 else 2457 comps = 4; 2458 } 2459 2460 _mesa_add_parameter(params, PROGRAM_UNIFORM, name, comps, 2461 type->gl_type, NULL, NULL, false); 2462 } 2463 } else { 2464 for (unsigned i = 0; i < num_params; i++) { 2465 _mesa_add_parameter(params, PROGRAM_UNIFORM, name, 4, 2466 type->gl_type, NULL, NULL, true); 2467 } 2468 } 2469 2470 /* The first part of the uniform that's processed determines the base 2471 * location of the whole uniform (for structures). 2472 */ 2473 if (this->idx < 0) 2474 this->idx = index; 2475} 2476 2477/** 2478 * Generate the program parameters list for the user uniforms in a shader 2479 * 2480 * \param shader_program Linked shader program. This is only used to 2481 * emit possible link errors to the info log. 2482 * \param sh Shader whose uniforms are to be processed. 2483 * \param params Parameter list to be filled in. 2484 */ 2485void 2486_mesa_generate_parameters_list_for_uniforms(struct gl_context *ctx, 2487 struct gl_shader_program 2488 *shader_program, 2489 struct gl_linked_shader *sh, 2490 struct gl_program_parameter_list 2491 *params) 2492{ 2493 add_uniform_to_shader add(ctx, shader_program, params); 2494 2495 foreach_in_list(ir_instruction, node, sh->ir) { 2496 ir_variable *var = node->as_variable(); 2497 2498 if ((var == NULL) || (var->data.mode != ir_var_uniform) 2499 || var->is_in_buffer_block() || (strncmp(var->name, "gl_", 3) == 0)) 2500 continue; 2501 2502 add.process(var); 2503 } 2504} 2505 2506void 2507_mesa_associate_uniform_storage(struct gl_context *ctx, 2508 struct gl_shader_program *shader_program, 2509 struct gl_program *prog) 2510{ 2511 struct gl_program_parameter_list *params = prog->Parameters; 2512 gl_shader_stage shader_type = prog->info.stage; 2513 2514 /* After adding each uniform to the parameter list, connect the storage for 2515 * the parameter with the tracking structure used by the API for the 2516 * uniform. 2517 */ 2518 unsigned last_location = unsigned(~0); 2519 for (unsigned i = 0; i < params->NumParameters; i++) { 2520 if (params->Parameters[i].Type != PROGRAM_UNIFORM) 2521 continue; 2522 2523 unsigned location; 2524 const bool found = 2525 shader_program->UniformHash->get(location, params->Parameters[i].Name); 2526 assert(found); 2527 2528 if (!found) 2529 continue; 2530 2531 struct gl_uniform_storage *storage = 2532 &shader_program->data->UniformStorage[location]; 2533 2534 /* Do not associate any uniform storage to built-in uniforms */ 2535 if (storage->builtin) 2536 continue; 2537 2538 if (location != last_location) { 2539 enum gl_uniform_driver_format format = uniform_native; 2540 unsigned columns = 0; 2541 2542 int dmul; 2543 if (ctx->Const.PackedDriverUniformStorage && !prog->is_arb_asm) { 2544 dmul = storage->type->vector_elements * sizeof(float); 2545 } else { 2546 dmul = 4 * sizeof(float); 2547 } 2548 2549 switch (storage->type->base_type) { 2550 case GLSL_TYPE_UINT64: 2551 if (storage->type->vector_elements > 2) 2552 dmul *= 2; 2553 /* fallthrough */ 2554 case GLSL_TYPE_UINT: 2555 case GLSL_TYPE_UINT16: 2556 case GLSL_TYPE_UINT8: 2557 assert(ctx->Const.NativeIntegers); 2558 format = uniform_native; 2559 columns = 1; 2560 break; 2561 case GLSL_TYPE_INT64: 2562 if (storage->type->vector_elements > 2) 2563 dmul *= 2; 2564 /* fallthrough */ 2565 case GLSL_TYPE_INT: 2566 case GLSL_TYPE_INT16: 2567 case GLSL_TYPE_INT8: 2568 format = 2569 (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float; 2570 columns = 1; 2571 break; 2572 case GLSL_TYPE_DOUBLE: 2573 if (storage->type->vector_elements > 2) 2574 dmul *= 2; 2575 /* fallthrough */ 2576 case GLSL_TYPE_FLOAT: 2577 case GLSL_TYPE_FLOAT16: 2578 format = uniform_native; 2579 columns = storage->type->matrix_columns; 2580 break; 2581 case GLSL_TYPE_BOOL: 2582 format = uniform_native; 2583 columns = 1; 2584 break; 2585 case GLSL_TYPE_SAMPLER: 2586 case GLSL_TYPE_IMAGE: 2587 case GLSL_TYPE_SUBROUTINE: 2588 format = uniform_native; 2589 columns = 1; 2590 break; 2591 case GLSL_TYPE_ATOMIC_UINT: 2592 case GLSL_TYPE_ARRAY: 2593 case GLSL_TYPE_VOID: 2594 case GLSL_TYPE_STRUCT: 2595 case GLSL_TYPE_ERROR: 2596 case GLSL_TYPE_INTERFACE: 2597 case GLSL_TYPE_FUNCTION: 2598 assert(!"Should not get here."); 2599 break; 2600 } 2601 2602 unsigned pvo = params->ParameterValueOffset[i]; 2603 _mesa_uniform_attach_driver_storage(storage, dmul * columns, dmul, 2604 format, 2605 ¶ms->ParameterValues[pvo]); 2606 2607 /* When a bindless sampler/image is bound to a texture/image unit, we 2608 * have to overwrite the constant value by the resident handle 2609 * directly in the constant buffer before the next draw. One solution 2610 * is to keep track a pointer to the base of the data. 2611 */ 2612 if (storage->is_bindless && (prog->sh.NumBindlessSamplers || 2613 prog->sh.NumBindlessImages)) { 2614 unsigned array_elements = MAX2(1, storage->array_elements); 2615 2616 for (unsigned j = 0; j < array_elements; ++j) { 2617 unsigned unit = storage->opaque[shader_type].index + j; 2618 2619 if (storage->type->without_array()->is_sampler()) { 2620 assert(unit >= 0 && unit < prog->sh.NumBindlessSamplers); 2621 prog->sh.BindlessSamplers[unit].data = 2622 ¶ms->ParameterValues[pvo] + 4 * j; 2623 } else if (storage->type->without_array()->is_image()) { 2624 assert(unit >= 0 && unit < prog->sh.NumBindlessImages); 2625 prog->sh.BindlessImages[unit].data = 2626 ¶ms->ParameterValues[pvo] + 4 * j; 2627 } 2628 } 2629 } 2630 2631 /* After attaching the driver's storage to the uniform, propagate any 2632 * data from the linker's backing store. This will cause values from 2633 * initializers in the source code to be copied over. 2634 */ 2635 unsigned array_elements = MAX2(1, storage->array_elements); 2636 if (ctx->Const.PackedDriverUniformStorage && !prog->is_arb_asm && 2637 (storage->is_bindless || !storage->type->contains_opaque())) { 2638 const int dmul = storage->type->is_64bit() ? 2 : 1; 2639 const unsigned components = 2640 storage->type->vector_elements * 2641 storage->type->matrix_columns; 2642 2643 for (unsigned s = 0; s < storage->num_driver_storage; s++) { 2644 gl_constant_value *uni_storage = (gl_constant_value *) 2645 storage->driver_storage[s].data; 2646 memcpy(uni_storage, storage->storage, 2647 sizeof(storage->storage[0]) * components * 2648 array_elements * dmul); 2649 } 2650 } else { 2651 _mesa_propagate_uniforms_to_driver_storage(storage, 0, 2652 array_elements); 2653 } 2654 2655 last_location = location; 2656 } 2657 } 2658} 2659 2660/* 2661 * On a basic block basis, tracks available PROGRAM_TEMPORARY register 2662 * channels for copy propagation and updates following instructions to 2663 * use the original versions. 2664 * 2665 * The ir_to_mesa_visitor lazily produces code assuming that this pass 2666 * will occur. As an example, a TXP production before this pass: 2667 * 2668 * 0: MOV TEMP[1], INPUT[4].xyyy; 2669 * 1: MOV TEMP[1].w, INPUT[4].wwww; 2670 * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 2671 * 2672 * and after: 2673 * 2674 * 0: MOV TEMP[1], INPUT[4].xyyy; 2675 * 1: MOV TEMP[1].w, INPUT[4].wwww; 2676 * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 2677 * 2678 * which allows for dead code elimination on TEMP[1]'s writes. 2679 */ 2680void 2681ir_to_mesa_visitor::copy_propagate(void) 2682{ 2683 ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx, 2684 ir_to_mesa_instruction *, 2685 this->next_temp * 4); 2686 int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 2687 int level = 0; 2688 2689 foreach_in_list(ir_to_mesa_instruction, inst, &this->instructions) { 2690 assert(inst->dst.file != PROGRAM_TEMPORARY 2691 || inst->dst.index < this->next_temp); 2692 2693 /* First, do any copy propagation possible into the src regs. */ 2694 for (int r = 0; r < 3; r++) { 2695 ir_to_mesa_instruction *first = NULL; 2696 bool good = true; 2697 int acp_base = inst->src[r].index * 4; 2698 2699 if (inst->src[r].file != PROGRAM_TEMPORARY || 2700 inst->src[r].reladdr) 2701 continue; 2702 2703 /* See if we can find entries in the ACP consisting of MOVs 2704 * from the same src register for all the swizzled channels 2705 * of this src register reference. 2706 */ 2707 for (int i = 0; i < 4; i++) { 2708 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 2709 ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan]; 2710 2711 if (!copy_chan) { 2712 good = false; 2713 break; 2714 } 2715 2716 assert(acp_level[acp_base + src_chan] <= level); 2717 2718 if (!first) { 2719 first = copy_chan; 2720 } else { 2721 if (first->src[0].file != copy_chan->src[0].file || 2722 first->src[0].index != copy_chan->src[0].index) { 2723 good = false; 2724 break; 2725 } 2726 } 2727 } 2728 2729 if (good) { 2730 /* We've now validated that we can copy-propagate to 2731 * replace this src register reference. Do it. 2732 */ 2733 inst->src[r].file = first->src[0].file; 2734 inst->src[r].index = first->src[0].index; 2735 2736 int swizzle = 0; 2737 for (int i = 0; i < 4; i++) { 2738 int src_chan = GET_SWZ(inst->src[r].swizzle, i); 2739 ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan]; 2740 swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 2741 (3 * i)); 2742 } 2743 inst->src[r].swizzle = swizzle; 2744 } 2745 } 2746 2747 switch (inst->op) { 2748 case OPCODE_BGNLOOP: 2749 case OPCODE_ENDLOOP: 2750 /* End of a basic block, clear the ACP entirely. */ 2751 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 2752 break; 2753 2754 case OPCODE_IF: 2755 ++level; 2756 break; 2757 2758 case OPCODE_ENDIF: 2759 case OPCODE_ELSE: 2760 /* Clear all channels written inside the block from the ACP, but 2761 * leaving those that were not touched. 2762 */ 2763 for (int r = 0; r < this->next_temp; r++) { 2764 for (int c = 0; c < 4; c++) { 2765 if (!acp[4 * r + c]) 2766 continue; 2767 2768 if (acp_level[4 * r + c] >= level) 2769 acp[4 * r + c] = NULL; 2770 } 2771 } 2772 if (inst->op == OPCODE_ENDIF) 2773 --level; 2774 break; 2775 2776 default: 2777 /* Continuing the block, clear any written channels from 2778 * the ACP. 2779 */ 2780 if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 2781 /* Any temporary might be written, so no copy propagation 2782 * across this instruction. 2783 */ 2784 memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 2785 } else if (inst->dst.file == PROGRAM_OUTPUT && 2786 inst->dst.reladdr) { 2787 /* Any output might be written, so no copy propagation 2788 * from outputs across this instruction. 2789 */ 2790 for (int r = 0; r < this->next_temp; r++) { 2791 for (int c = 0; c < 4; c++) { 2792 if (!acp[4 * r + c]) 2793 continue; 2794 2795 if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 2796 acp[4 * r + c] = NULL; 2797 } 2798 } 2799 } else if (inst->dst.file == PROGRAM_TEMPORARY || 2800 inst->dst.file == PROGRAM_OUTPUT) { 2801 /* Clear where it's used as dst. */ 2802 if (inst->dst.file == PROGRAM_TEMPORARY) { 2803 for (int c = 0; c < 4; c++) { 2804 if (inst->dst.writemask & (1 << c)) { 2805 acp[4 * inst->dst.index + c] = NULL; 2806 } 2807 } 2808 } 2809 2810 /* Clear where it's used as src. */ 2811 for (int r = 0; r < this->next_temp; r++) { 2812 for (int c = 0; c < 4; c++) { 2813 if (!acp[4 * r + c]) 2814 continue; 2815 2816 int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 2817 2818 if (acp[4 * r + c]->src[0].file == inst->dst.file && 2819 acp[4 * r + c]->src[0].index == inst->dst.index && 2820 inst->dst.writemask & (1 << src_chan)) 2821 { 2822 acp[4 * r + c] = NULL; 2823 } 2824 } 2825 } 2826 } 2827 break; 2828 } 2829 2830 /* If this is a copy, add it to the ACP. */ 2831 if (inst->op == OPCODE_MOV && 2832 inst->dst.file == PROGRAM_TEMPORARY && 2833 !(inst->dst.file == inst->src[0].file && 2834 inst->dst.index == inst->src[0].index) && 2835 !inst->dst.reladdr && 2836 !inst->saturate && 2837 !inst->src[0].reladdr && 2838 !inst->src[0].negate) { 2839 for (int i = 0; i < 4; i++) { 2840 if (inst->dst.writemask & (1 << i)) { 2841 acp[4 * inst->dst.index + i] = inst; 2842 acp_level[4 * inst->dst.index + i] = level; 2843 } 2844 } 2845 } 2846 } 2847 2848 ralloc_free(acp_level); 2849 ralloc_free(acp); 2850} 2851 2852 2853/** 2854 * Convert a shader's GLSL IR into a Mesa gl_program. 2855 */ 2856static struct gl_program * 2857get_mesa_program(struct gl_context *ctx, 2858 struct gl_shader_program *shader_program, 2859 struct gl_linked_shader *shader) 2860{ 2861 ir_to_mesa_visitor v; 2862 struct prog_instruction *mesa_instructions, *mesa_inst; 2863 ir_instruction **mesa_instruction_annotation; 2864 int i; 2865 struct gl_program *prog; 2866 GLenum target = _mesa_shader_stage_to_program(shader->Stage); 2867 const char *target_string = _mesa_shader_stage_to_string(shader->Stage); 2868 struct gl_shader_compiler_options *options = 2869 &ctx->Const.ShaderCompilerOptions[shader->Stage]; 2870 2871 validate_ir_tree(shader->ir); 2872 2873 prog = shader->Program; 2874 prog->Parameters = _mesa_new_parameter_list(); 2875 v.ctx = ctx; 2876 v.prog = prog; 2877 v.shader_program = shader_program; 2878 v.options = options; 2879 2880 _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader, 2881 prog->Parameters); 2882 2883 /* Emit Mesa IR for main(). */ 2884 visit_exec_list(shader->ir, &v); 2885 v.emit(NULL, OPCODE_END); 2886 2887 prog->arb.NumTemporaries = v.next_temp; 2888 2889 unsigned num_instructions = v.instructions.length(); 2890 2891 mesa_instructions = rzalloc_array(prog, struct prog_instruction, 2892 num_instructions); 2893 mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *, 2894 num_instructions); 2895 2896 v.copy_propagate(); 2897 2898 /* Convert ir_mesa_instructions into prog_instructions. 2899 */ 2900 mesa_inst = mesa_instructions; 2901 i = 0; 2902 foreach_in_list(const ir_to_mesa_instruction, inst, &v.instructions) { 2903 mesa_inst->Opcode = inst->op; 2904 if (inst->saturate) 2905 mesa_inst->Saturate = GL_TRUE; 2906 mesa_inst->DstReg.File = inst->dst.file; 2907 mesa_inst->DstReg.Index = inst->dst.index; 2908 mesa_inst->DstReg.WriteMask = inst->dst.writemask; 2909 mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; 2910 mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]); 2911 mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]); 2912 mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]); 2913 mesa_inst->TexSrcUnit = inst->sampler; 2914 mesa_inst->TexSrcTarget = inst->tex_target; 2915 mesa_inst->TexShadow = inst->tex_shadow; 2916 mesa_instruction_annotation[i] = inst->ir; 2917 2918 /* Set IndirectRegisterFiles. */ 2919 if (mesa_inst->DstReg.RelAddr) 2920 prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File; 2921 2922 /* Update program's bitmask of indirectly accessed register files */ 2923 for (unsigned src = 0; src < 3; src++) 2924 if (mesa_inst->SrcReg[src].RelAddr) 2925 prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; 2926 2927 switch (mesa_inst->Opcode) { 2928 case OPCODE_IF: 2929 if (options->MaxIfDepth == 0) { 2930 linker_warning(shader_program, 2931 "Couldn't flatten if-statement. " 2932 "This will likely result in software " 2933 "rasterization.\n"); 2934 } 2935 break; 2936 case OPCODE_BGNLOOP: 2937 if (options->EmitNoLoops) { 2938 linker_warning(shader_program, 2939 "Couldn't unroll loop. " 2940 "This will likely result in software " 2941 "rasterization.\n"); 2942 } 2943 break; 2944 case OPCODE_CONT: 2945 if (options->EmitNoCont) { 2946 linker_warning(shader_program, 2947 "Couldn't lower continue-statement. " 2948 "This will likely result in software " 2949 "rasterization.\n"); 2950 } 2951 break; 2952 case OPCODE_ARL: 2953 prog->arb.NumAddressRegs = 1; 2954 break; 2955 default: 2956 break; 2957 } 2958 2959 mesa_inst++; 2960 i++; 2961 2962 if (!shader_program->data->LinkStatus) 2963 break; 2964 } 2965 2966 if (!shader_program->data->LinkStatus) { 2967 goto fail_exit; 2968 } 2969 2970 set_branchtargets(&v, mesa_instructions, num_instructions); 2971 2972 if (ctx->_Shader->Flags & GLSL_DUMP) { 2973 fprintf(stderr, "\n"); 2974 fprintf(stderr, "GLSL IR for linked %s program %d:\n", target_string, 2975 shader_program->Name); 2976 _mesa_print_ir(stderr, shader->ir, NULL); 2977 fprintf(stderr, "\n"); 2978 fprintf(stderr, "\n"); 2979 fprintf(stderr, "Mesa IR for linked %s program %d:\n", target_string, 2980 shader_program->Name); 2981 print_program(mesa_instructions, mesa_instruction_annotation, 2982 num_instructions); 2983 fflush(stderr); 2984 } 2985 2986 prog->arb.Instructions = mesa_instructions; 2987 prog->arb.NumInstructions = num_instructions; 2988 2989 /* Setting this to NULL prevents a possible double free in the fail_exit 2990 * path (far below). 2991 */ 2992 mesa_instructions = NULL; 2993 2994 do_set_program_inouts(shader->ir, prog, shader->Stage); 2995 2996 prog->ShadowSamplers = shader->shadow_samplers; 2997 prog->ExternalSamplersUsed = gl_external_samplers(prog); 2998 _mesa_update_shader_textures_used(shader_program, prog); 2999 3000 /* Set the gl_FragDepth layout. */ 3001 if (target == GL_FRAGMENT_PROGRAM_ARB) { 3002 prog->info.fs.depth_layout = shader_program->FragDepthLayout; 3003 } 3004 3005 _mesa_optimize_program(prog, prog); 3006 3007 /* This has to be done last. Any operation that can cause 3008 * prog->ParameterValues to get reallocated (e.g., anything that adds a 3009 * program constant) has to happen before creating this linkage. 3010 */ 3011 _mesa_associate_uniform_storage(ctx, shader_program, prog); 3012 if (!shader_program->data->LinkStatus) { 3013 goto fail_exit; 3014 } 3015 3016 return prog; 3017 3018fail_exit: 3019 ralloc_free(mesa_instructions); 3020 _mesa_reference_program(ctx, &shader->Program, NULL); 3021 return NULL; 3022} 3023 3024extern "C" { 3025 3026/** 3027 * Link a shader. 3028 * Called via ctx->Driver.LinkShader() 3029 * This actually involves converting GLSL IR into Mesa gl_programs with 3030 * code lowering and other optimizations. 3031 */ 3032GLboolean 3033_mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 3034{ 3035 assert(prog->data->LinkStatus); 3036 3037 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 3038 if (prog->_LinkedShaders[i] == NULL) 3039 continue; 3040 3041 bool progress; 3042 exec_list *ir = prog->_LinkedShaders[i]->ir; 3043 const struct gl_shader_compiler_options *options = 3044 &ctx->Const.ShaderCompilerOptions[prog->_LinkedShaders[i]->Stage]; 3045 3046 do { 3047 progress = false; 3048 3049 /* Lowering */ 3050 do_mat_op_to_vec(ir); 3051 lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2 3052 | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP 3053 | MUL64_TO_MUL_AND_MUL_HIGH 3054 | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); 3055 3056 progress = do_common_optimization(ir, true, true, 3057 options, ctx->Const.NativeIntegers) 3058 || progress; 3059 3060 progress = lower_quadop_vector(ir, true) || progress; 3061 3062 if (options->MaxIfDepth == 0) 3063 progress = lower_discard(ir) || progress; 3064 3065 progress = lower_if_to_cond_assign((gl_shader_stage)i, ir, 3066 options->MaxIfDepth) || progress; 3067 3068 progress = lower_noise(ir) || progress; 3069 3070 /* If there are forms of indirect addressing that the driver 3071 * cannot handle, perform the lowering pass. 3072 */ 3073 if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 3074 || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 3075 progress = 3076 lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir, 3077 options->EmitNoIndirectInput, 3078 options->EmitNoIndirectOutput, 3079 options->EmitNoIndirectTemp, 3080 options->EmitNoIndirectUniform) 3081 || progress; 3082 3083 progress = do_vec_index_to_cond_assign(ir) || progress; 3084 progress = lower_vector_insert(ir, true) || progress; 3085 } while (progress); 3086 3087 validate_ir_tree(ir); 3088 } 3089 3090 for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 3091 struct gl_program *linked_prog; 3092 3093 if (prog->_LinkedShaders[i] == NULL) 3094 continue; 3095 3096 linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); 3097 3098 if (linked_prog) { 3099 _mesa_copy_linked_program_data(prog, prog->_LinkedShaders[i]); 3100 3101 if (!ctx->Driver.ProgramStringNotify(ctx, 3102 _mesa_shader_stage_to_program(i), 3103 linked_prog)) { 3104 _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 3105 NULL); 3106 return GL_FALSE; 3107 } 3108 } 3109 } 3110 3111 build_program_resource_list(ctx, prog); 3112 return prog->data->LinkStatus; 3113} 3114 3115/** 3116 * Link a GLSL shader program. Called via glLinkProgram(). 3117 */ 3118void 3119_mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 3120{ 3121 unsigned int i; 3122 bool spirv = false; 3123 3124 _mesa_clear_shader_program_data(ctx, prog); 3125 3126 prog->data = _mesa_create_shader_program_data(); 3127 3128 prog->data->LinkStatus = LINKING_SUCCESS; 3129 3130 for (i = 0; i < prog->NumShaders; i++) { 3131 if (!prog->Shaders[i]->CompileStatus) { 3132 linker_error(prog, "linking with uncompiled/unspecialized shader"); 3133 } 3134 3135 if (!i) { 3136 spirv = (prog->Shaders[i]->spirv_data != NULL); 3137 } else if (spirv && !prog->Shaders[i]->spirv_data) { 3138 /* The GL_ARB_gl_spirv spec adds a new bullet point to the list of 3139 * reasons LinkProgram can fail: 3140 * 3141 * "All the shader objects attached to <program> do not have the 3142 * same value for the SPIR_V_BINARY_ARB state." 3143 */ 3144 linker_error(prog, 3145 "not all attached shaders have the same " 3146 "SPIR_V_BINARY_ARB state"); 3147 } 3148 } 3149 prog->data->spirv = spirv; 3150 3151 if (prog->data->LinkStatus) { 3152 if (!spirv) 3153 link_shaders(ctx, prog); 3154 else 3155 _mesa_spirv_link_shaders(ctx, prog); 3156 } 3157 3158 /* If LinkStatus is LINKING_SUCCESS, then reset sampler validated to true. 3159 * Validation happens via the LinkShader call below. If LinkStatus is 3160 * LINKING_SKIPPED, then SamplersValidated will have been restored from the 3161 * shader cache. 3162 */ 3163 if (prog->data->LinkStatus == LINKING_SUCCESS) { 3164 prog->SamplersValidated = GL_TRUE; 3165 } 3166 3167 if (prog->data->LinkStatus && !ctx->Driver.LinkShader(ctx, prog)) { 3168 prog->data->LinkStatus = LINKING_FAILURE; 3169 } 3170 3171 /* Return early if we are loading the shader from on-disk cache */ 3172 if (prog->data->LinkStatus == LINKING_SKIPPED) 3173 return; 3174 3175 if (ctx->_Shader->Flags & GLSL_DUMP) { 3176 if (!prog->data->LinkStatus) { 3177 fprintf(stderr, "GLSL shader program %d failed to link\n", prog->Name); 3178 } 3179 3180 if (prog->data->InfoLog && prog->data->InfoLog[0] != 0) { 3181 fprintf(stderr, "GLSL shader program %d info log:\n", prog->Name); 3182 fprintf(stderr, "%s\n", prog->data->InfoLog); 3183 } 3184 } 3185 3186#ifdef ENABLE_SHADER_CACHE 3187 if (prog->data->LinkStatus) 3188 shader_cache_write_program_metadata(ctx, prog); 3189#endif 3190} 3191 3192} /* extern "C" */ 3193