13464ebd5Sriastradh/* 23464ebd5Sriastradh * Copyright (C) 2005-2007 Brian Paul All Rights Reserved. 33464ebd5Sriastradh * Copyright (C) 2008 VMware, Inc. All Rights Reserved. 43464ebd5Sriastradh * Copyright © 2010 Intel Corporation 53464ebd5Sriastradh * 63464ebd5Sriastradh * Permission is hereby granted, free of charge, to any person obtaining a 73464ebd5Sriastradh * copy of this software and associated documentation files (the "Software"), 83464ebd5Sriastradh * to deal in the Software without restriction, including without limitation 93464ebd5Sriastradh * the rights to use, copy, modify, merge, publish, distribute, sublicense, 103464ebd5Sriastradh * and/or sell copies of the Software, and to permit persons to whom the 113464ebd5Sriastradh * Software is furnished to do so, subject to the following conditions: 123464ebd5Sriastradh * 133464ebd5Sriastradh * The above copyright notice and this permission notice (including the next 143464ebd5Sriastradh * paragraph) shall be included in all copies or substantial portions of the 153464ebd5Sriastradh * Software. 163464ebd5Sriastradh * 173464ebd5Sriastradh * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 183464ebd5Sriastradh * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 193464ebd5Sriastradh * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 203464ebd5Sriastradh * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 213464ebd5Sriastradh * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 223464ebd5Sriastradh * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 233464ebd5Sriastradh * DEALINGS IN THE SOFTWARE. 243464ebd5Sriastradh */ 253464ebd5Sriastradh 263464ebd5Sriastradh/** 273464ebd5Sriastradh * \file ir_to_mesa.cpp 283464ebd5Sriastradh * 293464ebd5Sriastradh * Translate GLSL IR to Mesa's gl_program representation. 303464ebd5Sriastradh */ 313464ebd5Sriastradh 323464ebd5Sriastradh#include <stdio.h> 3301e04c3fSmrg#include "main/macros.h" 343464ebd5Sriastradh#include "main/mtypes.h" 3501e04c3fSmrg#include "main/shaderapi.h" 363464ebd5Sriastradh#include "main/shaderobj.h" 373464ebd5Sriastradh#include "main/uniforms.h" 3801e04c3fSmrg#include "main/glspirv.h" 3901e04c3fSmrg#include "compiler/glsl/ast.h" 4001e04c3fSmrg#include "compiler/glsl/ir.h" 4101e04c3fSmrg#include "compiler/glsl/ir_expression_flattening.h" 4201e04c3fSmrg#include "compiler/glsl/ir_visitor.h" 4301e04c3fSmrg#include "compiler/glsl/ir_optimization.h" 4401e04c3fSmrg#include "compiler/glsl/ir_uniform.h" 4501e04c3fSmrg#include "compiler/glsl/glsl_parser_extras.h" 4601e04c3fSmrg#include "compiler/glsl_types.h" 4701e04c3fSmrg#include "compiler/glsl/linker.h" 4801e04c3fSmrg#include "compiler/glsl/program.h" 4901e04c3fSmrg#include "compiler/glsl/shader_cache.h" 5001e04c3fSmrg#include "compiler/glsl/string_to_uint_map.h" 513464ebd5Sriastradh#include "program/prog_instruction.h" 523464ebd5Sriastradh#include "program/prog_optimize.h" 533464ebd5Sriastradh#include "program/prog_print.h" 543464ebd5Sriastradh#include "program/program.h" 553464ebd5Sriastradh#include "program/prog_parameter.h" 5601e04c3fSmrg 573464ebd5Sriastradh 58af69d88dSmrgstatic int swizzle_for_size(int size); 59af69d88dSmrg 60af69d88dSmrgnamespace { 61af69d88dSmrg 623464ebd5Sriastradhclass src_reg; 633464ebd5Sriastradhclass dst_reg; 643464ebd5Sriastradh 653464ebd5Sriastradh/** 663464ebd5Sriastradh * This struct is a corresponding struct to Mesa prog_src_register, with 673464ebd5Sriastradh * wider fields. 683464ebd5Sriastradh */ 693464ebd5Sriastradhclass src_reg { 703464ebd5Sriastradhpublic: 713464ebd5Sriastradh src_reg(gl_register_file file, int index, const glsl_type *type) 723464ebd5Sriastradh { 733464ebd5Sriastradh this->file = file; 743464ebd5Sriastradh this->index = index; 753464ebd5Sriastradh if (type && (type->is_scalar() || type->is_vector() || type->is_matrix())) 763464ebd5Sriastradh this->swizzle = swizzle_for_size(type->vector_elements); 773464ebd5Sriastradh else 783464ebd5Sriastradh this->swizzle = SWIZZLE_XYZW; 793464ebd5Sriastradh this->negate = 0; 803464ebd5Sriastradh this->reladdr = NULL; 813464ebd5Sriastradh } 823464ebd5Sriastradh 833464ebd5Sriastradh src_reg() 843464ebd5Sriastradh { 853464ebd5Sriastradh this->file = PROGRAM_UNDEFINED; 863464ebd5Sriastradh this->index = 0; 873464ebd5Sriastradh this->swizzle = 0; 883464ebd5Sriastradh this->negate = 0; 893464ebd5Sriastradh this->reladdr = NULL; 903464ebd5Sriastradh } 913464ebd5Sriastradh 923464ebd5Sriastradh explicit src_reg(dst_reg reg); 933464ebd5Sriastradh 943464ebd5Sriastradh gl_register_file file; /**< PROGRAM_* from Mesa */ 95af69d88dSmrg int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ 963464ebd5Sriastradh GLuint swizzle; /**< SWIZZLE_XYZWONEZERO swizzles from Mesa. */ 973464ebd5Sriastradh int negate; /**< NEGATE_XYZW mask from mesa */ 983464ebd5Sriastradh /** Register index should be offset by the integer in this reg. */ 993464ebd5Sriastradh src_reg *reladdr; 1003464ebd5Sriastradh}; 1013464ebd5Sriastradh 1023464ebd5Sriastradhclass dst_reg { 1033464ebd5Sriastradhpublic: 1043464ebd5Sriastradh dst_reg(gl_register_file file, int writemask) 1053464ebd5Sriastradh { 1063464ebd5Sriastradh this->file = file; 1073464ebd5Sriastradh this->index = 0; 1083464ebd5Sriastradh this->writemask = writemask; 1093464ebd5Sriastradh this->reladdr = NULL; 1103464ebd5Sriastradh } 1113464ebd5Sriastradh 1123464ebd5Sriastradh dst_reg() 1133464ebd5Sriastradh { 1143464ebd5Sriastradh this->file = PROGRAM_UNDEFINED; 1153464ebd5Sriastradh this->index = 0; 1163464ebd5Sriastradh this->writemask = 0; 1173464ebd5Sriastradh this->reladdr = NULL; 1183464ebd5Sriastradh } 1193464ebd5Sriastradh 1203464ebd5Sriastradh explicit dst_reg(src_reg reg); 1213464ebd5Sriastradh 1223464ebd5Sriastradh gl_register_file file; /**< PROGRAM_* from Mesa */ 123af69d88dSmrg int index; /**< temporary index, VERT_ATTRIB_*, VARYING_SLOT_*, etc. */ 1243464ebd5Sriastradh int writemask; /**< Bitfield of WRITEMASK_[XYZW] */ 1253464ebd5Sriastradh /** Register index should be offset by the integer in this reg. */ 1263464ebd5Sriastradh src_reg *reladdr; 1273464ebd5Sriastradh}; 1283464ebd5Sriastradh 129af69d88dSmrg} /* anonymous namespace */ 130af69d88dSmrg 1313464ebd5Sriastradhsrc_reg::src_reg(dst_reg reg) 1323464ebd5Sriastradh{ 1333464ebd5Sriastradh this->file = reg.file; 1343464ebd5Sriastradh this->index = reg.index; 1353464ebd5Sriastradh this->swizzle = SWIZZLE_XYZW; 1363464ebd5Sriastradh this->negate = 0; 1373464ebd5Sriastradh this->reladdr = reg.reladdr; 1383464ebd5Sriastradh} 1393464ebd5Sriastradh 1403464ebd5Sriastradhdst_reg::dst_reg(src_reg reg) 1413464ebd5Sriastradh{ 1423464ebd5Sriastradh this->file = reg.file; 1433464ebd5Sriastradh this->index = reg.index; 1443464ebd5Sriastradh this->writemask = WRITEMASK_XYZW; 1453464ebd5Sriastradh this->reladdr = reg.reladdr; 1463464ebd5Sriastradh} 1473464ebd5Sriastradh 148af69d88dSmrgnamespace { 149af69d88dSmrg 1503464ebd5Sriastradhclass ir_to_mesa_instruction : public exec_node { 1513464ebd5Sriastradhpublic: 152af69d88dSmrg DECLARE_RALLOC_CXX_OPERATORS(ir_to_mesa_instruction) 1533464ebd5Sriastradh 1543464ebd5Sriastradh enum prog_opcode op; 1553464ebd5Sriastradh dst_reg dst; 1563464ebd5Sriastradh src_reg src[3]; 1573464ebd5Sriastradh /** Pointer to the ir source this tree came from for debugging */ 1583464ebd5Sriastradh ir_instruction *ir; 1593464ebd5Sriastradh bool saturate; 1603464ebd5Sriastradh int sampler; /**< sampler index */ 1613464ebd5Sriastradh int tex_target; /**< One of TEXTURE_*_INDEX */ 1623464ebd5Sriastradh GLboolean tex_shadow; 1633464ebd5Sriastradh}; 1643464ebd5Sriastradh 1653464ebd5Sriastradhclass variable_storage : public exec_node { 1663464ebd5Sriastradhpublic: 1673464ebd5Sriastradh variable_storage(ir_variable *var, gl_register_file file, int index) 1683464ebd5Sriastradh : file(file), index(index), var(var) 1693464ebd5Sriastradh { 1703464ebd5Sriastradh /* empty */ 1713464ebd5Sriastradh } 1723464ebd5Sriastradh 1733464ebd5Sriastradh gl_register_file file; 1743464ebd5Sriastradh int index; 1753464ebd5Sriastradh ir_variable *var; /* variable that maps to this, if any */ 1763464ebd5Sriastradh}; 1773464ebd5Sriastradh 1783464ebd5Sriastradhclass function_entry : public exec_node { 1793464ebd5Sriastradhpublic: 1803464ebd5Sriastradh ir_function_signature *sig; 1813464ebd5Sriastradh 1823464ebd5Sriastradh /** 1833464ebd5Sriastradh * identifier of this function signature used by the program. 1843464ebd5Sriastradh * 1853464ebd5Sriastradh * At the point that Mesa instructions for function calls are 1863464ebd5Sriastradh * generated, we don't know the address of the first instruction of 1873464ebd5Sriastradh * the function body. So we make the BranchTarget that is called a 1883464ebd5Sriastradh * small integer and rewrite them during set_branchtargets(). 1893464ebd5Sriastradh */ 1903464ebd5Sriastradh int sig_id; 1913464ebd5Sriastradh 1923464ebd5Sriastradh /** 1933464ebd5Sriastradh * Pointer to first instruction of the function body. 1943464ebd5Sriastradh * 1953464ebd5Sriastradh * Set during function body emits after main() is processed. 1963464ebd5Sriastradh */ 1973464ebd5Sriastradh ir_to_mesa_instruction *bgn_inst; 1983464ebd5Sriastradh 1993464ebd5Sriastradh /** 2003464ebd5Sriastradh * Index of the first instruction of the function body in actual 2013464ebd5Sriastradh * Mesa IR. 2023464ebd5Sriastradh * 2033464ebd5Sriastradh * Set after convertion from ir_to_mesa_instruction to prog_instruction. 2043464ebd5Sriastradh */ 2053464ebd5Sriastradh int inst; 2063464ebd5Sriastradh 2073464ebd5Sriastradh /** Storage for the return value. */ 2083464ebd5Sriastradh src_reg return_reg; 2093464ebd5Sriastradh}; 2103464ebd5Sriastradh 2113464ebd5Sriastradhclass ir_to_mesa_visitor : public ir_visitor { 2123464ebd5Sriastradhpublic: 2133464ebd5Sriastradh ir_to_mesa_visitor(); 2143464ebd5Sriastradh ~ir_to_mesa_visitor(); 2153464ebd5Sriastradh 2163464ebd5Sriastradh function_entry *current_function; 2173464ebd5Sriastradh 2183464ebd5Sriastradh struct gl_context *ctx; 2193464ebd5Sriastradh struct gl_program *prog; 2203464ebd5Sriastradh struct gl_shader_program *shader_program; 2213464ebd5Sriastradh struct gl_shader_compiler_options *options; 2223464ebd5Sriastradh 2233464ebd5Sriastradh int next_temp; 2243464ebd5Sriastradh 225af69d88dSmrg variable_storage *find_variable_storage(const ir_variable *var); 2263464ebd5Sriastradh 2273464ebd5Sriastradh src_reg get_temp(const glsl_type *type); 2283464ebd5Sriastradh void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr); 2293464ebd5Sriastradh 2303464ebd5Sriastradh src_reg src_reg_for_float(float val); 2313464ebd5Sriastradh 2323464ebd5Sriastradh /** 2333464ebd5Sriastradh * \name Visit methods 2343464ebd5Sriastradh * 2353464ebd5Sriastradh * As typical for the visitor pattern, there must be one \c visit method for 2363464ebd5Sriastradh * each concrete subclass of \c ir_instruction. Virtual base classes within 2373464ebd5Sriastradh * the hierarchy should not have \c visit methods. 2383464ebd5Sriastradh */ 2393464ebd5Sriastradh /*@{*/ 2403464ebd5Sriastradh virtual void visit(ir_variable *); 2413464ebd5Sriastradh virtual void visit(ir_loop *); 2423464ebd5Sriastradh virtual void visit(ir_loop_jump *); 2433464ebd5Sriastradh virtual void visit(ir_function_signature *); 2443464ebd5Sriastradh virtual void visit(ir_function *); 2453464ebd5Sriastradh virtual void visit(ir_expression *); 2463464ebd5Sriastradh virtual void visit(ir_swizzle *); 2473464ebd5Sriastradh virtual void visit(ir_dereference_variable *); 2483464ebd5Sriastradh virtual void visit(ir_dereference_array *); 2493464ebd5Sriastradh virtual void visit(ir_dereference_record *); 2503464ebd5Sriastradh virtual void visit(ir_assignment *); 2513464ebd5Sriastradh virtual void visit(ir_constant *); 2523464ebd5Sriastradh virtual void visit(ir_call *); 2533464ebd5Sriastradh virtual void visit(ir_return *); 2543464ebd5Sriastradh virtual void visit(ir_discard *); 2557ec681f3Smrg virtual void visit(ir_demote *); 2563464ebd5Sriastradh virtual void visit(ir_texture *); 2573464ebd5Sriastradh virtual void visit(ir_if *); 258af69d88dSmrg virtual void visit(ir_emit_vertex *); 259af69d88dSmrg virtual void visit(ir_end_primitive *); 26001e04c3fSmrg virtual void visit(ir_barrier *); 2613464ebd5Sriastradh /*@}*/ 2623464ebd5Sriastradh 2633464ebd5Sriastradh src_reg result; 2643464ebd5Sriastradh 2653464ebd5Sriastradh /** List of variable_storage */ 2663464ebd5Sriastradh exec_list variables; 2673464ebd5Sriastradh 2683464ebd5Sriastradh /** List of function_entry */ 2693464ebd5Sriastradh exec_list function_signatures; 2703464ebd5Sriastradh int next_signature_id; 2713464ebd5Sriastradh 2723464ebd5Sriastradh /** List of ir_to_mesa_instruction */ 2733464ebd5Sriastradh exec_list instructions; 2743464ebd5Sriastradh 2753464ebd5Sriastradh ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op); 2763464ebd5Sriastradh 2773464ebd5Sriastradh ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 2783464ebd5Sriastradh dst_reg dst, src_reg src0); 2793464ebd5Sriastradh 2803464ebd5Sriastradh ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 2813464ebd5Sriastradh dst_reg dst, src_reg src0, src_reg src1); 2823464ebd5Sriastradh 2833464ebd5Sriastradh ir_to_mesa_instruction *emit(ir_instruction *ir, enum prog_opcode op, 2843464ebd5Sriastradh dst_reg dst, 2853464ebd5Sriastradh src_reg src0, src_reg src1, src_reg src2); 2863464ebd5Sriastradh 2873464ebd5Sriastradh /** 2883464ebd5Sriastradh * Emit the correct dot-product instruction for the type of arguments 2893464ebd5Sriastradh */ 290af69d88dSmrg ir_to_mesa_instruction * emit_dp(ir_instruction *ir, 291af69d88dSmrg dst_reg dst, 292af69d88dSmrg src_reg src0, 293af69d88dSmrg src_reg src1, 294af69d88dSmrg unsigned elements); 2953464ebd5Sriastradh 2963464ebd5Sriastradh void emit_scalar(ir_instruction *ir, enum prog_opcode op, 2973464ebd5Sriastradh dst_reg dst, src_reg src0); 2983464ebd5Sriastradh 2993464ebd5Sriastradh void emit_scalar(ir_instruction *ir, enum prog_opcode op, 3003464ebd5Sriastradh dst_reg dst, src_reg src0, src_reg src1); 3013464ebd5Sriastradh 302af69d88dSmrg bool try_emit_mad(ir_expression *ir, 3033464ebd5Sriastradh int mul_operand); 304af69d88dSmrg bool try_emit_mad_for_and_not(ir_expression *ir, 305af69d88dSmrg int mul_operand); 3063464ebd5Sriastradh 3073464ebd5Sriastradh void emit_swz(ir_expression *ir); 3083464ebd5Sriastradh 30901e04c3fSmrg void emit_equality_comparison(ir_expression *ir, enum prog_opcode op, 31001e04c3fSmrg dst_reg dst, 31101e04c3fSmrg const src_reg &src0, const src_reg &src1); 31201e04c3fSmrg 31301e04c3fSmrg inline void emit_sne(ir_expression *ir, dst_reg dst, 31401e04c3fSmrg const src_reg &src0, const src_reg &src1) 31501e04c3fSmrg { 31601e04c3fSmrg emit_equality_comparison(ir, OPCODE_SLT, dst, src0, src1); 31701e04c3fSmrg } 31801e04c3fSmrg 31901e04c3fSmrg inline void emit_seq(ir_expression *ir, dst_reg dst, 32001e04c3fSmrg const src_reg &src0, const src_reg &src1) 32101e04c3fSmrg { 32201e04c3fSmrg emit_equality_comparison(ir, OPCODE_SGE, dst, src0, src1); 32301e04c3fSmrg } 32401e04c3fSmrg 3253464ebd5Sriastradh bool process_move_condition(ir_rvalue *ir); 3263464ebd5Sriastradh 3273464ebd5Sriastradh void copy_propagate(void); 3283464ebd5Sriastradh 3293464ebd5Sriastradh void *mem_ctx; 3303464ebd5Sriastradh}; 3313464ebd5Sriastradh 332af69d88dSmrg} /* anonymous namespace */ 333af69d88dSmrg 334af69d88dSmrgstatic src_reg undef_src = src_reg(PROGRAM_UNDEFINED, 0, NULL); 3353464ebd5Sriastradh 336af69d88dSmrgstatic dst_reg undef_dst = dst_reg(PROGRAM_UNDEFINED, SWIZZLE_NOOP); 3373464ebd5Sriastradh 338af69d88dSmrgstatic dst_reg address_reg = dst_reg(PROGRAM_ADDRESS, WRITEMASK_X); 3393464ebd5Sriastradh 3403464ebd5Sriastradhstatic int 3413464ebd5Sriastradhswizzle_for_size(int size) 3423464ebd5Sriastradh{ 343af69d88dSmrg static const int size_swizzles[4] = { 3443464ebd5Sriastradh MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X), 3453464ebd5Sriastradh MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y), 3463464ebd5Sriastradh MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z), 3473464ebd5Sriastradh MAKE_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W), 3483464ebd5Sriastradh }; 3493464ebd5Sriastradh 3503464ebd5Sriastradh assert((size >= 1) && (size <= 4)); 3513464ebd5Sriastradh return size_swizzles[size - 1]; 3523464ebd5Sriastradh} 3533464ebd5Sriastradh 3543464ebd5Sriastradhir_to_mesa_instruction * 3553464ebd5Sriastradhir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 3563464ebd5Sriastradh dst_reg dst, 3573464ebd5Sriastradh src_reg src0, src_reg src1, src_reg src2) 3583464ebd5Sriastradh{ 3593464ebd5Sriastradh ir_to_mesa_instruction *inst = new(mem_ctx) ir_to_mesa_instruction(); 3603464ebd5Sriastradh int num_reladdr = 0; 3613464ebd5Sriastradh 3623464ebd5Sriastradh /* If we have to do relative addressing, we want to load the ARL 3633464ebd5Sriastradh * reg directly for one of the regs, and preload the other reladdr 3643464ebd5Sriastradh * sources into temps. 3653464ebd5Sriastradh */ 3663464ebd5Sriastradh num_reladdr += dst.reladdr != NULL; 3673464ebd5Sriastradh num_reladdr += src0.reladdr != NULL; 3683464ebd5Sriastradh num_reladdr += src1.reladdr != NULL; 3693464ebd5Sriastradh num_reladdr += src2.reladdr != NULL; 3703464ebd5Sriastradh 3713464ebd5Sriastradh reladdr_to_temp(ir, &src2, &num_reladdr); 3723464ebd5Sriastradh reladdr_to_temp(ir, &src1, &num_reladdr); 3733464ebd5Sriastradh reladdr_to_temp(ir, &src0, &num_reladdr); 3743464ebd5Sriastradh 3753464ebd5Sriastradh if (dst.reladdr) { 3763464ebd5Sriastradh emit(ir, OPCODE_ARL, address_reg, *dst.reladdr); 3773464ebd5Sriastradh num_reladdr--; 3783464ebd5Sriastradh } 3793464ebd5Sriastradh assert(num_reladdr == 0); 3803464ebd5Sriastradh 3813464ebd5Sriastradh inst->op = op; 3823464ebd5Sriastradh inst->dst = dst; 3833464ebd5Sriastradh inst->src[0] = src0; 3843464ebd5Sriastradh inst->src[1] = src1; 3853464ebd5Sriastradh inst->src[2] = src2; 3863464ebd5Sriastradh inst->ir = ir; 3873464ebd5Sriastradh 3883464ebd5Sriastradh this->instructions.push_tail(inst); 3893464ebd5Sriastradh 3903464ebd5Sriastradh return inst; 3913464ebd5Sriastradh} 3923464ebd5Sriastradh 3933464ebd5Sriastradh 3943464ebd5Sriastradhir_to_mesa_instruction * 3953464ebd5Sriastradhir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 3963464ebd5Sriastradh dst_reg dst, src_reg src0, src_reg src1) 3973464ebd5Sriastradh{ 3983464ebd5Sriastradh return emit(ir, op, dst, src0, src1, undef_src); 3993464ebd5Sriastradh} 4003464ebd5Sriastradh 4013464ebd5Sriastradhir_to_mesa_instruction * 4023464ebd5Sriastradhir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op, 4033464ebd5Sriastradh dst_reg dst, src_reg src0) 4043464ebd5Sriastradh{ 4053464ebd5Sriastradh assert(dst.writemask != 0); 4063464ebd5Sriastradh return emit(ir, op, dst, src0, undef_src, undef_src); 4073464ebd5Sriastradh} 4083464ebd5Sriastradh 4093464ebd5Sriastradhir_to_mesa_instruction * 4103464ebd5Sriastradhir_to_mesa_visitor::emit(ir_instruction *ir, enum prog_opcode op) 4113464ebd5Sriastradh{ 4123464ebd5Sriastradh return emit(ir, op, undef_dst, undef_src, undef_src, undef_src); 4133464ebd5Sriastradh} 4143464ebd5Sriastradh 415af69d88dSmrgir_to_mesa_instruction * 4163464ebd5Sriastradhir_to_mesa_visitor::emit_dp(ir_instruction *ir, 4173464ebd5Sriastradh dst_reg dst, src_reg src0, src_reg src1, 4183464ebd5Sriastradh unsigned elements) 4193464ebd5Sriastradh{ 42001e04c3fSmrg static const enum prog_opcode dot_opcodes[] = { 4213464ebd5Sriastradh OPCODE_DP2, OPCODE_DP3, OPCODE_DP4 4223464ebd5Sriastradh }; 4233464ebd5Sriastradh 424af69d88dSmrg return emit(ir, dot_opcodes[elements - 2], dst, src0, src1); 4253464ebd5Sriastradh} 4263464ebd5Sriastradh 4273464ebd5Sriastradh/** 4283464ebd5Sriastradh * Emits Mesa scalar opcodes to produce unique answers across channels. 4293464ebd5Sriastradh * 4303464ebd5Sriastradh * Some Mesa opcodes are scalar-only, like ARB_fp/vp. The src X 4313464ebd5Sriastradh * channel determines the result across all channels. So to do a vec4 4323464ebd5Sriastradh * of this operation, we want to emit a scalar per source channel used 4333464ebd5Sriastradh * to produce dest channels. 4343464ebd5Sriastradh */ 4353464ebd5Sriastradhvoid 4363464ebd5Sriastradhir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, 4373464ebd5Sriastradh dst_reg dst, 4383464ebd5Sriastradh src_reg orig_src0, src_reg orig_src1) 4393464ebd5Sriastradh{ 4403464ebd5Sriastradh int i, j; 4413464ebd5Sriastradh int done_mask = ~dst.writemask; 4423464ebd5Sriastradh 4433464ebd5Sriastradh /* Mesa RCP is a scalar operation splatting results to all channels, 4443464ebd5Sriastradh * like ARB_fp/vp. So emit as many RCPs as necessary to cover our 4453464ebd5Sriastradh * dst channels. 4463464ebd5Sriastradh */ 4473464ebd5Sriastradh for (i = 0; i < 4; i++) { 4483464ebd5Sriastradh GLuint this_mask = (1 << i); 4493464ebd5Sriastradh ir_to_mesa_instruction *inst; 4503464ebd5Sriastradh src_reg src0 = orig_src0; 4513464ebd5Sriastradh src_reg src1 = orig_src1; 4523464ebd5Sriastradh 4533464ebd5Sriastradh if (done_mask & this_mask) 4543464ebd5Sriastradh continue; 4553464ebd5Sriastradh 4563464ebd5Sriastradh GLuint src0_swiz = GET_SWZ(src0.swizzle, i); 4573464ebd5Sriastradh GLuint src1_swiz = GET_SWZ(src1.swizzle, i); 4583464ebd5Sriastradh for (j = i + 1; j < 4; j++) { 4593464ebd5Sriastradh /* If there is another enabled component in the destination that is 4603464ebd5Sriastradh * derived from the same inputs, generate its value on this pass as 4613464ebd5Sriastradh * well. 4623464ebd5Sriastradh */ 4633464ebd5Sriastradh if (!(done_mask & (1 << j)) && 4643464ebd5Sriastradh GET_SWZ(src0.swizzle, j) == src0_swiz && 4653464ebd5Sriastradh GET_SWZ(src1.swizzle, j) == src1_swiz) { 4663464ebd5Sriastradh this_mask |= (1 << j); 4673464ebd5Sriastradh } 4683464ebd5Sriastradh } 4693464ebd5Sriastradh src0.swizzle = MAKE_SWIZZLE4(src0_swiz, src0_swiz, 4703464ebd5Sriastradh src0_swiz, src0_swiz); 4713464ebd5Sriastradh src1.swizzle = MAKE_SWIZZLE4(src1_swiz, src1_swiz, 4723464ebd5Sriastradh src1_swiz, src1_swiz); 4733464ebd5Sriastradh 4743464ebd5Sriastradh inst = emit(ir, op, dst, src0, src1); 4753464ebd5Sriastradh inst->dst.writemask = this_mask; 4763464ebd5Sriastradh done_mask |= this_mask; 4773464ebd5Sriastradh } 4783464ebd5Sriastradh} 4793464ebd5Sriastradh 4803464ebd5Sriastradhvoid 4813464ebd5Sriastradhir_to_mesa_visitor::emit_scalar(ir_instruction *ir, enum prog_opcode op, 4823464ebd5Sriastradh dst_reg dst, src_reg src0) 4833464ebd5Sriastradh{ 4843464ebd5Sriastradh src_reg undef = undef_src; 4853464ebd5Sriastradh 4863464ebd5Sriastradh undef.swizzle = SWIZZLE_XXXX; 4873464ebd5Sriastradh 4883464ebd5Sriastradh emit_scalar(ir, op, dst, src0, undef); 4893464ebd5Sriastradh} 4903464ebd5Sriastradh 491af69d88dSmrgsrc_reg 4923464ebd5Sriastradhir_to_mesa_visitor::src_reg_for_float(float val) 4933464ebd5Sriastradh{ 4943464ebd5Sriastradh src_reg src(PROGRAM_CONSTANT, -1, NULL); 4953464ebd5Sriastradh 4963464ebd5Sriastradh src.index = _mesa_add_unnamed_constant(this->prog->Parameters, 497af69d88dSmrg (const gl_constant_value *)&val, 1, &src.swizzle); 4983464ebd5Sriastradh 4993464ebd5Sriastradh return src; 5003464ebd5Sriastradh} 5013464ebd5Sriastradh 50201e04c3fSmrgstatic int 50301e04c3fSmrgtype_size(const struct glsl_type *type) 50401e04c3fSmrg{ 5057ec681f3Smrg return type->count_vec4_slots(false, false); 50601e04c3fSmrg} 50701e04c3fSmrg 5083464ebd5Sriastradh/** 5093464ebd5Sriastradh * In the initial pass of codegen, we assign temporary numbers to 5103464ebd5Sriastradh * intermediate results. (not SSA -- variable assignments will reuse 5113464ebd5Sriastradh * storage). Actual register allocation for the Mesa VM occurs in a 5123464ebd5Sriastradh * pass over the Mesa IR later. 5133464ebd5Sriastradh */ 5143464ebd5Sriastradhsrc_reg 5153464ebd5Sriastradhir_to_mesa_visitor::get_temp(const glsl_type *type) 5163464ebd5Sriastradh{ 5173464ebd5Sriastradh src_reg src; 5183464ebd5Sriastradh 5193464ebd5Sriastradh src.file = PROGRAM_TEMPORARY; 5203464ebd5Sriastradh src.index = next_temp; 5213464ebd5Sriastradh src.reladdr = NULL; 5223464ebd5Sriastradh next_temp += type_size(type); 5233464ebd5Sriastradh 524b9abf16eSmaya if (type->is_array() || type->is_struct()) { 5253464ebd5Sriastradh src.swizzle = SWIZZLE_NOOP; 5263464ebd5Sriastradh } else { 527af69d88dSmrg src.swizzle = swizzle_for_size(type->vector_elements); 5283464ebd5Sriastradh } 5293464ebd5Sriastradh src.negate = 0; 5303464ebd5Sriastradh 5313464ebd5Sriastradh return src; 5323464ebd5Sriastradh} 5333464ebd5Sriastradh 5343464ebd5Sriastradhvariable_storage * 535af69d88dSmrgir_to_mesa_visitor::find_variable_storage(const ir_variable *var) 5363464ebd5Sriastradh{ 537af69d88dSmrg foreach_in_list(variable_storage, entry, &this->variables) { 5383464ebd5Sriastradh if (entry->var == var) 5393464ebd5Sriastradh return entry; 5403464ebd5Sriastradh } 5413464ebd5Sriastradh 5423464ebd5Sriastradh return NULL; 5433464ebd5Sriastradh} 5443464ebd5Sriastradh 5453464ebd5Sriastradhvoid 5463464ebd5Sriastradhir_to_mesa_visitor::visit(ir_variable *ir) 5473464ebd5Sriastradh{ 548af69d88dSmrg if (ir->data.mode == ir_var_uniform && strncmp(ir->name, "gl_", 3) == 0) { 5493464ebd5Sriastradh unsigned int i; 55001e04c3fSmrg const ir_state_slot *const slots = ir->get_state_slots(); 55101e04c3fSmrg assert(slots != NULL); 5523464ebd5Sriastradh 5533464ebd5Sriastradh /* Check if this statevar's setup in the STATE file exactly 5543464ebd5Sriastradh * matches how we'll want to reference it as a 5553464ebd5Sriastradh * struct/array/whatever. If not, then we need to move it into 5563464ebd5Sriastradh * temporary storage and hope that it'll get copy-propagated 5573464ebd5Sriastradh * out. 5583464ebd5Sriastradh */ 55901e04c3fSmrg for (i = 0; i < ir->get_num_state_slots(); i++) { 5603464ebd5Sriastradh if (slots[i].swizzle != SWIZZLE_XYZW) { 5613464ebd5Sriastradh break; 5623464ebd5Sriastradh } 5633464ebd5Sriastradh } 5643464ebd5Sriastradh 565af69d88dSmrg variable_storage *storage; 5663464ebd5Sriastradh dst_reg dst; 56701e04c3fSmrg if (i == ir->get_num_state_slots()) { 5683464ebd5Sriastradh /* We'll set the index later. */ 5693464ebd5Sriastradh storage = new(mem_ctx) variable_storage(ir, PROGRAM_STATE_VAR, -1); 5703464ebd5Sriastradh this->variables.push_tail(storage); 5713464ebd5Sriastradh 5723464ebd5Sriastradh dst = undef_dst; 5733464ebd5Sriastradh } else { 5743464ebd5Sriastradh /* The variable_storage constructor allocates slots based on the size 5753464ebd5Sriastradh * of the type. However, this had better match the number of state 5763464ebd5Sriastradh * elements that we're going to copy into the new temporary. 5773464ebd5Sriastradh */ 57801e04c3fSmrg assert((int) ir->get_num_state_slots() == type_size(ir->type)); 5793464ebd5Sriastradh 5803464ebd5Sriastradh storage = new(mem_ctx) variable_storage(ir, PROGRAM_TEMPORARY, 5813464ebd5Sriastradh this->next_temp); 5823464ebd5Sriastradh this->variables.push_tail(storage); 5833464ebd5Sriastradh this->next_temp += type_size(ir->type); 5843464ebd5Sriastradh 5853464ebd5Sriastradh dst = dst_reg(src_reg(PROGRAM_TEMPORARY, storage->index, NULL)); 5863464ebd5Sriastradh } 5873464ebd5Sriastradh 5883464ebd5Sriastradh 58901e04c3fSmrg for (unsigned int i = 0; i < ir->get_num_state_slots(); i++) { 5903464ebd5Sriastradh int index = _mesa_add_state_reference(this->prog->Parameters, 59101e04c3fSmrg slots[i].tokens); 5923464ebd5Sriastradh 5933464ebd5Sriastradh if (storage->file == PROGRAM_STATE_VAR) { 5943464ebd5Sriastradh if (storage->index == -1) { 5953464ebd5Sriastradh storage->index = index; 5963464ebd5Sriastradh } else { 5973464ebd5Sriastradh assert(index == storage->index + (int)i); 5983464ebd5Sriastradh } 5993464ebd5Sriastradh } else { 6003464ebd5Sriastradh src_reg src(PROGRAM_STATE_VAR, index, NULL); 6013464ebd5Sriastradh src.swizzle = slots[i].swizzle; 6023464ebd5Sriastradh emit(ir, OPCODE_MOV, dst, src); 6033464ebd5Sriastradh /* even a float takes up a whole vec4 reg in a struct/array. */ 6043464ebd5Sriastradh dst.index++; 6053464ebd5Sriastradh } 6063464ebd5Sriastradh } 6073464ebd5Sriastradh 6083464ebd5Sriastradh if (storage->file == PROGRAM_TEMPORARY && 60901e04c3fSmrg dst.index != storage->index + (int) ir->get_num_state_slots()) { 6103464ebd5Sriastradh linker_error(this->shader_program, 6113464ebd5Sriastradh "failed to load builtin uniform `%s' " 6123464ebd5Sriastradh "(%d/%d regs loaded)\n", 6133464ebd5Sriastradh ir->name, dst.index - storage->index, 6143464ebd5Sriastradh type_size(ir->type)); 6153464ebd5Sriastradh } 6163464ebd5Sriastradh } 6173464ebd5Sriastradh} 6183464ebd5Sriastradh 6193464ebd5Sriastradhvoid 6203464ebd5Sriastradhir_to_mesa_visitor::visit(ir_loop *ir) 6213464ebd5Sriastradh{ 6223464ebd5Sriastradh emit(NULL, OPCODE_BGNLOOP); 6233464ebd5Sriastradh 6243464ebd5Sriastradh visit_exec_list(&ir->body_instructions, this); 6253464ebd5Sriastradh 6263464ebd5Sriastradh emit(NULL, OPCODE_ENDLOOP); 6273464ebd5Sriastradh} 6283464ebd5Sriastradh 6293464ebd5Sriastradhvoid 6303464ebd5Sriastradhir_to_mesa_visitor::visit(ir_loop_jump *ir) 6313464ebd5Sriastradh{ 6323464ebd5Sriastradh switch (ir->mode) { 6333464ebd5Sriastradh case ir_loop_jump::jump_break: 6343464ebd5Sriastradh emit(NULL, OPCODE_BRK); 6353464ebd5Sriastradh break; 6363464ebd5Sriastradh case ir_loop_jump::jump_continue: 6373464ebd5Sriastradh emit(NULL, OPCODE_CONT); 6383464ebd5Sriastradh break; 6393464ebd5Sriastradh } 6403464ebd5Sriastradh} 6413464ebd5Sriastradh 6423464ebd5Sriastradh 6433464ebd5Sriastradhvoid 6443464ebd5Sriastradhir_to_mesa_visitor::visit(ir_function_signature *ir) 6453464ebd5Sriastradh{ 6463464ebd5Sriastradh assert(0); 6473464ebd5Sriastradh (void)ir; 6483464ebd5Sriastradh} 6493464ebd5Sriastradh 6503464ebd5Sriastradhvoid 6513464ebd5Sriastradhir_to_mesa_visitor::visit(ir_function *ir) 6523464ebd5Sriastradh{ 6533464ebd5Sriastradh /* Ignore function bodies other than main() -- we shouldn't see calls to 6543464ebd5Sriastradh * them since they should all be inlined before we get to ir_to_mesa. 6553464ebd5Sriastradh */ 6563464ebd5Sriastradh if (strcmp(ir->name, "main") == 0) { 6573464ebd5Sriastradh const ir_function_signature *sig; 6583464ebd5Sriastradh exec_list empty; 6593464ebd5Sriastradh 660af69d88dSmrg sig = ir->matching_signature(NULL, &empty, false); 6613464ebd5Sriastradh 6623464ebd5Sriastradh assert(sig); 6633464ebd5Sriastradh 664af69d88dSmrg foreach_in_list(ir_instruction, ir, &sig->body) { 6653464ebd5Sriastradh ir->accept(this); 6663464ebd5Sriastradh } 6673464ebd5Sriastradh } 6683464ebd5Sriastradh} 6693464ebd5Sriastradh 670af69d88dSmrgbool 6713464ebd5Sriastradhir_to_mesa_visitor::try_emit_mad(ir_expression *ir, int mul_operand) 6723464ebd5Sriastradh{ 6733464ebd5Sriastradh int nonmul_operand = 1 - mul_operand; 6743464ebd5Sriastradh src_reg a, b, c; 6753464ebd5Sriastradh 6763464ebd5Sriastradh ir_expression *expr = ir->operands[mul_operand]->as_expression(); 6773464ebd5Sriastradh if (!expr || expr->operation != ir_binop_mul) 6783464ebd5Sriastradh return false; 6793464ebd5Sriastradh 6803464ebd5Sriastradh expr->operands[0]->accept(this); 6813464ebd5Sriastradh a = this->result; 6823464ebd5Sriastradh expr->operands[1]->accept(this); 6833464ebd5Sriastradh b = this->result; 6843464ebd5Sriastradh ir->operands[nonmul_operand]->accept(this); 6853464ebd5Sriastradh c = this->result; 6863464ebd5Sriastradh 6873464ebd5Sriastradh this->result = get_temp(ir->type); 6883464ebd5Sriastradh emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, c); 6893464ebd5Sriastradh 6903464ebd5Sriastradh return true; 6913464ebd5Sriastradh} 6923464ebd5Sriastradh 693af69d88dSmrg/** 694af69d88dSmrg * Emit OPCODE_MAD(a, -b, a) instead of AND(a, NOT(b)) 695af69d88dSmrg * 696af69d88dSmrg * The logic values are 1.0 for true and 0.0 for false. Logical-and is 697af69d88dSmrg * implemented using multiplication, and logical-or is implemented using 698af69d88dSmrg * addition. Logical-not can be implemented as (true - x), or (1.0 - x). 699af69d88dSmrg * As result, the logical expression (a & !b) can be rewritten as: 700af69d88dSmrg * 701af69d88dSmrg * - a * !b 702af69d88dSmrg * - a * (1 - b) 703af69d88dSmrg * - (a * 1) - (a * b) 704af69d88dSmrg * - a + -(a * b) 705af69d88dSmrg * - a + (a * -b) 706af69d88dSmrg * 707af69d88dSmrg * This final expression can be implemented as a single MAD(a, -b, a) 708af69d88dSmrg * instruction. 709af69d88dSmrg */ 710af69d88dSmrgbool 711af69d88dSmrgir_to_mesa_visitor::try_emit_mad_for_and_not(ir_expression *ir, int try_operand) 712af69d88dSmrg{ 713af69d88dSmrg const int other_operand = 1 - try_operand; 714af69d88dSmrg src_reg a, b; 715af69d88dSmrg 716af69d88dSmrg ir_expression *expr = ir->operands[try_operand]->as_expression(); 717af69d88dSmrg if (!expr || expr->operation != ir_unop_logic_not) 718af69d88dSmrg return false; 719af69d88dSmrg 720af69d88dSmrg ir->operands[other_operand]->accept(this); 721af69d88dSmrg a = this->result; 722af69d88dSmrg expr->operands[0]->accept(this); 723af69d88dSmrg b = this->result; 724af69d88dSmrg 725af69d88dSmrg b.negate = ~b.negate; 726af69d88dSmrg 727af69d88dSmrg this->result = get_temp(ir->type); 728af69d88dSmrg emit(ir, OPCODE_MAD, dst_reg(this->result), a, b, a); 729af69d88dSmrg 730af69d88dSmrg return true; 731af69d88dSmrg} 732af69d88dSmrg 7333464ebd5Sriastradhvoid 7343464ebd5Sriastradhir_to_mesa_visitor::reladdr_to_temp(ir_instruction *ir, 7353464ebd5Sriastradh src_reg *reg, int *num_reladdr) 7363464ebd5Sriastradh{ 7373464ebd5Sriastradh if (!reg->reladdr) 7383464ebd5Sriastradh return; 7393464ebd5Sriastradh 7403464ebd5Sriastradh emit(ir, OPCODE_ARL, address_reg, *reg->reladdr); 7413464ebd5Sriastradh 7423464ebd5Sriastradh if (*num_reladdr != 1) { 7433464ebd5Sriastradh src_reg temp = get_temp(glsl_type::vec4_type); 7443464ebd5Sriastradh 7453464ebd5Sriastradh emit(ir, OPCODE_MOV, dst_reg(temp), *reg); 7463464ebd5Sriastradh *reg = temp; 7473464ebd5Sriastradh } 7483464ebd5Sriastradh 7493464ebd5Sriastradh (*num_reladdr)--; 7503464ebd5Sriastradh} 7513464ebd5Sriastradh 7523464ebd5Sriastradhvoid 7533464ebd5Sriastradhir_to_mesa_visitor::emit_swz(ir_expression *ir) 7543464ebd5Sriastradh{ 7553464ebd5Sriastradh /* Assume that the vector operator is in a form compatible with OPCODE_SWZ. 7563464ebd5Sriastradh * This means that each of the operands is either an immediate value of -1, 7573464ebd5Sriastradh * 0, or 1, or is a component from one source register (possibly with 7583464ebd5Sriastradh * negation). 7593464ebd5Sriastradh */ 7603464ebd5Sriastradh uint8_t components[4] = { 0 }; 7613464ebd5Sriastradh bool negate[4] = { false }; 7623464ebd5Sriastradh ir_variable *var = NULL; 7633464ebd5Sriastradh 7643464ebd5Sriastradh for (unsigned i = 0; i < ir->type->vector_elements; i++) { 7653464ebd5Sriastradh ir_rvalue *op = ir->operands[i]; 7663464ebd5Sriastradh 7673464ebd5Sriastradh assert(op->type->is_scalar()); 7683464ebd5Sriastradh 7693464ebd5Sriastradh while (op != NULL) { 7703464ebd5Sriastradh switch (op->ir_type) { 7713464ebd5Sriastradh case ir_type_constant: { 7723464ebd5Sriastradh 7733464ebd5Sriastradh assert(op->type->is_scalar()); 7743464ebd5Sriastradh 7753464ebd5Sriastradh const ir_constant *const c = op->as_constant(); 7763464ebd5Sriastradh if (c->is_one()) { 7773464ebd5Sriastradh components[i] = SWIZZLE_ONE; 7783464ebd5Sriastradh } else if (c->is_zero()) { 7793464ebd5Sriastradh components[i] = SWIZZLE_ZERO; 7803464ebd5Sriastradh } else if (c->is_negative_one()) { 7813464ebd5Sriastradh components[i] = SWIZZLE_ONE; 7823464ebd5Sriastradh negate[i] = true; 7833464ebd5Sriastradh } else { 7843464ebd5Sriastradh assert(!"SWZ constant must be 0.0 or 1.0."); 7853464ebd5Sriastradh } 7863464ebd5Sriastradh 7873464ebd5Sriastradh op = NULL; 7883464ebd5Sriastradh break; 7893464ebd5Sriastradh } 7903464ebd5Sriastradh 7913464ebd5Sriastradh case ir_type_dereference_variable: { 7923464ebd5Sriastradh ir_dereference_variable *const deref = 7933464ebd5Sriastradh (ir_dereference_variable *) op; 7943464ebd5Sriastradh 7953464ebd5Sriastradh assert((var == NULL) || (deref->var == var)); 7963464ebd5Sriastradh components[i] = SWIZZLE_X; 7973464ebd5Sriastradh var = deref->var; 7983464ebd5Sriastradh op = NULL; 7993464ebd5Sriastradh break; 8003464ebd5Sriastradh } 8013464ebd5Sriastradh 8023464ebd5Sriastradh case ir_type_expression: { 8033464ebd5Sriastradh ir_expression *const expr = (ir_expression *) op; 8043464ebd5Sriastradh 8053464ebd5Sriastradh assert(expr->operation == ir_unop_neg); 8063464ebd5Sriastradh negate[i] = true; 8073464ebd5Sriastradh 8083464ebd5Sriastradh op = expr->operands[0]; 8093464ebd5Sriastradh break; 8103464ebd5Sriastradh } 8113464ebd5Sriastradh 8123464ebd5Sriastradh case ir_type_swizzle: { 8133464ebd5Sriastradh ir_swizzle *const swiz = (ir_swizzle *) op; 8143464ebd5Sriastradh 8153464ebd5Sriastradh components[i] = swiz->mask.x; 8163464ebd5Sriastradh op = swiz->val; 8173464ebd5Sriastradh break; 8183464ebd5Sriastradh } 8193464ebd5Sriastradh 8203464ebd5Sriastradh default: 8213464ebd5Sriastradh assert(!"Should not get here."); 8223464ebd5Sriastradh return; 8233464ebd5Sriastradh } 8243464ebd5Sriastradh } 8253464ebd5Sriastradh } 8263464ebd5Sriastradh 8273464ebd5Sriastradh assert(var != NULL); 8283464ebd5Sriastradh 8293464ebd5Sriastradh ir_dereference_variable *const deref = 8303464ebd5Sriastradh new(mem_ctx) ir_dereference_variable(var); 8313464ebd5Sriastradh 8323464ebd5Sriastradh this->result.file = PROGRAM_UNDEFINED; 8333464ebd5Sriastradh deref->accept(this); 8343464ebd5Sriastradh if (this->result.file == PROGRAM_UNDEFINED) { 8353464ebd5Sriastradh printf("Failed to get tree for expression operand:\n"); 836af69d88dSmrg deref->print(); 837af69d88dSmrg printf("\n"); 8383464ebd5Sriastradh exit(1); 8393464ebd5Sriastradh } 8403464ebd5Sriastradh 8413464ebd5Sriastradh src_reg src; 8423464ebd5Sriastradh 8433464ebd5Sriastradh src = this->result; 8443464ebd5Sriastradh src.swizzle = MAKE_SWIZZLE4(components[0], 8453464ebd5Sriastradh components[1], 8463464ebd5Sriastradh components[2], 8473464ebd5Sriastradh components[3]); 8483464ebd5Sriastradh src.negate = ((unsigned(negate[0]) << 0) 8493464ebd5Sriastradh | (unsigned(negate[1]) << 1) 8503464ebd5Sriastradh | (unsigned(negate[2]) << 2) 8513464ebd5Sriastradh | (unsigned(negate[3]) << 3)); 8523464ebd5Sriastradh 8533464ebd5Sriastradh /* Storage for our result. Ideally for an assignment we'd be using the 8543464ebd5Sriastradh * actual storage for the result here, instead. 8553464ebd5Sriastradh */ 8563464ebd5Sriastradh const src_reg result_src = get_temp(ir->type); 8573464ebd5Sriastradh dst_reg result_dst = dst_reg(result_src); 8583464ebd5Sriastradh 8593464ebd5Sriastradh /* Limit writes to the channels that will be used by result_src later. 8603464ebd5Sriastradh * This does limit this temp's use as a temporary for multi-instruction 8613464ebd5Sriastradh * sequences. 8623464ebd5Sriastradh */ 8633464ebd5Sriastradh result_dst.writemask = (1 << ir->type->vector_elements) - 1; 8643464ebd5Sriastradh 8653464ebd5Sriastradh emit(ir, OPCODE_SWZ, result_dst, src); 8663464ebd5Sriastradh this->result = result_src; 8673464ebd5Sriastradh} 8683464ebd5Sriastradh 86901e04c3fSmrgvoid 87001e04c3fSmrgir_to_mesa_visitor::emit_equality_comparison(ir_expression *ir, 87101e04c3fSmrg enum prog_opcode op, 87201e04c3fSmrg dst_reg dst, 87301e04c3fSmrg const src_reg &src0, 87401e04c3fSmrg const src_reg &src1) 87501e04c3fSmrg{ 87601e04c3fSmrg src_reg difference; 87701e04c3fSmrg src_reg abs_difference = get_temp(glsl_type::vec4_type); 87801e04c3fSmrg const src_reg zero = src_reg_for_float(0.0); 87901e04c3fSmrg 88001e04c3fSmrg /* x == y is equivalent to -abs(x-y) >= 0. Since all of the code that 88101e04c3fSmrg * consumes the generated IR is pretty dumb, take special care when one 88201e04c3fSmrg * of the operands is zero. 88301e04c3fSmrg * 88401e04c3fSmrg * Similarly, x != y is equivalent to -abs(x-y) < 0. 88501e04c3fSmrg */ 88601e04c3fSmrg if (src0.file == zero.file && 88701e04c3fSmrg src0.index == zero.index && 88801e04c3fSmrg src0.swizzle == zero.swizzle) { 88901e04c3fSmrg difference = src1; 89001e04c3fSmrg } else if (src1.file == zero.file && 89101e04c3fSmrg src1.index == zero.index && 89201e04c3fSmrg src1.swizzle == zero.swizzle) { 89301e04c3fSmrg difference = src0; 89401e04c3fSmrg } else { 89501e04c3fSmrg difference = get_temp(glsl_type::vec4_type); 89601e04c3fSmrg 89701e04c3fSmrg src_reg tmp_src = src0; 89801e04c3fSmrg tmp_src.negate = ~tmp_src.negate; 89901e04c3fSmrg 90001e04c3fSmrg emit(ir, OPCODE_ADD, dst_reg(difference), tmp_src, src1); 90101e04c3fSmrg } 90201e04c3fSmrg 90301e04c3fSmrg emit(ir, OPCODE_ABS, dst_reg(abs_difference), difference); 90401e04c3fSmrg 90501e04c3fSmrg abs_difference.negate = ~abs_difference.negate; 90601e04c3fSmrg emit(ir, op, dst, abs_difference, zero); 90701e04c3fSmrg} 90801e04c3fSmrg 9093464ebd5Sriastradhvoid 9103464ebd5Sriastradhir_to_mesa_visitor::visit(ir_expression *ir) 9113464ebd5Sriastradh{ 9123464ebd5Sriastradh unsigned int operand; 91301e04c3fSmrg src_reg op[ARRAY_SIZE(ir->operands)]; 9143464ebd5Sriastradh src_reg result_src; 9153464ebd5Sriastradh dst_reg result_dst; 9163464ebd5Sriastradh 9173464ebd5Sriastradh /* Quick peephole: Emit OPCODE_MAD(a, b, c) instead of ADD(MUL(a, b), c) 9183464ebd5Sriastradh */ 9193464ebd5Sriastradh if (ir->operation == ir_binop_add) { 9203464ebd5Sriastradh if (try_emit_mad(ir, 1)) 9213464ebd5Sriastradh return; 9223464ebd5Sriastradh if (try_emit_mad(ir, 0)) 9233464ebd5Sriastradh return; 9243464ebd5Sriastradh } 925af69d88dSmrg 926af69d88dSmrg /* Quick peephole: Emit OPCODE_MAD(-a, -b, a) instead of AND(a, NOT(b)) 927af69d88dSmrg */ 928af69d88dSmrg if (ir->operation == ir_binop_logic_and) { 929af69d88dSmrg if (try_emit_mad_for_and_not(ir, 1)) 930af69d88dSmrg return; 931af69d88dSmrg if (try_emit_mad_for_and_not(ir, 0)) 932af69d88dSmrg return; 933af69d88dSmrg } 934af69d88dSmrg 9353464ebd5Sriastradh if (ir->operation == ir_quadop_vector) { 9363464ebd5Sriastradh this->emit_swz(ir); 9373464ebd5Sriastradh return; 9383464ebd5Sriastradh } 9393464ebd5Sriastradh 94001e04c3fSmrg for (operand = 0; operand < ir->num_operands; operand++) { 9413464ebd5Sriastradh this->result.file = PROGRAM_UNDEFINED; 9423464ebd5Sriastradh ir->operands[operand]->accept(this); 9433464ebd5Sriastradh if (this->result.file == PROGRAM_UNDEFINED) { 9443464ebd5Sriastradh printf("Failed to get tree for expression operand:\n"); 945af69d88dSmrg ir->operands[operand]->print(); 946af69d88dSmrg printf("\n"); 9473464ebd5Sriastradh exit(1); 9483464ebd5Sriastradh } 9493464ebd5Sriastradh op[operand] = this->result; 9503464ebd5Sriastradh 9513464ebd5Sriastradh /* Matrix expression operands should have been broken down to vector 9523464ebd5Sriastradh * operations already. 9533464ebd5Sriastradh */ 9543464ebd5Sriastradh assert(!ir->operands[operand]->type->is_matrix()); 9553464ebd5Sriastradh } 9563464ebd5Sriastradh 9573464ebd5Sriastradh int vector_elements = ir->operands[0]->type->vector_elements; 9583464ebd5Sriastradh if (ir->operands[1]) { 9593464ebd5Sriastradh vector_elements = MAX2(vector_elements, 9603464ebd5Sriastradh ir->operands[1]->type->vector_elements); 9613464ebd5Sriastradh } 9623464ebd5Sriastradh 9633464ebd5Sriastradh this->result.file = PROGRAM_UNDEFINED; 9643464ebd5Sriastradh 9653464ebd5Sriastradh /* Storage for our result. Ideally for an assignment we'd be using 9663464ebd5Sriastradh * the actual storage for the result here, instead. 9673464ebd5Sriastradh */ 9683464ebd5Sriastradh result_src = get_temp(ir->type); 9693464ebd5Sriastradh /* convenience for the emit functions below. */ 9703464ebd5Sriastradh result_dst = dst_reg(result_src); 9713464ebd5Sriastradh /* Limit writes to the channels that will be used by result_src later. 9723464ebd5Sriastradh * This does limit this temp's use as a temporary for multi-instruction 9733464ebd5Sriastradh * sequences. 9743464ebd5Sriastradh */ 9753464ebd5Sriastradh result_dst.writemask = (1 << ir->type->vector_elements) - 1; 9763464ebd5Sriastradh 9773464ebd5Sriastradh switch (ir->operation) { 9783464ebd5Sriastradh case ir_unop_logic_not: 979af69d88dSmrg /* Previously 'SEQ dst, src, 0.0' was used for this. However, many 980af69d88dSmrg * older GPUs implement SEQ using multiple instructions (i915 uses two 981af69d88dSmrg * SGE instructions and a MUL instruction). Since our logic values are 982af69d88dSmrg * 0.0 and 1.0, 1-x also implements !x. 983af69d88dSmrg */ 984af69d88dSmrg op[0].negate = ~op[0].negate; 985af69d88dSmrg emit(ir, OPCODE_ADD, result_dst, op[0], src_reg_for_float(1.0)); 9863464ebd5Sriastradh break; 9873464ebd5Sriastradh case ir_unop_neg: 9883464ebd5Sriastradh op[0].negate = ~op[0].negate; 9893464ebd5Sriastradh result_src = op[0]; 9903464ebd5Sriastradh break; 9913464ebd5Sriastradh case ir_unop_abs: 9923464ebd5Sriastradh emit(ir, OPCODE_ABS, result_dst, op[0]); 9933464ebd5Sriastradh break; 9943464ebd5Sriastradh case ir_unop_sign: 9953464ebd5Sriastradh emit(ir, OPCODE_SSG, result_dst, op[0]); 9963464ebd5Sriastradh break; 9973464ebd5Sriastradh case ir_unop_rcp: 9983464ebd5Sriastradh emit_scalar(ir, OPCODE_RCP, result_dst, op[0]); 9993464ebd5Sriastradh break; 10003464ebd5Sriastradh 10013464ebd5Sriastradh case ir_unop_exp2: 10023464ebd5Sriastradh emit_scalar(ir, OPCODE_EX2, result_dst, op[0]); 10033464ebd5Sriastradh break; 10043464ebd5Sriastradh case ir_unop_exp: 100501e04c3fSmrg assert(!"not reached: should be handled by exp_to_exp2"); 100601e04c3fSmrg break; 10073464ebd5Sriastradh case ir_unop_log: 100801e04c3fSmrg assert(!"not reached: should be handled by log_to_log2"); 10093464ebd5Sriastradh break; 10103464ebd5Sriastradh case ir_unop_log2: 10113464ebd5Sriastradh emit_scalar(ir, OPCODE_LG2, result_dst, op[0]); 10123464ebd5Sriastradh break; 10133464ebd5Sriastradh case ir_unop_sin: 10143464ebd5Sriastradh emit_scalar(ir, OPCODE_SIN, result_dst, op[0]); 10153464ebd5Sriastradh break; 10163464ebd5Sriastradh case ir_unop_cos: 10173464ebd5Sriastradh emit_scalar(ir, OPCODE_COS, result_dst, op[0]); 10183464ebd5Sriastradh break; 10193464ebd5Sriastradh 10203464ebd5Sriastradh case ir_unop_dFdx: 10213464ebd5Sriastradh emit(ir, OPCODE_DDX, result_dst, op[0]); 10223464ebd5Sriastradh break; 10233464ebd5Sriastradh case ir_unop_dFdy: 10243464ebd5Sriastradh emit(ir, OPCODE_DDY, result_dst, op[0]); 10253464ebd5Sriastradh break; 10263464ebd5Sriastradh 102701e04c3fSmrg case ir_unop_saturate: { 102801e04c3fSmrg ir_to_mesa_instruction *inst = emit(ir, OPCODE_MOV, 102901e04c3fSmrg result_dst, op[0]); 103001e04c3fSmrg inst->saturate = true; 103101e04c3fSmrg break; 103201e04c3fSmrg } 10333464ebd5Sriastradh 10343464ebd5Sriastradh case ir_binop_add: 10353464ebd5Sriastradh emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 10363464ebd5Sriastradh break; 10373464ebd5Sriastradh case ir_binop_sub: 10383464ebd5Sriastradh emit(ir, OPCODE_SUB, result_dst, op[0], op[1]); 10393464ebd5Sriastradh break; 10403464ebd5Sriastradh 10413464ebd5Sriastradh case ir_binop_mul: 10423464ebd5Sriastradh emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 10433464ebd5Sriastradh break; 10443464ebd5Sriastradh case ir_binop_div: 10453464ebd5Sriastradh assert(!"not reached: should be handled by ir_div_to_mul_rcp"); 1046af69d88dSmrg break; 10473464ebd5Sriastradh case ir_binop_mod: 104801e04c3fSmrg /* Floating point should be lowered by MOD_TO_FLOOR in the compiler. */ 10497ec681f3Smrg assert(ir->type->is_integer_32()); 1050af69d88dSmrg emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 10513464ebd5Sriastradh break; 10523464ebd5Sriastradh 10533464ebd5Sriastradh case ir_binop_less: 10543464ebd5Sriastradh emit(ir, OPCODE_SLT, result_dst, op[0], op[1]); 10553464ebd5Sriastradh break; 10563464ebd5Sriastradh case ir_binop_gequal: 10573464ebd5Sriastradh emit(ir, OPCODE_SGE, result_dst, op[0], op[1]); 10583464ebd5Sriastradh break; 10593464ebd5Sriastradh case ir_binop_equal: 106001e04c3fSmrg emit_seq(ir, result_dst, op[0], op[1]); 10613464ebd5Sriastradh break; 10623464ebd5Sriastradh case ir_binop_nequal: 106301e04c3fSmrg emit_sne(ir, result_dst, op[0], op[1]); 10643464ebd5Sriastradh break; 10653464ebd5Sriastradh case ir_binop_all_equal: 10663464ebd5Sriastradh /* "==" operator producing a scalar boolean. */ 10673464ebd5Sriastradh if (ir->operands[0]->type->is_vector() || 10683464ebd5Sriastradh ir->operands[1]->type->is_vector()) { 10693464ebd5Sriastradh src_reg temp = get_temp(glsl_type::vec4_type); 107001e04c3fSmrg emit_sne(ir, dst_reg(temp), op[0], op[1]); 1071af69d88dSmrg 1072af69d88dSmrg /* After the dot-product, the value will be an integer on the 1073af69d88dSmrg * range [0,4]. Zero becomes 1.0, and positive values become zero. 1074af69d88dSmrg */ 10753464ebd5Sriastradh emit_dp(ir, result_dst, temp, temp, vector_elements); 1076af69d88dSmrg 1077af69d88dSmrg /* Negating the result of the dot-product gives values on the range 1078af69d88dSmrg * [-4, 0]. Zero becomes 1.0, and negative values become zero. This 1079af69d88dSmrg * achieved using SGE. 1080af69d88dSmrg */ 1081af69d88dSmrg src_reg sge_src = result_src; 1082af69d88dSmrg sge_src.negate = ~sge_src.negate; 1083af69d88dSmrg emit(ir, OPCODE_SGE, result_dst, sge_src, src_reg_for_float(0.0)); 10843464ebd5Sriastradh } else { 108501e04c3fSmrg emit_seq(ir, result_dst, op[0], op[1]); 10863464ebd5Sriastradh } 10873464ebd5Sriastradh break; 10883464ebd5Sriastradh case ir_binop_any_nequal: 10893464ebd5Sriastradh /* "!=" operator producing a scalar boolean. */ 10903464ebd5Sriastradh if (ir->operands[0]->type->is_vector() || 10913464ebd5Sriastradh ir->operands[1]->type->is_vector()) { 10923464ebd5Sriastradh src_reg temp = get_temp(glsl_type::vec4_type); 109301e04c3fSmrg if (ir->operands[0]->type->is_boolean() && 109401e04c3fSmrg ir->operands[1]->as_constant() && 109501e04c3fSmrg ir->operands[1]->as_constant()->is_zero()) { 109601e04c3fSmrg temp = op[0]; 109701e04c3fSmrg } else { 109801e04c3fSmrg emit_sne(ir, dst_reg(temp), op[0], op[1]); 109901e04c3fSmrg } 1100af69d88dSmrg 1101af69d88dSmrg /* After the dot-product, the value will be an integer on the 1102af69d88dSmrg * range [0,4]. Zero stays zero, and positive values become 1.0. 1103af69d88dSmrg */ 1104af69d88dSmrg ir_to_mesa_instruction *const dp = 1105af69d88dSmrg emit_dp(ir, result_dst, temp, temp, vector_elements); 1106af69d88dSmrg if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 1107af69d88dSmrg /* The clamping to [0,1] can be done for free in the fragment 1108af69d88dSmrg * shader with a saturate. 1109af69d88dSmrg */ 1110af69d88dSmrg dp->saturate = true; 1111af69d88dSmrg } else { 1112af69d88dSmrg /* Negating the result of the dot-product gives values on the range 1113af69d88dSmrg * [-4, 0]. Zero stays zero, and negative values become 1.0. This 1114af69d88dSmrg * achieved using SLT. 1115af69d88dSmrg */ 1116af69d88dSmrg src_reg slt_src = result_src; 1117af69d88dSmrg slt_src.negate = ~slt_src.negate; 1118af69d88dSmrg emit(ir, OPCODE_SLT, result_dst, slt_src, src_reg_for_float(0.0)); 1119af69d88dSmrg } 11203464ebd5Sriastradh } else { 112101e04c3fSmrg emit_sne(ir, result_dst, op[0], op[1]); 1122af69d88dSmrg } 11233464ebd5Sriastradh break; 11243464ebd5Sriastradh 11253464ebd5Sriastradh case ir_binop_logic_xor: 112601e04c3fSmrg emit_sne(ir, result_dst, op[0], op[1]); 11273464ebd5Sriastradh break; 11283464ebd5Sriastradh 1129af69d88dSmrg case ir_binop_logic_or: { 1130af69d88dSmrg if (this->prog->Target == GL_FRAGMENT_PROGRAM_ARB) { 113101e04c3fSmrg /* After the addition, the value will be an integer on the 113201e04c3fSmrg * range [0,2]. Zero stays zero, and positive values become 1.0. 113301e04c3fSmrg */ 113401e04c3fSmrg ir_to_mesa_instruction *add = 113501e04c3fSmrg emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1136af69d88dSmrg add->saturate = true; 1137af69d88dSmrg } else { 113801e04c3fSmrg /* The Boolean arguments are stored as float 0.0 and 1.0. If either 113901e04c3fSmrg * value is 1.0, the result of the logcal-or should be 1.0. If both 114001e04c3fSmrg * values are 0.0, the result should be 0.0. This is exactly what 114101e04c3fSmrg * MAX does. 114201e04c3fSmrg */ 114301e04c3fSmrg emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); 1144af69d88dSmrg } 11453464ebd5Sriastradh break; 1146af69d88dSmrg } 11473464ebd5Sriastradh 11483464ebd5Sriastradh case ir_binop_logic_and: 11493464ebd5Sriastradh /* the bool args are stored as float 0.0 or 1.0, so "mul" gives us "and". */ 11503464ebd5Sriastradh emit(ir, OPCODE_MUL, result_dst, op[0], op[1]); 11513464ebd5Sriastradh break; 11523464ebd5Sriastradh 11533464ebd5Sriastradh case ir_binop_dot: 11543464ebd5Sriastradh assert(ir->operands[0]->type->is_vector()); 11553464ebd5Sriastradh assert(ir->operands[0]->type == ir->operands[1]->type); 11563464ebd5Sriastradh emit_dp(ir, result_dst, op[0], op[1], 11573464ebd5Sriastradh ir->operands[0]->type->vector_elements); 11583464ebd5Sriastradh break; 11593464ebd5Sriastradh 11603464ebd5Sriastradh case ir_unop_sqrt: 11613464ebd5Sriastradh /* sqrt(x) = x * rsq(x). */ 11623464ebd5Sriastradh emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); 11633464ebd5Sriastradh emit(ir, OPCODE_MUL, result_dst, result_src, op[0]); 11643464ebd5Sriastradh /* For incoming channels <= 0, set the result to 0. */ 11653464ebd5Sriastradh op[0].negate = ~op[0].negate; 11663464ebd5Sriastradh emit(ir, OPCODE_CMP, result_dst, 11673464ebd5Sriastradh op[0], result_src, src_reg_for_float(0.0)); 11683464ebd5Sriastradh break; 11693464ebd5Sriastradh case ir_unop_rsq: 11703464ebd5Sriastradh emit_scalar(ir, OPCODE_RSQ, result_dst, op[0]); 11713464ebd5Sriastradh break; 11723464ebd5Sriastradh case ir_unop_i2f: 1173af69d88dSmrg case ir_unop_u2f: 11743464ebd5Sriastradh case ir_unop_b2f: 11753464ebd5Sriastradh case ir_unop_b2i: 1176af69d88dSmrg case ir_unop_i2u: 1177af69d88dSmrg case ir_unop_u2i: 11783464ebd5Sriastradh /* Mesa IR lacks types, ints are stored as truncated floats. */ 11793464ebd5Sriastradh result_src = op[0]; 11803464ebd5Sriastradh break; 11813464ebd5Sriastradh case ir_unop_f2i: 1182af69d88dSmrg case ir_unop_f2u: 11833464ebd5Sriastradh emit(ir, OPCODE_TRUNC, result_dst, op[0]); 11843464ebd5Sriastradh break; 11853464ebd5Sriastradh case ir_unop_f2b: 11863464ebd5Sriastradh case ir_unop_i2b: 118701e04c3fSmrg emit_sne(ir, result_dst, op[0], src_reg_for_float(0.0)); 11883464ebd5Sriastradh break; 1189af69d88dSmrg case ir_unop_bitcast_f2i: // Ignore these 4, they can't happen here anyway 1190af69d88dSmrg case ir_unop_bitcast_f2u: 1191af69d88dSmrg case ir_unop_bitcast_i2f: 1192af69d88dSmrg case ir_unop_bitcast_u2f: 1193af69d88dSmrg break; 11943464ebd5Sriastradh case ir_unop_trunc: 11953464ebd5Sriastradh emit(ir, OPCODE_TRUNC, result_dst, op[0]); 11963464ebd5Sriastradh break; 11973464ebd5Sriastradh case ir_unop_ceil: 11983464ebd5Sriastradh op[0].negate = ~op[0].negate; 11993464ebd5Sriastradh emit(ir, OPCODE_FLR, result_dst, op[0]); 12003464ebd5Sriastradh result_src.negate = ~result_src.negate; 12013464ebd5Sriastradh break; 12023464ebd5Sriastradh case ir_unop_floor: 12033464ebd5Sriastradh emit(ir, OPCODE_FLR, result_dst, op[0]); 12043464ebd5Sriastradh break; 12053464ebd5Sriastradh case ir_unop_fract: 12063464ebd5Sriastradh emit(ir, OPCODE_FRC, result_dst, op[0]); 12073464ebd5Sriastradh break; 1208af69d88dSmrg case ir_unop_pack_snorm_2x16: 1209af69d88dSmrg case ir_unop_pack_snorm_4x8: 1210af69d88dSmrg case ir_unop_pack_unorm_2x16: 1211af69d88dSmrg case ir_unop_pack_unorm_4x8: 1212af69d88dSmrg case ir_unop_pack_half_2x16: 121301e04c3fSmrg case ir_unop_pack_double_2x32: 1214af69d88dSmrg case ir_unop_unpack_snorm_2x16: 1215af69d88dSmrg case ir_unop_unpack_snorm_4x8: 1216af69d88dSmrg case ir_unop_unpack_unorm_2x16: 1217af69d88dSmrg case ir_unop_unpack_unorm_4x8: 1218af69d88dSmrg case ir_unop_unpack_half_2x16: 121901e04c3fSmrg case ir_unop_unpack_double_2x32: 1220af69d88dSmrg case ir_unop_bitfield_reverse: 1221af69d88dSmrg case ir_unop_bit_count: 1222af69d88dSmrg case ir_unop_find_msb: 1223af69d88dSmrg case ir_unop_find_lsb: 122401e04c3fSmrg case ir_unop_d2f: 122501e04c3fSmrg case ir_unop_f2d: 122601e04c3fSmrg case ir_unop_d2i: 122701e04c3fSmrg case ir_unop_i2d: 122801e04c3fSmrg case ir_unop_d2u: 122901e04c3fSmrg case ir_unop_u2d: 123001e04c3fSmrg case ir_unop_d2b: 123101e04c3fSmrg case ir_unop_frexp_sig: 123201e04c3fSmrg case ir_unop_frexp_exp: 1233af69d88dSmrg assert(!"not supported"); 1234af69d88dSmrg break; 12353464ebd5Sriastradh case ir_binop_min: 12363464ebd5Sriastradh emit(ir, OPCODE_MIN, result_dst, op[0], op[1]); 12373464ebd5Sriastradh break; 12383464ebd5Sriastradh case ir_binop_max: 12393464ebd5Sriastradh emit(ir, OPCODE_MAX, result_dst, op[0], op[1]); 12403464ebd5Sriastradh break; 12413464ebd5Sriastradh case ir_binop_pow: 12423464ebd5Sriastradh emit_scalar(ir, OPCODE_POW, result_dst, op[0], op[1]); 12433464ebd5Sriastradh break; 12443464ebd5Sriastradh 1245af69d88dSmrg /* GLSL 1.30 integer ops are unsupported in Mesa IR, but since 1246af69d88dSmrg * hardware backends have no way to avoid Mesa IR generation 1247af69d88dSmrg * even if they don't use it, we need to emit "something" and 1248af69d88dSmrg * continue. 1249af69d88dSmrg */ 12503464ebd5Sriastradh case ir_binop_lshift: 12513464ebd5Sriastradh case ir_binop_rshift: 12523464ebd5Sriastradh case ir_binop_bit_and: 12533464ebd5Sriastradh case ir_binop_bit_xor: 12543464ebd5Sriastradh case ir_binop_bit_or: 1255af69d88dSmrg emit(ir, OPCODE_ADD, result_dst, op[0], op[1]); 1256af69d88dSmrg break; 1257af69d88dSmrg 1258af69d88dSmrg case ir_unop_bit_not: 12593464ebd5Sriastradh case ir_unop_round_even: 1260af69d88dSmrg emit(ir, OPCODE_MOV, result_dst, op[0]); 1261af69d88dSmrg break; 1262af69d88dSmrg 1263af69d88dSmrg case ir_binop_ubo_load: 1264af69d88dSmrg assert(!"not supported"); 1265af69d88dSmrg break; 1266af69d88dSmrg 1267af69d88dSmrg case ir_triop_lrp: 1268af69d88dSmrg /* ir_triop_lrp operands are (x, y, a) while 1269af69d88dSmrg * OPCODE_LRP operands are (a, y, x) to match ARB_fragment_program. 1270af69d88dSmrg */ 1271af69d88dSmrg emit(ir, OPCODE_LRP, result_dst, op[2], op[1], op[0]); 1272af69d88dSmrg break; 1273af69d88dSmrg 127401e04c3fSmrg case ir_triop_csel: 127501e04c3fSmrg /* We assume that boolean true and false are 1.0 and 0.0. OPCODE_CMP 127601e04c3fSmrg * selects src1 if src0 is < 0, src2 otherwise. 127701e04c3fSmrg */ 127801e04c3fSmrg op[0].negate = ~op[0].negate; 127901e04c3fSmrg emit(ir, OPCODE_CMP, result_dst, op[0], op[1], op[2]); 128001e04c3fSmrg break; 128101e04c3fSmrg 1282af69d88dSmrg case ir_binop_vector_extract: 1283af69d88dSmrg case ir_triop_fma: 1284af69d88dSmrg case ir_triop_bitfield_extract: 1285af69d88dSmrg case ir_triop_vector_insert: 1286af69d88dSmrg case ir_quadop_bitfield_insert: 1287af69d88dSmrg case ir_binop_ldexp: 1288af69d88dSmrg case ir_binop_carry: 1289af69d88dSmrg case ir_binop_borrow: 12907ec681f3Smrg case ir_binop_abs_sub: 12917ec681f3Smrg case ir_binop_add_sat: 12927ec681f3Smrg case ir_binop_sub_sat: 12937ec681f3Smrg case ir_binop_avg: 12947ec681f3Smrg case ir_binop_avg_round: 12957ec681f3Smrg case ir_binop_mul_32x16: 1296af69d88dSmrg case ir_binop_imul_high: 1297af69d88dSmrg case ir_unop_interpolate_at_centroid: 1298af69d88dSmrg case ir_binop_interpolate_at_offset: 1299af69d88dSmrg case ir_binop_interpolate_at_sample: 1300af69d88dSmrg case ir_unop_dFdx_coarse: 1301af69d88dSmrg case ir_unop_dFdx_fine: 1302af69d88dSmrg case ir_unop_dFdy_coarse: 1303af69d88dSmrg case ir_unop_dFdy_fine: 130401e04c3fSmrg case ir_unop_subroutine_to_int: 130501e04c3fSmrg case ir_unop_get_buffer_size: 130601e04c3fSmrg case ir_unop_bitcast_u642d: 130701e04c3fSmrg case ir_unop_bitcast_i642d: 130801e04c3fSmrg case ir_unop_bitcast_d2u64: 130901e04c3fSmrg case ir_unop_bitcast_d2i64: 131001e04c3fSmrg case ir_unop_i642i: 131101e04c3fSmrg case ir_unop_u642i: 131201e04c3fSmrg case ir_unop_i642u: 131301e04c3fSmrg case ir_unop_u642u: 131401e04c3fSmrg case ir_unop_i642b: 131501e04c3fSmrg case ir_unop_i642f: 131601e04c3fSmrg case ir_unop_u642f: 131701e04c3fSmrg case ir_unop_i642d: 131801e04c3fSmrg case ir_unop_u642d: 131901e04c3fSmrg case ir_unop_i2i64: 132001e04c3fSmrg case ir_unop_u2i64: 132101e04c3fSmrg case ir_unop_b2i64: 132201e04c3fSmrg case ir_unop_f2i64: 132301e04c3fSmrg case ir_unop_d2i64: 132401e04c3fSmrg case ir_unop_i2u64: 132501e04c3fSmrg case ir_unop_u2u64: 132601e04c3fSmrg case ir_unop_f2u64: 132701e04c3fSmrg case ir_unop_d2u64: 132801e04c3fSmrg case ir_unop_u642i64: 132901e04c3fSmrg case ir_unop_i642u64: 133001e04c3fSmrg case ir_unop_pack_int_2x32: 133101e04c3fSmrg case ir_unop_unpack_int_2x32: 133201e04c3fSmrg case ir_unop_pack_uint_2x32: 133301e04c3fSmrg case ir_unop_unpack_uint_2x32: 133401e04c3fSmrg case ir_unop_pack_sampler_2x32: 133501e04c3fSmrg case ir_unop_unpack_sampler_2x32: 133601e04c3fSmrg case ir_unop_pack_image_2x32: 133701e04c3fSmrg case ir_unop_unpack_image_2x32: 13387ec681f3Smrg case ir_unop_atan: 13397ec681f3Smrg case ir_binop_atan2: 13407ec681f3Smrg case ir_unop_clz: 13417ec681f3Smrg case ir_unop_f162f: 13427ec681f3Smrg case ir_unop_f2f16: 13437ec681f3Smrg case ir_unop_f2fmp: 13447ec681f3Smrg case ir_unop_f162b: 13457ec681f3Smrg case ir_unop_b2f16: 13467ec681f3Smrg case ir_unop_i2i: 13477ec681f3Smrg case ir_unop_i2imp: 13487ec681f3Smrg case ir_unop_u2u: 13497ec681f3Smrg case ir_unop_u2ump: 1350af69d88dSmrg assert(!"not supported"); 13513464ebd5Sriastradh break; 13523464ebd5Sriastradh 135301e04c3fSmrg case ir_unop_ssbo_unsized_array_length: 13547ec681f3Smrg case ir_unop_implicitly_sized_array_length: 13553464ebd5Sriastradh case ir_quadop_vector: 13563464ebd5Sriastradh /* This operation should have already been handled. 13573464ebd5Sriastradh */ 13583464ebd5Sriastradh assert(!"Should not get here."); 13593464ebd5Sriastradh break; 13603464ebd5Sriastradh } 13613464ebd5Sriastradh 13623464ebd5Sriastradh this->result = result_src; 13633464ebd5Sriastradh} 13643464ebd5Sriastradh 13653464ebd5Sriastradh 13663464ebd5Sriastradhvoid 13673464ebd5Sriastradhir_to_mesa_visitor::visit(ir_swizzle *ir) 13683464ebd5Sriastradh{ 13693464ebd5Sriastradh src_reg src; 13703464ebd5Sriastradh int i; 13717ec681f3Smrg int swizzle[4] = {0}; 13723464ebd5Sriastradh 13733464ebd5Sriastradh /* Note that this is only swizzles in expressions, not those on the left 13743464ebd5Sriastradh * hand side of an assignment, which do write masking. See ir_assignment 13753464ebd5Sriastradh * for that. 13763464ebd5Sriastradh */ 13773464ebd5Sriastradh 13783464ebd5Sriastradh ir->val->accept(this); 13793464ebd5Sriastradh src = this->result; 13803464ebd5Sriastradh assert(src.file != PROGRAM_UNDEFINED); 138101e04c3fSmrg assert(ir->type->vector_elements > 0); 13823464ebd5Sriastradh 13833464ebd5Sriastradh for (i = 0; i < 4; i++) { 13843464ebd5Sriastradh if (i < ir->type->vector_elements) { 13853464ebd5Sriastradh switch (i) { 13863464ebd5Sriastradh case 0: 13873464ebd5Sriastradh swizzle[i] = GET_SWZ(src.swizzle, ir->mask.x); 13883464ebd5Sriastradh break; 13893464ebd5Sriastradh case 1: 13903464ebd5Sriastradh swizzle[i] = GET_SWZ(src.swizzle, ir->mask.y); 13913464ebd5Sriastradh break; 13923464ebd5Sriastradh case 2: 13933464ebd5Sriastradh swizzle[i] = GET_SWZ(src.swizzle, ir->mask.z); 13943464ebd5Sriastradh break; 13953464ebd5Sriastradh case 3: 13963464ebd5Sriastradh swizzle[i] = GET_SWZ(src.swizzle, ir->mask.w); 13973464ebd5Sriastradh break; 13983464ebd5Sriastradh } 13993464ebd5Sriastradh } else { 14003464ebd5Sriastradh /* If the type is smaller than a vec4, replicate the last 14013464ebd5Sriastradh * channel out. 14023464ebd5Sriastradh */ 14033464ebd5Sriastradh swizzle[i] = swizzle[ir->type->vector_elements - 1]; 14043464ebd5Sriastradh } 14053464ebd5Sriastradh } 14063464ebd5Sriastradh 14073464ebd5Sriastradh src.swizzle = MAKE_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]); 14083464ebd5Sriastradh 14093464ebd5Sriastradh this->result = src; 14103464ebd5Sriastradh} 14113464ebd5Sriastradh 14123464ebd5Sriastradhvoid 14133464ebd5Sriastradhir_to_mesa_visitor::visit(ir_dereference_variable *ir) 14143464ebd5Sriastradh{ 14153464ebd5Sriastradh variable_storage *entry = find_variable_storage(ir->var); 14163464ebd5Sriastradh ir_variable *var = ir->var; 14173464ebd5Sriastradh 14183464ebd5Sriastradh if (!entry) { 1419af69d88dSmrg switch (var->data.mode) { 14203464ebd5Sriastradh case ir_var_uniform: 14213464ebd5Sriastradh entry = new(mem_ctx) variable_storage(var, PROGRAM_UNIFORM, 142201e04c3fSmrg var->data.param_index); 14233464ebd5Sriastradh this->variables.push_tail(entry); 14243464ebd5Sriastradh break; 1425af69d88dSmrg case ir_var_shader_in: 14263464ebd5Sriastradh /* The linker assigns locations for varyings and attributes, 1427af69d88dSmrg * including deprecated builtins (like gl_Color), 1428af69d88dSmrg * user-assigned generic attributes (glBindVertexLocation), 1429af69d88dSmrg * and user-defined varyings. 14303464ebd5Sriastradh */ 1431af69d88dSmrg assert(var->data.location != -1); 14323464ebd5Sriastradh entry = new(mem_ctx) variable_storage(var, 14333464ebd5Sriastradh PROGRAM_INPUT, 1434af69d88dSmrg var->data.location); 14353464ebd5Sriastradh break; 1436af69d88dSmrg case ir_var_shader_out: 1437af69d88dSmrg assert(var->data.location != -1); 14383464ebd5Sriastradh entry = new(mem_ctx) variable_storage(var, 14393464ebd5Sriastradh PROGRAM_OUTPUT, 1440af69d88dSmrg var->data.location); 14413464ebd5Sriastradh break; 14423464ebd5Sriastradh case ir_var_system_value: 14433464ebd5Sriastradh entry = new(mem_ctx) variable_storage(var, 14443464ebd5Sriastradh PROGRAM_SYSTEM_VALUE, 1445af69d88dSmrg var->data.location); 14463464ebd5Sriastradh break; 14473464ebd5Sriastradh case ir_var_auto: 14483464ebd5Sriastradh case ir_var_temporary: 14493464ebd5Sriastradh entry = new(mem_ctx) variable_storage(var, PROGRAM_TEMPORARY, 14503464ebd5Sriastradh this->next_temp); 14513464ebd5Sriastradh this->variables.push_tail(entry); 14523464ebd5Sriastradh 14533464ebd5Sriastradh next_temp += type_size(var->type); 14543464ebd5Sriastradh break; 14553464ebd5Sriastradh } 14563464ebd5Sriastradh 14573464ebd5Sriastradh if (!entry) { 14583464ebd5Sriastradh printf("Failed to make storage for %s\n", var->name); 14593464ebd5Sriastradh exit(1); 14603464ebd5Sriastradh } 14613464ebd5Sriastradh } 14623464ebd5Sriastradh 14633464ebd5Sriastradh this->result = src_reg(entry->file, entry->index, var->type); 14643464ebd5Sriastradh} 14653464ebd5Sriastradh 14663464ebd5Sriastradhvoid 14673464ebd5Sriastradhir_to_mesa_visitor::visit(ir_dereference_array *ir) 14683464ebd5Sriastradh{ 14693464ebd5Sriastradh ir_constant *index; 14703464ebd5Sriastradh src_reg src; 14713464ebd5Sriastradh int element_size = type_size(ir->type); 14723464ebd5Sriastradh 147301e04c3fSmrg index = ir->array_index->constant_expression_value(ralloc_parent(ir)); 14743464ebd5Sriastradh 14753464ebd5Sriastradh ir->array->accept(this); 14763464ebd5Sriastradh src = this->result; 14773464ebd5Sriastradh 14783464ebd5Sriastradh if (index) { 14793464ebd5Sriastradh src.index += index->value.i[0] * element_size; 14803464ebd5Sriastradh } else { 14813464ebd5Sriastradh /* Variable index array dereference. It eats the "vec4" of the 14823464ebd5Sriastradh * base of the array and an index that offsets the Mesa register 14833464ebd5Sriastradh * index. 14843464ebd5Sriastradh */ 14853464ebd5Sriastradh ir->array_index->accept(this); 14863464ebd5Sriastradh 14873464ebd5Sriastradh src_reg index_reg; 14883464ebd5Sriastradh 14893464ebd5Sriastradh if (element_size == 1) { 14903464ebd5Sriastradh index_reg = this->result; 14913464ebd5Sriastradh } else { 14923464ebd5Sriastradh index_reg = get_temp(glsl_type::float_type); 14933464ebd5Sriastradh 14943464ebd5Sriastradh emit(ir, OPCODE_MUL, dst_reg(index_reg), 14953464ebd5Sriastradh this->result, src_reg_for_float(element_size)); 14963464ebd5Sriastradh } 14973464ebd5Sriastradh 14983464ebd5Sriastradh /* If there was already a relative address register involved, add the 14993464ebd5Sriastradh * new and the old together to get the new offset. 15003464ebd5Sriastradh */ 15013464ebd5Sriastradh if (src.reladdr != NULL) { 15023464ebd5Sriastradh src_reg accum_reg = get_temp(glsl_type::float_type); 15033464ebd5Sriastradh 15043464ebd5Sriastradh emit(ir, OPCODE_ADD, dst_reg(accum_reg), 15053464ebd5Sriastradh index_reg, *src.reladdr); 15063464ebd5Sriastradh 15073464ebd5Sriastradh index_reg = accum_reg; 15083464ebd5Sriastradh } 15093464ebd5Sriastradh 15103464ebd5Sriastradh src.reladdr = ralloc(mem_ctx, src_reg); 15113464ebd5Sriastradh memcpy(src.reladdr, &index_reg, sizeof(index_reg)); 15123464ebd5Sriastradh } 15133464ebd5Sriastradh 15143464ebd5Sriastradh /* If the type is smaller than a vec4, replicate the last channel out. */ 15153464ebd5Sriastradh if (ir->type->is_scalar() || ir->type->is_vector()) 15163464ebd5Sriastradh src.swizzle = swizzle_for_size(ir->type->vector_elements); 15173464ebd5Sriastradh else 15183464ebd5Sriastradh src.swizzle = SWIZZLE_NOOP; 15193464ebd5Sriastradh 15203464ebd5Sriastradh this->result = src; 15213464ebd5Sriastradh} 15223464ebd5Sriastradh 15233464ebd5Sriastradhvoid 15243464ebd5Sriastradhir_to_mesa_visitor::visit(ir_dereference_record *ir) 15253464ebd5Sriastradh{ 15263464ebd5Sriastradh unsigned int i; 15273464ebd5Sriastradh const glsl_type *struct_type = ir->record->type; 15283464ebd5Sriastradh int offset = 0; 15293464ebd5Sriastradh 15303464ebd5Sriastradh ir->record->accept(this); 15313464ebd5Sriastradh 153201e04c3fSmrg assert(ir->field_idx >= 0); 15333464ebd5Sriastradh for (i = 0; i < struct_type->length; i++) { 153401e04c3fSmrg if (i == (unsigned) ir->field_idx) 15353464ebd5Sriastradh break; 15363464ebd5Sriastradh offset += type_size(struct_type->fields.structure[i].type); 15373464ebd5Sriastradh } 15383464ebd5Sriastradh 15393464ebd5Sriastradh /* If the type is smaller than a vec4, replicate the last channel out. */ 15403464ebd5Sriastradh if (ir->type->is_scalar() || ir->type->is_vector()) 15413464ebd5Sriastradh this->result.swizzle = swizzle_for_size(ir->type->vector_elements); 15423464ebd5Sriastradh else 15433464ebd5Sriastradh this->result.swizzle = SWIZZLE_NOOP; 15443464ebd5Sriastradh 15453464ebd5Sriastradh this->result.index += offset; 15463464ebd5Sriastradh} 15473464ebd5Sriastradh 15483464ebd5Sriastradh/** 15493464ebd5Sriastradh * We want to be careful in assignment setup to hit the actual storage 15503464ebd5Sriastradh * instead of potentially using a temporary like we might with the 15513464ebd5Sriastradh * ir_dereference handler. 15523464ebd5Sriastradh */ 15533464ebd5Sriastradhstatic dst_reg 15543464ebd5Sriastradhget_assignment_lhs(ir_dereference *ir, ir_to_mesa_visitor *v) 15553464ebd5Sriastradh{ 15563464ebd5Sriastradh /* The LHS must be a dereference. If the LHS is a variable indexed array 15573464ebd5Sriastradh * access of a vector, it must be separated into a series conditional moves 15583464ebd5Sriastradh * before reaching this point (see ir_vec_index_to_cond_assign). 15593464ebd5Sriastradh */ 15603464ebd5Sriastradh assert(ir->as_dereference()); 15613464ebd5Sriastradh ir_dereference_array *deref_array = ir->as_dereference_array(); 15623464ebd5Sriastradh if (deref_array) { 15633464ebd5Sriastradh assert(!deref_array->array->type->is_vector()); 15643464ebd5Sriastradh } 15653464ebd5Sriastradh 15663464ebd5Sriastradh /* Use the rvalue deref handler for the most part. We'll ignore 15673464ebd5Sriastradh * swizzles in it and write swizzles using writemask, though. 15683464ebd5Sriastradh */ 15693464ebd5Sriastradh ir->accept(v); 15703464ebd5Sriastradh return dst_reg(v->result); 15713464ebd5Sriastradh} 15723464ebd5Sriastradh 157301e04c3fSmrg/* Calculate the sampler index and also calculate the base uniform location 157401e04c3fSmrg * for struct members. 157501e04c3fSmrg */ 157601e04c3fSmrgstatic void 157701e04c3fSmrgcalc_sampler_offsets(struct gl_shader_program *prog, ir_dereference *deref, 157801e04c3fSmrg unsigned *offset, unsigned *array_elements, 157901e04c3fSmrg unsigned *location) 158001e04c3fSmrg{ 158101e04c3fSmrg if (deref->ir_type == ir_type_dereference_variable) 158201e04c3fSmrg return; 158301e04c3fSmrg 158401e04c3fSmrg switch (deref->ir_type) { 158501e04c3fSmrg case ir_type_dereference_array: { 158601e04c3fSmrg ir_dereference_array *deref_arr = deref->as_dereference_array(); 158701e04c3fSmrg 158801e04c3fSmrg void *mem_ctx = ralloc_parent(deref_arr); 158901e04c3fSmrg ir_constant *array_index = 159001e04c3fSmrg deref_arr->array_index->constant_expression_value(mem_ctx); 159101e04c3fSmrg 159201e04c3fSmrg if (!array_index) { 159301e04c3fSmrg /* GLSL 1.10 and 1.20 allowed variable sampler array indices, 159401e04c3fSmrg * while GLSL 1.30 requires that the array indices be 159501e04c3fSmrg * constant integer expressions. We don't expect any driver 159601e04c3fSmrg * to actually work with a really variable array index, so 159701e04c3fSmrg * all that would work would be an unrolled loop counter that ends 159801e04c3fSmrg * up being constant above. 159901e04c3fSmrg */ 160001e04c3fSmrg ralloc_strcat(&prog->data->InfoLog, 160101e04c3fSmrg "warning: Variable sampler array index unsupported.\n" 160201e04c3fSmrg "This feature of the language was removed in GLSL 1.20 " 160301e04c3fSmrg "and is unlikely to be supported for 1.10 in Mesa.\n"); 160401e04c3fSmrg } else { 160501e04c3fSmrg *offset += array_index->value.u[0] * *array_elements; 160601e04c3fSmrg } 160701e04c3fSmrg 160801e04c3fSmrg *array_elements *= deref_arr->array->type->length; 160901e04c3fSmrg 161001e04c3fSmrg calc_sampler_offsets(prog, deref_arr->array->as_dereference(), 161101e04c3fSmrg offset, array_elements, location); 161201e04c3fSmrg break; 161301e04c3fSmrg } 161401e04c3fSmrg 161501e04c3fSmrg case ir_type_dereference_record: { 161601e04c3fSmrg ir_dereference_record *deref_record = deref->as_dereference_record(); 161701e04c3fSmrg unsigned field_index = deref_record->field_idx; 161801e04c3fSmrg *location += 1619b9abf16eSmaya deref_record->record->type->struct_location_offset(field_index); 162001e04c3fSmrg calc_sampler_offsets(prog, deref_record->record->as_dereference(), 162101e04c3fSmrg offset, array_elements, location); 162201e04c3fSmrg break; 162301e04c3fSmrg } 162401e04c3fSmrg 162501e04c3fSmrg default: 162601e04c3fSmrg unreachable("Invalid deref type"); 162701e04c3fSmrg break; 162801e04c3fSmrg } 162901e04c3fSmrg} 163001e04c3fSmrg 163101e04c3fSmrgstatic int 163201e04c3fSmrgget_sampler_uniform_value(class ir_dereference *sampler, 163301e04c3fSmrg struct gl_shader_program *shader_program, 163401e04c3fSmrg const struct gl_program *prog) 163501e04c3fSmrg{ 163601e04c3fSmrg GLuint shader = _mesa_program_enum_to_shader_stage(prog->Target); 163701e04c3fSmrg ir_variable *var = sampler->variable_referenced(); 163801e04c3fSmrg unsigned location = var->data.location; 163901e04c3fSmrg unsigned array_elements = 1; 164001e04c3fSmrg unsigned offset = 0; 164101e04c3fSmrg 164201e04c3fSmrg calc_sampler_offsets(shader_program, sampler, &offset, &array_elements, 164301e04c3fSmrg &location); 164401e04c3fSmrg 164501e04c3fSmrg assert(shader_program->data->UniformStorage[location].opaque[shader].active); 164601e04c3fSmrg return shader_program->data->UniformStorage[location].opaque[shader].index + 164701e04c3fSmrg offset; 164801e04c3fSmrg} 164901e04c3fSmrg 16503464ebd5Sriastradh/** 16513464ebd5Sriastradh * Process the condition of a conditional assignment 16523464ebd5Sriastradh * 16533464ebd5Sriastradh * Examines the condition of a conditional assignment to generate the optimal 16543464ebd5Sriastradh * first operand of a \c CMP instruction. If the condition is a relational 16553464ebd5Sriastradh * operator with 0 (e.g., \c ir_binop_less), the value being compared will be 16563464ebd5Sriastradh * used as the source for the \c CMP instruction. Otherwise the comparison 16573464ebd5Sriastradh * is processed to a boolean result, and the boolean result is used as the 16583464ebd5Sriastradh * operand to the CMP instruction. 16593464ebd5Sriastradh */ 16603464ebd5Sriastradhbool 16613464ebd5Sriastradhir_to_mesa_visitor::process_move_condition(ir_rvalue *ir) 16623464ebd5Sriastradh{ 16633464ebd5Sriastradh ir_rvalue *src_ir = ir; 16643464ebd5Sriastradh bool negate = true; 16653464ebd5Sriastradh bool switch_order = false; 16663464ebd5Sriastradh 16673464ebd5Sriastradh ir_expression *const expr = ir->as_expression(); 166801e04c3fSmrg if ((expr != NULL) && (expr->num_operands == 2)) { 16693464ebd5Sriastradh bool zero_on_left = false; 16703464ebd5Sriastradh 16713464ebd5Sriastradh if (expr->operands[0]->is_zero()) { 16723464ebd5Sriastradh src_ir = expr->operands[1]; 16733464ebd5Sriastradh zero_on_left = true; 16743464ebd5Sriastradh } else if (expr->operands[1]->is_zero()) { 16753464ebd5Sriastradh src_ir = expr->operands[0]; 16763464ebd5Sriastradh zero_on_left = false; 16773464ebd5Sriastradh } 16783464ebd5Sriastradh 16793464ebd5Sriastradh /* a is - 0 + - 0 + 16803464ebd5Sriastradh * (a < 0) T F F ( a < 0) T F F 16813464ebd5Sriastradh * (0 < a) F F T (-a < 0) F F T 16823464ebd5Sriastradh * (a >= 0) F T T ( a < 0) T F F (swap order of other operands) 16833464ebd5Sriastradh * (0 >= a) T T F (-a < 0) F F T (swap order of other operands) 16843464ebd5Sriastradh * 16853464ebd5Sriastradh * Note that exchanging the order of 0 and 'a' in the comparison simply 16863464ebd5Sriastradh * means that the value of 'a' should be negated. 16873464ebd5Sriastradh */ 16883464ebd5Sriastradh if (src_ir != ir) { 16893464ebd5Sriastradh switch (expr->operation) { 16903464ebd5Sriastradh case ir_binop_less: 16913464ebd5Sriastradh switch_order = false; 16923464ebd5Sriastradh negate = zero_on_left; 16933464ebd5Sriastradh break; 16943464ebd5Sriastradh 16953464ebd5Sriastradh case ir_binop_gequal: 16963464ebd5Sriastradh switch_order = true; 16973464ebd5Sriastradh negate = zero_on_left; 16983464ebd5Sriastradh break; 16993464ebd5Sriastradh 17003464ebd5Sriastradh default: 17013464ebd5Sriastradh /* This isn't the right kind of comparison afterall, so make sure 17023464ebd5Sriastradh * the whole condition is visited. 17033464ebd5Sriastradh */ 17043464ebd5Sriastradh src_ir = ir; 17053464ebd5Sriastradh break; 17063464ebd5Sriastradh } 17073464ebd5Sriastradh } 17083464ebd5Sriastradh } 17093464ebd5Sriastradh 17103464ebd5Sriastradh src_ir->accept(this); 17113464ebd5Sriastradh 17123464ebd5Sriastradh /* We use the OPCODE_CMP (a < 0 ? b : c) for conditional moves, and the 17133464ebd5Sriastradh * condition we produced is 0.0 or 1.0. By flipping the sign, we can 17143464ebd5Sriastradh * choose which value OPCODE_CMP produces without an extra instruction 17153464ebd5Sriastradh * computing the condition. 17163464ebd5Sriastradh */ 17173464ebd5Sriastradh if (negate) 17183464ebd5Sriastradh this->result.negate = ~this->result.negate; 17193464ebd5Sriastradh 17203464ebd5Sriastradh return switch_order; 17213464ebd5Sriastradh} 17223464ebd5Sriastradh 17233464ebd5Sriastradhvoid 17243464ebd5Sriastradhir_to_mesa_visitor::visit(ir_assignment *ir) 17253464ebd5Sriastradh{ 17263464ebd5Sriastradh dst_reg l; 17273464ebd5Sriastradh src_reg r; 17283464ebd5Sriastradh int i; 17293464ebd5Sriastradh 17303464ebd5Sriastradh ir->rhs->accept(this); 17313464ebd5Sriastradh r = this->result; 17323464ebd5Sriastradh 17333464ebd5Sriastradh l = get_assignment_lhs(ir->lhs, this); 17343464ebd5Sriastradh 17353464ebd5Sriastradh /* FINISHME: This should really set to the correct maximal writemask for each 17363464ebd5Sriastradh * FINISHME: component written (in the loops below). This case can only 17373464ebd5Sriastradh * FINISHME: occur for matrices, arrays, and structures. 17383464ebd5Sriastradh */ 17393464ebd5Sriastradh if (ir->write_mask == 0) { 17403464ebd5Sriastradh assert(!ir->lhs->type->is_scalar() && !ir->lhs->type->is_vector()); 17413464ebd5Sriastradh l.writemask = WRITEMASK_XYZW; 17423464ebd5Sriastradh } else if (ir->lhs->type->is_scalar()) { 17433464ebd5Sriastradh /* FINISHME: This hack makes writing to gl_FragDepth, which lives in the 17443464ebd5Sriastradh * FINISHME: W component of fragment shader output zero, work correctly. 17453464ebd5Sriastradh */ 17463464ebd5Sriastradh l.writemask = WRITEMASK_XYZW; 17473464ebd5Sriastradh } else { 17483464ebd5Sriastradh int swizzles[4]; 17493464ebd5Sriastradh int first_enabled_chan = 0; 17503464ebd5Sriastradh int rhs_chan = 0; 17513464ebd5Sriastradh 17523464ebd5Sriastradh assert(ir->lhs->type->is_vector()); 17533464ebd5Sriastradh l.writemask = ir->write_mask; 17543464ebd5Sriastradh 17553464ebd5Sriastradh for (int i = 0; i < 4; i++) { 17563464ebd5Sriastradh if (l.writemask & (1 << i)) { 17573464ebd5Sriastradh first_enabled_chan = GET_SWZ(r.swizzle, i); 17583464ebd5Sriastradh break; 17593464ebd5Sriastradh } 17603464ebd5Sriastradh } 17613464ebd5Sriastradh 17623464ebd5Sriastradh /* Swizzle a small RHS vector into the channels being written. 17633464ebd5Sriastradh * 17643464ebd5Sriastradh * glsl ir treats write_mask as dictating how many channels are 17653464ebd5Sriastradh * present on the RHS while Mesa IR treats write_mask as just 17663464ebd5Sriastradh * showing which channels of the vec4 RHS get written. 17673464ebd5Sriastradh */ 17683464ebd5Sriastradh for (int i = 0; i < 4; i++) { 17693464ebd5Sriastradh if (l.writemask & (1 << i)) 17703464ebd5Sriastradh swizzles[i] = GET_SWZ(r.swizzle, rhs_chan++); 17713464ebd5Sriastradh else 17723464ebd5Sriastradh swizzles[i] = first_enabled_chan; 17733464ebd5Sriastradh } 17743464ebd5Sriastradh r.swizzle = MAKE_SWIZZLE4(swizzles[0], swizzles[1], 17753464ebd5Sriastradh swizzles[2], swizzles[3]); 17763464ebd5Sriastradh } 17773464ebd5Sriastradh 17783464ebd5Sriastradh assert(l.file != PROGRAM_UNDEFINED); 17793464ebd5Sriastradh assert(r.file != PROGRAM_UNDEFINED); 17803464ebd5Sriastradh 17813464ebd5Sriastradh if (ir->condition) { 17823464ebd5Sriastradh const bool switch_order = this->process_move_condition(ir->condition); 17833464ebd5Sriastradh src_reg condition = this->result; 17843464ebd5Sriastradh 17853464ebd5Sriastradh for (i = 0; i < type_size(ir->lhs->type); i++) { 17863464ebd5Sriastradh if (switch_order) { 17873464ebd5Sriastradh emit(ir, OPCODE_CMP, l, condition, src_reg(l), r); 17883464ebd5Sriastradh } else { 17893464ebd5Sriastradh emit(ir, OPCODE_CMP, l, condition, r, src_reg(l)); 17903464ebd5Sriastradh } 17913464ebd5Sriastradh 17923464ebd5Sriastradh l.index++; 17933464ebd5Sriastradh r.index++; 17943464ebd5Sriastradh } 17953464ebd5Sriastradh } else { 17963464ebd5Sriastradh for (i = 0; i < type_size(ir->lhs->type); i++) { 17973464ebd5Sriastradh emit(ir, OPCODE_MOV, l, r); 17983464ebd5Sriastradh l.index++; 17993464ebd5Sriastradh r.index++; 18003464ebd5Sriastradh } 18013464ebd5Sriastradh } 18023464ebd5Sriastradh} 18033464ebd5Sriastradh 18043464ebd5Sriastradh 18053464ebd5Sriastradhvoid 18063464ebd5Sriastradhir_to_mesa_visitor::visit(ir_constant *ir) 18073464ebd5Sriastradh{ 18083464ebd5Sriastradh src_reg src; 18093464ebd5Sriastradh GLfloat stack_vals[4] = { 0 }; 18103464ebd5Sriastradh GLfloat *values = stack_vals; 18113464ebd5Sriastradh unsigned int i; 18123464ebd5Sriastradh 18133464ebd5Sriastradh /* Unfortunately, 4 floats is all we can get into 18143464ebd5Sriastradh * _mesa_add_unnamed_constant. So, make a temp to store an 18153464ebd5Sriastradh * aggregate constant and move each constant value into it. If we 18163464ebd5Sriastradh * get lucky, copy propagation will eliminate the extra moves. 18173464ebd5Sriastradh */ 18183464ebd5Sriastradh 1819b9abf16eSmaya if (ir->type->is_struct()) { 18203464ebd5Sriastradh src_reg temp_base = get_temp(ir->type); 18213464ebd5Sriastradh dst_reg temp = dst_reg(temp_base); 18223464ebd5Sriastradh 182301e04c3fSmrg for (i = 0; i < ir->type->length; i++) { 182401e04c3fSmrg ir_constant *const field_value = ir->get_record_field(i); 18253464ebd5Sriastradh int size = type_size(field_value->type); 18263464ebd5Sriastradh 18273464ebd5Sriastradh assert(size > 0); 18283464ebd5Sriastradh 18293464ebd5Sriastradh field_value->accept(this); 18303464ebd5Sriastradh src = this->result; 18313464ebd5Sriastradh 183201e04c3fSmrg for (unsigned j = 0; j < (unsigned int)size; j++) { 18333464ebd5Sriastradh emit(ir, OPCODE_MOV, temp, src); 18343464ebd5Sriastradh 18353464ebd5Sriastradh src.index++; 18363464ebd5Sriastradh temp.index++; 18373464ebd5Sriastradh } 18383464ebd5Sriastradh } 18393464ebd5Sriastradh this->result = temp_base; 18403464ebd5Sriastradh return; 18413464ebd5Sriastradh } 18423464ebd5Sriastradh 18433464ebd5Sriastradh if (ir->type->is_array()) { 18443464ebd5Sriastradh src_reg temp_base = get_temp(ir->type); 18453464ebd5Sriastradh dst_reg temp = dst_reg(temp_base); 18463464ebd5Sriastradh int size = type_size(ir->type->fields.array); 18473464ebd5Sriastradh 18483464ebd5Sriastradh assert(size > 0); 18493464ebd5Sriastradh 18503464ebd5Sriastradh for (i = 0; i < ir->type->length; i++) { 185101e04c3fSmrg ir->const_elements[i]->accept(this); 18523464ebd5Sriastradh src = this->result; 18533464ebd5Sriastradh for (int j = 0; j < size; j++) { 18543464ebd5Sriastradh emit(ir, OPCODE_MOV, temp, src); 18553464ebd5Sriastradh 18563464ebd5Sriastradh src.index++; 18573464ebd5Sriastradh temp.index++; 18583464ebd5Sriastradh } 18593464ebd5Sriastradh } 18603464ebd5Sriastradh this->result = temp_base; 18613464ebd5Sriastradh return; 18623464ebd5Sriastradh } 18633464ebd5Sriastradh 18643464ebd5Sriastradh if (ir->type->is_matrix()) { 18653464ebd5Sriastradh src_reg mat = get_temp(ir->type); 18663464ebd5Sriastradh dst_reg mat_column = dst_reg(mat); 18673464ebd5Sriastradh 18683464ebd5Sriastradh for (i = 0; i < ir->type->matrix_columns; i++) { 186901e04c3fSmrg assert(ir->type->is_float()); 18703464ebd5Sriastradh values = &ir->value.f[i * ir->type->vector_elements]; 18713464ebd5Sriastradh 18723464ebd5Sriastradh src = src_reg(PROGRAM_CONSTANT, -1, NULL); 18733464ebd5Sriastradh src.index = _mesa_add_unnamed_constant(this->prog->Parameters, 1874af69d88dSmrg (gl_constant_value *) values, 18753464ebd5Sriastradh ir->type->vector_elements, 18763464ebd5Sriastradh &src.swizzle); 18773464ebd5Sriastradh emit(ir, OPCODE_MOV, mat_column, src); 18783464ebd5Sriastradh 18793464ebd5Sriastradh mat_column.index++; 18803464ebd5Sriastradh } 18813464ebd5Sriastradh 18823464ebd5Sriastradh this->result = mat; 18833464ebd5Sriastradh return; 18843464ebd5Sriastradh } 18853464ebd5Sriastradh 18863464ebd5Sriastradh src.file = PROGRAM_CONSTANT; 18873464ebd5Sriastradh switch (ir->type->base_type) { 18883464ebd5Sriastradh case GLSL_TYPE_FLOAT: 18893464ebd5Sriastradh values = &ir->value.f[0]; 18903464ebd5Sriastradh break; 18913464ebd5Sriastradh case GLSL_TYPE_UINT: 18923464ebd5Sriastradh for (i = 0; i < ir->type->vector_elements; i++) { 18933464ebd5Sriastradh values[i] = ir->value.u[i]; 18943464ebd5Sriastradh } 18953464ebd5Sriastradh break; 18963464ebd5Sriastradh case GLSL_TYPE_INT: 18973464ebd5Sriastradh for (i = 0; i < ir->type->vector_elements; i++) { 18983464ebd5Sriastradh values[i] = ir->value.i[i]; 18993464ebd5Sriastradh } 19003464ebd5Sriastradh break; 19013464ebd5Sriastradh case GLSL_TYPE_BOOL: 19023464ebd5Sriastradh for (i = 0; i < ir->type->vector_elements; i++) { 19033464ebd5Sriastradh values[i] = ir->value.b[i]; 19043464ebd5Sriastradh } 19053464ebd5Sriastradh break; 19063464ebd5Sriastradh default: 19073464ebd5Sriastradh assert(!"Non-float/uint/int/bool constant"); 19083464ebd5Sriastradh } 19093464ebd5Sriastradh 19103464ebd5Sriastradh this->result = src_reg(PROGRAM_CONSTANT, -1, ir->type); 19113464ebd5Sriastradh this->result.index = _mesa_add_unnamed_constant(this->prog->Parameters, 1912af69d88dSmrg (gl_constant_value *) values, 19133464ebd5Sriastradh ir->type->vector_elements, 19143464ebd5Sriastradh &this->result.swizzle); 19153464ebd5Sriastradh} 19163464ebd5Sriastradh 19173464ebd5Sriastradhvoid 1918af69d88dSmrgir_to_mesa_visitor::visit(ir_call *) 19193464ebd5Sriastradh{ 1920af69d88dSmrg assert(!"ir_to_mesa: All function calls should have been inlined by now."); 19213464ebd5Sriastradh} 19223464ebd5Sriastradh 19233464ebd5Sriastradhvoid 19243464ebd5Sriastradhir_to_mesa_visitor::visit(ir_texture *ir) 19253464ebd5Sriastradh{ 19263464ebd5Sriastradh src_reg result_src, coord, lod_info, projector, dx, dy; 19273464ebd5Sriastradh dst_reg result_dst, coord_dst; 19283464ebd5Sriastradh ir_to_mesa_instruction *inst = NULL; 19293464ebd5Sriastradh prog_opcode opcode = OPCODE_NOP; 19303464ebd5Sriastradh 1931af69d88dSmrg if (ir->op == ir_txs) 1932af69d88dSmrg this->result = src_reg_for_float(0.0); 1933af69d88dSmrg else 1934af69d88dSmrg ir->coordinate->accept(this); 19353464ebd5Sriastradh 19363464ebd5Sriastradh /* Put our coords in a temp. We'll need to modify them for shadow, 193701e04c3fSmrg * projection, or LOD, so the only case we'd use it as-is is if 19383464ebd5Sriastradh * we're doing plain old texturing. Mesa IR optimization should 19393464ebd5Sriastradh * handle cleaning up our mess in that case. 19403464ebd5Sriastradh */ 19413464ebd5Sriastradh coord = get_temp(glsl_type::vec4_type); 19423464ebd5Sriastradh coord_dst = dst_reg(coord); 19433464ebd5Sriastradh emit(ir, OPCODE_MOV, coord_dst, this->result); 19443464ebd5Sriastradh 19453464ebd5Sriastradh if (ir->projector) { 19463464ebd5Sriastradh ir->projector->accept(this); 19473464ebd5Sriastradh projector = this->result; 19483464ebd5Sriastradh } 19493464ebd5Sriastradh 19503464ebd5Sriastradh /* Storage for our result. Ideally for an assignment we'd be using 19513464ebd5Sriastradh * the actual storage for the result here, instead. 19523464ebd5Sriastradh */ 19533464ebd5Sriastradh result_src = get_temp(glsl_type::vec4_type); 19543464ebd5Sriastradh result_dst = dst_reg(result_src); 19553464ebd5Sriastradh 19563464ebd5Sriastradh switch (ir->op) { 19573464ebd5Sriastradh case ir_tex: 1958af69d88dSmrg case ir_txs: 19593464ebd5Sriastradh opcode = OPCODE_TEX; 19603464ebd5Sriastradh break; 19613464ebd5Sriastradh case ir_txb: 19623464ebd5Sriastradh opcode = OPCODE_TXB; 19633464ebd5Sriastradh ir->lod_info.bias->accept(this); 19643464ebd5Sriastradh lod_info = this->result; 19653464ebd5Sriastradh break; 1966af69d88dSmrg case ir_txf: 1967af69d88dSmrg /* Pretend to be TXL so the sampler, coordinate, lod are available */ 19683464ebd5Sriastradh case ir_txl: 19693464ebd5Sriastradh opcode = OPCODE_TXL; 19703464ebd5Sriastradh ir->lod_info.lod->accept(this); 19713464ebd5Sriastradh lod_info = this->result; 19723464ebd5Sriastradh break; 19733464ebd5Sriastradh case ir_txd: 19743464ebd5Sriastradh opcode = OPCODE_TXD; 19753464ebd5Sriastradh ir->lod_info.grad.dPdx->accept(this); 19763464ebd5Sriastradh dx = this->result; 19773464ebd5Sriastradh ir->lod_info.grad.dPdy->accept(this); 19783464ebd5Sriastradh dy = this->result; 19793464ebd5Sriastradh break; 1980af69d88dSmrg case ir_txf_ms: 1981af69d88dSmrg assert(!"Unexpected ir_txf_ms opcode"); 1982af69d88dSmrg break; 1983af69d88dSmrg case ir_lod: 1984af69d88dSmrg assert(!"Unexpected ir_lod opcode"); 1985af69d88dSmrg break; 1986af69d88dSmrg case ir_tg4: 1987af69d88dSmrg assert(!"Unexpected ir_tg4 opcode"); 1988af69d88dSmrg break; 1989af69d88dSmrg case ir_query_levels: 1990af69d88dSmrg assert(!"Unexpected ir_query_levels opcode"); 19913464ebd5Sriastradh break; 199201e04c3fSmrg case ir_samples_identical: 199301e04c3fSmrg unreachable("Unexpected ir_samples_identical opcode"); 199401e04c3fSmrg case ir_texture_samples: 199501e04c3fSmrg unreachable("Unexpected ir_texture_samples opcode"); 19963464ebd5Sriastradh } 19973464ebd5Sriastradh 1998af69d88dSmrg const glsl_type *sampler_type = ir->sampler->type; 1999af69d88dSmrg 20003464ebd5Sriastradh if (ir->projector) { 20013464ebd5Sriastradh if (opcode == OPCODE_TEX) { 20023464ebd5Sriastradh /* Slot the projector in as the last component of the coord. */ 20033464ebd5Sriastradh coord_dst.writemask = WRITEMASK_W; 20043464ebd5Sriastradh emit(ir, OPCODE_MOV, coord_dst, projector); 20053464ebd5Sriastradh coord_dst.writemask = WRITEMASK_XYZW; 20063464ebd5Sriastradh opcode = OPCODE_TXP; 20073464ebd5Sriastradh } else { 20083464ebd5Sriastradh src_reg coord_w = coord; 20093464ebd5Sriastradh coord_w.swizzle = SWIZZLE_WWWW; 20103464ebd5Sriastradh 20113464ebd5Sriastradh /* For the other TEX opcodes there's no projective version 20123464ebd5Sriastradh * since the last slot is taken up by lod info. Do the 20133464ebd5Sriastradh * projective divide now. 20143464ebd5Sriastradh */ 20153464ebd5Sriastradh coord_dst.writemask = WRITEMASK_W; 20163464ebd5Sriastradh emit(ir, OPCODE_RCP, coord_dst, projector); 20173464ebd5Sriastradh 20183464ebd5Sriastradh /* In the case where we have to project the coordinates "by hand," 201901e04c3fSmrg * the shadow comparator value must also be projected. 20203464ebd5Sriastradh */ 20213464ebd5Sriastradh src_reg tmp_src = coord; 202201e04c3fSmrg if (ir->shadow_comparator) { 20233464ebd5Sriastradh /* Slot the shadow value in as the second to last component of the 20243464ebd5Sriastradh * coord. 20253464ebd5Sriastradh */ 202601e04c3fSmrg ir->shadow_comparator->accept(this); 20273464ebd5Sriastradh 20283464ebd5Sriastradh tmp_src = get_temp(glsl_type::vec4_type); 20293464ebd5Sriastradh dst_reg tmp_dst = dst_reg(tmp_src); 20303464ebd5Sriastradh 2031af69d88dSmrg /* Projective division not allowed for array samplers. */ 2032af69d88dSmrg assert(!sampler_type->sampler_array); 2033af69d88dSmrg 20343464ebd5Sriastradh tmp_dst.writemask = WRITEMASK_Z; 20353464ebd5Sriastradh emit(ir, OPCODE_MOV, tmp_dst, this->result); 20363464ebd5Sriastradh 20373464ebd5Sriastradh tmp_dst.writemask = WRITEMASK_XY; 20383464ebd5Sriastradh emit(ir, OPCODE_MOV, tmp_dst, coord); 20393464ebd5Sriastradh } 20403464ebd5Sriastradh 20413464ebd5Sriastradh coord_dst.writemask = WRITEMASK_XYZ; 20423464ebd5Sriastradh emit(ir, OPCODE_MUL, coord_dst, tmp_src, coord_w); 20433464ebd5Sriastradh 20443464ebd5Sriastradh coord_dst.writemask = WRITEMASK_XYZW; 20453464ebd5Sriastradh coord.swizzle = SWIZZLE_XYZW; 20463464ebd5Sriastradh } 20473464ebd5Sriastradh } 20483464ebd5Sriastradh 20493464ebd5Sriastradh /* If projection is done and the opcode is not OPCODE_TXP, then the shadow 205001e04c3fSmrg * comparator was put in the correct place (and projected) by the code, 20513464ebd5Sriastradh * above, that handles by-hand projection. 20523464ebd5Sriastradh */ 205301e04c3fSmrg if (ir->shadow_comparator && (!ir->projector || opcode == OPCODE_TXP)) { 20543464ebd5Sriastradh /* Slot the shadow value in as the second to last component of the 20553464ebd5Sriastradh * coord. 20563464ebd5Sriastradh */ 205701e04c3fSmrg ir->shadow_comparator->accept(this); 2058af69d88dSmrg 2059af69d88dSmrg /* XXX This will need to be updated for cubemap array samplers. */ 2060af69d88dSmrg if (sampler_type->sampler_dimensionality == GLSL_SAMPLER_DIM_2D && 2061af69d88dSmrg sampler_type->sampler_array) { 2062af69d88dSmrg coord_dst.writemask = WRITEMASK_W; 2063af69d88dSmrg } else { 2064af69d88dSmrg coord_dst.writemask = WRITEMASK_Z; 2065af69d88dSmrg } 2066af69d88dSmrg 20673464ebd5Sriastradh emit(ir, OPCODE_MOV, coord_dst, this->result); 20683464ebd5Sriastradh coord_dst.writemask = WRITEMASK_XYZW; 20693464ebd5Sriastradh } 20703464ebd5Sriastradh 20713464ebd5Sriastradh if (opcode == OPCODE_TXL || opcode == OPCODE_TXB) { 20723464ebd5Sriastradh /* Mesa IR stores lod or lod bias in the last channel of the coords. */ 20733464ebd5Sriastradh coord_dst.writemask = WRITEMASK_W; 20743464ebd5Sriastradh emit(ir, OPCODE_MOV, coord_dst, lod_info); 20753464ebd5Sriastradh coord_dst.writemask = WRITEMASK_XYZW; 20763464ebd5Sriastradh } 20773464ebd5Sriastradh 20783464ebd5Sriastradh if (opcode == OPCODE_TXD) 20793464ebd5Sriastradh inst = emit(ir, opcode, result_dst, coord, dx, dy); 20803464ebd5Sriastradh else 20813464ebd5Sriastradh inst = emit(ir, opcode, result_dst, coord); 20823464ebd5Sriastradh 208301e04c3fSmrg if (ir->shadow_comparator) 20843464ebd5Sriastradh inst->tex_shadow = GL_TRUE; 20853464ebd5Sriastradh 208601e04c3fSmrg inst->sampler = get_sampler_uniform_value(ir->sampler, shader_program, 208701e04c3fSmrg prog); 20883464ebd5Sriastradh 20893464ebd5Sriastradh switch (sampler_type->sampler_dimensionality) { 20903464ebd5Sriastradh case GLSL_SAMPLER_DIM_1D: 20913464ebd5Sriastradh inst->tex_target = (sampler_type->sampler_array) 20923464ebd5Sriastradh ? TEXTURE_1D_ARRAY_INDEX : TEXTURE_1D_INDEX; 20933464ebd5Sriastradh break; 20943464ebd5Sriastradh case GLSL_SAMPLER_DIM_2D: 20953464ebd5Sriastradh inst->tex_target = (sampler_type->sampler_array) 20963464ebd5Sriastradh ? TEXTURE_2D_ARRAY_INDEX : TEXTURE_2D_INDEX; 20973464ebd5Sriastradh break; 20983464ebd5Sriastradh case GLSL_SAMPLER_DIM_3D: 20993464ebd5Sriastradh inst->tex_target = TEXTURE_3D_INDEX; 21003464ebd5Sriastradh break; 21013464ebd5Sriastradh case GLSL_SAMPLER_DIM_CUBE: 21023464ebd5Sriastradh inst->tex_target = TEXTURE_CUBE_INDEX; 21033464ebd5Sriastradh break; 21043464ebd5Sriastradh case GLSL_SAMPLER_DIM_RECT: 21053464ebd5Sriastradh inst->tex_target = TEXTURE_RECT_INDEX; 21063464ebd5Sriastradh break; 21073464ebd5Sriastradh case GLSL_SAMPLER_DIM_BUF: 21083464ebd5Sriastradh assert(!"FINISHME: Implement ARB_texture_buffer_object"); 21093464ebd5Sriastradh break; 2110af69d88dSmrg case GLSL_SAMPLER_DIM_EXTERNAL: 2111af69d88dSmrg inst->tex_target = TEXTURE_EXTERNAL_INDEX; 2112af69d88dSmrg break; 21133464ebd5Sriastradh default: 21143464ebd5Sriastradh assert(!"Should not get here."); 21153464ebd5Sriastradh } 21163464ebd5Sriastradh 21173464ebd5Sriastradh this->result = result_src; 21183464ebd5Sriastradh} 21193464ebd5Sriastradh 21203464ebd5Sriastradhvoid 21213464ebd5Sriastradhir_to_mesa_visitor::visit(ir_return *ir) 21223464ebd5Sriastradh{ 2123af69d88dSmrg /* Non-void functions should have been inlined. We may still emit RETs 2124af69d88dSmrg * from main() unless the EmitNoMainReturn option is set. 2125af69d88dSmrg */ 2126af69d88dSmrg assert(!ir->get_value()); 21273464ebd5Sriastradh emit(ir, OPCODE_RET); 21283464ebd5Sriastradh} 21293464ebd5Sriastradh 21303464ebd5Sriastradhvoid 21313464ebd5Sriastradhir_to_mesa_visitor::visit(ir_discard *ir) 21323464ebd5Sriastradh{ 213301e04c3fSmrg if (!ir->condition) 213401e04c3fSmrg ir->condition = new(mem_ctx) ir_constant(true); 213501e04c3fSmrg 213601e04c3fSmrg ir->condition->accept(this); 213701e04c3fSmrg this->result.negate = ~this->result.negate; 213801e04c3fSmrg emit(ir, OPCODE_KIL, undef_dst, this->result); 21393464ebd5Sriastradh} 21403464ebd5Sriastradh 21417ec681f3Smrgvoid 21427ec681f3Smrgir_to_mesa_visitor::visit(ir_demote *ir) 21437ec681f3Smrg{ 21447ec681f3Smrg assert(!"demote statement unsupported"); 21457ec681f3Smrg} 21467ec681f3Smrg 21473464ebd5Sriastradhvoid 21483464ebd5Sriastradhir_to_mesa_visitor::visit(ir_if *ir) 21493464ebd5Sriastradh{ 215001e04c3fSmrg ir_to_mesa_instruction *if_inst; 21513464ebd5Sriastradh 21523464ebd5Sriastradh ir->condition->accept(this); 21533464ebd5Sriastradh assert(this->result.file != PROGRAM_UNDEFINED); 21543464ebd5Sriastradh 215501e04c3fSmrg if_inst = emit(ir->condition, OPCODE_IF, undef_dst, this->result); 21563464ebd5Sriastradh 21573464ebd5Sriastradh this->instructions.push_tail(if_inst); 21583464ebd5Sriastradh 21593464ebd5Sriastradh visit_exec_list(&ir->then_instructions, this); 21603464ebd5Sriastradh 21613464ebd5Sriastradh if (!ir->else_instructions.is_empty()) { 21623464ebd5Sriastradh emit(ir->condition, OPCODE_ELSE); 21633464ebd5Sriastradh visit_exec_list(&ir->else_instructions, this); 21643464ebd5Sriastradh } 21653464ebd5Sriastradh 2166af69d88dSmrg emit(ir->condition, OPCODE_ENDIF); 2167af69d88dSmrg} 2168af69d88dSmrg 2169af69d88dSmrgvoid 2170af69d88dSmrgir_to_mesa_visitor::visit(ir_emit_vertex *) 2171af69d88dSmrg{ 2172af69d88dSmrg assert(!"Geometry shaders not supported."); 2173af69d88dSmrg} 2174af69d88dSmrg 2175af69d88dSmrgvoid 2176af69d88dSmrgir_to_mesa_visitor::visit(ir_end_primitive *) 2177af69d88dSmrg{ 2178af69d88dSmrg assert(!"Geometry shaders not supported."); 21793464ebd5Sriastradh} 21803464ebd5Sriastradh 218101e04c3fSmrgvoid 218201e04c3fSmrgir_to_mesa_visitor::visit(ir_barrier *) 218301e04c3fSmrg{ 218401e04c3fSmrg unreachable("GLSL barrier() not supported."); 218501e04c3fSmrg} 218601e04c3fSmrg 21873464ebd5Sriastradhir_to_mesa_visitor::ir_to_mesa_visitor() 21883464ebd5Sriastradh{ 21893464ebd5Sriastradh result.file = PROGRAM_UNDEFINED; 21903464ebd5Sriastradh next_temp = 1; 21913464ebd5Sriastradh next_signature_id = 1; 21923464ebd5Sriastradh current_function = NULL; 21933464ebd5Sriastradh mem_ctx = ralloc_context(NULL); 21947ec681f3Smrg ctx = NULL; 21957ec681f3Smrg prog = NULL; 21967ec681f3Smrg shader_program = NULL; 21977ec681f3Smrg options = NULL; 21983464ebd5Sriastradh} 21993464ebd5Sriastradh 22003464ebd5Sriastradhir_to_mesa_visitor::~ir_to_mesa_visitor() 22013464ebd5Sriastradh{ 22023464ebd5Sriastradh ralloc_free(mem_ctx); 22033464ebd5Sriastradh} 22043464ebd5Sriastradh 22053464ebd5Sriastradhstatic struct prog_src_register 22063464ebd5Sriastradhmesa_src_reg_from_ir_src_reg(src_reg reg) 22073464ebd5Sriastradh{ 22083464ebd5Sriastradh struct prog_src_register mesa_reg; 22093464ebd5Sriastradh 22103464ebd5Sriastradh mesa_reg.File = reg.file; 22113464ebd5Sriastradh assert(reg.index < (1 << INST_INDEX_BITS)); 22123464ebd5Sriastradh mesa_reg.Index = reg.index; 22133464ebd5Sriastradh mesa_reg.Swizzle = reg.swizzle; 22143464ebd5Sriastradh mesa_reg.RelAddr = reg.reladdr != NULL; 22153464ebd5Sriastradh mesa_reg.Negate = reg.negate; 22163464ebd5Sriastradh 22173464ebd5Sriastradh return mesa_reg; 22183464ebd5Sriastradh} 22193464ebd5Sriastradh 22203464ebd5Sriastradhstatic void 22213464ebd5Sriastradhset_branchtargets(ir_to_mesa_visitor *v, 22223464ebd5Sriastradh struct prog_instruction *mesa_instructions, 22233464ebd5Sriastradh int num_instructions) 22243464ebd5Sriastradh{ 22253464ebd5Sriastradh int if_count = 0, loop_count = 0; 22263464ebd5Sriastradh int *if_stack, *loop_stack; 22273464ebd5Sriastradh int if_stack_pos = 0, loop_stack_pos = 0; 22283464ebd5Sriastradh int i, j; 22293464ebd5Sriastradh 22303464ebd5Sriastradh for (i = 0; i < num_instructions; i++) { 22313464ebd5Sriastradh switch (mesa_instructions[i].Opcode) { 22323464ebd5Sriastradh case OPCODE_IF: 22333464ebd5Sriastradh if_count++; 22343464ebd5Sriastradh break; 22353464ebd5Sriastradh case OPCODE_BGNLOOP: 22363464ebd5Sriastradh loop_count++; 22373464ebd5Sriastradh break; 22383464ebd5Sriastradh case OPCODE_BRK: 22393464ebd5Sriastradh case OPCODE_CONT: 22403464ebd5Sriastradh mesa_instructions[i].BranchTarget = -1; 22413464ebd5Sriastradh break; 22423464ebd5Sriastradh default: 22433464ebd5Sriastradh break; 22443464ebd5Sriastradh } 22453464ebd5Sriastradh } 22463464ebd5Sriastradh 22473464ebd5Sriastradh if_stack = rzalloc_array(v->mem_ctx, int, if_count); 22483464ebd5Sriastradh loop_stack = rzalloc_array(v->mem_ctx, int, loop_count); 22493464ebd5Sriastradh 22503464ebd5Sriastradh for (i = 0; i < num_instructions; i++) { 22513464ebd5Sriastradh switch (mesa_instructions[i].Opcode) { 22523464ebd5Sriastradh case OPCODE_IF: 22533464ebd5Sriastradh if_stack[if_stack_pos] = i; 22543464ebd5Sriastradh if_stack_pos++; 22553464ebd5Sriastradh break; 22563464ebd5Sriastradh case OPCODE_ELSE: 22573464ebd5Sriastradh mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; 22583464ebd5Sriastradh if_stack[if_stack_pos - 1] = i; 22593464ebd5Sriastradh break; 22603464ebd5Sriastradh case OPCODE_ENDIF: 22613464ebd5Sriastradh mesa_instructions[if_stack[if_stack_pos - 1]].BranchTarget = i; 22623464ebd5Sriastradh if_stack_pos--; 22633464ebd5Sriastradh break; 22643464ebd5Sriastradh case OPCODE_BGNLOOP: 22653464ebd5Sriastradh loop_stack[loop_stack_pos] = i; 22663464ebd5Sriastradh loop_stack_pos++; 22673464ebd5Sriastradh break; 22683464ebd5Sriastradh case OPCODE_ENDLOOP: 22693464ebd5Sriastradh loop_stack_pos--; 22703464ebd5Sriastradh /* Rewrite any breaks/conts at this nesting level (haven't 22713464ebd5Sriastradh * already had a BranchTarget assigned) to point to the end 22723464ebd5Sriastradh * of the loop. 22733464ebd5Sriastradh */ 22743464ebd5Sriastradh for (j = loop_stack[loop_stack_pos]; j < i; j++) { 22753464ebd5Sriastradh if (mesa_instructions[j].Opcode == OPCODE_BRK || 22763464ebd5Sriastradh mesa_instructions[j].Opcode == OPCODE_CONT) { 22773464ebd5Sriastradh if (mesa_instructions[j].BranchTarget == -1) { 22783464ebd5Sriastradh mesa_instructions[j].BranchTarget = i; 22793464ebd5Sriastradh } 22803464ebd5Sriastradh } 22813464ebd5Sriastradh } 22823464ebd5Sriastradh /* The loop ends point at each other. */ 22833464ebd5Sriastradh mesa_instructions[i].BranchTarget = loop_stack[loop_stack_pos]; 22843464ebd5Sriastradh mesa_instructions[loop_stack[loop_stack_pos]].BranchTarget = i; 22853464ebd5Sriastradh break; 22863464ebd5Sriastradh case OPCODE_CAL: 2287af69d88dSmrg foreach_in_list(function_entry, entry, &v->function_signatures) { 22883464ebd5Sriastradh if (entry->sig_id == mesa_instructions[i].BranchTarget) { 22893464ebd5Sriastradh mesa_instructions[i].BranchTarget = entry->inst; 22903464ebd5Sriastradh break; 22913464ebd5Sriastradh } 22923464ebd5Sriastradh } 22933464ebd5Sriastradh break; 22943464ebd5Sriastradh default: 22953464ebd5Sriastradh break; 22963464ebd5Sriastradh } 22973464ebd5Sriastradh } 22983464ebd5Sriastradh} 22993464ebd5Sriastradh 23003464ebd5Sriastradhstatic void 23013464ebd5Sriastradhprint_program(struct prog_instruction *mesa_instructions, 23023464ebd5Sriastradh ir_instruction **mesa_instruction_annotation, 23033464ebd5Sriastradh int num_instructions) 23043464ebd5Sriastradh{ 23053464ebd5Sriastradh ir_instruction *last_ir = NULL; 23063464ebd5Sriastradh int i; 23073464ebd5Sriastradh int indent = 0; 23083464ebd5Sriastradh 23093464ebd5Sriastradh for (i = 0; i < num_instructions; i++) { 23103464ebd5Sriastradh struct prog_instruction *mesa_inst = mesa_instructions + i; 23113464ebd5Sriastradh ir_instruction *ir = mesa_instruction_annotation[i]; 23123464ebd5Sriastradh 23133464ebd5Sriastradh fprintf(stdout, "%3d: ", i); 23143464ebd5Sriastradh 23153464ebd5Sriastradh if (last_ir != ir && ir) { 23163464ebd5Sriastradh int j; 23173464ebd5Sriastradh 23183464ebd5Sriastradh for (j = 0; j < indent; j++) { 23193464ebd5Sriastradh fprintf(stdout, " "); 23203464ebd5Sriastradh } 23213464ebd5Sriastradh ir->print(); 23223464ebd5Sriastradh printf("\n"); 23233464ebd5Sriastradh last_ir = ir; 23243464ebd5Sriastradh 23253464ebd5Sriastradh fprintf(stdout, " "); /* line number spacing. */ 23263464ebd5Sriastradh } 23273464ebd5Sriastradh 23283464ebd5Sriastradh indent = _mesa_fprint_instruction_opt(stdout, mesa_inst, indent, 23293464ebd5Sriastradh PROG_PRINT_DEBUG, NULL); 23303464ebd5Sriastradh } 23313464ebd5Sriastradh} 23323464ebd5Sriastradh 2333af69d88dSmrgnamespace { 23343464ebd5Sriastradh 2335af69d88dSmrgclass add_uniform_to_shader : public program_resource_visitor { 2336af69d88dSmrgpublic: 233701e04c3fSmrg add_uniform_to_shader(struct gl_context *ctx, 233801e04c3fSmrg struct gl_shader_program *shader_program, 233901e04c3fSmrg struct gl_program_parameter_list *params) 23407ec681f3Smrg : ctx(ctx), shader_program(shader_program), params(params), idx(-1), 23417ec681f3Smrg var(NULL) 2342af69d88dSmrg { 2343af69d88dSmrg /* empty */ 23443464ebd5Sriastradh } 23453464ebd5Sriastradh 2346af69d88dSmrg void process(ir_variable *var) 2347af69d88dSmrg { 2348af69d88dSmrg this->idx = -1; 234901e04c3fSmrg this->var = var; 235001e04c3fSmrg this->program_resource_visitor::process(var, 235101e04c3fSmrg ctx->Const.UseSTD430AsDefaultPacking); 235201e04c3fSmrg var->data.param_index = this->idx; 23533464ebd5Sriastradh } 23543464ebd5Sriastradh 2355af69d88dSmrgprivate: 2356af69d88dSmrg virtual void visit_field(const glsl_type *type, const char *name, 235701e04c3fSmrg bool row_major, const glsl_type *record_type, 235801e04c3fSmrg const enum glsl_interface_packing packing, 235901e04c3fSmrg bool last_field); 23603464ebd5Sriastradh 236101e04c3fSmrg struct gl_context *ctx; 23627ec681f3Smrg struct gl_shader_program *shader_program; 2363af69d88dSmrg struct gl_program_parameter_list *params; 2364af69d88dSmrg int idx; 236501e04c3fSmrg ir_variable *var; 23663464ebd5Sriastradh}; 23673464ebd5Sriastradh 2368af69d88dSmrg} /* anonymous namespace */ 23693464ebd5Sriastradh 2370af69d88dSmrgvoid 2371af69d88dSmrgadd_uniform_to_shader::visit_field(const glsl_type *type, const char *name, 237201e04c3fSmrg bool /* row_major */, 237301e04c3fSmrg const glsl_type * /* record_type */, 237401e04c3fSmrg const enum glsl_interface_packing, 237501e04c3fSmrg bool /* last_field */) 23763464ebd5Sriastradh{ 237701e04c3fSmrg /* opaque types don't use storage in the param list unless they are 237801e04c3fSmrg * bindless samplers or images. 237901e04c3fSmrg */ 238001e04c3fSmrg if (type->contains_opaque() && !var->data.bindless) 238101e04c3fSmrg return; 23823464ebd5Sriastradh 238301e04c3fSmrg /* Add the uniform to the param list */ 238401e04c3fSmrg assert(_mesa_lookup_parameter_index(params, name) < 0); 238501e04c3fSmrg int index = _mesa_lookup_parameter_index(params, name); 238601e04c3fSmrg 238701e04c3fSmrg unsigned num_params = type->arrays_of_arrays_size(); 238801e04c3fSmrg num_params = MAX2(num_params, 1); 238901e04c3fSmrg num_params *= type->without_array()->matrix_columns; 239001e04c3fSmrg 239101e04c3fSmrg bool is_dual_slot = type->without_array()->is_dual_slot(); 239201e04c3fSmrg if (is_dual_slot) 239301e04c3fSmrg num_params *= 2; 239401e04c3fSmrg 23957ec681f3Smrg _mesa_reserve_parameter_storage(params, num_params, num_params); 239601e04c3fSmrg index = params->NumParameters; 239701e04c3fSmrg 239801e04c3fSmrg if (ctx->Const.PackedDriverUniformStorage) { 239901e04c3fSmrg for (unsigned i = 0; i < num_params; i++) { 240001e04c3fSmrg unsigned dmul = type->without_array()->is_64bit() ? 2 : 1; 240101e04c3fSmrg unsigned comps = type->without_array()->vector_elements * dmul; 240201e04c3fSmrg if (is_dual_slot) { 240301e04c3fSmrg if (i & 0x1) 240401e04c3fSmrg comps -= 4; 240501e04c3fSmrg else 240601e04c3fSmrg comps = 4; 240701e04c3fSmrg } 24083464ebd5Sriastradh 240901e04c3fSmrg _mesa_add_parameter(params, PROGRAM_UNIFORM, name, comps, 241001e04c3fSmrg type->gl_type, NULL, NULL, false); 241101e04c3fSmrg } 2412af69d88dSmrg } else { 241301e04c3fSmrg for (unsigned i = 0; i < num_params; i++) { 241401e04c3fSmrg _mesa_add_parameter(params, PROGRAM_UNIFORM, name, 4, 241501e04c3fSmrg type->gl_type, NULL, NULL, true); 24163464ebd5Sriastradh } 24173464ebd5Sriastradh } 24183464ebd5Sriastradh 2419af69d88dSmrg /* The first part of the uniform that's processed determines the base 2420af69d88dSmrg * location of the whole uniform (for structures). 2421af69d88dSmrg */ 2422af69d88dSmrg if (this->idx < 0) 2423af69d88dSmrg this->idx = index; 24247ec681f3Smrg 24257ec681f3Smrg /* Each Parameter will hold the index to the backing uniform storage. 24267ec681f3Smrg * This avoids relying on names to match parameters and uniform 24277ec681f3Smrg * storages later when associating uniform storage. 24287ec681f3Smrg */ 24297ec681f3Smrg unsigned location = -1; 24307ec681f3Smrg ASSERTED const bool found = 24317ec681f3Smrg shader_program->UniformHash->get(location, params->Parameters[index].Name); 24327ec681f3Smrg assert(found); 24337ec681f3Smrg 24347ec681f3Smrg for (unsigned i = 0; i < num_params; i++) { 24357ec681f3Smrg struct gl_program_parameter *param = ¶ms->Parameters[index + i]; 24367ec681f3Smrg param->UniformStorageIndex = location; 24377ec681f3Smrg param->MainUniformStorageIndex = params->Parameters[this->idx].UniformStorageIndex; 24387ec681f3Smrg } 24393464ebd5Sriastradh} 24403464ebd5Sriastradh 2441af69d88dSmrg/** 2442af69d88dSmrg * Generate the program parameters list for the user uniforms in a shader 2443af69d88dSmrg * 2444af69d88dSmrg * \param shader_program Linked shader program. This is only used to 2445af69d88dSmrg * emit possible link errors to the info log. 2446af69d88dSmrg * \param sh Shader whose uniforms are to be processed. 2447af69d88dSmrg * \param params Parameter list to be filled in. 2448af69d88dSmrg */ 2449af69d88dSmrgvoid 245001e04c3fSmrg_mesa_generate_parameters_list_for_uniforms(struct gl_context *ctx, 245101e04c3fSmrg struct gl_shader_program 2452af69d88dSmrg *shader_program, 245301e04c3fSmrg struct gl_linked_shader *sh, 2454af69d88dSmrg struct gl_program_parameter_list 2455af69d88dSmrg *params) 24563464ebd5Sriastradh{ 245701e04c3fSmrg add_uniform_to_shader add(ctx, shader_program, params); 24583464ebd5Sriastradh 2459af69d88dSmrg foreach_in_list(ir_instruction, node, sh->ir) { 2460af69d88dSmrg ir_variable *var = node->as_variable(); 24613464ebd5Sriastradh 2462af69d88dSmrg if ((var == NULL) || (var->data.mode != ir_var_uniform) 246301e04c3fSmrg || var->is_in_buffer_block() || (strncmp(var->name, "gl_", 3) == 0)) 2464af69d88dSmrg continue; 24653464ebd5Sriastradh 2466af69d88dSmrg add.process(var); 24673464ebd5Sriastradh } 24683464ebd5Sriastradh} 24693464ebd5Sriastradh 2470af69d88dSmrgvoid 2471af69d88dSmrg_mesa_associate_uniform_storage(struct gl_context *ctx, 247201e04c3fSmrg struct gl_shader_program *shader_program, 2473b9abf16eSmaya struct gl_program *prog) 24743464ebd5Sriastradh{ 247501e04c3fSmrg struct gl_program_parameter_list *params = prog->Parameters; 247601e04c3fSmrg gl_shader_stage shader_type = prog->info.stage; 247701e04c3fSmrg 24787ec681f3Smrg _mesa_disallow_parameter_storage_realloc(params); 24797ec681f3Smrg 2480af69d88dSmrg /* After adding each uniform to the parameter list, connect the storage for 2481af69d88dSmrg * the parameter with the tracking structure used by the API for the 2482af69d88dSmrg * uniform. 2483af69d88dSmrg */ 2484af69d88dSmrg unsigned last_location = unsigned(~0); 2485af69d88dSmrg for (unsigned i = 0; i < params->NumParameters; i++) { 2486af69d88dSmrg if (params->Parameters[i].Type != PROGRAM_UNIFORM) 248701e04c3fSmrg continue; 24883464ebd5Sriastradh 24897ec681f3Smrg unsigned location = params->Parameters[i].UniformStorageIndex; 249001e04c3fSmrg 249101e04c3fSmrg struct gl_uniform_storage *storage = 249201e04c3fSmrg &shader_program->data->UniformStorage[location]; 249301e04c3fSmrg 249401e04c3fSmrg /* Do not associate any uniform storage to built-in uniforms */ 249501e04c3fSmrg if (storage->builtin) 249601e04c3fSmrg continue; 24973464ebd5Sriastradh 2498af69d88dSmrg if (location != last_location) { 249901e04c3fSmrg enum gl_uniform_driver_format format = uniform_native; 250001e04c3fSmrg unsigned columns = 0; 250101e04c3fSmrg 250201e04c3fSmrg int dmul; 25037ec681f3Smrg if (ctx->Const.PackedDriverUniformStorage && !prog->info.is_arb_asm) { 250401e04c3fSmrg dmul = storage->type->vector_elements * sizeof(float); 250501e04c3fSmrg } else { 250601e04c3fSmrg dmul = 4 * sizeof(float); 250701e04c3fSmrg } 250801e04c3fSmrg 250901e04c3fSmrg switch (storage->type->base_type) { 251001e04c3fSmrg case GLSL_TYPE_UINT64: 251101e04c3fSmrg if (storage->type->vector_elements > 2) 251201e04c3fSmrg dmul *= 2; 25137ec681f3Smrg FALLTHROUGH; 251401e04c3fSmrg case GLSL_TYPE_UINT: 251501e04c3fSmrg case GLSL_TYPE_UINT16: 251601e04c3fSmrg case GLSL_TYPE_UINT8: 251701e04c3fSmrg assert(ctx->Const.NativeIntegers); 251801e04c3fSmrg format = uniform_native; 251901e04c3fSmrg columns = 1; 252001e04c3fSmrg break; 252101e04c3fSmrg case GLSL_TYPE_INT64: 252201e04c3fSmrg if (storage->type->vector_elements > 2) 252301e04c3fSmrg dmul *= 2; 25247ec681f3Smrg FALLTHROUGH; 252501e04c3fSmrg case GLSL_TYPE_INT: 252601e04c3fSmrg case GLSL_TYPE_INT16: 252701e04c3fSmrg case GLSL_TYPE_INT8: 252801e04c3fSmrg format = 252901e04c3fSmrg (ctx->Const.NativeIntegers) ? uniform_native : uniform_int_float; 253001e04c3fSmrg columns = 1; 253101e04c3fSmrg break; 253201e04c3fSmrg case GLSL_TYPE_DOUBLE: 253301e04c3fSmrg if (storage->type->vector_elements > 2) 253401e04c3fSmrg dmul *= 2; 25357ec681f3Smrg FALLTHROUGH; 253601e04c3fSmrg case GLSL_TYPE_FLOAT: 253701e04c3fSmrg case GLSL_TYPE_FLOAT16: 253801e04c3fSmrg format = uniform_native; 253901e04c3fSmrg columns = storage->type->matrix_columns; 254001e04c3fSmrg break; 254101e04c3fSmrg case GLSL_TYPE_BOOL: 254201e04c3fSmrg format = uniform_native; 254301e04c3fSmrg columns = 1; 254401e04c3fSmrg break; 254501e04c3fSmrg case GLSL_TYPE_SAMPLER: 254601e04c3fSmrg case GLSL_TYPE_IMAGE: 254701e04c3fSmrg case GLSL_TYPE_SUBROUTINE: 254801e04c3fSmrg format = uniform_native; 254901e04c3fSmrg columns = 1; 255001e04c3fSmrg break; 2551af69d88dSmrg case GLSL_TYPE_ATOMIC_UINT: 2552af69d88dSmrg case GLSL_TYPE_ARRAY: 2553af69d88dSmrg case GLSL_TYPE_VOID: 2554af69d88dSmrg case GLSL_TYPE_STRUCT: 2555af69d88dSmrg case GLSL_TYPE_ERROR: 2556af69d88dSmrg case GLSL_TYPE_INTERFACE: 255701e04c3fSmrg case GLSL_TYPE_FUNCTION: 255801e04c3fSmrg assert(!"Should not get here."); 255901e04c3fSmrg break; 256001e04c3fSmrg } 25613464ebd5Sriastradh 25627ec681f3Smrg unsigned pvo = params->Parameters[i].ValueOffset; 256301e04c3fSmrg _mesa_uniform_attach_driver_storage(storage, dmul * columns, dmul, 256401e04c3fSmrg format, 256501e04c3fSmrg ¶ms->ParameterValues[pvo]); 256601e04c3fSmrg 256701e04c3fSmrg /* When a bindless sampler/image is bound to a texture/image unit, we 256801e04c3fSmrg * have to overwrite the constant value by the resident handle 256901e04c3fSmrg * directly in the constant buffer before the next draw. One solution 257001e04c3fSmrg * is to keep track a pointer to the base of the data. 257101e04c3fSmrg */ 257201e04c3fSmrg if (storage->is_bindless && (prog->sh.NumBindlessSamplers || 257301e04c3fSmrg prog->sh.NumBindlessImages)) { 257401e04c3fSmrg unsigned array_elements = MAX2(1, storage->array_elements); 257501e04c3fSmrg 257601e04c3fSmrg for (unsigned j = 0; j < array_elements; ++j) { 257701e04c3fSmrg unsigned unit = storage->opaque[shader_type].index + j; 257801e04c3fSmrg 257901e04c3fSmrg if (storage->type->without_array()->is_sampler()) { 258001e04c3fSmrg assert(unit >= 0 && unit < prog->sh.NumBindlessSamplers); 258101e04c3fSmrg prog->sh.BindlessSamplers[unit].data = 258201e04c3fSmrg ¶ms->ParameterValues[pvo] + 4 * j; 258301e04c3fSmrg } else if (storage->type->without_array()->is_image()) { 258401e04c3fSmrg assert(unit >= 0 && unit < prog->sh.NumBindlessImages); 258501e04c3fSmrg prog->sh.BindlessImages[unit].data = 258601e04c3fSmrg ¶ms->ParameterValues[pvo] + 4 * j; 258701e04c3fSmrg } 258801e04c3fSmrg } 258901e04c3fSmrg } 25903464ebd5Sriastradh 259101e04c3fSmrg /* After attaching the driver's storage to the uniform, propagate any 259201e04c3fSmrg * data from the linker's backing store. This will cause values from 259301e04c3fSmrg * initializers in the source code to be copied over. 259401e04c3fSmrg */ 2595b9abf16eSmaya unsigned array_elements = MAX2(1, storage->array_elements); 25967ec681f3Smrg if (ctx->Const.PackedDriverUniformStorage && !prog->info.is_arb_asm && 2597b9abf16eSmaya (storage->is_bindless || !storage->type->contains_opaque())) { 2598b9abf16eSmaya const int dmul = storage->type->is_64bit() ? 2 : 1; 2599b9abf16eSmaya const unsigned components = 2600b9abf16eSmaya storage->type->vector_elements * 2601b9abf16eSmaya storage->type->matrix_columns; 2602b9abf16eSmaya 2603b9abf16eSmaya for (unsigned s = 0; s < storage->num_driver_storage; s++) { 2604b9abf16eSmaya gl_constant_value *uni_storage = (gl_constant_value *) 2605b9abf16eSmaya storage->driver_storage[s].data; 2606b9abf16eSmaya memcpy(uni_storage, storage->storage, 2607b9abf16eSmaya sizeof(storage->storage[0]) * components * 2608b9abf16eSmaya array_elements * dmul); 260901e04c3fSmrg } 2610b9abf16eSmaya } else { 2611b9abf16eSmaya _mesa_propagate_uniforms_to_driver_storage(storage, 0, 2612b9abf16eSmaya array_elements); 261301e04c3fSmrg } 26143464ebd5Sriastradh 261501e04c3fSmrg last_location = location; 26163464ebd5Sriastradh } 26173464ebd5Sriastradh } 26183464ebd5Sriastradh} 26193464ebd5Sriastradh 26207ec681f3Smrgvoid 26217ec681f3Smrg_mesa_ensure_and_associate_uniform_storage(struct gl_context *ctx, 26227ec681f3Smrg struct gl_shader_program *shader_program, 26237ec681f3Smrg struct gl_program *prog, unsigned required_space) 26247ec681f3Smrg{ 26257ec681f3Smrg /* Avoid reallocation of the program parameter list, because the uniform 26267ec681f3Smrg * storage is only associated with the original parameter list. 26277ec681f3Smrg */ 26287ec681f3Smrg _mesa_reserve_parameter_storage(prog->Parameters, required_space, 26297ec681f3Smrg required_space); 26307ec681f3Smrg 26317ec681f3Smrg /* This has to be done last. Any operation the can cause 26327ec681f3Smrg * prog->ParameterValues to get reallocated (e.g., anything that adds a 26337ec681f3Smrg * program constant) has to happen before creating this linkage. 26347ec681f3Smrg */ 26357ec681f3Smrg _mesa_associate_uniform_storage(ctx, shader_program, prog); 26367ec681f3Smrg} 26377ec681f3Smrg 26383464ebd5Sriastradh/* 26393464ebd5Sriastradh * On a basic block basis, tracks available PROGRAM_TEMPORARY register 26403464ebd5Sriastradh * channels for copy propagation and updates following instructions to 26413464ebd5Sriastradh * use the original versions. 26423464ebd5Sriastradh * 26433464ebd5Sriastradh * The ir_to_mesa_visitor lazily produces code assuming that this pass 26443464ebd5Sriastradh * will occur. As an example, a TXP production before this pass: 26453464ebd5Sriastradh * 26463464ebd5Sriastradh * 0: MOV TEMP[1], INPUT[4].xyyy; 26473464ebd5Sriastradh * 1: MOV TEMP[1].w, INPUT[4].wwww; 26483464ebd5Sriastradh * 2: TXP TEMP[2], TEMP[1], texture[0], 2D; 26493464ebd5Sriastradh * 26503464ebd5Sriastradh * and after: 26513464ebd5Sriastradh * 26523464ebd5Sriastradh * 0: MOV TEMP[1], INPUT[4].xyyy; 26533464ebd5Sriastradh * 1: MOV TEMP[1].w, INPUT[4].wwww; 26543464ebd5Sriastradh * 2: TXP TEMP[2], INPUT[4].xyyw, texture[0], 2D; 26553464ebd5Sriastradh * 26563464ebd5Sriastradh * which allows for dead code elimination on TEMP[1]'s writes. 26573464ebd5Sriastradh */ 26583464ebd5Sriastradhvoid 26593464ebd5Sriastradhir_to_mesa_visitor::copy_propagate(void) 26603464ebd5Sriastradh{ 26613464ebd5Sriastradh ir_to_mesa_instruction **acp = rzalloc_array(mem_ctx, 26623464ebd5Sriastradh ir_to_mesa_instruction *, 26633464ebd5Sriastradh this->next_temp * 4); 26643464ebd5Sriastradh int *acp_level = rzalloc_array(mem_ctx, int, this->next_temp * 4); 26653464ebd5Sriastradh int level = 0; 26663464ebd5Sriastradh 2667af69d88dSmrg foreach_in_list(ir_to_mesa_instruction, inst, &this->instructions) { 26683464ebd5Sriastradh assert(inst->dst.file != PROGRAM_TEMPORARY 26693464ebd5Sriastradh || inst->dst.index < this->next_temp); 26703464ebd5Sriastradh 26713464ebd5Sriastradh /* First, do any copy propagation possible into the src regs. */ 26723464ebd5Sriastradh for (int r = 0; r < 3; r++) { 26733464ebd5Sriastradh ir_to_mesa_instruction *first = NULL; 26743464ebd5Sriastradh bool good = true; 26753464ebd5Sriastradh int acp_base = inst->src[r].index * 4; 26763464ebd5Sriastradh 26773464ebd5Sriastradh if (inst->src[r].file != PROGRAM_TEMPORARY || 26783464ebd5Sriastradh inst->src[r].reladdr) 26793464ebd5Sriastradh continue; 26803464ebd5Sriastradh 26813464ebd5Sriastradh /* See if we can find entries in the ACP consisting of MOVs 26823464ebd5Sriastradh * from the same src register for all the swizzled channels 26833464ebd5Sriastradh * of this src register reference. 26843464ebd5Sriastradh */ 26853464ebd5Sriastradh for (int i = 0; i < 4; i++) { 26863464ebd5Sriastradh int src_chan = GET_SWZ(inst->src[r].swizzle, i); 26873464ebd5Sriastradh ir_to_mesa_instruction *copy_chan = acp[acp_base + src_chan]; 26883464ebd5Sriastradh 26893464ebd5Sriastradh if (!copy_chan) { 26903464ebd5Sriastradh good = false; 26913464ebd5Sriastradh break; 26923464ebd5Sriastradh } 26933464ebd5Sriastradh 26943464ebd5Sriastradh assert(acp_level[acp_base + src_chan] <= level); 26953464ebd5Sriastradh 26963464ebd5Sriastradh if (!first) { 26973464ebd5Sriastradh first = copy_chan; 26983464ebd5Sriastradh } else { 26993464ebd5Sriastradh if (first->src[0].file != copy_chan->src[0].file || 27003464ebd5Sriastradh first->src[0].index != copy_chan->src[0].index) { 27013464ebd5Sriastradh good = false; 27023464ebd5Sriastradh break; 27033464ebd5Sriastradh } 27043464ebd5Sriastradh } 27053464ebd5Sriastradh } 27063464ebd5Sriastradh 27073464ebd5Sriastradh if (good) { 27083464ebd5Sriastradh /* We've now validated that we can copy-propagate to 27093464ebd5Sriastradh * replace this src register reference. Do it. 27103464ebd5Sriastradh */ 27113464ebd5Sriastradh inst->src[r].file = first->src[0].file; 27123464ebd5Sriastradh inst->src[r].index = first->src[0].index; 27133464ebd5Sriastradh 27143464ebd5Sriastradh int swizzle = 0; 27153464ebd5Sriastradh for (int i = 0; i < 4; i++) { 27163464ebd5Sriastradh int src_chan = GET_SWZ(inst->src[r].swizzle, i); 27173464ebd5Sriastradh ir_to_mesa_instruction *copy_inst = acp[acp_base + src_chan]; 27183464ebd5Sriastradh swizzle |= (GET_SWZ(copy_inst->src[0].swizzle, src_chan) << 27193464ebd5Sriastradh (3 * i)); 27203464ebd5Sriastradh } 27213464ebd5Sriastradh inst->src[r].swizzle = swizzle; 27223464ebd5Sriastradh } 27233464ebd5Sriastradh } 27243464ebd5Sriastradh 27253464ebd5Sriastradh switch (inst->op) { 27263464ebd5Sriastradh case OPCODE_BGNLOOP: 27273464ebd5Sriastradh case OPCODE_ENDLOOP: 27283464ebd5Sriastradh /* End of a basic block, clear the ACP entirely. */ 27293464ebd5Sriastradh memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 27303464ebd5Sriastradh break; 27313464ebd5Sriastradh 27323464ebd5Sriastradh case OPCODE_IF: 27333464ebd5Sriastradh ++level; 27343464ebd5Sriastradh break; 27353464ebd5Sriastradh 27363464ebd5Sriastradh case OPCODE_ENDIF: 27373464ebd5Sriastradh case OPCODE_ELSE: 27383464ebd5Sriastradh /* Clear all channels written inside the block from the ACP, but 27393464ebd5Sriastradh * leaving those that were not touched. 27403464ebd5Sriastradh */ 27413464ebd5Sriastradh for (int r = 0; r < this->next_temp; r++) { 27423464ebd5Sriastradh for (int c = 0; c < 4; c++) { 27433464ebd5Sriastradh if (!acp[4 * r + c]) 27443464ebd5Sriastradh continue; 27453464ebd5Sriastradh 27463464ebd5Sriastradh if (acp_level[4 * r + c] >= level) 27473464ebd5Sriastradh acp[4 * r + c] = NULL; 27483464ebd5Sriastradh } 27493464ebd5Sriastradh } 27503464ebd5Sriastradh if (inst->op == OPCODE_ENDIF) 27513464ebd5Sriastradh --level; 27523464ebd5Sriastradh break; 27533464ebd5Sriastradh 27543464ebd5Sriastradh default: 27553464ebd5Sriastradh /* Continuing the block, clear any written channels from 27563464ebd5Sriastradh * the ACP. 27573464ebd5Sriastradh */ 27583464ebd5Sriastradh if (inst->dst.file == PROGRAM_TEMPORARY && inst->dst.reladdr) { 27593464ebd5Sriastradh /* Any temporary might be written, so no copy propagation 27603464ebd5Sriastradh * across this instruction. 27613464ebd5Sriastradh */ 27623464ebd5Sriastradh memset(acp, 0, sizeof(*acp) * this->next_temp * 4); 27633464ebd5Sriastradh } else if (inst->dst.file == PROGRAM_OUTPUT && 27643464ebd5Sriastradh inst->dst.reladdr) { 27653464ebd5Sriastradh /* Any output might be written, so no copy propagation 27663464ebd5Sriastradh * from outputs across this instruction. 27673464ebd5Sriastradh */ 27683464ebd5Sriastradh for (int r = 0; r < this->next_temp; r++) { 27693464ebd5Sriastradh for (int c = 0; c < 4; c++) { 27703464ebd5Sriastradh if (!acp[4 * r + c]) 27713464ebd5Sriastradh continue; 27723464ebd5Sriastradh 27733464ebd5Sriastradh if (acp[4 * r + c]->src[0].file == PROGRAM_OUTPUT) 27743464ebd5Sriastradh acp[4 * r + c] = NULL; 27753464ebd5Sriastradh } 27763464ebd5Sriastradh } 27773464ebd5Sriastradh } else if (inst->dst.file == PROGRAM_TEMPORARY || 27783464ebd5Sriastradh inst->dst.file == PROGRAM_OUTPUT) { 27793464ebd5Sriastradh /* Clear where it's used as dst. */ 27803464ebd5Sriastradh if (inst->dst.file == PROGRAM_TEMPORARY) { 27813464ebd5Sriastradh for (int c = 0; c < 4; c++) { 27823464ebd5Sriastradh if (inst->dst.writemask & (1 << c)) { 27833464ebd5Sriastradh acp[4 * inst->dst.index + c] = NULL; 27843464ebd5Sriastradh } 27853464ebd5Sriastradh } 27863464ebd5Sriastradh } 27873464ebd5Sriastradh 27883464ebd5Sriastradh /* Clear where it's used as src. */ 27893464ebd5Sriastradh for (int r = 0; r < this->next_temp; r++) { 27903464ebd5Sriastradh for (int c = 0; c < 4; c++) { 27913464ebd5Sriastradh if (!acp[4 * r + c]) 27923464ebd5Sriastradh continue; 27933464ebd5Sriastradh 27943464ebd5Sriastradh int src_chan = GET_SWZ(acp[4 * r + c]->src[0].swizzle, c); 27953464ebd5Sriastradh 27963464ebd5Sriastradh if (acp[4 * r + c]->src[0].file == inst->dst.file && 27973464ebd5Sriastradh acp[4 * r + c]->src[0].index == inst->dst.index && 27983464ebd5Sriastradh inst->dst.writemask & (1 << src_chan)) 27993464ebd5Sriastradh { 28003464ebd5Sriastradh acp[4 * r + c] = NULL; 28013464ebd5Sriastradh } 28023464ebd5Sriastradh } 28033464ebd5Sriastradh } 28043464ebd5Sriastradh } 28053464ebd5Sriastradh break; 28063464ebd5Sriastradh } 28073464ebd5Sriastradh 28083464ebd5Sriastradh /* If this is a copy, add it to the ACP. */ 28093464ebd5Sriastradh if (inst->op == OPCODE_MOV && 28103464ebd5Sriastradh inst->dst.file == PROGRAM_TEMPORARY && 2811af69d88dSmrg !(inst->dst.file == inst->src[0].file && 2812af69d88dSmrg inst->dst.index == inst->src[0].index) && 28133464ebd5Sriastradh !inst->dst.reladdr && 28143464ebd5Sriastradh !inst->saturate && 28153464ebd5Sriastradh !inst->src[0].reladdr && 28163464ebd5Sriastradh !inst->src[0].negate) { 28173464ebd5Sriastradh for (int i = 0; i < 4; i++) { 28183464ebd5Sriastradh if (inst->dst.writemask & (1 << i)) { 28193464ebd5Sriastradh acp[4 * inst->dst.index + i] = inst; 28203464ebd5Sriastradh acp_level[4 * inst->dst.index + i] = level; 28213464ebd5Sriastradh } 28223464ebd5Sriastradh } 28233464ebd5Sriastradh } 28243464ebd5Sriastradh } 28253464ebd5Sriastradh 28263464ebd5Sriastradh ralloc_free(acp_level); 28273464ebd5Sriastradh ralloc_free(acp); 28283464ebd5Sriastradh} 28293464ebd5Sriastradh 28303464ebd5Sriastradh 28313464ebd5Sriastradh/** 28323464ebd5Sriastradh * Convert a shader's GLSL IR into a Mesa gl_program. 28333464ebd5Sriastradh */ 28343464ebd5Sriastradhstatic struct gl_program * 28353464ebd5Sriastradhget_mesa_program(struct gl_context *ctx, 28363464ebd5Sriastradh struct gl_shader_program *shader_program, 283701e04c3fSmrg struct gl_linked_shader *shader) 28383464ebd5Sriastradh{ 28393464ebd5Sriastradh ir_to_mesa_visitor v; 28403464ebd5Sriastradh struct prog_instruction *mesa_instructions, *mesa_inst; 28413464ebd5Sriastradh ir_instruction **mesa_instruction_annotation; 28423464ebd5Sriastradh int i; 28433464ebd5Sriastradh struct gl_program *prog; 2844af69d88dSmrg GLenum target = _mesa_shader_stage_to_program(shader->Stage); 2845af69d88dSmrg const char *target_string = _mesa_shader_stage_to_string(shader->Stage); 28463464ebd5Sriastradh struct gl_shader_compiler_options *options = 2847af69d88dSmrg &ctx->Const.ShaderCompilerOptions[shader->Stage]; 28483464ebd5Sriastradh 28493464ebd5Sriastradh validate_ir_tree(shader->ir); 28503464ebd5Sriastradh 285101e04c3fSmrg prog = shader->Program; 28523464ebd5Sriastradh prog->Parameters = _mesa_new_parameter_list(); 28533464ebd5Sriastradh v.ctx = ctx; 28543464ebd5Sriastradh v.prog = prog; 28553464ebd5Sriastradh v.shader_program = shader_program; 28563464ebd5Sriastradh v.options = options; 28573464ebd5Sriastradh 285801e04c3fSmrg _mesa_generate_parameters_list_for_uniforms(ctx, shader_program, shader, 2859af69d88dSmrg prog->Parameters); 28603464ebd5Sriastradh 28613464ebd5Sriastradh /* Emit Mesa IR for main(). */ 28623464ebd5Sriastradh visit_exec_list(shader->ir, &v); 28633464ebd5Sriastradh v.emit(NULL, OPCODE_END); 28643464ebd5Sriastradh 286501e04c3fSmrg prog->arb.NumTemporaries = v.next_temp; 28663464ebd5Sriastradh 2867af69d88dSmrg unsigned num_instructions = v.instructions.length(); 28683464ebd5Sriastradh 286901e04c3fSmrg mesa_instructions = rzalloc_array(prog, struct prog_instruction, 287001e04c3fSmrg num_instructions); 28713464ebd5Sriastradh mesa_instruction_annotation = ralloc_array(v.mem_ctx, ir_instruction *, 28723464ebd5Sriastradh num_instructions); 28733464ebd5Sriastradh 28743464ebd5Sriastradh v.copy_propagate(); 28753464ebd5Sriastradh 28763464ebd5Sriastradh /* Convert ir_mesa_instructions into prog_instructions. 28773464ebd5Sriastradh */ 28783464ebd5Sriastradh mesa_inst = mesa_instructions; 28793464ebd5Sriastradh i = 0; 2880af69d88dSmrg foreach_in_list(const ir_to_mesa_instruction, inst, &v.instructions) { 28813464ebd5Sriastradh mesa_inst->Opcode = inst->op; 28823464ebd5Sriastradh if (inst->saturate) 288301e04c3fSmrg mesa_inst->Saturate = GL_TRUE; 28843464ebd5Sriastradh mesa_inst->DstReg.File = inst->dst.file; 28853464ebd5Sriastradh mesa_inst->DstReg.Index = inst->dst.index; 28863464ebd5Sriastradh mesa_inst->DstReg.WriteMask = inst->dst.writemask; 28873464ebd5Sriastradh mesa_inst->DstReg.RelAddr = inst->dst.reladdr != NULL; 28883464ebd5Sriastradh mesa_inst->SrcReg[0] = mesa_src_reg_from_ir_src_reg(inst->src[0]); 28893464ebd5Sriastradh mesa_inst->SrcReg[1] = mesa_src_reg_from_ir_src_reg(inst->src[1]); 28903464ebd5Sriastradh mesa_inst->SrcReg[2] = mesa_src_reg_from_ir_src_reg(inst->src[2]); 28913464ebd5Sriastradh mesa_inst->TexSrcUnit = inst->sampler; 28923464ebd5Sriastradh mesa_inst->TexSrcTarget = inst->tex_target; 28933464ebd5Sriastradh mesa_inst->TexShadow = inst->tex_shadow; 28943464ebd5Sriastradh mesa_instruction_annotation[i] = inst->ir; 28953464ebd5Sriastradh 28963464ebd5Sriastradh /* Set IndirectRegisterFiles. */ 28973464ebd5Sriastradh if (mesa_inst->DstReg.RelAddr) 289801e04c3fSmrg prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->DstReg.File; 28993464ebd5Sriastradh 29003464ebd5Sriastradh /* Update program's bitmask of indirectly accessed register files */ 29013464ebd5Sriastradh for (unsigned src = 0; src < 3; src++) 29023464ebd5Sriastradh if (mesa_inst->SrcReg[src].RelAddr) 290301e04c3fSmrg prog->arb.IndirectRegisterFiles |= 1 << mesa_inst->SrcReg[src].File; 29043464ebd5Sriastradh 29053464ebd5Sriastradh switch (mesa_inst->Opcode) { 29063464ebd5Sriastradh case OPCODE_IF: 2907af69d88dSmrg if (options->MaxIfDepth == 0) { 29083464ebd5Sriastradh linker_warning(shader_program, 29093464ebd5Sriastradh "Couldn't flatten if-statement. " 29103464ebd5Sriastradh "This will likely result in software " 29113464ebd5Sriastradh "rasterization.\n"); 29123464ebd5Sriastradh } 29133464ebd5Sriastradh break; 29143464ebd5Sriastradh case OPCODE_BGNLOOP: 29153464ebd5Sriastradh if (options->EmitNoLoops) { 29163464ebd5Sriastradh linker_warning(shader_program, 29173464ebd5Sriastradh "Couldn't unroll loop. " 29183464ebd5Sriastradh "This will likely result in software " 29193464ebd5Sriastradh "rasterization.\n"); 29203464ebd5Sriastradh } 29213464ebd5Sriastradh break; 29223464ebd5Sriastradh case OPCODE_CONT: 29233464ebd5Sriastradh if (options->EmitNoCont) { 29243464ebd5Sriastradh linker_warning(shader_program, 29253464ebd5Sriastradh "Couldn't lower continue-statement. " 29263464ebd5Sriastradh "This will likely result in software " 29273464ebd5Sriastradh "rasterization.\n"); 29283464ebd5Sriastradh } 29293464ebd5Sriastradh break; 29303464ebd5Sriastradh case OPCODE_ARL: 293101e04c3fSmrg prog->arb.NumAddressRegs = 1; 29323464ebd5Sriastradh break; 29333464ebd5Sriastradh default: 29343464ebd5Sriastradh break; 29353464ebd5Sriastradh } 29363464ebd5Sriastradh 29373464ebd5Sriastradh mesa_inst++; 29383464ebd5Sriastradh i++; 29393464ebd5Sriastradh 294001e04c3fSmrg if (!shader_program->data->LinkStatus) 29413464ebd5Sriastradh break; 29423464ebd5Sriastradh } 29433464ebd5Sriastradh 294401e04c3fSmrg if (!shader_program->data->LinkStatus) { 2945af69d88dSmrg goto fail_exit; 29463464ebd5Sriastradh } 29473464ebd5Sriastradh 29483464ebd5Sriastradh set_branchtargets(&v, mesa_instructions, num_instructions); 29493464ebd5Sriastradh 2950af69d88dSmrg if (ctx->_Shader->Flags & GLSL_DUMP) { 2951af69d88dSmrg fprintf(stderr, "\n"); 2952af69d88dSmrg fprintf(stderr, "GLSL IR for linked %s program %d:\n", target_string, 2953af69d88dSmrg shader_program->Name); 2954af69d88dSmrg _mesa_print_ir(stderr, shader->ir, NULL); 2955af69d88dSmrg fprintf(stderr, "\n"); 2956af69d88dSmrg fprintf(stderr, "\n"); 2957af69d88dSmrg fprintf(stderr, "Mesa IR for linked %s program %d:\n", target_string, 2958af69d88dSmrg shader_program->Name); 29593464ebd5Sriastradh print_program(mesa_instructions, mesa_instruction_annotation, 29603464ebd5Sriastradh num_instructions); 2961af69d88dSmrg fflush(stderr); 29623464ebd5Sriastradh } 29633464ebd5Sriastradh 296401e04c3fSmrg prog->arb.Instructions = mesa_instructions; 296501e04c3fSmrg prog->arb.NumInstructions = num_instructions; 29663464ebd5Sriastradh 2967af69d88dSmrg /* Setting this to NULL prevents a possible double free in the fail_exit 2968af69d88dSmrg * path (far below). 2969af69d88dSmrg */ 2970af69d88dSmrg mesa_instructions = NULL; 2971af69d88dSmrg 2972af69d88dSmrg do_set_program_inouts(shader->ir, prog, shader->Stage); 29733464ebd5Sriastradh 2974af69d88dSmrg prog->ShadowSamplers = shader->shadow_samplers; 297501e04c3fSmrg prog->ExternalSamplersUsed = gl_external_samplers(prog); 2976af69d88dSmrg _mesa_update_shader_textures_used(shader_program, prog); 2977af69d88dSmrg 2978af69d88dSmrg /* Set the gl_FragDepth layout. */ 2979af69d88dSmrg if (target == GL_FRAGMENT_PROGRAM_ARB) { 298001e04c3fSmrg prog->info.fs.depth_layout = shader_program->FragDepthLayout; 2981af69d88dSmrg } 29823464ebd5Sriastradh 298301e04c3fSmrg _mesa_optimize_program(prog, prog); 29843464ebd5Sriastradh 2985af69d88dSmrg /* This has to be done last. Any operation that can cause 2986af69d88dSmrg * prog->ParameterValues to get reallocated (e.g., anything that adds a 2987af69d88dSmrg * program constant) has to happen before creating this linkage. 2988af69d88dSmrg */ 2989b9abf16eSmaya _mesa_associate_uniform_storage(ctx, shader_program, prog); 299001e04c3fSmrg if (!shader_program->data->LinkStatus) { 2991af69d88dSmrg goto fail_exit; 2992af69d88dSmrg } 2993af69d88dSmrg 29943464ebd5Sriastradh return prog; 2995af69d88dSmrg 2996af69d88dSmrgfail_exit: 299701e04c3fSmrg ralloc_free(mesa_instructions); 2998af69d88dSmrg _mesa_reference_program(ctx, &shader->Program, NULL); 2999af69d88dSmrg return NULL; 30003464ebd5Sriastradh} 30013464ebd5Sriastradh 30023464ebd5Sriastradhextern "C" { 30033464ebd5Sriastradh 30043464ebd5Sriastradh/** 30053464ebd5Sriastradh * Link a shader. 30063464ebd5Sriastradh * Called via ctx->Driver.LinkShader() 30073464ebd5Sriastradh * This actually involves converting GLSL IR into Mesa gl_programs with 30083464ebd5Sriastradh * code lowering and other optimizations. 30093464ebd5Sriastradh */ 30103464ebd5SriastradhGLboolean 30113464ebd5Sriastradh_mesa_ir_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 30123464ebd5Sriastradh{ 301301e04c3fSmrg assert(prog->data->LinkStatus); 30143464ebd5Sriastradh 3015af69d88dSmrg for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 30163464ebd5Sriastradh if (prog->_LinkedShaders[i] == NULL) 30173464ebd5Sriastradh continue; 30183464ebd5Sriastradh 30193464ebd5Sriastradh bool progress; 30203464ebd5Sriastradh exec_list *ir = prog->_LinkedShaders[i]->ir; 30213464ebd5Sriastradh const struct gl_shader_compiler_options *options = 3022af69d88dSmrg &ctx->Const.ShaderCompilerOptions[prog->_LinkedShaders[i]->Stage]; 30233464ebd5Sriastradh 30243464ebd5Sriastradh do { 30253464ebd5Sriastradh progress = false; 30263464ebd5Sriastradh 30273464ebd5Sriastradh /* Lowering */ 30283464ebd5Sriastradh do_mat_op_to_vec(ir); 302901e04c3fSmrg lower_instructions(ir, (MOD_TO_FLOOR | DIV_TO_MUL_RCP | EXP_TO_EXP2 3030af69d88dSmrg | LOG_TO_LOG2 | INT_DIV_TO_MUL_RCP 3031b9abf16eSmaya | MUL64_TO_MUL_AND_MUL_HIGH 30323464ebd5Sriastradh | ((options->EmitNoPow) ? POW_TO_EXP2 : 0))); 30333464ebd5Sriastradh 3034af69d88dSmrg progress = do_common_optimization(ir, true, true, 3035af69d88dSmrg options, ctx->Const.NativeIntegers) 3036af69d88dSmrg || progress; 30373464ebd5Sriastradh 30383464ebd5Sriastradh progress = lower_quadop_vector(ir, true) || progress; 30393464ebd5Sriastradh 3040af69d88dSmrg if (options->MaxIfDepth == 0) 30413464ebd5Sriastradh progress = lower_discard(ir) || progress; 3042af69d88dSmrg 304301e04c3fSmrg progress = lower_if_to_cond_assign((gl_shader_stage)i, ir, 304401e04c3fSmrg options->MaxIfDepth) || progress; 30453464ebd5Sriastradh 30463464ebd5Sriastradh /* If there are forms of indirect addressing that the driver 30473464ebd5Sriastradh * cannot handle, perform the lowering pass. 30483464ebd5Sriastradh */ 30493464ebd5Sriastradh if (options->EmitNoIndirectInput || options->EmitNoIndirectOutput 30503464ebd5Sriastradh || options->EmitNoIndirectTemp || options->EmitNoIndirectUniform) 30513464ebd5Sriastradh progress = 305201e04c3fSmrg lower_variable_index_to_cond_assign(prog->_LinkedShaders[i]->Stage, ir, 30533464ebd5Sriastradh options->EmitNoIndirectInput, 30543464ebd5Sriastradh options->EmitNoIndirectOutput, 30553464ebd5Sriastradh options->EmitNoIndirectTemp, 30563464ebd5Sriastradh options->EmitNoIndirectUniform) 30573464ebd5Sriastradh || progress; 30583464ebd5Sriastradh 30593464ebd5Sriastradh progress = do_vec_index_to_cond_assign(ir) || progress; 3060af69d88dSmrg progress = lower_vector_insert(ir, true) || progress; 30613464ebd5Sriastradh } while (progress); 30623464ebd5Sriastradh 30633464ebd5Sriastradh validate_ir_tree(ir); 30643464ebd5Sriastradh } 30653464ebd5Sriastradh 3066af69d88dSmrg for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) { 30673464ebd5Sriastradh struct gl_program *linked_prog; 30683464ebd5Sriastradh 30693464ebd5Sriastradh if (prog->_LinkedShaders[i] == NULL) 30703464ebd5Sriastradh continue; 30713464ebd5Sriastradh 30723464ebd5Sriastradh linked_prog = get_mesa_program(ctx, prog, prog->_LinkedShaders[i]); 30733464ebd5Sriastradh 30743464ebd5Sriastradh if (linked_prog) { 307501e04c3fSmrg _mesa_copy_linked_program_data(prog, prog->_LinkedShaders[i]); 3076af69d88dSmrg 3077af69d88dSmrg if (!ctx->Driver.ProgramStringNotify(ctx, 3078af69d88dSmrg _mesa_shader_stage_to_program(i), 3079af69d88dSmrg linked_prog)) { 308001e04c3fSmrg _mesa_reference_program(ctx, &prog->_LinkedShaders[i]->Program, 308101e04c3fSmrg NULL); 30823464ebd5Sriastradh return GL_FALSE; 30833464ebd5Sriastradh } 30843464ebd5Sriastradh } 30853464ebd5Sriastradh } 30863464ebd5Sriastradh 30877ec681f3Smrg build_program_resource_list(ctx, prog, false); 308801e04c3fSmrg return prog->data->LinkStatus; 30893464ebd5Sriastradh} 30903464ebd5Sriastradh 30913464ebd5Sriastradh/** 30923464ebd5Sriastradh * Link a GLSL shader program. Called via glLinkProgram(). 30933464ebd5Sriastradh */ 30943464ebd5Sriastradhvoid 30953464ebd5Sriastradh_mesa_glsl_link_shader(struct gl_context *ctx, struct gl_shader_program *prog) 30963464ebd5Sriastradh{ 30973464ebd5Sriastradh unsigned int i; 309801e04c3fSmrg bool spirv = false; 30993464ebd5Sriastradh 31003464ebd5Sriastradh _mesa_clear_shader_program_data(ctx, prog); 31013464ebd5Sriastradh 310201e04c3fSmrg prog->data = _mesa_create_shader_program_data(); 310301e04c3fSmrg 310401e04c3fSmrg prog->data->LinkStatus = LINKING_SUCCESS; 31053464ebd5Sriastradh 31063464ebd5Sriastradh for (i = 0; i < prog->NumShaders; i++) { 31073464ebd5Sriastradh if (!prog->Shaders[i]->CompileStatus) { 310801e04c3fSmrg linker_error(prog, "linking with uncompiled/unspecialized shader"); 310901e04c3fSmrg } 311001e04c3fSmrg 311101e04c3fSmrg if (!i) { 311201e04c3fSmrg spirv = (prog->Shaders[i]->spirv_data != NULL); 311301e04c3fSmrg } else if (spirv && !prog->Shaders[i]->spirv_data) { 311401e04c3fSmrg /* The GL_ARB_gl_spirv spec adds a new bullet point to the list of 311501e04c3fSmrg * reasons LinkProgram can fail: 311601e04c3fSmrg * 311701e04c3fSmrg * "All the shader objects attached to <program> do not have the 311801e04c3fSmrg * same value for the SPIR_V_BINARY_ARB state." 311901e04c3fSmrg */ 312001e04c3fSmrg linker_error(prog, 312101e04c3fSmrg "not all attached shaders have the same " 312201e04c3fSmrg "SPIR_V_BINARY_ARB state"); 31233464ebd5Sriastradh } 31243464ebd5Sriastradh } 312501e04c3fSmrg prog->data->spirv = spirv; 31263464ebd5Sriastradh 312701e04c3fSmrg if (prog->data->LinkStatus) { 312801e04c3fSmrg if (!spirv) 312901e04c3fSmrg link_shaders(ctx, prog); 313001e04c3fSmrg else 313101e04c3fSmrg _mesa_spirv_link_shaders(ctx, prog); 31323464ebd5Sriastradh } 31333464ebd5Sriastradh 313401e04c3fSmrg /* If LinkStatus is LINKING_SUCCESS, then reset sampler validated to true. 313501e04c3fSmrg * Validation happens via the LinkShader call below. If LinkStatus is 313601e04c3fSmrg * LINKING_SKIPPED, then SamplersValidated will have been restored from the 313701e04c3fSmrg * shader cache. 313801e04c3fSmrg */ 313901e04c3fSmrg if (prog->data->LinkStatus == LINKING_SUCCESS) { 314001e04c3fSmrg prog->SamplersValidated = GL_TRUE; 314101e04c3fSmrg } 314201e04c3fSmrg 314301e04c3fSmrg if (prog->data->LinkStatus && !ctx->Driver.LinkShader(ctx, prog)) { 314401e04c3fSmrg prog->data->LinkStatus = LINKING_FAILURE; 31453464ebd5Sriastradh } 31463464ebd5Sriastradh 31477ec681f3Smrg if (prog->data->LinkStatus != LINKING_FAILURE) 31487ec681f3Smrg _mesa_create_program_resource_hash(prog); 31497ec681f3Smrg 315001e04c3fSmrg /* Return early if we are loading the shader from on-disk cache */ 315101e04c3fSmrg if (prog->data->LinkStatus == LINKING_SKIPPED) 315201e04c3fSmrg return; 315301e04c3fSmrg 3154af69d88dSmrg if (ctx->_Shader->Flags & GLSL_DUMP) { 315501e04c3fSmrg if (!prog->data->LinkStatus) { 3156af69d88dSmrg fprintf(stderr, "GLSL shader program %d failed to link\n", prog->Name); 31573464ebd5Sriastradh } 31583464ebd5Sriastradh 315901e04c3fSmrg if (prog->data->InfoLog && prog->data->InfoLog[0] != 0) { 3160af69d88dSmrg fprintf(stderr, "GLSL shader program %d info log:\n", prog->Name); 316101e04c3fSmrg fprintf(stderr, "%s\n", prog->data->InfoLog); 31623464ebd5Sriastradh } 31633464ebd5Sriastradh } 316401e04c3fSmrg 316501e04c3fSmrg#ifdef ENABLE_SHADER_CACHE 316601e04c3fSmrg if (prog->data->LinkStatus) 316701e04c3fSmrg shader_cache_write_program_metadata(ctx, prog); 316801e04c3fSmrg#endif 31693464ebd5Sriastradh} 31703464ebd5Sriastradh 31713464ebd5Sriastradh} /* extern "C" */ 3172