17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2019 Google, Inc 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 217ec681f3Smrg * DEALINGS IN THE SOFTWARE. 227ec681f3Smrg */ 237ec681f3Smrg 247ec681f3Smrg/** 257ec681f3Smrg * \file lower_precision.cpp 267ec681f3Smrg */ 277ec681f3Smrg 287ec681f3Smrg#include "main/macros.h" 297ec681f3Smrg#include "main/mtypes.h" 307ec681f3Smrg#include "compiler/glsl_types.h" 317ec681f3Smrg#include "ir.h" 327ec681f3Smrg#include "ir_builder.h" 337ec681f3Smrg#include "ir_optimization.h" 347ec681f3Smrg#include "ir_rvalue_visitor.h" 357ec681f3Smrg#include "util/half_float.h" 367ec681f3Smrg#include "util/set.h" 377ec681f3Smrg#include "util/hash_table.h" 387ec681f3Smrg#include <vector> 397ec681f3Smrg 407ec681f3Smrgnamespace { 417ec681f3Smrg 427ec681f3Smrgclass find_precision_visitor : public ir_rvalue_enter_visitor { 437ec681f3Smrgpublic: 447ec681f3Smrg find_precision_visitor(const struct gl_shader_compiler_options *options); 457ec681f3Smrg ~find_precision_visitor(); 467ec681f3Smrg 477ec681f3Smrg virtual void handle_rvalue(ir_rvalue **rvalue); 487ec681f3Smrg virtual ir_visitor_status visit_enter(ir_call *ir); 497ec681f3Smrg 507ec681f3Smrg ir_function_signature *map_builtin(ir_function_signature *sig); 517ec681f3Smrg 527ec681f3Smrg /* Set of rvalues that can be lowered. This will be filled in by 537ec681f3Smrg * find_lowerable_rvalues_visitor. Only the root node of a lowerable section 547ec681f3Smrg * will be added to this set. 557ec681f3Smrg */ 567ec681f3Smrg struct set *lowerable_rvalues; 577ec681f3Smrg 587ec681f3Smrg /** 597ec681f3Smrg * A mapping of builtin signature functions to lowered versions. This is 607ec681f3Smrg * filled in lazily when a lowered version is needed. 617ec681f3Smrg */ 627ec681f3Smrg struct hash_table *lowered_builtins; 637ec681f3Smrg /** 647ec681f3Smrg * A temporary hash table only used in order to clone functions. 657ec681f3Smrg */ 667ec681f3Smrg struct hash_table *clone_ht; 677ec681f3Smrg 687ec681f3Smrg void *lowered_builtin_mem_ctx; 697ec681f3Smrg 707ec681f3Smrg const struct gl_shader_compiler_options *options; 717ec681f3Smrg}; 727ec681f3Smrg 737ec681f3Smrgclass find_lowerable_rvalues_visitor : public ir_hierarchical_visitor { 747ec681f3Smrgpublic: 757ec681f3Smrg enum can_lower_state { 767ec681f3Smrg UNKNOWN, 777ec681f3Smrg CANT_LOWER, 787ec681f3Smrg SHOULD_LOWER, 797ec681f3Smrg }; 807ec681f3Smrg 817ec681f3Smrg enum parent_relation { 827ec681f3Smrg /* The parent performs a further operation involving the result from the 837ec681f3Smrg * child and can be lowered along with it. 847ec681f3Smrg */ 857ec681f3Smrg COMBINED_OPERATION, 867ec681f3Smrg /* The parent instruction’s operation is independent of the child type so 877ec681f3Smrg * the child should be lowered separately. 887ec681f3Smrg */ 897ec681f3Smrg INDEPENDENT_OPERATION, 907ec681f3Smrg }; 917ec681f3Smrg 927ec681f3Smrg struct stack_entry { 937ec681f3Smrg ir_instruction *instr; 947ec681f3Smrg enum can_lower_state state; 957ec681f3Smrg /* List of child rvalues that can be lowered. When this stack entry is 967ec681f3Smrg * popped, if this node itself can’t be lowered than all of the children 977ec681f3Smrg * are root nodes to lower so we will add them to lowerable_rvalues. 987ec681f3Smrg * Otherwise if this node can also be lowered then we won’t add the 997ec681f3Smrg * children because we only want to add the topmost lowerable nodes to 1007ec681f3Smrg * lowerable_rvalues and the children will be lowered as part of lowering 1017ec681f3Smrg * this node. 1027ec681f3Smrg */ 1037ec681f3Smrg std::vector<ir_instruction *> lowerable_children; 1047ec681f3Smrg }; 1057ec681f3Smrg 1067ec681f3Smrg find_lowerable_rvalues_visitor(struct set *result, 1077ec681f3Smrg const struct gl_shader_compiler_options *options); 1087ec681f3Smrg 1097ec681f3Smrg static void stack_enter(class ir_instruction *ir, void *data); 1107ec681f3Smrg static void stack_leave(class ir_instruction *ir, void *data); 1117ec681f3Smrg 1127ec681f3Smrg virtual ir_visitor_status visit(ir_constant *ir); 1137ec681f3Smrg virtual ir_visitor_status visit(ir_dereference_variable *ir); 1147ec681f3Smrg 1157ec681f3Smrg virtual ir_visitor_status visit_enter(ir_dereference_record *ir); 1167ec681f3Smrg virtual ir_visitor_status visit_enter(ir_dereference_array *ir); 1177ec681f3Smrg virtual ir_visitor_status visit_enter(ir_texture *ir); 1187ec681f3Smrg virtual ir_visitor_status visit_enter(ir_expression *ir); 1197ec681f3Smrg 1207ec681f3Smrg virtual ir_visitor_status visit_leave(ir_assignment *ir); 1217ec681f3Smrg virtual ir_visitor_status visit_leave(ir_call *ir); 1227ec681f3Smrg 1237ec681f3Smrg can_lower_state handle_precision(const glsl_type *type, 1247ec681f3Smrg int precision) const; 1257ec681f3Smrg 1267ec681f3Smrg static parent_relation get_parent_relation(ir_instruction *parent, 1277ec681f3Smrg ir_instruction *child); 1287ec681f3Smrg 1297ec681f3Smrg std::vector<stack_entry> stack; 1307ec681f3Smrg struct set *lowerable_rvalues; 1317ec681f3Smrg const struct gl_shader_compiler_options *options; 1327ec681f3Smrg 1337ec681f3Smrg void pop_stack_entry(); 1347ec681f3Smrg void add_lowerable_children(const stack_entry &entry); 1357ec681f3Smrg}; 1367ec681f3Smrg 1377ec681f3Smrgclass lower_precision_visitor : public ir_rvalue_visitor { 1387ec681f3Smrgpublic: 1397ec681f3Smrg virtual void handle_rvalue(ir_rvalue **rvalue); 1407ec681f3Smrg virtual ir_visitor_status visit_enter(ir_dereference_array *); 1417ec681f3Smrg virtual ir_visitor_status visit_enter(ir_dereference_record *); 1427ec681f3Smrg virtual ir_visitor_status visit_enter(ir_call *ir); 1437ec681f3Smrg virtual ir_visitor_status visit_enter(ir_texture *ir); 1447ec681f3Smrg virtual ir_visitor_status visit_leave(ir_expression *); 1457ec681f3Smrg}; 1467ec681f3Smrg 1477ec681f3Smrgstatic bool 1487ec681f3Smrgcan_lower_type(const struct gl_shader_compiler_options *options, 1497ec681f3Smrg const glsl_type *type) 1507ec681f3Smrg{ 1517ec681f3Smrg /* Don’t lower any expressions involving non-float types except bool and 1527ec681f3Smrg * texture samplers. This will rule out operations that change the type such 1537ec681f3Smrg * as conversion to ints. Instead it will end up lowering the arguments 1547ec681f3Smrg * instead and adding a final conversion to float32. We want to handle 1557ec681f3Smrg * boolean types so that it will do comparisons as 16-bit. 1567ec681f3Smrg */ 1577ec681f3Smrg 1587ec681f3Smrg switch (type->without_array()->base_type) { 1597ec681f3Smrg /* TODO: should we do anything for these two with regard to Int16 vs FP16 1607ec681f3Smrg * support? 1617ec681f3Smrg */ 1627ec681f3Smrg case GLSL_TYPE_BOOL: 1637ec681f3Smrg case GLSL_TYPE_SAMPLER: 1647ec681f3Smrg case GLSL_TYPE_IMAGE: 1657ec681f3Smrg return true; 1667ec681f3Smrg 1677ec681f3Smrg case GLSL_TYPE_FLOAT: 1687ec681f3Smrg return options->LowerPrecisionFloat16; 1697ec681f3Smrg 1707ec681f3Smrg case GLSL_TYPE_UINT: 1717ec681f3Smrg case GLSL_TYPE_INT: 1727ec681f3Smrg return options->LowerPrecisionInt16; 1737ec681f3Smrg 1747ec681f3Smrg default: 1757ec681f3Smrg return false; 1767ec681f3Smrg } 1777ec681f3Smrg} 1787ec681f3Smrg 1797ec681f3Smrgfind_lowerable_rvalues_visitor::find_lowerable_rvalues_visitor(struct set *res, 1807ec681f3Smrg const struct gl_shader_compiler_options *opts) 1817ec681f3Smrg{ 1827ec681f3Smrg lowerable_rvalues = res; 1837ec681f3Smrg options = opts; 1847ec681f3Smrg callback_enter = stack_enter; 1857ec681f3Smrg callback_leave = stack_leave; 1867ec681f3Smrg data_enter = this; 1877ec681f3Smrg data_leave = this; 1887ec681f3Smrg} 1897ec681f3Smrg 1907ec681f3Smrgvoid 1917ec681f3Smrgfind_lowerable_rvalues_visitor::stack_enter(class ir_instruction *ir, 1927ec681f3Smrg void *data) 1937ec681f3Smrg{ 1947ec681f3Smrg find_lowerable_rvalues_visitor *state = 1957ec681f3Smrg (find_lowerable_rvalues_visitor *) data; 1967ec681f3Smrg 1977ec681f3Smrg /* Add a new stack entry for this instruction */ 1987ec681f3Smrg stack_entry entry; 1997ec681f3Smrg 2007ec681f3Smrg entry.instr = ir; 2017ec681f3Smrg entry.state = state->in_assignee ? CANT_LOWER : UNKNOWN; 2027ec681f3Smrg 2037ec681f3Smrg state->stack.push_back(entry); 2047ec681f3Smrg} 2057ec681f3Smrg 2067ec681f3Smrgvoid 2077ec681f3Smrgfind_lowerable_rvalues_visitor::add_lowerable_children(const stack_entry &entry) 2087ec681f3Smrg{ 2097ec681f3Smrg /* We can’t lower this node so if there were any pending children then they 2107ec681f3Smrg * are all root lowerable nodes and we should add them to the set. 2117ec681f3Smrg */ 2127ec681f3Smrg for (auto &it : entry.lowerable_children) 2137ec681f3Smrg _mesa_set_add(lowerable_rvalues, it); 2147ec681f3Smrg} 2157ec681f3Smrg 2167ec681f3Smrgvoid 2177ec681f3Smrgfind_lowerable_rvalues_visitor::pop_stack_entry() 2187ec681f3Smrg{ 2197ec681f3Smrg const stack_entry &entry = stack.back(); 2207ec681f3Smrg 2217ec681f3Smrg if (stack.size() >= 2) { 2227ec681f3Smrg /* Combine this state into the parent state, unless the parent operation 2237ec681f3Smrg * doesn’t have any relation to the child operations 2247ec681f3Smrg */ 2257ec681f3Smrg stack_entry &parent = stack.end()[-2]; 2267ec681f3Smrg parent_relation rel = get_parent_relation(parent.instr, entry.instr); 2277ec681f3Smrg 2287ec681f3Smrg if (rel == COMBINED_OPERATION) { 2297ec681f3Smrg switch (entry.state) { 2307ec681f3Smrg case CANT_LOWER: 2317ec681f3Smrg parent.state = CANT_LOWER; 2327ec681f3Smrg break; 2337ec681f3Smrg case SHOULD_LOWER: 2347ec681f3Smrg if (parent.state == UNKNOWN) 2357ec681f3Smrg parent.state = SHOULD_LOWER; 2367ec681f3Smrg break; 2377ec681f3Smrg case UNKNOWN: 2387ec681f3Smrg break; 2397ec681f3Smrg } 2407ec681f3Smrg } 2417ec681f3Smrg } 2427ec681f3Smrg 2437ec681f3Smrg if (entry.state == SHOULD_LOWER) { 2447ec681f3Smrg ir_rvalue *rv = entry.instr->as_rvalue(); 2457ec681f3Smrg 2467ec681f3Smrg if (rv == NULL) { 2477ec681f3Smrg add_lowerable_children(entry); 2487ec681f3Smrg } else if (stack.size() >= 2) { 2497ec681f3Smrg stack_entry &parent = stack.end()[-2]; 2507ec681f3Smrg 2517ec681f3Smrg switch (get_parent_relation(parent.instr, rv)) { 2527ec681f3Smrg case COMBINED_OPERATION: 2537ec681f3Smrg /* We only want to add the toplevel lowerable instructions to the 2547ec681f3Smrg * lowerable set. Therefore if there is a parent then instead of 2557ec681f3Smrg * adding this instruction to the set we will queue depending on 2567ec681f3Smrg * the result of the parent instruction. 2577ec681f3Smrg */ 2587ec681f3Smrg parent.lowerable_children.push_back(entry.instr); 2597ec681f3Smrg break; 2607ec681f3Smrg case INDEPENDENT_OPERATION: 2617ec681f3Smrg _mesa_set_add(lowerable_rvalues, rv); 2627ec681f3Smrg break; 2637ec681f3Smrg } 2647ec681f3Smrg } else { 2657ec681f3Smrg /* This is a toplevel node so add it directly to the lowerable 2667ec681f3Smrg * set. 2677ec681f3Smrg */ 2687ec681f3Smrg _mesa_set_add(lowerable_rvalues, rv); 2697ec681f3Smrg } 2707ec681f3Smrg } else if (entry.state == CANT_LOWER) { 2717ec681f3Smrg add_lowerable_children(entry); 2727ec681f3Smrg } 2737ec681f3Smrg 2747ec681f3Smrg stack.pop_back(); 2757ec681f3Smrg} 2767ec681f3Smrg 2777ec681f3Smrgvoid 2787ec681f3Smrgfind_lowerable_rvalues_visitor::stack_leave(class ir_instruction *ir, 2797ec681f3Smrg void *data) 2807ec681f3Smrg{ 2817ec681f3Smrg find_lowerable_rvalues_visitor *state = 2827ec681f3Smrg (find_lowerable_rvalues_visitor *) data; 2837ec681f3Smrg 2847ec681f3Smrg state->pop_stack_entry(); 2857ec681f3Smrg} 2867ec681f3Smrg 2877ec681f3Smrgenum find_lowerable_rvalues_visitor::can_lower_state 2887ec681f3Smrgfind_lowerable_rvalues_visitor::handle_precision(const glsl_type *type, 2897ec681f3Smrg int precision) const 2907ec681f3Smrg{ 2917ec681f3Smrg if (!can_lower_type(options, type)) 2927ec681f3Smrg return CANT_LOWER; 2937ec681f3Smrg 2947ec681f3Smrg switch (precision) { 2957ec681f3Smrg case GLSL_PRECISION_NONE: 2967ec681f3Smrg return UNKNOWN; 2977ec681f3Smrg case GLSL_PRECISION_HIGH: 2987ec681f3Smrg return CANT_LOWER; 2997ec681f3Smrg case GLSL_PRECISION_MEDIUM: 3007ec681f3Smrg case GLSL_PRECISION_LOW: 3017ec681f3Smrg return SHOULD_LOWER; 3027ec681f3Smrg } 3037ec681f3Smrg 3047ec681f3Smrg return CANT_LOWER; 3057ec681f3Smrg} 3067ec681f3Smrg 3077ec681f3Smrgenum find_lowerable_rvalues_visitor::parent_relation 3087ec681f3Smrgfind_lowerable_rvalues_visitor::get_parent_relation(ir_instruction *parent, 3097ec681f3Smrg ir_instruction *child) 3107ec681f3Smrg{ 3117ec681f3Smrg /* If the parent is a dereference instruction then the only child could be 3127ec681f3Smrg * for example an array dereference and that should be lowered independently 3137ec681f3Smrg * of the parent. 3147ec681f3Smrg */ 3157ec681f3Smrg if (parent->as_dereference()) 3167ec681f3Smrg return INDEPENDENT_OPERATION; 3177ec681f3Smrg 3187ec681f3Smrg /* The precision of texture sampling depend on the precision of the sampler. 3197ec681f3Smrg * The rest of the arguments don’t matter so we can treat it as an 3207ec681f3Smrg * independent operation. 3217ec681f3Smrg */ 3227ec681f3Smrg if (parent->as_texture()) 3237ec681f3Smrg return INDEPENDENT_OPERATION; 3247ec681f3Smrg 3257ec681f3Smrg return COMBINED_OPERATION; 3267ec681f3Smrg} 3277ec681f3Smrg 3287ec681f3Smrgir_visitor_status 3297ec681f3Smrgfind_lowerable_rvalues_visitor::visit(ir_constant *ir) 3307ec681f3Smrg{ 3317ec681f3Smrg stack_enter(ir, this); 3327ec681f3Smrg 3337ec681f3Smrg if (!can_lower_type(options, ir->type)) 3347ec681f3Smrg stack.back().state = CANT_LOWER; 3357ec681f3Smrg 3367ec681f3Smrg stack_leave(ir, this); 3377ec681f3Smrg 3387ec681f3Smrg return visit_continue; 3397ec681f3Smrg} 3407ec681f3Smrg 3417ec681f3Smrgir_visitor_status 3427ec681f3Smrgfind_lowerable_rvalues_visitor::visit(ir_dereference_variable *ir) 3437ec681f3Smrg{ 3447ec681f3Smrg stack_enter(ir, this); 3457ec681f3Smrg 3467ec681f3Smrg if (stack.back().state == UNKNOWN) 3477ec681f3Smrg stack.back().state = handle_precision(ir->type, ir->precision()); 3487ec681f3Smrg 3497ec681f3Smrg stack_leave(ir, this); 3507ec681f3Smrg 3517ec681f3Smrg return visit_continue; 3527ec681f3Smrg} 3537ec681f3Smrg 3547ec681f3Smrgir_visitor_status 3557ec681f3Smrgfind_lowerable_rvalues_visitor::visit_enter(ir_dereference_record *ir) 3567ec681f3Smrg{ 3577ec681f3Smrg ir_hierarchical_visitor::visit_enter(ir); 3587ec681f3Smrg 3597ec681f3Smrg if (stack.back().state == UNKNOWN) 3607ec681f3Smrg stack.back().state = handle_precision(ir->type, ir->precision()); 3617ec681f3Smrg 3627ec681f3Smrg return visit_continue; 3637ec681f3Smrg} 3647ec681f3Smrg 3657ec681f3Smrgir_visitor_status 3667ec681f3Smrgfind_lowerable_rvalues_visitor::visit_enter(ir_dereference_array *ir) 3677ec681f3Smrg{ 3687ec681f3Smrg ir_hierarchical_visitor::visit_enter(ir); 3697ec681f3Smrg 3707ec681f3Smrg if (stack.back().state == UNKNOWN) 3717ec681f3Smrg stack.back().state = handle_precision(ir->type, ir->precision()); 3727ec681f3Smrg 3737ec681f3Smrg return visit_continue; 3747ec681f3Smrg} 3757ec681f3Smrg 3767ec681f3Smrgir_visitor_status 3777ec681f3Smrgfind_lowerable_rvalues_visitor::visit_enter(ir_texture *ir) 3787ec681f3Smrg{ 3797ec681f3Smrg ir_hierarchical_visitor::visit_enter(ir); 3807ec681f3Smrg 3817ec681f3Smrg /* The precision of the sample value depends on the precision of the 3827ec681f3Smrg * sampler. 3837ec681f3Smrg */ 3847ec681f3Smrg stack.back().state = handle_precision(ir->type, 3857ec681f3Smrg ir->sampler->precision()); 3867ec681f3Smrg return visit_continue; 3877ec681f3Smrg} 3887ec681f3Smrg 3897ec681f3Smrgir_visitor_status 3907ec681f3Smrgfind_lowerable_rvalues_visitor::visit_enter(ir_expression *ir) 3917ec681f3Smrg{ 3927ec681f3Smrg ir_hierarchical_visitor::visit_enter(ir); 3937ec681f3Smrg 3947ec681f3Smrg if (!can_lower_type(options, ir->type)) 3957ec681f3Smrg stack.back().state = CANT_LOWER; 3967ec681f3Smrg 3977ec681f3Smrg /* Don't lower precision for derivative calculations */ 3987ec681f3Smrg if (!options->LowerPrecisionDerivatives && 3997ec681f3Smrg (ir->operation == ir_unop_dFdx || 4007ec681f3Smrg ir->operation == ir_unop_dFdx_coarse || 4017ec681f3Smrg ir->operation == ir_unop_dFdx_fine || 4027ec681f3Smrg ir->operation == ir_unop_dFdy || 4037ec681f3Smrg ir->operation == ir_unop_dFdy_coarse || 4047ec681f3Smrg ir->operation == ir_unop_dFdy_fine)) { 4057ec681f3Smrg stack.back().state = CANT_LOWER; 4067ec681f3Smrg } 4077ec681f3Smrg 4087ec681f3Smrg return visit_continue; 4097ec681f3Smrg} 4107ec681f3Smrg 4117ec681f3Smrgstatic bool 4127ec681f3Smrgfunction_always_returns_mediump_or_lowp(const char *name) 4137ec681f3Smrg{ 4147ec681f3Smrg return !strcmp(name, "bitCount") || 4157ec681f3Smrg !strcmp(name, "findLSB") || 4167ec681f3Smrg !strcmp(name, "findMSB") || 4177ec681f3Smrg !strcmp(name, "unpackHalf2x16") || 4187ec681f3Smrg !strcmp(name, "unpackUnorm4x8") || 4197ec681f3Smrg !strcmp(name, "unpackSnorm4x8"); 4207ec681f3Smrg} 4217ec681f3Smrg 4227ec681f3Smrgstatic unsigned 4237ec681f3Smrghandle_call(ir_call *ir, const struct set *lowerable_rvalues) 4247ec681f3Smrg{ 4257ec681f3Smrg /* The intrinsic call is inside the wrapper imageLoad function that will 4267ec681f3Smrg * be inlined. We have to handle both of them. 4277ec681f3Smrg */ 4287ec681f3Smrg if (ir->callee->intrinsic_id == ir_intrinsic_image_load || 4297ec681f3Smrg (ir->callee->is_builtin() && 4307ec681f3Smrg !strcmp(ir->callee_name(), "imageLoad"))) { 4317ec681f3Smrg ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head(); 4327ec681f3Smrg ir_variable *resource = param->variable_referenced(); 4337ec681f3Smrg 4347ec681f3Smrg assert(ir->callee->return_precision == GLSL_PRECISION_NONE); 4357ec681f3Smrg assert(resource->type->without_array()->is_image()); 4367ec681f3Smrg 4377ec681f3Smrg /* GLSL ES 3.20 requires that images have a precision modifier, but if 4387ec681f3Smrg * you set one, it doesn't do anything, because all intrinsics are 4397ec681f3Smrg * defined with highp. This seems to be a spec bug. 4407ec681f3Smrg * 4417ec681f3Smrg * In theory we could set the return value to mediump if the image 4427ec681f3Smrg * format has a lower precision. This appears to be the most sensible 4437ec681f3Smrg * thing to do. 4447ec681f3Smrg */ 4457ec681f3Smrg const struct util_format_description *desc = 4467ec681f3Smrg util_format_description(resource->data.image_format); 4477ec681f3Smrg int i = 4487ec681f3Smrg util_format_get_first_non_void_channel(resource->data.image_format); 4497ec681f3Smrg bool mediump; 4507ec681f3Smrg 4517ec681f3Smrg assert(i >= 0); 4527ec681f3Smrg 4537ec681f3Smrg if (desc->channel[i].pure_integer || 4547ec681f3Smrg desc->channel[i].type == UTIL_FORMAT_TYPE_FLOAT) 4557ec681f3Smrg mediump = desc->channel[i].size <= 16; 4567ec681f3Smrg else 4577ec681f3Smrg mediump = desc->channel[i].size <= 10; /* unorm/snorm */ 4587ec681f3Smrg 4597ec681f3Smrg return mediump ? GLSL_PRECISION_MEDIUM : GLSL_PRECISION_HIGH; 4607ec681f3Smrg } 4617ec681f3Smrg 4627ec681f3Smrg /* Return the declared precision for user-defined functions. */ 4637ec681f3Smrg if (!ir->callee->is_builtin()) 4647ec681f3Smrg return ir->callee->return_precision; 4657ec681f3Smrg 4667ec681f3Smrg /* Handle special calls. */ 4677ec681f3Smrg if (ir->callee->is_builtin() && ir->actual_parameters.length()) { 4687ec681f3Smrg ir_rvalue *param = (ir_rvalue*)ir->actual_parameters.get_head(); 4697ec681f3Smrg ir_variable *var = param->variable_referenced(); 4707ec681f3Smrg 4717ec681f3Smrg /* Handle builtin wrappers around ir_texture opcodes. These wrappers will 4727ec681f3Smrg * be inlined by lower_precision() if we return true here, so that we can 4737ec681f3Smrg * get to ir_texture later and do proper lowering. 4747ec681f3Smrg * 4757ec681f3Smrg * We should lower the type of the return value if the sampler type 4767ec681f3Smrg * uses lower precision. The function parameters don't matter. 4777ec681f3Smrg */ 4787ec681f3Smrg if (var && var->type->without_array()->is_sampler()) { 4797ec681f3Smrg /* textureSize always returns highp. */ 4807ec681f3Smrg if (!strcmp(ir->callee_name(), "textureSize")) 4817ec681f3Smrg return GLSL_PRECISION_HIGH; 4827ec681f3Smrg 4837ec681f3Smrg return var->data.precision; 4847ec681f3Smrg } 4857ec681f3Smrg } 4867ec681f3Smrg 4877ec681f3Smrg if (/* Parameters are always highp: */ 4887ec681f3Smrg !strcmp(ir->callee_name(), "floatBitsToInt") || 4897ec681f3Smrg !strcmp(ir->callee_name(), "floatBitsToUint") || 4907ec681f3Smrg !strcmp(ir->callee_name(), "intBitsToFloat") || 4917ec681f3Smrg !strcmp(ir->callee_name(), "uintBitsToFloat") || 4927ec681f3Smrg !strcmp(ir->callee_name(), "bitfieldReverse") || 4937ec681f3Smrg !strcmp(ir->callee_name(), "frexp") || 4947ec681f3Smrg !strcmp(ir->callee_name(), "ldexp") || 4957ec681f3Smrg /* Parameters and outputs are always highp: */ 4967ec681f3Smrg /* TODO: The operations are highp, but carry and borrow outputs are lowp. */ 4977ec681f3Smrg !strcmp(ir->callee_name(), "uaddCarry") || 4987ec681f3Smrg !strcmp(ir->callee_name(), "usubBorrow") || 4997ec681f3Smrg !strcmp(ir->callee_name(), "imulExtended") || 5007ec681f3Smrg !strcmp(ir->callee_name(), "umulExtended") || 5017ec681f3Smrg !strcmp(ir->callee_name(), "unpackUnorm2x16") || 5027ec681f3Smrg !strcmp(ir->callee_name(), "unpackSnorm2x16") || 5037ec681f3Smrg /* Outputs are highp: */ 5047ec681f3Smrg !strcmp(ir->callee_name(), "packUnorm2x16") || 5057ec681f3Smrg !strcmp(ir->callee_name(), "packSnorm2x16") || 5067ec681f3Smrg /* Parameters are mediump and outputs are highp. The parameters should 5077ec681f3Smrg * be optimized in NIR, not here, e.g: 5087ec681f3Smrg * - packHalf2x16 can just be a bitcast from f16vec2 to uint32 5097ec681f3Smrg * - Other opcodes don't have to convert parameters to highp if the hw 5107ec681f3Smrg * has f16 versions. Optimize in NIR accordingly. 5117ec681f3Smrg */ 5127ec681f3Smrg !strcmp(ir->callee_name(), "packHalf2x16") || 5137ec681f3Smrg !strcmp(ir->callee_name(), "packUnorm4x8") || 5147ec681f3Smrg !strcmp(ir->callee_name(), "packSnorm4x8") || 5157ec681f3Smrg /* Atomic functions are not lowered. */ 5167ec681f3Smrg strstr(ir->callee_name(), "atomic") == ir->callee_name()) 5177ec681f3Smrg return GLSL_PRECISION_HIGH; 5187ec681f3Smrg 5197ec681f3Smrg assert(ir->callee->return_precision == GLSL_PRECISION_NONE); 5207ec681f3Smrg 5217ec681f3Smrg /* Number of parameters to check if they are lowerable. */ 5227ec681f3Smrg unsigned check_parameters = ir->actual_parameters.length(); 5237ec681f3Smrg 5247ec681f3Smrg /* Interpolation functions only consider the precision of the interpolant. */ 5257ec681f3Smrg /* Bitfield functions ignore the precision of "offset" and "bits". */ 5267ec681f3Smrg if (!strcmp(ir->callee_name(), "interpolateAtOffset") || 5277ec681f3Smrg !strcmp(ir->callee_name(), "interpolateAtSample") || 5287ec681f3Smrg !strcmp(ir->callee_name(), "bitfieldExtract")) { 5297ec681f3Smrg check_parameters = 1; 5307ec681f3Smrg } else if (!strcmp(ir->callee_name(), "bitfieldInsert")) { 5317ec681f3Smrg check_parameters = 2; 5327ec681f3Smrg } if (function_always_returns_mediump_or_lowp(ir->callee_name())) { 5337ec681f3Smrg /* These only lower the return value. Parameters keep their precision, 5347ec681f3Smrg * which is preserved in map_builtin. 5357ec681f3Smrg */ 5367ec681f3Smrg check_parameters = 0; 5377ec681f3Smrg } 5387ec681f3Smrg 5397ec681f3Smrg /* If the call is to a builtin, then the function won’t have a return 5407ec681f3Smrg * precision and we should determine it from the precision of the arguments. 5417ec681f3Smrg */ 5427ec681f3Smrg foreach_in_list(ir_rvalue, param, &ir->actual_parameters) { 5437ec681f3Smrg if (!check_parameters) 5447ec681f3Smrg break; 5457ec681f3Smrg 5467ec681f3Smrg if (!param->as_constant() && 5477ec681f3Smrg _mesa_set_search(lowerable_rvalues, param) == NULL) 5487ec681f3Smrg return GLSL_PRECISION_HIGH; 5497ec681f3Smrg 5507ec681f3Smrg --check_parameters; 5517ec681f3Smrg } 5527ec681f3Smrg 5537ec681f3Smrg return GLSL_PRECISION_MEDIUM; 5547ec681f3Smrg} 5557ec681f3Smrg 5567ec681f3Smrgir_visitor_status 5577ec681f3Smrgfind_lowerable_rvalues_visitor::visit_leave(ir_call *ir) 5587ec681f3Smrg{ 5597ec681f3Smrg ir_hierarchical_visitor::visit_leave(ir); 5607ec681f3Smrg 5617ec681f3Smrg /* Special case for handling temporary variables generated by the compiler 5627ec681f3Smrg * for function calls. If we assign to one of these using a function call 5637ec681f3Smrg * that has a lowerable return type then we can assume the temporary 5647ec681f3Smrg * variable should have a medium precision too. 5657ec681f3Smrg */ 5667ec681f3Smrg 5677ec681f3Smrg /* Do nothing if the return type is void. */ 5687ec681f3Smrg if (!ir->return_deref) 5697ec681f3Smrg return visit_continue; 5707ec681f3Smrg 5717ec681f3Smrg ir_variable *var = ir->return_deref->variable_referenced(); 5727ec681f3Smrg 5737ec681f3Smrg assert(var->data.mode == ir_var_temporary); 5747ec681f3Smrg 5757ec681f3Smrg unsigned return_precision = handle_call(ir, lowerable_rvalues); 5767ec681f3Smrg 5777ec681f3Smrg can_lower_state lower_state = 5787ec681f3Smrg handle_precision(var->type, return_precision); 5797ec681f3Smrg 5807ec681f3Smrg if (lower_state == SHOULD_LOWER) { 5817ec681f3Smrg /* There probably shouldn’t be any situations where multiple ir_call 5827ec681f3Smrg * instructions write to the same temporary? 5837ec681f3Smrg */ 5847ec681f3Smrg assert(var->data.precision == GLSL_PRECISION_NONE); 5857ec681f3Smrg var->data.precision = GLSL_PRECISION_MEDIUM; 5867ec681f3Smrg } else { 5877ec681f3Smrg var->data.precision = GLSL_PRECISION_HIGH; 5887ec681f3Smrg } 5897ec681f3Smrg 5907ec681f3Smrg return visit_continue; 5917ec681f3Smrg} 5927ec681f3Smrg 5937ec681f3Smrgir_visitor_status 5947ec681f3Smrgfind_lowerable_rvalues_visitor::visit_leave(ir_assignment *ir) 5957ec681f3Smrg{ 5967ec681f3Smrg ir_hierarchical_visitor::visit_leave(ir); 5977ec681f3Smrg 5987ec681f3Smrg /* Special case for handling temporary variables generated by the compiler. 5997ec681f3Smrg * If we assign to one of these using a lowered precision then we can assume 6007ec681f3Smrg * the temporary variable should have a medium precision too. 6017ec681f3Smrg */ 6027ec681f3Smrg ir_variable *var = ir->lhs->variable_referenced(); 6037ec681f3Smrg 6047ec681f3Smrg if (var->data.mode == ir_var_temporary) { 6057ec681f3Smrg if (_mesa_set_search(lowerable_rvalues, ir->rhs)) { 6067ec681f3Smrg /* Only override the precision if this is the first assignment. For 6077ec681f3Smrg * temporaries such as the ones generated for the ?: operator there 6087ec681f3Smrg * can be multiple assignments with different precisions. This way we 6097ec681f3Smrg * get the highest precision of all of the assignments. 6107ec681f3Smrg */ 6117ec681f3Smrg if (var->data.precision == GLSL_PRECISION_NONE) 6127ec681f3Smrg var->data.precision = GLSL_PRECISION_MEDIUM; 6137ec681f3Smrg } else if (!ir->rhs->as_constant()) { 6147ec681f3Smrg var->data.precision = GLSL_PRECISION_HIGH; 6157ec681f3Smrg } 6167ec681f3Smrg } 6177ec681f3Smrg 6187ec681f3Smrg return visit_continue; 6197ec681f3Smrg} 6207ec681f3Smrg 6217ec681f3Smrgvoid 6227ec681f3Smrgfind_lowerable_rvalues(const struct gl_shader_compiler_options *options, 6237ec681f3Smrg exec_list *instructions, 6247ec681f3Smrg struct set *result) 6257ec681f3Smrg{ 6267ec681f3Smrg find_lowerable_rvalues_visitor v(result, options); 6277ec681f3Smrg 6287ec681f3Smrg visit_list_elements(&v, instructions); 6297ec681f3Smrg 6307ec681f3Smrg assert(v.stack.empty()); 6317ec681f3Smrg} 6327ec681f3Smrg 6337ec681f3Smrgstatic const glsl_type * 6347ec681f3Smrgconvert_type(bool up, const glsl_type *type) 6357ec681f3Smrg{ 6367ec681f3Smrg if (type->is_array()) { 6377ec681f3Smrg return glsl_type::get_array_instance(convert_type(up, type->fields.array), 6387ec681f3Smrg type->array_size(), 6397ec681f3Smrg type->explicit_stride); 6407ec681f3Smrg } 6417ec681f3Smrg 6427ec681f3Smrg glsl_base_type new_base_type; 6437ec681f3Smrg 6447ec681f3Smrg if (up) { 6457ec681f3Smrg switch (type->base_type) { 6467ec681f3Smrg case GLSL_TYPE_FLOAT16: 6477ec681f3Smrg new_base_type = GLSL_TYPE_FLOAT; 6487ec681f3Smrg break; 6497ec681f3Smrg case GLSL_TYPE_INT16: 6507ec681f3Smrg new_base_type = GLSL_TYPE_INT; 6517ec681f3Smrg break; 6527ec681f3Smrg case GLSL_TYPE_UINT16: 6537ec681f3Smrg new_base_type = GLSL_TYPE_UINT; 6547ec681f3Smrg break; 6557ec681f3Smrg default: 6567ec681f3Smrg unreachable("invalid type"); 6577ec681f3Smrg return NULL; 6587ec681f3Smrg } 6597ec681f3Smrg } else { 6607ec681f3Smrg switch (type->base_type) { 6617ec681f3Smrg case GLSL_TYPE_FLOAT: 6627ec681f3Smrg new_base_type = GLSL_TYPE_FLOAT16; 6637ec681f3Smrg break; 6647ec681f3Smrg case GLSL_TYPE_INT: 6657ec681f3Smrg new_base_type = GLSL_TYPE_INT16; 6667ec681f3Smrg break; 6677ec681f3Smrg case GLSL_TYPE_UINT: 6687ec681f3Smrg new_base_type = GLSL_TYPE_UINT16; 6697ec681f3Smrg break; 6707ec681f3Smrg default: 6717ec681f3Smrg unreachable("invalid type"); 6727ec681f3Smrg return NULL; 6737ec681f3Smrg } 6747ec681f3Smrg } 6757ec681f3Smrg 6767ec681f3Smrg return glsl_type::get_instance(new_base_type, 6777ec681f3Smrg type->vector_elements, 6787ec681f3Smrg type->matrix_columns, 6797ec681f3Smrg type->explicit_stride, 6807ec681f3Smrg type->interface_row_major); 6817ec681f3Smrg} 6827ec681f3Smrg 6837ec681f3Smrgstatic const glsl_type * 6847ec681f3Smrglower_glsl_type(const glsl_type *type) 6857ec681f3Smrg{ 6867ec681f3Smrg return convert_type(false, type); 6877ec681f3Smrg} 6887ec681f3Smrg 6897ec681f3Smrgstatic ir_rvalue * 6907ec681f3Smrgconvert_precision(bool up, ir_rvalue *ir) 6917ec681f3Smrg{ 6927ec681f3Smrg unsigned op; 6937ec681f3Smrg 6947ec681f3Smrg if (up) { 6957ec681f3Smrg switch (ir->type->base_type) { 6967ec681f3Smrg case GLSL_TYPE_FLOAT16: 6977ec681f3Smrg op = ir_unop_f162f; 6987ec681f3Smrg break; 6997ec681f3Smrg case GLSL_TYPE_INT16: 7007ec681f3Smrg op = ir_unop_i2i; 7017ec681f3Smrg break; 7027ec681f3Smrg case GLSL_TYPE_UINT16: 7037ec681f3Smrg op = ir_unop_u2u; 7047ec681f3Smrg break; 7057ec681f3Smrg default: 7067ec681f3Smrg unreachable("invalid type"); 7077ec681f3Smrg return NULL; 7087ec681f3Smrg } 7097ec681f3Smrg } else { 7107ec681f3Smrg switch (ir->type->base_type) { 7117ec681f3Smrg case GLSL_TYPE_FLOAT: 7127ec681f3Smrg op = ir_unop_f2fmp; 7137ec681f3Smrg break; 7147ec681f3Smrg case GLSL_TYPE_INT: 7157ec681f3Smrg op = ir_unop_i2imp; 7167ec681f3Smrg break; 7177ec681f3Smrg case GLSL_TYPE_UINT: 7187ec681f3Smrg op = ir_unop_u2ump; 7197ec681f3Smrg break; 7207ec681f3Smrg default: 7217ec681f3Smrg unreachable("invalid type"); 7227ec681f3Smrg return NULL; 7237ec681f3Smrg } 7247ec681f3Smrg } 7257ec681f3Smrg 7267ec681f3Smrg const glsl_type *desired_type = convert_type(up, ir->type); 7277ec681f3Smrg void *mem_ctx = ralloc_parent(ir); 7287ec681f3Smrg return new(mem_ctx) ir_expression(op, desired_type, ir, NULL); 7297ec681f3Smrg} 7307ec681f3Smrg 7317ec681f3Smrgvoid 7327ec681f3Smrglower_precision_visitor::handle_rvalue(ir_rvalue **rvalue) 7337ec681f3Smrg{ 7347ec681f3Smrg ir_rvalue *ir = *rvalue; 7357ec681f3Smrg 7367ec681f3Smrg if (ir == NULL) 7377ec681f3Smrg return; 7387ec681f3Smrg 7397ec681f3Smrg if (ir->as_dereference()) { 7407ec681f3Smrg if (!ir->type->is_boolean()) 7417ec681f3Smrg *rvalue = convert_precision(false, ir); 7427ec681f3Smrg } else if (ir->type->is_32bit()) { 7437ec681f3Smrg ir->type = lower_glsl_type(ir->type); 7447ec681f3Smrg 7457ec681f3Smrg ir_constant *const_ir = ir->as_constant(); 7467ec681f3Smrg 7477ec681f3Smrg if (const_ir) { 7487ec681f3Smrg ir_constant_data value; 7497ec681f3Smrg 7507ec681f3Smrg if (ir->type->base_type == GLSL_TYPE_FLOAT16) { 7517ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++) 7527ec681f3Smrg value.f16[i] = _mesa_float_to_half(const_ir->value.f[i]); 7537ec681f3Smrg } else if (ir->type->base_type == GLSL_TYPE_INT16) { 7547ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++) 7557ec681f3Smrg value.i16[i] = const_ir->value.i[i]; 7567ec681f3Smrg } else if (ir->type->base_type == GLSL_TYPE_UINT16) { 7577ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++) 7587ec681f3Smrg value.u16[i] = const_ir->value.u[i]; 7597ec681f3Smrg } else { 7607ec681f3Smrg unreachable("invalid type"); 7617ec681f3Smrg } 7627ec681f3Smrg 7637ec681f3Smrg const_ir->value = value; 7647ec681f3Smrg } 7657ec681f3Smrg } 7667ec681f3Smrg} 7677ec681f3Smrg 7687ec681f3Smrgir_visitor_status 7697ec681f3Smrglower_precision_visitor::visit_enter(ir_dereference_record *ir) 7707ec681f3Smrg{ 7717ec681f3Smrg /* We don’t want to lower the variable */ 7727ec681f3Smrg return visit_continue_with_parent; 7737ec681f3Smrg} 7747ec681f3Smrg 7757ec681f3Smrgir_visitor_status 7767ec681f3Smrglower_precision_visitor::visit_enter(ir_dereference_array *ir) 7777ec681f3Smrg{ 7787ec681f3Smrg /* We don’t want to convert the array index or the variable. If the array 7797ec681f3Smrg * index itself is lowerable that will be handled separately. 7807ec681f3Smrg */ 7817ec681f3Smrg return visit_continue_with_parent; 7827ec681f3Smrg} 7837ec681f3Smrg 7847ec681f3Smrgir_visitor_status 7857ec681f3Smrglower_precision_visitor::visit_enter(ir_call *ir) 7867ec681f3Smrg{ 7877ec681f3Smrg /* We don’t want to convert the arguments. These will be handled separately. 7887ec681f3Smrg */ 7897ec681f3Smrg return visit_continue_with_parent; 7907ec681f3Smrg} 7917ec681f3Smrg 7927ec681f3Smrgir_visitor_status 7937ec681f3Smrglower_precision_visitor::visit_enter(ir_texture *ir) 7947ec681f3Smrg{ 7957ec681f3Smrg /* We don’t want to convert the arguments. These will be handled separately. 7967ec681f3Smrg */ 7977ec681f3Smrg return visit_continue_with_parent; 7987ec681f3Smrg} 7997ec681f3Smrg 8007ec681f3Smrgir_visitor_status 8017ec681f3Smrglower_precision_visitor::visit_leave(ir_expression *ir) 8027ec681f3Smrg{ 8037ec681f3Smrg ir_rvalue_visitor::visit_leave(ir); 8047ec681f3Smrg 8057ec681f3Smrg /* If the expression is a conversion operation to or from bool then fix the 8067ec681f3Smrg * operation. 8077ec681f3Smrg */ 8087ec681f3Smrg switch (ir->operation) { 8097ec681f3Smrg case ir_unop_b2f: 8107ec681f3Smrg ir->operation = ir_unop_b2f16; 8117ec681f3Smrg break; 8127ec681f3Smrg case ir_unop_f2b: 8137ec681f3Smrg ir->operation = ir_unop_f162b; 8147ec681f3Smrg break; 8157ec681f3Smrg case ir_unop_b2i: 8167ec681f3Smrg case ir_unop_i2b: 8177ec681f3Smrg /* Nothing to do - they both support int16. */ 8187ec681f3Smrg break; 8197ec681f3Smrg default: 8207ec681f3Smrg break; 8217ec681f3Smrg } 8227ec681f3Smrg 8237ec681f3Smrg return visit_continue; 8247ec681f3Smrg} 8257ec681f3Smrg 8267ec681f3Smrgvoid 8277ec681f3Smrgfind_precision_visitor::handle_rvalue(ir_rvalue **rvalue) 8287ec681f3Smrg{ 8297ec681f3Smrg /* Checking the precision of rvalue can be lowered first throughout 8307ec681f3Smrg * find_lowerable_rvalues_visitor. 8317ec681f3Smrg * Once it found the precision of rvalue can be lowered, then we can 8327ec681f3Smrg * add conversion f2fmp, etc. through lower_precision_visitor. 8337ec681f3Smrg */ 8347ec681f3Smrg if (*rvalue == NULL) 8357ec681f3Smrg return; 8367ec681f3Smrg 8377ec681f3Smrg struct set_entry *entry = _mesa_set_search(lowerable_rvalues, *rvalue); 8387ec681f3Smrg 8397ec681f3Smrg if (!entry) 8407ec681f3Smrg return; 8417ec681f3Smrg 8427ec681f3Smrg _mesa_set_remove(lowerable_rvalues, entry); 8437ec681f3Smrg 8447ec681f3Smrg /* If the entire expression is just a variable dereference then trying to 8457ec681f3Smrg * lower it will just directly add pointless to and from conversions without 8467ec681f3Smrg * any actual operation in-between. Although these will eventually get 8477ec681f3Smrg * optimised out, avoiding generating them here also avoids breaking inout 8487ec681f3Smrg * parameters to functions. 8497ec681f3Smrg */ 8507ec681f3Smrg if ((*rvalue)->as_dereference()) 8517ec681f3Smrg return; 8527ec681f3Smrg 8537ec681f3Smrg lower_precision_visitor v; 8547ec681f3Smrg 8557ec681f3Smrg (*rvalue)->accept(&v); 8567ec681f3Smrg v.handle_rvalue(rvalue); 8577ec681f3Smrg 8587ec681f3Smrg /* We don’t need to add the final conversion if the final type has been 8597ec681f3Smrg * converted to bool 8607ec681f3Smrg */ 8617ec681f3Smrg if ((*rvalue)->type->base_type != GLSL_TYPE_BOOL) { 8627ec681f3Smrg *rvalue = convert_precision(true, *rvalue); 8637ec681f3Smrg } 8647ec681f3Smrg} 8657ec681f3Smrg 8667ec681f3Smrgir_visitor_status 8677ec681f3Smrgfind_precision_visitor::visit_enter(ir_call *ir) 8687ec681f3Smrg{ 8697ec681f3Smrg ir_rvalue_enter_visitor::visit_enter(ir); 8707ec681f3Smrg 8717ec681f3Smrg ir_variable *return_var = 8727ec681f3Smrg ir->return_deref ? ir->return_deref->variable_referenced() : NULL; 8737ec681f3Smrg 8747ec681f3Smrg /* Don't do anything for image_load here. We have only changed the return 8757ec681f3Smrg * value to mediump/lowp, so that following instructions can use reduced 8767ec681f3Smrg * precision. 8777ec681f3Smrg * 8787ec681f3Smrg * The return value type of the intrinsic itself isn't changed here, but 8797ec681f3Smrg * can be changed in NIR if all users use the *2*mp opcode. 8807ec681f3Smrg */ 8817ec681f3Smrg if (ir->callee->intrinsic_id == ir_intrinsic_image_load) 8827ec681f3Smrg return visit_continue; 8837ec681f3Smrg 8847ec681f3Smrg /* If this is a call to a builtin and the find_lowerable_rvalues_visitor 8857ec681f3Smrg * overrode the precision of the temporary return variable, then we can 8867ec681f3Smrg * replace the builtin implementation with a lowered version. 8877ec681f3Smrg */ 8887ec681f3Smrg 8897ec681f3Smrg if (!ir->callee->is_builtin() || 8907ec681f3Smrg ir->callee->is_intrinsic() || 8917ec681f3Smrg return_var == NULL || 8927ec681f3Smrg (return_var->data.precision != GLSL_PRECISION_MEDIUM && 8937ec681f3Smrg return_var->data.precision != GLSL_PRECISION_LOW)) 8947ec681f3Smrg return visit_continue; 8957ec681f3Smrg 8967ec681f3Smrg ir->callee = map_builtin(ir->callee); 8977ec681f3Smrg ir->generate_inline(ir); 8987ec681f3Smrg ir->remove(); 8997ec681f3Smrg 9007ec681f3Smrg return visit_continue_with_parent; 9017ec681f3Smrg} 9027ec681f3Smrg 9037ec681f3Smrgir_function_signature * 9047ec681f3Smrgfind_precision_visitor::map_builtin(ir_function_signature *sig) 9057ec681f3Smrg{ 9067ec681f3Smrg if (lowered_builtins == NULL) { 9077ec681f3Smrg lowered_builtins = _mesa_pointer_hash_table_create(NULL); 9087ec681f3Smrg clone_ht =_mesa_pointer_hash_table_create(NULL); 9097ec681f3Smrg lowered_builtin_mem_ctx = ralloc_context(NULL); 9107ec681f3Smrg } else { 9117ec681f3Smrg struct hash_entry *entry = _mesa_hash_table_search(lowered_builtins, sig); 9127ec681f3Smrg if (entry) 9137ec681f3Smrg return (ir_function_signature *) entry->data; 9147ec681f3Smrg } 9157ec681f3Smrg 9167ec681f3Smrg ir_function_signature *lowered_sig = 9177ec681f3Smrg sig->clone(lowered_builtin_mem_ctx, clone_ht); 9187ec681f3Smrg 9197ec681f3Smrg /* Functions that always return mediump or lowp should keep their 9207ec681f3Smrg * parameters intact, because they can be highp. NIR can lower 9217ec681f3Smrg * the up-conversion for parameters if needed. 9227ec681f3Smrg */ 9237ec681f3Smrg if (!function_always_returns_mediump_or_lowp(sig->function_name())) { 9247ec681f3Smrg foreach_in_list(ir_variable, param, &lowered_sig->parameters) { 9257ec681f3Smrg param->data.precision = GLSL_PRECISION_MEDIUM; 9267ec681f3Smrg } 9277ec681f3Smrg } 9287ec681f3Smrg 9297ec681f3Smrg lower_precision(options, &lowered_sig->body); 9307ec681f3Smrg 9317ec681f3Smrg _mesa_hash_table_clear(clone_ht, NULL); 9327ec681f3Smrg 9337ec681f3Smrg _mesa_hash_table_insert(lowered_builtins, sig, lowered_sig); 9347ec681f3Smrg 9357ec681f3Smrg return lowered_sig; 9367ec681f3Smrg} 9377ec681f3Smrg 9387ec681f3Smrgfind_precision_visitor::find_precision_visitor(const struct gl_shader_compiler_options *options) 9397ec681f3Smrg : lowerable_rvalues(_mesa_pointer_set_create(NULL)), 9407ec681f3Smrg lowered_builtins(NULL), 9417ec681f3Smrg clone_ht(NULL), 9427ec681f3Smrg lowered_builtin_mem_ctx(NULL), 9437ec681f3Smrg options(options) 9447ec681f3Smrg{ 9457ec681f3Smrg} 9467ec681f3Smrg 9477ec681f3Smrgfind_precision_visitor::~find_precision_visitor() 9487ec681f3Smrg{ 9497ec681f3Smrg _mesa_set_destroy(lowerable_rvalues, NULL); 9507ec681f3Smrg 9517ec681f3Smrg if (lowered_builtins) { 9527ec681f3Smrg _mesa_hash_table_destroy(lowered_builtins, NULL); 9537ec681f3Smrg _mesa_hash_table_destroy(clone_ht, NULL); 9547ec681f3Smrg ralloc_free(lowered_builtin_mem_ctx); 9557ec681f3Smrg } 9567ec681f3Smrg} 9577ec681f3Smrg 9587ec681f3Smrg/* Lowering opcodes to 16 bits is not enough for programs with control flow 9597ec681f3Smrg * (and the ?: operator, which is represented by if-then-else in the IR), 9607ec681f3Smrg * because temporary variables, which are used for passing values between 9617ec681f3Smrg * code blocks, are not lowered, resulting in 32-bit phis in NIR. 9627ec681f3Smrg * 9637ec681f3Smrg * First change the variable types to 16 bits, then change all ir_dereference 9647ec681f3Smrg * types to 16 bits. 9657ec681f3Smrg */ 9667ec681f3Smrgclass lower_variables_visitor : public ir_rvalue_enter_visitor { 9677ec681f3Smrgpublic: 9687ec681f3Smrg lower_variables_visitor(const struct gl_shader_compiler_options *options) 9697ec681f3Smrg : options(options) { 9707ec681f3Smrg lower_vars = _mesa_pointer_set_create(NULL); 9717ec681f3Smrg } 9727ec681f3Smrg 9737ec681f3Smrg virtual ~lower_variables_visitor() 9747ec681f3Smrg { 9757ec681f3Smrg _mesa_set_destroy(lower_vars, NULL); 9767ec681f3Smrg } 9777ec681f3Smrg 9787ec681f3Smrg virtual ir_visitor_status visit(ir_variable *var); 9797ec681f3Smrg virtual ir_visitor_status visit_enter(ir_assignment *ir); 9807ec681f3Smrg virtual ir_visitor_status visit_enter(ir_return *ir); 9817ec681f3Smrg virtual ir_visitor_status visit_enter(ir_call *ir); 9827ec681f3Smrg virtual void handle_rvalue(ir_rvalue **rvalue); 9837ec681f3Smrg 9847ec681f3Smrg void fix_types_in_deref_chain(ir_dereference *ir); 9857ec681f3Smrg void convert_split_assignment(ir_dereference *lhs, ir_rvalue *rhs, 9867ec681f3Smrg bool insert_before); 9877ec681f3Smrg 9887ec681f3Smrg const struct gl_shader_compiler_options *options; 9897ec681f3Smrg set *lower_vars; 9907ec681f3Smrg}; 9917ec681f3Smrg 9927ec681f3Smrgstatic void 9937ec681f3Smrglower_constant(ir_constant *ir) 9947ec681f3Smrg{ 9957ec681f3Smrg if (ir->type->is_array()) { 9967ec681f3Smrg for (int i = 0; i < ir->type->array_size(); i++) 9977ec681f3Smrg lower_constant(ir->get_array_element(i)); 9987ec681f3Smrg 9997ec681f3Smrg ir->type = lower_glsl_type(ir->type); 10007ec681f3Smrg return; 10017ec681f3Smrg } 10027ec681f3Smrg 10037ec681f3Smrg ir->type = lower_glsl_type(ir->type); 10047ec681f3Smrg ir_constant_data value; 10057ec681f3Smrg 10067ec681f3Smrg if (ir->type->base_type == GLSL_TYPE_FLOAT16) { 10077ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(value.f16); i++) 10087ec681f3Smrg value.f16[i] = _mesa_float_to_half(ir->value.f[i]); 10097ec681f3Smrg } else if (ir->type->base_type == GLSL_TYPE_INT16) { 10107ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(value.i16); i++) 10117ec681f3Smrg value.i16[i] = ir->value.i[i]; 10127ec681f3Smrg } else if (ir->type->base_type == GLSL_TYPE_UINT16) { 10137ec681f3Smrg for (unsigned i = 0; i < ARRAY_SIZE(value.u16); i++) 10147ec681f3Smrg value.u16[i] = ir->value.u[i]; 10157ec681f3Smrg } else { 10167ec681f3Smrg unreachable("invalid type"); 10177ec681f3Smrg } 10187ec681f3Smrg 10197ec681f3Smrg ir->value = value; 10207ec681f3Smrg} 10217ec681f3Smrg 10227ec681f3Smrgir_visitor_status 10237ec681f3Smrglower_variables_visitor::visit(ir_variable *var) 10247ec681f3Smrg{ 10257ec681f3Smrg if ((var->data.mode != ir_var_temporary && 10267ec681f3Smrg var->data.mode != ir_var_auto && 10277ec681f3Smrg /* Lower uniforms but not UBOs. */ 10287ec681f3Smrg (var->data.mode != ir_var_uniform || 10297ec681f3Smrg var->is_in_buffer_block() || 10307ec681f3Smrg !(options->LowerPrecisionFloat16Uniforms && 10317ec681f3Smrg var->type->without_array()->base_type == GLSL_TYPE_FLOAT))) || 10327ec681f3Smrg !var->type->without_array()->is_32bit() || 10337ec681f3Smrg (var->data.precision != GLSL_PRECISION_MEDIUM && 10347ec681f3Smrg var->data.precision != GLSL_PRECISION_LOW) || 10357ec681f3Smrg !can_lower_type(options, var->type)) 10367ec681f3Smrg return visit_continue; 10377ec681f3Smrg 10387ec681f3Smrg /* Lower constant initializers. */ 10397ec681f3Smrg if (var->constant_value && 10407ec681f3Smrg var->type == var->constant_value->type) { 10417ec681f3Smrg if (!options->LowerPrecisionConstants) 10427ec681f3Smrg return visit_continue; 10437ec681f3Smrg var->constant_value = 10447ec681f3Smrg var->constant_value->clone(ralloc_parent(var), NULL); 10457ec681f3Smrg lower_constant(var->constant_value); 10467ec681f3Smrg } 10477ec681f3Smrg 10487ec681f3Smrg if (var->constant_initializer && 10497ec681f3Smrg var->type == var->constant_initializer->type) { 10507ec681f3Smrg if (!options->LowerPrecisionConstants) 10517ec681f3Smrg return visit_continue; 10527ec681f3Smrg var->constant_initializer = 10537ec681f3Smrg var->constant_initializer->clone(ralloc_parent(var), NULL); 10547ec681f3Smrg lower_constant(var->constant_initializer); 10557ec681f3Smrg } 10567ec681f3Smrg 10577ec681f3Smrg var->type = lower_glsl_type(var->type); 10587ec681f3Smrg _mesa_set_add(lower_vars, var); 10597ec681f3Smrg 10607ec681f3Smrg return visit_continue; 10617ec681f3Smrg} 10627ec681f3Smrg 10637ec681f3Smrgvoid 10647ec681f3Smrglower_variables_visitor::fix_types_in_deref_chain(ir_dereference *ir) 10657ec681f3Smrg{ 10667ec681f3Smrg assert(ir->type->without_array()->is_32bit()); 10677ec681f3Smrg assert(_mesa_set_search(lower_vars, ir->variable_referenced())); 10687ec681f3Smrg 10697ec681f3Smrg /* Fix the type in the dereference node. */ 10707ec681f3Smrg ir->type = lower_glsl_type(ir->type); 10717ec681f3Smrg 10727ec681f3Smrg /* If it's an array, fix the types in the whole dereference chain. */ 10737ec681f3Smrg for (ir_dereference_array *deref_array = ir->as_dereference_array(); 10747ec681f3Smrg deref_array; 10757ec681f3Smrg deref_array = deref_array->array->as_dereference_array()) { 10767ec681f3Smrg assert(deref_array->array->type->without_array()->is_32bit()); 10777ec681f3Smrg deref_array->array->type = lower_glsl_type(deref_array->array->type); 10787ec681f3Smrg } 10797ec681f3Smrg} 10807ec681f3Smrg 10817ec681f3Smrgvoid 10827ec681f3Smrglower_variables_visitor::convert_split_assignment(ir_dereference *lhs, 10837ec681f3Smrg ir_rvalue *rhs, 10847ec681f3Smrg bool insert_before) 10857ec681f3Smrg{ 10867ec681f3Smrg void *mem_ctx = ralloc_parent(lhs); 10877ec681f3Smrg 10887ec681f3Smrg if (lhs->type->is_array()) { 10897ec681f3Smrg for (unsigned i = 0; i < lhs->type->length; i++) { 10907ec681f3Smrg ir_dereference *l, *r; 10917ec681f3Smrg 10927ec681f3Smrg l = new(mem_ctx) ir_dereference_array(lhs->clone(mem_ctx, NULL), 10937ec681f3Smrg new(mem_ctx) ir_constant(i)); 10947ec681f3Smrg r = new(mem_ctx) ir_dereference_array(rhs->clone(mem_ctx, NULL), 10957ec681f3Smrg new(mem_ctx) ir_constant(i)); 10967ec681f3Smrg convert_split_assignment(l, r, insert_before); 10977ec681f3Smrg } 10987ec681f3Smrg return; 10997ec681f3Smrg } 11007ec681f3Smrg 11017ec681f3Smrg assert(lhs->type->is_16bit() || lhs->type->is_32bit()); 11027ec681f3Smrg assert(rhs->type->is_16bit() || rhs->type->is_32bit()); 11037ec681f3Smrg assert(lhs->type->is_16bit() != rhs->type->is_16bit()); 11047ec681f3Smrg 11057ec681f3Smrg ir_assignment *assign = 11067ec681f3Smrg new(mem_ctx) ir_assignment(lhs, convert_precision(lhs->type->is_32bit(), rhs)); 11077ec681f3Smrg 11087ec681f3Smrg if (insert_before) 11097ec681f3Smrg base_ir->insert_before(assign); 11107ec681f3Smrg else 11117ec681f3Smrg base_ir->insert_after(assign); 11127ec681f3Smrg} 11137ec681f3Smrg 11147ec681f3Smrgir_visitor_status 11157ec681f3Smrglower_variables_visitor::visit_enter(ir_assignment *ir) 11167ec681f3Smrg{ 11177ec681f3Smrg ir_dereference *lhs = ir->lhs; 11187ec681f3Smrg ir_variable *var = lhs->variable_referenced(); 11197ec681f3Smrg ir_dereference *rhs_deref = ir->rhs->as_dereference(); 11207ec681f3Smrg ir_variable *rhs_var = rhs_deref ? rhs_deref->variable_referenced() : NULL; 11217ec681f3Smrg ir_constant *rhs_const = ir->rhs->as_constant(); 11227ec681f3Smrg 11237ec681f3Smrg /* Legalize array assignments between lowered and non-lowered variables. */ 11247ec681f3Smrg if (lhs->type->is_array() && 11257ec681f3Smrg (rhs_var || rhs_const) && 11267ec681f3Smrg (!rhs_var || 11277ec681f3Smrg (var && 11287ec681f3Smrg var->type->without_array()->is_16bit() != 11297ec681f3Smrg rhs_var->type->without_array()->is_16bit())) && 11307ec681f3Smrg (!rhs_const || 11317ec681f3Smrg (var && 11327ec681f3Smrg var->type->without_array()->is_16bit() && 11337ec681f3Smrg rhs_const->type->without_array()->is_32bit()))) { 11347ec681f3Smrg assert(ir->rhs->type->is_array()); 11357ec681f3Smrg 11367ec681f3Smrg /* Fix array assignments from lowered to non-lowered. */ 11377ec681f3Smrg if (rhs_var && _mesa_set_search(lower_vars, rhs_var)) { 11387ec681f3Smrg fix_types_in_deref_chain(rhs_deref); 11397ec681f3Smrg /* Convert to 32 bits for LHS. */ 11407ec681f3Smrg convert_split_assignment(lhs, rhs_deref, true); 11417ec681f3Smrg ir->remove(); 11427ec681f3Smrg return visit_continue; 11437ec681f3Smrg } 11447ec681f3Smrg 11457ec681f3Smrg /* Fix array assignments from non-lowered to lowered. */ 11467ec681f3Smrg if (var && 11477ec681f3Smrg _mesa_set_search(lower_vars, var) && 11487ec681f3Smrg ir->rhs->type->without_array()->is_32bit()) { 11497ec681f3Smrg fix_types_in_deref_chain(lhs); 11507ec681f3Smrg /* Convert to 16 bits for LHS. */ 11517ec681f3Smrg convert_split_assignment(lhs, ir->rhs, true); 11527ec681f3Smrg ir->remove(); 11537ec681f3Smrg return visit_continue; 11547ec681f3Smrg } 11557ec681f3Smrg } 11567ec681f3Smrg 11577ec681f3Smrg /* Fix assignment types. */ 11587ec681f3Smrg if (var && 11597ec681f3Smrg _mesa_set_search(lower_vars, var)) { 11607ec681f3Smrg /* Fix the LHS type. */ 11617ec681f3Smrg if (lhs->type->without_array()->is_32bit()) 11627ec681f3Smrg fix_types_in_deref_chain(lhs); 11637ec681f3Smrg 11647ec681f3Smrg /* Fix the RHS type if it's a lowered variable. */ 11657ec681f3Smrg if (rhs_var && 11667ec681f3Smrg _mesa_set_search(lower_vars, rhs_var) && 11677ec681f3Smrg rhs_deref->type->without_array()->is_32bit()) 11687ec681f3Smrg fix_types_in_deref_chain(rhs_deref); 11697ec681f3Smrg 11707ec681f3Smrg /* Fix the RHS type if it's a non-array expression. */ 11717ec681f3Smrg if (ir->rhs->type->is_32bit()) { 11727ec681f3Smrg ir_expression *expr = ir->rhs->as_expression(); 11737ec681f3Smrg 11747ec681f3Smrg /* Convert the RHS to the LHS type. */ 11757ec681f3Smrg if (expr && 11767ec681f3Smrg (expr->operation == ir_unop_f162f || 11777ec681f3Smrg expr->operation == ir_unop_i2i || 11787ec681f3Smrg expr->operation == ir_unop_u2u) && 11797ec681f3Smrg expr->operands[0]->type->is_16bit()) { 11807ec681f3Smrg /* If there is an "up" conversion, just remove it. 11817ec681f3Smrg * This is optional. We could as well execute the else statement and 11827ec681f3Smrg * let NIR eliminate the up+down conversions. 11837ec681f3Smrg */ 11847ec681f3Smrg ir->rhs = expr->operands[0]; 11857ec681f3Smrg } else { 11867ec681f3Smrg /* Add a "down" conversion operation to fix the type of RHS. */ 11877ec681f3Smrg ir->rhs = convert_precision(false, ir->rhs); 11887ec681f3Smrg } 11897ec681f3Smrg } 11907ec681f3Smrg } 11917ec681f3Smrg 11927ec681f3Smrg return ir_rvalue_enter_visitor::visit_enter(ir); 11937ec681f3Smrg} 11947ec681f3Smrg 11957ec681f3Smrgir_visitor_status 11967ec681f3Smrglower_variables_visitor::visit_enter(ir_return *ir) 11977ec681f3Smrg{ 11987ec681f3Smrg void *mem_ctx = ralloc_parent(ir); 11997ec681f3Smrg 12007ec681f3Smrg ir_dereference *deref = ir->value ? ir->value->as_dereference() : NULL; 12017ec681f3Smrg if (deref) { 12027ec681f3Smrg ir_variable *var = deref->variable_referenced(); 12037ec681f3Smrg 12047ec681f3Smrg /* Fix the type of the return value. */ 12057ec681f3Smrg if (var && 12067ec681f3Smrg _mesa_set_search(lower_vars, var) && 12077ec681f3Smrg deref->type->without_array()->is_32bit()) { 12087ec681f3Smrg /* Create a 32-bit temporary variable. */ 12097ec681f3Smrg ir_variable *new_var = 12107ec681f3Smrg new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary); 12117ec681f3Smrg base_ir->insert_before(new_var); 12127ec681f3Smrg 12137ec681f3Smrg /* Fix types in dereferences. */ 12147ec681f3Smrg fix_types_in_deref_chain(deref); 12157ec681f3Smrg 12167ec681f3Smrg /* Convert to 32 bits for the return value. */ 12177ec681f3Smrg convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var), 12187ec681f3Smrg deref, true); 12197ec681f3Smrg ir->value = new(mem_ctx) ir_dereference_variable(new_var); 12207ec681f3Smrg } 12217ec681f3Smrg } 12227ec681f3Smrg 12237ec681f3Smrg return ir_rvalue_enter_visitor::visit_enter(ir); 12247ec681f3Smrg} 12257ec681f3Smrg 12267ec681f3Smrgvoid lower_variables_visitor::handle_rvalue(ir_rvalue **rvalue) 12277ec681f3Smrg{ 12287ec681f3Smrg ir_rvalue *ir = *rvalue; 12297ec681f3Smrg 12307ec681f3Smrg if (in_assignee || ir == NULL) 12317ec681f3Smrg return; 12327ec681f3Smrg 12337ec681f3Smrg ir_expression *expr = ir->as_expression(); 12347ec681f3Smrg ir_dereference *expr_op0_deref = expr ? expr->operands[0]->as_dereference() : NULL; 12357ec681f3Smrg 12367ec681f3Smrg /* Remove f2fmp(float16). Same for int16 and uint16. */ 12377ec681f3Smrg if (expr && 12387ec681f3Smrg expr_op0_deref && 12397ec681f3Smrg (expr->operation == ir_unop_f2fmp || 12407ec681f3Smrg expr->operation == ir_unop_i2imp || 12417ec681f3Smrg expr->operation == ir_unop_u2ump || 12427ec681f3Smrg expr->operation == ir_unop_f2f16 || 12437ec681f3Smrg expr->operation == ir_unop_i2i || 12447ec681f3Smrg expr->operation == ir_unop_u2u) && 12457ec681f3Smrg expr->type->without_array()->is_16bit() && 12467ec681f3Smrg expr_op0_deref->type->without_array()->is_32bit() && 12477ec681f3Smrg expr_op0_deref->variable_referenced() && 12487ec681f3Smrg _mesa_set_search(lower_vars, expr_op0_deref->variable_referenced())) { 12497ec681f3Smrg fix_types_in_deref_chain(expr_op0_deref); 12507ec681f3Smrg 12517ec681f3Smrg /* Remove f2fmp/i2imp/u2ump. */ 12527ec681f3Smrg *rvalue = expr_op0_deref; 12537ec681f3Smrg return; 12547ec681f3Smrg } 12557ec681f3Smrg 12567ec681f3Smrg ir_dereference *deref = ir->as_dereference(); 12577ec681f3Smrg 12587ec681f3Smrg if (deref) { 12597ec681f3Smrg ir_variable *var = deref->variable_referenced(); 12607ec681f3Smrg 12617ec681f3Smrg /* var can be NULL if we are dereferencing ir_constant. */ 12627ec681f3Smrg if (var && 12637ec681f3Smrg _mesa_set_search(lower_vars, var) && 12647ec681f3Smrg deref->type->without_array()->is_32bit()) { 12657ec681f3Smrg void *mem_ctx = ralloc_parent(ir); 12667ec681f3Smrg 12677ec681f3Smrg /* Create a 32-bit temporary variable. */ 12687ec681f3Smrg ir_variable *new_var = 12697ec681f3Smrg new(mem_ctx) ir_variable(deref->type, "lowerp", ir_var_temporary); 12707ec681f3Smrg base_ir->insert_before(new_var); 12717ec681f3Smrg 12727ec681f3Smrg /* Fix types in dereferences. */ 12737ec681f3Smrg fix_types_in_deref_chain(deref); 12747ec681f3Smrg 12757ec681f3Smrg /* Convert to 32 bits for the rvalue. */ 12767ec681f3Smrg convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var), 12777ec681f3Smrg deref, true); 12787ec681f3Smrg *rvalue = new(mem_ctx) ir_dereference_variable(new_var); 12797ec681f3Smrg } 12807ec681f3Smrg } 12817ec681f3Smrg} 12827ec681f3Smrg 12837ec681f3Smrgir_visitor_status 12847ec681f3Smrglower_variables_visitor::visit_enter(ir_call *ir) 12857ec681f3Smrg{ 12867ec681f3Smrg void *mem_ctx = ralloc_parent(ir); 12877ec681f3Smrg 12887ec681f3Smrg /* We can't pass 16-bit variables as 32-bit inout/out parameters. */ 12897ec681f3Smrg foreach_two_lists(formal_node, &ir->callee->parameters, 12907ec681f3Smrg actual_node, &ir->actual_parameters) { 12917ec681f3Smrg ir_dereference *param_deref = 12927ec681f3Smrg ((ir_rvalue *)actual_node)->as_dereference(); 12937ec681f3Smrg ir_variable *param = (ir_variable *)formal_node; 12947ec681f3Smrg 12957ec681f3Smrg if (!param_deref) 12967ec681f3Smrg continue; 12977ec681f3Smrg 12987ec681f3Smrg ir_variable *var = param_deref->variable_referenced(); 12997ec681f3Smrg 13007ec681f3Smrg /* var can be NULL if we are dereferencing ir_constant. */ 13017ec681f3Smrg if (var && 13027ec681f3Smrg _mesa_set_search(lower_vars, var) && 13037ec681f3Smrg param->type->without_array()->is_32bit()) { 13047ec681f3Smrg fix_types_in_deref_chain(param_deref); 13057ec681f3Smrg 13067ec681f3Smrg /* Create a 32-bit temporary variable for the parameter. */ 13077ec681f3Smrg ir_variable *new_var = 13087ec681f3Smrg new(mem_ctx) ir_variable(param->type, "lowerp", ir_var_temporary); 13097ec681f3Smrg base_ir->insert_before(new_var); 13107ec681f3Smrg 13117ec681f3Smrg /* Replace the parameter. */ 13127ec681f3Smrg actual_node->replace_with(new(mem_ctx) ir_dereference_variable(new_var)); 13137ec681f3Smrg 13147ec681f3Smrg if (param->data.mode == ir_var_function_in || 13157ec681f3Smrg param->data.mode == ir_var_function_inout) { 13167ec681f3Smrg /* Convert to 32 bits for passing in. */ 13177ec681f3Smrg convert_split_assignment(new(mem_ctx) ir_dereference_variable(new_var), 13187ec681f3Smrg param_deref->clone(mem_ctx, NULL), true); 13197ec681f3Smrg } 13207ec681f3Smrg if (param->data.mode == ir_var_function_out || 13217ec681f3Smrg param->data.mode == ir_var_function_inout) { 13227ec681f3Smrg /* Convert to 16 bits after returning. */ 13237ec681f3Smrg convert_split_assignment(param_deref, 13247ec681f3Smrg new(mem_ctx) ir_dereference_variable(new_var), 13257ec681f3Smrg false); 13267ec681f3Smrg } 13277ec681f3Smrg } 13287ec681f3Smrg } 13297ec681f3Smrg 13307ec681f3Smrg /* Fix the type of return value dereferencies. */ 13317ec681f3Smrg ir_dereference_variable *ret_deref = ir->return_deref; 13327ec681f3Smrg ir_variable *ret_var = ret_deref ? ret_deref->variable_referenced() : NULL; 13337ec681f3Smrg 13347ec681f3Smrg if (ret_var && 13357ec681f3Smrg _mesa_set_search(lower_vars, ret_var) && 13367ec681f3Smrg ret_deref->type->without_array()->is_32bit()) { 13377ec681f3Smrg /* Create a 32-bit temporary variable. */ 13387ec681f3Smrg ir_variable *new_var = 13397ec681f3Smrg new(mem_ctx) ir_variable(ir->callee->return_type, "lowerp", 13407ec681f3Smrg ir_var_temporary); 13417ec681f3Smrg base_ir->insert_before(new_var); 13427ec681f3Smrg 13437ec681f3Smrg /* Replace the return variable. */ 13447ec681f3Smrg ret_deref->var = new_var; 13457ec681f3Smrg 13467ec681f3Smrg /* Convert to 16 bits after returning. */ 13477ec681f3Smrg convert_split_assignment(new(mem_ctx) ir_dereference_variable(ret_var), 13487ec681f3Smrg new(mem_ctx) ir_dereference_variable(new_var), 13497ec681f3Smrg false); 13507ec681f3Smrg } 13517ec681f3Smrg 13527ec681f3Smrg return ir_rvalue_enter_visitor::visit_enter(ir); 13537ec681f3Smrg} 13547ec681f3Smrg 13557ec681f3Smrg} 13567ec681f3Smrg 13577ec681f3Smrgvoid 13587ec681f3Smrglower_precision(const struct gl_shader_compiler_options *options, 13597ec681f3Smrg exec_list *instructions) 13607ec681f3Smrg{ 13617ec681f3Smrg find_precision_visitor v(options); 13627ec681f3Smrg find_lowerable_rvalues(options, instructions, v.lowerable_rvalues); 13637ec681f3Smrg visit_list_elements(&v, instructions); 13647ec681f3Smrg 13657ec681f3Smrg lower_variables_visitor vars(options); 13667ec681f3Smrg visit_list_elements(&vars, instructions); 13677ec681f3Smrg} 1368