101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2010 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 2101e04c3fSmrg * DEALINGS IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg/** 2501e04c3fSmrg * \file opt_tree_grafting.cpp 2601e04c3fSmrg * 2701e04c3fSmrg * Takes assignments to variables that are dereferenced only once and 2801e04c3fSmrg * pastes the RHS expression into where the variable is dereferenced. 2901e04c3fSmrg * 3001e04c3fSmrg * In the process of various operations like function inlining and 3101e04c3fSmrg * tertiary op handling, we'll end up with our expression trees having 3201e04c3fSmrg * been chopped up into a series of assignments of short expressions 3301e04c3fSmrg * to temps. Other passes like ir_algebraic.cpp would prefer to see 3401e04c3fSmrg * the deepest expression trees they can to try to optimize them. 3501e04c3fSmrg * 3601e04c3fSmrg * This is a lot like copy propagaton. In comparison, copy 3701e04c3fSmrg * propagation only acts on plain copies, not arbitrary expressions on 3801e04c3fSmrg * the RHS. Generally, we wouldn't want to go pasting some 3901e04c3fSmrg * complicated expression everywhere it got used, though, so we don't 4001e04c3fSmrg * handle expressions in that pass. 4101e04c3fSmrg * 4201e04c3fSmrg * The hard part is making sure we don't move an expression across 4301e04c3fSmrg * some other assignments that would change the value of the 4401e04c3fSmrg * expression. So we split this into two passes: First, find the 4501e04c3fSmrg * variables in our scope which are written to once and read once, and 4601e04c3fSmrg * then go through basic blocks seeing if we find an opportunity to 4701e04c3fSmrg * move those expressions safely. 4801e04c3fSmrg */ 4901e04c3fSmrg 5001e04c3fSmrg#include "ir.h" 5101e04c3fSmrg#include "ir_visitor.h" 5201e04c3fSmrg#include "ir_variable_refcount.h" 5301e04c3fSmrg#include "ir_basic_block.h" 5401e04c3fSmrg#include "ir_optimization.h" 5501e04c3fSmrg#include "compiler/glsl_types.h" 5601e04c3fSmrg 5701e04c3fSmrgnamespace { 5801e04c3fSmrg 5901e04c3fSmrgstatic bool debug = false; 6001e04c3fSmrg 6101e04c3fSmrgclass ir_tree_grafting_visitor : public ir_hierarchical_visitor { 6201e04c3fSmrgpublic: 6301e04c3fSmrg ir_tree_grafting_visitor(ir_assignment *graft_assign, 6401e04c3fSmrg ir_variable *graft_var) 6501e04c3fSmrg { 6601e04c3fSmrg this->progress = false; 6701e04c3fSmrg this->graft_assign = graft_assign; 6801e04c3fSmrg this->graft_var = graft_var; 6901e04c3fSmrg } 7001e04c3fSmrg 7101e04c3fSmrg virtual ir_visitor_status visit_leave(class ir_assignment *); 7201e04c3fSmrg virtual ir_visitor_status visit_enter(class ir_call *); 7301e04c3fSmrg virtual ir_visitor_status visit_enter(class ir_expression *); 7401e04c3fSmrg virtual ir_visitor_status visit_enter(class ir_function *); 7501e04c3fSmrg virtual ir_visitor_status visit_enter(class ir_function_signature *); 7601e04c3fSmrg virtual ir_visitor_status visit_enter(class ir_if *); 7701e04c3fSmrg virtual ir_visitor_status visit_enter(class ir_loop *); 7801e04c3fSmrg virtual ir_visitor_status visit_enter(class ir_swizzle *); 7901e04c3fSmrg virtual ir_visitor_status visit_enter(class ir_texture *); 8001e04c3fSmrg 8101e04c3fSmrg ir_visitor_status check_graft(ir_instruction *ir, ir_variable *var); 8201e04c3fSmrg 8301e04c3fSmrg bool do_graft(ir_rvalue **rvalue); 8401e04c3fSmrg 8501e04c3fSmrg bool progress; 8601e04c3fSmrg ir_variable *graft_var; 8701e04c3fSmrg ir_assignment *graft_assign; 8801e04c3fSmrg}; 8901e04c3fSmrg 9001e04c3fSmrgstruct find_deref_info { 9101e04c3fSmrg ir_variable *var; 9201e04c3fSmrg bool found; 9301e04c3fSmrg}; 9401e04c3fSmrg 9501e04c3fSmrgvoid 9601e04c3fSmrgdereferences_variable_callback(ir_instruction *ir, void *data) 9701e04c3fSmrg{ 9801e04c3fSmrg struct find_deref_info *info = (struct find_deref_info *)data; 9901e04c3fSmrg ir_dereference_variable *deref = ir->as_dereference_variable(); 10001e04c3fSmrg 10101e04c3fSmrg if (deref && deref->var == info->var) 10201e04c3fSmrg info->found = true; 10301e04c3fSmrg} 10401e04c3fSmrg 10501e04c3fSmrgstatic bool 10601e04c3fSmrgdereferences_variable(ir_instruction *ir, ir_variable *var) 10701e04c3fSmrg{ 10801e04c3fSmrg struct find_deref_info info; 10901e04c3fSmrg 11001e04c3fSmrg info.var = var; 11101e04c3fSmrg info.found = false; 11201e04c3fSmrg 11301e04c3fSmrg visit_tree(ir, dereferences_variable_callback, &info); 11401e04c3fSmrg 11501e04c3fSmrg return info.found; 11601e04c3fSmrg} 11701e04c3fSmrg 11801e04c3fSmrgbool 11901e04c3fSmrgir_tree_grafting_visitor::do_graft(ir_rvalue **rvalue) 12001e04c3fSmrg{ 12101e04c3fSmrg if (!*rvalue) 12201e04c3fSmrg return false; 12301e04c3fSmrg 12401e04c3fSmrg ir_dereference_variable *deref = (*rvalue)->as_dereference_variable(); 12501e04c3fSmrg 12601e04c3fSmrg if (!deref || deref->var != this->graft_var) 12701e04c3fSmrg return false; 12801e04c3fSmrg 12901e04c3fSmrg if (debug) { 13001e04c3fSmrg fprintf(stderr, "GRAFTING:\n"); 13101e04c3fSmrg this->graft_assign->fprint(stderr); 13201e04c3fSmrg fprintf(stderr, "\n"); 13301e04c3fSmrg fprintf(stderr, "TO:\n"); 13401e04c3fSmrg (*rvalue)->fprint(stderr); 13501e04c3fSmrg fprintf(stderr, "\n"); 13601e04c3fSmrg } 13701e04c3fSmrg 13801e04c3fSmrg this->graft_assign->remove(); 13901e04c3fSmrg *rvalue = this->graft_assign->rhs; 14001e04c3fSmrg 14101e04c3fSmrg this->progress = true; 14201e04c3fSmrg return true; 14301e04c3fSmrg} 14401e04c3fSmrg 14501e04c3fSmrgir_visitor_status 14601e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_loop *ir) 14701e04c3fSmrg{ 14801e04c3fSmrg (void)ir; 14901e04c3fSmrg /* Do not traverse into the body of the loop since that is a 15001e04c3fSmrg * different basic block. 15101e04c3fSmrg */ 15201e04c3fSmrg return visit_stop; 15301e04c3fSmrg} 15401e04c3fSmrg 15501e04c3fSmrg/** 15601e04c3fSmrg * Check if we can continue grafting after writing to a variable. If the 15701e04c3fSmrg * expression we're trying to graft references the variable, we must stop. 15801e04c3fSmrg * 15901e04c3fSmrg * \param ir An instruction that writes to a variable. 16001e04c3fSmrg * \param var The variable being updated. 16101e04c3fSmrg */ 16201e04c3fSmrgir_visitor_status 16301e04c3fSmrgir_tree_grafting_visitor::check_graft(ir_instruction *ir, ir_variable *var) 16401e04c3fSmrg{ 16501e04c3fSmrg if (dereferences_variable(this->graft_assign->rhs, var)) { 16601e04c3fSmrg if (debug) { 16701e04c3fSmrg fprintf(stderr, "graft killed by: "); 16801e04c3fSmrg ir->fprint(stderr); 16901e04c3fSmrg fprintf(stderr, "\n"); 17001e04c3fSmrg } 17101e04c3fSmrg return visit_stop; 17201e04c3fSmrg } 17301e04c3fSmrg 17401e04c3fSmrg return visit_continue; 17501e04c3fSmrg} 17601e04c3fSmrg 17701e04c3fSmrgir_visitor_status 17801e04c3fSmrgir_tree_grafting_visitor::visit_leave(ir_assignment *ir) 17901e04c3fSmrg{ 18001e04c3fSmrg if (do_graft(&ir->rhs) || 18101e04c3fSmrg do_graft(&ir->condition)) 18201e04c3fSmrg return visit_stop; 18301e04c3fSmrg 18401e04c3fSmrg /* If this assignment updates a variable used in the assignment 18501e04c3fSmrg * we're trying to graft, then we're done. 18601e04c3fSmrg */ 18701e04c3fSmrg return check_graft(ir, ir->lhs->variable_referenced()); 18801e04c3fSmrg} 18901e04c3fSmrg 19001e04c3fSmrgir_visitor_status 19101e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_function *ir) 19201e04c3fSmrg{ 19301e04c3fSmrg (void) ir; 19401e04c3fSmrg return visit_continue_with_parent; 19501e04c3fSmrg} 19601e04c3fSmrg 19701e04c3fSmrgir_visitor_status 19801e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_function_signature *ir) 19901e04c3fSmrg{ 20001e04c3fSmrg (void)ir; 20101e04c3fSmrg return visit_continue_with_parent; 20201e04c3fSmrg} 20301e04c3fSmrg 20401e04c3fSmrgir_visitor_status 20501e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_call *ir) 20601e04c3fSmrg{ 20701e04c3fSmrg foreach_two_lists(formal_node, &ir->callee->parameters, 20801e04c3fSmrg actual_node, &ir->actual_parameters) { 20901e04c3fSmrg ir_variable *sig_param = (ir_variable *) formal_node; 21001e04c3fSmrg ir_rvalue *ir = (ir_rvalue *) actual_node; 21101e04c3fSmrg ir_rvalue *new_ir = ir; 21201e04c3fSmrg 21301e04c3fSmrg if (sig_param->data.mode != ir_var_function_in 21401e04c3fSmrg && sig_param->data.mode != ir_var_const_in) { 21501e04c3fSmrg if (check_graft(ir, sig_param) == visit_stop) 21601e04c3fSmrg return visit_stop; 21701e04c3fSmrg continue; 21801e04c3fSmrg } 21901e04c3fSmrg 22001e04c3fSmrg if (do_graft(&new_ir)) { 22101e04c3fSmrg ir->replace_with(new_ir); 22201e04c3fSmrg return visit_stop; 22301e04c3fSmrg } 22401e04c3fSmrg } 22501e04c3fSmrg 22601e04c3fSmrg if (ir->return_deref && check_graft(ir, ir->return_deref->var) == visit_stop) 22701e04c3fSmrg return visit_stop; 22801e04c3fSmrg 22901e04c3fSmrg return visit_continue; 23001e04c3fSmrg} 23101e04c3fSmrg 23201e04c3fSmrgir_visitor_status 23301e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_expression *ir) 23401e04c3fSmrg{ 23501e04c3fSmrg for (unsigned int i = 0; i < ir->num_operands; i++) { 23601e04c3fSmrg if (do_graft(&ir->operands[i])) 23701e04c3fSmrg return visit_stop; 23801e04c3fSmrg } 23901e04c3fSmrg 24001e04c3fSmrg return visit_continue; 24101e04c3fSmrg} 24201e04c3fSmrg 24301e04c3fSmrgir_visitor_status 24401e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_if *ir) 24501e04c3fSmrg{ 24601e04c3fSmrg if (do_graft(&ir->condition)) 24701e04c3fSmrg return visit_stop; 24801e04c3fSmrg 24901e04c3fSmrg /* Do not traverse into the body of the if-statement since that is a 25001e04c3fSmrg * different basic block. 25101e04c3fSmrg */ 25201e04c3fSmrg return visit_continue_with_parent; 25301e04c3fSmrg} 25401e04c3fSmrg 25501e04c3fSmrgir_visitor_status 25601e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_swizzle *ir) 25701e04c3fSmrg{ 25801e04c3fSmrg if (do_graft(&ir->val)) 25901e04c3fSmrg return visit_stop; 26001e04c3fSmrg 26101e04c3fSmrg return visit_continue; 26201e04c3fSmrg} 26301e04c3fSmrg 26401e04c3fSmrgir_visitor_status 26501e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_texture *ir) 26601e04c3fSmrg{ 26701e04c3fSmrg if (do_graft(&ir->coordinate) || 26801e04c3fSmrg do_graft(&ir->projector) || 26901e04c3fSmrg do_graft(&ir->offset) || 27001e04c3fSmrg do_graft(&ir->shadow_comparator)) 27101e04c3fSmrg return visit_stop; 27201e04c3fSmrg 27301e04c3fSmrg switch (ir->op) { 27401e04c3fSmrg case ir_tex: 27501e04c3fSmrg case ir_lod: 27601e04c3fSmrg case ir_query_levels: 27701e04c3fSmrg case ir_texture_samples: 27801e04c3fSmrg case ir_samples_identical: 27901e04c3fSmrg break; 28001e04c3fSmrg case ir_txb: 28101e04c3fSmrg if (do_graft(&ir->lod_info.bias)) 28201e04c3fSmrg return visit_stop; 28301e04c3fSmrg break; 28401e04c3fSmrg case ir_txf: 28501e04c3fSmrg case ir_txl: 28601e04c3fSmrg case ir_txs: 28701e04c3fSmrg if (do_graft(&ir->lod_info.lod)) 28801e04c3fSmrg return visit_stop; 28901e04c3fSmrg break; 29001e04c3fSmrg case ir_txf_ms: 29101e04c3fSmrg if (do_graft(&ir->lod_info.sample_index)) 29201e04c3fSmrg return visit_stop; 29301e04c3fSmrg break; 29401e04c3fSmrg case ir_txd: 29501e04c3fSmrg if (do_graft(&ir->lod_info.grad.dPdx) || 29601e04c3fSmrg do_graft(&ir->lod_info.grad.dPdy)) 29701e04c3fSmrg return visit_stop; 29801e04c3fSmrg break; 29901e04c3fSmrg case ir_tg4: 30001e04c3fSmrg if (do_graft(&ir->lod_info.component)) 30101e04c3fSmrg return visit_stop; 30201e04c3fSmrg break; 30301e04c3fSmrg } 30401e04c3fSmrg 30501e04c3fSmrg return visit_continue; 30601e04c3fSmrg} 30701e04c3fSmrg 30801e04c3fSmrgstruct tree_grafting_info { 30901e04c3fSmrg ir_variable_refcount_visitor *refs; 31001e04c3fSmrg bool progress; 31101e04c3fSmrg}; 31201e04c3fSmrg 31301e04c3fSmrgstatic bool 31401e04c3fSmrgtry_tree_grafting(ir_assignment *start, 31501e04c3fSmrg ir_variable *lhs_var, 31601e04c3fSmrg ir_instruction *bb_last) 31701e04c3fSmrg{ 31801e04c3fSmrg ir_tree_grafting_visitor v(start, lhs_var); 31901e04c3fSmrg 32001e04c3fSmrg if (debug) { 32101e04c3fSmrg fprintf(stderr, "trying to graft: "); 32201e04c3fSmrg lhs_var->fprint(stderr); 32301e04c3fSmrg fprintf(stderr, "\n"); 32401e04c3fSmrg } 32501e04c3fSmrg 32601e04c3fSmrg for (ir_instruction *ir = (ir_instruction *)start->next; 32701e04c3fSmrg ir != bb_last->next; 32801e04c3fSmrg ir = (ir_instruction *)ir->next) { 32901e04c3fSmrg 33001e04c3fSmrg if (debug) { 33101e04c3fSmrg fprintf(stderr, "- "); 33201e04c3fSmrg ir->fprint(stderr); 33301e04c3fSmrg fprintf(stderr, "\n"); 33401e04c3fSmrg } 33501e04c3fSmrg 33601e04c3fSmrg ir_visitor_status s = ir->accept(&v); 33701e04c3fSmrg if (s == visit_stop) 33801e04c3fSmrg return v.progress; 33901e04c3fSmrg } 34001e04c3fSmrg 34101e04c3fSmrg return false; 34201e04c3fSmrg} 34301e04c3fSmrg 34401e04c3fSmrgstatic void 34501e04c3fSmrgtree_grafting_basic_block(ir_instruction *bb_first, 34601e04c3fSmrg ir_instruction *bb_last, 34701e04c3fSmrg void *data) 34801e04c3fSmrg{ 34901e04c3fSmrg struct tree_grafting_info *info = (struct tree_grafting_info *)data; 35001e04c3fSmrg ir_instruction *ir, *next; 35101e04c3fSmrg 35201e04c3fSmrg for (ir = bb_first, next = (ir_instruction *)ir->next; 35301e04c3fSmrg ir != bb_last->next; 35401e04c3fSmrg ir = next, next = (ir_instruction *)ir->next) { 35501e04c3fSmrg ir_assignment *assign = ir->as_assignment(); 35601e04c3fSmrg 35701e04c3fSmrg if (!assign) 35801e04c3fSmrg continue; 35901e04c3fSmrg 36001e04c3fSmrg ir_variable *lhs_var = assign->whole_variable_written(); 36101e04c3fSmrg if (!lhs_var) 36201e04c3fSmrg continue; 36301e04c3fSmrg 36401e04c3fSmrg if (lhs_var->data.mode == ir_var_function_out || 36501e04c3fSmrg lhs_var->data.mode == ir_var_function_inout || 36601e04c3fSmrg lhs_var->data.mode == ir_var_shader_out || 36701e04c3fSmrg lhs_var->data.mode == ir_var_shader_storage || 36801e04c3fSmrg lhs_var->data.mode == ir_var_shader_shared) 36901e04c3fSmrg continue; 37001e04c3fSmrg 37101e04c3fSmrg if (lhs_var->data.precise) 37201e04c3fSmrg continue; 37301e04c3fSmrg 37401e04c3fSmrg /* Do not graft sampler and image variables. This is a workaround to 37501e04c3fSmrg * st/glsl_to_tgsi being unable to handle expression parameters to image 37601e04c3fSmrg * intrinsics. 37701e04c3fSmrg * 37801e04c3fSmrg * Note that if this is ever fixed, we still need to skip grafting when 37901e04c3fSmrg * any image layout qualifiers (including the image format) are set, 38001e04c3fSmrg * since we must not lose those. 38101e04c3fSmrg */ 38201e04c3fSmrg if (lhs_var->type->is_sampler() || lhs_var->type->is_image()) 38301e04c3fSmrg continue; 38401e04c3fSmrg 38501e04c3fSmrg ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var); 38601e04c3fSmrg 38701e04c3fSmrg if (!entry->declaration || 38801e04c3fSmrg entry->assigned_count != 1 || 38901e04c3fSmrg entry->referenced_count != 2) 39001e04c3fSmrg continue; 39101e04c3fSmrg 39201e04c3fSmrg /* Found a possibly graftable assignment. Now, walk through the 39301e04c3fSmrg * rest of the BB seeing if the deref is here, and if nothing interfered with 39401e04c3fSmrg * pasting its expression's values in between. 39501e04c3fSmrg */ 39601e04c3fSmrg info->progress |= try_tree_grafting(assign, lhs_var, bb_last); 39701e04c3fSmrg } 39801e04c3fSmrg} 39901e04c3fSmrg 40001e04c3fSmrg} /* unnamed namespace */ 40101e04c3fSmrg 40201e04c3fSmrg/** 40301e04c3fSmrg * Does a copy propagation pass on the code present in the instruction stream. 40401e04c3fSmrg */ 40501e04c3fSmrgbool 40601e04c3fSmrgdo_tree_grafting(exec_list *instructions) 40701e04c3fSmrg{ 40801e04c3fSmrg ir_variable_refcount_visitor refs; 40901e04c3fSmrg struct tree_grafting_info info; 41001e04c3fSmrg 41101e04c3fSmrg info.progress = false; 41201e04c3fSmrg info.refs = &refs; 41301e04c3fSmrg 41401e04c3fSmrg visit_list_elements(info.refs, instructions); 41501e04c3fSmrg 41601e04c3fSmrg call_for_basic_blocks(instructions, tree_grafting_basic_block, &info); 41701e04c3fSmrg 41801e04c3fSmrg return info.progress; 41901e04c3fSmrg} 420