1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2010 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21b8e80941Smrg * DEALINGS IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg/** 25b8e80941Smrg * \file opt_tree_grafting.cpp 26b8e80941Smrg * 27b8e80941Smrg * Takes assignments to variables that are dereferenced only once and 28b8e80941Smrg * pastes the RHS expression into where the variable is dereferenced. 29b8e80941Smrg * 30b8e80941Smrg * In the process of various operations like function inlining and 31b8e80941Smrg * tertiary op handling, we'll end up with our expression trees having 32b8e80941Smrg * been chopped up into a series of assignments of short expressions 33b8e80941Smrg * to temps. Other passes like ir_algebraic.cpp would prefer to see 34b8e80941Smrg * the deepest expression trees they can to try to optimize them. 35b8e80941Smrg * 36b8e80941Smrg * This is a lot like copy propagaton. In comparison, copy 37b8e80941Smrg * propagation only acts on plain copies, not arbitrary expressions on 38b8e80941Smrg * the RHS. Generally, we wouldn't want to go pasting some 39b8e80941Smrg * complicated expression everywhere it got used, though, so we don't 40b8e80941Smrg * handle expressions in that pass. 41b8e80941Smrg * 42b8e80941Smrg * The hard part is making sure we don't move an expression across 43b8e80941Smrg * some other assignments that would change the value of the 44b8e80941Smrg * expression. So we split this into two passes: First, find the 45b8e80941Smrg * variables in our scope which are written to once and read once, and 46b8e80941Smrg * then go through basic blocks seeing if we find an opportunity to 47b8e80941Smrg * move those expressions safely. 48b8e80941Smrg */ 49b8e80941Smrg 50b8e80941Smrg#include "ir.h" 51b8e80941Smrg#include "ir_visitor.h" 52b8e80941Smrg#include "ir_variable_refcount.h" 53b8e80941Smrg#include "ir_basic_block.h" 54b8e80941Smrg#include "ir_optimization.h" 55b8e80941Smrg#include "compiler/glsl_types.h" 56b8e80941Smrg 57b8e80941Smrgnamespace { 58b8e80941Smrg 59b8e80941Smrgstatic bool debug = false; 60b8e80941Smrg 61b8e80941Smrgclass ir_tree_grafting_visitor : public ir_hierarchical_visitor { 62b8e80941Smrgpublic: 63b8e80941Smrg ir_tree_grafting_visitor(ir_assignment *graft_assign, 64b8e80941Smrg ir_variable *graft_var) 65b8e80941Smrg { 66b8e80941Smrg this->progress = false; 67b8e80941Smrg this->graft_assign = graft_assign; 68b8e80941Smrg this->graft_var = graft_var; 69b8e80941Smrg } 70b8e80941Smrg 71b8e80941Smrg virtual ir_visitor_status visit_leave(class ir_assignment *); 72b8e80941Smrg virtual ir_visitor_status visit_enter(class ir_call *); 73b8e80941Smrg virtual ir_visitor_status visit_enter(class ir_expression *); 74b8e80941Smrg virtual ir_visitor_status visit_enter(class ir_function *); 75b8e80941Smrg virtual ir_visitor_status visit_enter(class ir_function_signature *); 76b8e80941Smrg virtual ir_visitor_status visit_enter(class ir_if *); 77b8e80941Smrg virtual ir_visitor_status visit_enter(class ir_loop *); 78b8e80941Smrg virtual ir_visitor_status visit_enter(class ir_swizzle *); 79b8e80941Smrg virtual ir_visitor_status visit_enter(class ir_texture *); 80b8e80941Smrg 81b8e80941Smrg ir_visitor_status check_graft(ir_instruction *ir, ir_variable *var); 82b8e80941Smrg 83b8e80941Smrg bool do_graft(ir_rvalue **rvalue); 84b8e80941Smrg 85b8e80941Smrg bool progress; 86b8e80941Smrg ir_variable *graft_var; 87b8e80941Smrg ir_assignment *graft_assign; 88b8e80941Smrg}; 89b8e80941Smrg 90b8e80941Smrgstruct find_deref_info { 91b8e80941Smrg ir_variable *var; 92b8e80941Smrg bool found; 93b8e80941Smrg}; 94b8e80941Smrg 95b8e80941Smrgvoid 96b8e80941Smrgdereferences_variable_callback(ir_instruction *ir, void *data) 97b8e80941Smrg{ 98b8e80941Smrg struct find_deref_info *info = (struct find_deref_info *)data; 99b8e80941Smrg ir_dereference_variable *deref = ir->as_dereference_variable(); 100b8e80941Smrg 101b8e80941Smrg if (deref && deref->var == info->var) 102b8e80941Smrg info->found = true; 103b8e80941Smrg} 104b8e80941Smrg 105b8e80941Smrgstatic bool 106b8e80941Smrgdereferences_variable(ir_instruction *ir, ir_variable *var) 107b8e80941Smrg{ 108b8e80941Smrg struct find_deref_info info; 109b8e80941Smrg 110b8e80941Smrg info.var = var; 111b8e80941Smrg info.found = false; 112b8e80941Smrg 113b8e80941Smrg visit_tree(ir, dereferences_variable_callback, &info); 114b8e80941Smrg 115b8e80941Smrg return info.found; 116b8e80941Smrg} 117b8e80941Smrg 118b8e80941Smrgbool 119b8e80941Smrgir_tree_grafting_visitor::do_graft(ir_rvalue **rvalue) 120b8e80941Smrg{ 121b8e80941Smrg if (!*rvalue) 122b8e80941Smrg return false; 123b8e80941Smrg 124b8e80941Smrg ir_dereference_variable *deref = (*rvalue)->as_dereference_variable(); 125b8e80941Smrg 126b8e80941Smrg if (!deref || deref->var != this->graft_var) 127b8e80941Smrg return false; 128b8e80941Smrg 129b8e80941Smrg if (debug) { 130b8e80941Smrg fprintf(stderr, "GRAFTING:\n"); 131b8e80941Smrg this->graft_assign->fprint(stderr); 132b8e80941Smrg fprintf(stderr, "\n"); 133b8e80941Smrg fprintf(stderr, "TO:\n"); 134b8e80941Smrg (*rvalue)->fprint(stderr); 135b8e80941Smrg fprintf(stderr, "\n"); 136b8e80941Smrg } 137b8e80941Smrg 138b8e80941Smrg this->graft_assign->remove(); 139b8e80941Smrg *rvalue = this->graft_assign->rhs; 140b8e80941Smrg 141b8e80941Smrg this->progress = true; 142b8e80941Smrg return true; 143b8e80941Smrg} 144b8e80941Smrg 145b8e80941Smrgir_visitor_status 146b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_loop *ir) 147b8e80941Smrg{ 148b8e80941Smrg (void)ir; 149b8e80941Smrg /* Do not traverse into the body of the loop since that is a 150b8e80941Smrg * different basic block. 151b8e80941Smrg */ 152b8e80941Smrg return visit_stop; 153b8e80941Smrg} 154b8e80941Smrg 155b8e80941Smrg/** 156b8e80941Smrg * Check if we can continue grafting after writing to a variable. If the 157b8e80941Smrg * expression we're trying to graft references the variable, we must stop. 158b8e80941Smrg * 159b8e80941Smrg * \param ir An instruction that writes to a variable. 160b8e80941Smrg * \param var The variable being updated. 161b8e80941Smrg */ 162b8e80941Smrgir_visitor_status 163b8e80941Smrgir_tree_grafting_visitor::check_graft(ir_instruction *ir, ir_variable *var) 164b8e80941Smrg{ 165b8e80941Smrg if (dereferences_variable(this->graft_assign->rhs, var)) { 166b8e80941Smrg if (debug) { 167b8e80941Smrg fprintf(stderr, "graft killed by: "); 168b8e80941Smrg ir->fprint(stderr); 169b8e80941Smrg fprintf(stderr, "\n"); 170b8e80941Smrg } 171b8e80941Smrg return visit_stop; 172b8e80941Smrg } 173b8e80941Smrg 174b8e80941Smrg return visit_continue; 175b8e80941Smrg} 176b8e80941Smrg 177b8e80941Smrgir_visitor_status 178b8e80941Smrgir_tree_grafting_visitor::visit_leave(ir_assignment *ir) 179b8e80941Smrg{ 180b8e80941Smrg if (do_graft(&ir->rhs) || 181b8e80941Smrg do_graft(&ir->condition)) 182b8e80941Smrg return visit_stop; 183b8e80941Smrg 184b8e80941Smrg /* If this assignment updates a variable used in the assignment 185b8e80941Smrg * we're trying to graft, then we're done. 186b8e80941Smrg */ 187b8e80941Smrg return check_graft(ir, ir->lhs->variable_referenced()); 188b8e80941Smrg} 189b8e80941Smrg 190b8e80941Smrgir_visitor_status 191b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_function *ir) 192b8e80941Smrg{ 193b8e80941Smrg (void) ir; 194b8e80941Smrg return visit_continue_with_parent; 195b8e80941Smrg} 196b8e80941Smrg 197b8e80941Smrgir_visitor_status 198b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_function_signature *ir) 199b8e80941Smrg{ 200b8e80941Smrg (void)ir; 201b8e80941Smrg return visit_continue_with_parent; 202b8e80941Smrg} 203b8e80941Smrg 204b8e80941Smrgir_visitor_status 205b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_call *ir) 206b8e80941Smrg{ 207b8e80941Smrg foreach_two_lists(formal_node, &ir->callee->parameters, 208b8e80941Smrg actual_node, &ir->actual_parameters) { 209b8e80941Smrg ir_variable *sig_param = (ir_variable *) formal_node; 210b8e80941Smrg ir_rvalue *ir = (ir_rvalue *) actual_node; 211b8e80941Smrg ir_rvalue *new_ir = ir; 212b8e80941Smrg 213b8e80941Smrg if (sig_param->data.mode != ir_var_function_in 214b8e80941Smrg && sig_param->data.mode != ir_var_const_in) { 215b8e80941Smrg if (check_graft(ir, sig_param) == visit_stop) 216b8e80941Smrg return visit_stop; 217b8e80941Smrg continue; 218b8e80941Smrg } 219b8e80941Smrg 220b8e80941Smrg if (do_graft(&new_ir)) { 221b8e80941Smrg ir->replace_with(new_ir); 222b8e80941Smrg return visit_stop; 223b8e80941Smrg } 224b8e80941Smrg } 225b8e80941Smrg 226b8e80941Smrg if (ir->return_deref && check_graft(ir, ir->return_deref->var) == visit_stop) 227b8e80941Smrg return visit_stop; 228b8e80941Smrg 229b8e80941Smrg return visit_continue; 230b8e80941Smrg} 231b8e80941Smrg 232b8e80941Smrgir_visitor_status 233b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_expression *ir) 234b8e80941Smrg{ 235b8e80941Smrg for (unsigned int i = 0; i < ir->num_operands; i++) { 236b8e80941Smrg if (do_graft(&ir->operands[i])) 237b8e80941Smrg return visit_stop; 238b8e80941Smrg } 239b8e80941Smrg 240b8e80941Smrg return visit_continue; 241b8e80941Smrg} 242b8e80941Smrg 243b8e80941Smrgir_visitor_status 244b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_if *ir) 245b8e80941Smrg{ 246b8e80941Smrg if (do_graft(&ir->condition)) 247b8e80941Smrg return visit_stop; 248b8e80941Smrg 249b8e80941Smrg /* Do not traverse into the body of the if-statement since that is a 250b8e80941Smrg * different basic block. 251b8e80941Smrg */ 252b8e80941Smrg return visit_continue_with_parent; 253b8e80941Smrg} 254b8e80941Smrg 255b8e80941Smrgir_visitor_status 256b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_swizzle *ir) 257b8e80941Smrg{ 258b8e80941Smrg if (do_graft(&ir->val)) 259b8e80941Smrg return visit_stop; 260b8e80941Smrg 261b8e80941Smrg return visit_continue; 262b8e80941Smrg} 263b8e80941Smrg 264b8e80941Smrgir_visitor_status 265b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_texture *ir) 266b8e80941Smrg{ 267b8e80941Smrg if (do_graft(&ir->coordinate) || 268b8e80941Smrg do_graft(&ir->projector) || 269b8e80941Smrg do_graft(&ir->offset) || 270b8e80941Smrg do_graft(&ir->shadow_comparator)) 271b8e80941Smrg return visit_stop; 272b8e80941Smrg 273b8e80941Smrg switch (ir->op) { 274b8e80941Smrg case ir_tex: 275b8e80941Smrg case ir_lod: 276b8e80941Smrg case ir_query_levels: 277b8e80941Smrg case ir_texture_samples: 278b8e80941Smrg case ir_samples_identical: 279b8e80941Smrg break; 280b8e80941Smrg case ir_txb: 281b8e80941Smrg if (do_graft(&ir->lod_info.bias)) 282b8e80941Smrg return visit_stop; 283b8e80941Smrg break; 284b8e80941Smrg case ir_txf: 285b8e80941Smrg case ir_txl: 286b8e80941Smrg case ir_txs: 287b8e80941Smrg if (do_graft(&ir->lod_info.lod)) 288b8e80941Smrg return visit_stop; 289b8e80941Smrg break; 290b8e80941Smrg case ir_txf_ms: 291b8e80941Smrg if (do_graft(&ir->lod_info.sample_index)) 292b8e80941Smrg return visit_stop; 293b8e80941Smrg break; 294b8e80941Smrg case ir_txd: 295b8e80941Smrg if (do_graft(&ir->lod_info.grad.dPdx) || 296b8e80941Smrg do_graft(&ir->lod_info.grad.dPdy)) 297b8e80941Smrg return visit_stop; 298b8e80941Smrg break; 299b8e80941Smrg case ir_tg4: 300b8e80941Smrg if (do_graft(&ir->lod_info.component)) 301b8e80941Smrg return visit_stop; 302b8e80941Smrg break; 303b8e80941Smrg } 304b8e80941Smrg 305b8e80941Smrg return visit_continue; 306b8e80941Smrg} 307b8e80941Smrg 308b8e80941Smrgstruct tree_grafting_info { 309b8e80941Smrg ir_variable_refcount_visitor *refs; 310b8e80941Smrg bool progress; 311b8e80941Smrg}; 312b8e80941Smrg 313b8e80941Smrgstatic bool 314b8e80941Smrgtry_tree_grafting(ir_assignment *start, 315b8e80941Smrg ir_variable *lhs_var, 316b8e80941Smrg ir_instruction *bb_last) 317b8e80941Smrg{ 318b8e80941Smrg ir_tree_grafting_visitor v(start, lhs_var); 319b8e80941Smrg 320b8e80941Smrg if (debug) { 321b8e80941Smrg fprintf(stderr, "trying to graft: "); 322b8e80941Smrg lhs_var->fprint(stderr); 323b8e80941Smrg fprintf(stderr, "\n"); 324b8e80941Smrg } 325b8e80941Smrg 326b8e80941Smrg for (ir_instruction *ir = (ir_instruction *)start->next; 327b8e80941Smrg ir != bb_last->next; 328b8e80941Smrg ir = (ir_instruction *)ir->next) { 329b8e80941Smrg 330b8e80941Smrg if (debug) { 331b8e80941Smrg fprintf(stderr, "- "); 332b8e80941Smrg ir->fprint(stderr); 333b8e80941Smrg fprintf(stderr, "\n"); 334b8e80941Smrg } 335b8e80941Smrg 336b8e80941Smrg ir_visitor_status s = ir->accept(&v); 337b8e80941Smrg if (s == visit_stop) 338b8e80941Smrg return v.progress; 339b8e80941Smrg } 340b8e80941Smrg 341b8e80941Smrg return false; 342b8e80941Smrg} 343b8e80941Smrg 344b8e80941Smrgstatic void 345b8e80941Smrgtree_grafting_basic_block(ir_instruction *bb_first, 346b8e80941Smrg ir_instruction *bb_last, 347b8e80941Smrg void *data) 348b8e80941Smrg{ 349b8e80941Smrg struct tree_grafting_info *info = (struct tree_grafting_info *)data; 350b8e80941Smrg ir_instruction *ir, *next; 351b8e80941Smrg 352b8e80941Smrg for (ir = bb_first, next = (ir_instruction *)ir->next; 353b8e80941Smrg ir != bb_last->next; 354b8e80941Smrg ir = next, next = (ir_instruction *)ir->next) { 355b8e80941Smrg ir_assignment *assign = ir->as_assignment(); 356b8e80941Smrg 357b8e80941Smrg if (!assign) 358b8e80941Smrg continue; 359b8e80941Smrg 360b8e80941Smrg ir_variable *lhs_var = assign->whole_variable_written(); 361b8e80941Smrg if (!lhs_var) 362b8e80941Smrg continue; 363b8e80941Smrg 364b8e80941Smrg if (lhs_var->data.mode == ir_var_function_out || 365b8e80941Smrg lhs_var->data.mode == ir_var_function_inout || 366b8e80941Smrg lhs_var->data.mode == ir_var_shader_out || 367b8e80941Smrg lhs_var->data.mode == ir_var_shader_storage || 368b8e80941Smrg lhs_var->data.mode == ir_var_shader_shared) 369b8e80941Smrg continue; 370b8e80941Smrg 371b8e80941Smrg if (lhs_var->data.precise) 372b8e80941Smrg continue; 373b8e80941Smrg 374b8e80941Smrg /* Do not graft sampler and image variables. This is a workaround to 375b8e80941Smrg * st/glsl_to_tgsi being unable to handle expression parameters to image 376b8e80941Smrg * intrinsics. 377b8e80941Smrg * 378b8e80941Smrg * Note that if this is ever fixed, we still need to skip grafting when 379b8e80941Smrg * any image layout qualifiers (including the image format) are set, 380b8e80941Smrg * since we must not lose those. 381b8e80941Smrg */ 382b8e80941Smrg if (lhs_var->type->is_sampler() || lhs_var->type->is_image()) 383b8e80941Smrg continue; 384b8e80941Smrg 385b8e80941Smrg ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var); 386b8e80941Smrg 387b8e80941Smrg if (!entry->declaration || 388b8e80941Smrg entry->assigned_count != 1 || 389b8e80941Smrg entry->referenced_count != 2) 390b8e80941Smrg continue; 391b8e80941Smrg 392b8e80941Smrg /* Found a possibly graftable assignment. Now, walk through the 393b8e80941Smrg * rest of the BB seeing if the deref is here, and if nothing interfered with 394b8e80941Smrg * pasting its expression's values in between. 395b8e80941Smrg */ 396b8e80941Smrg info->progress |= try_tree_grafting(assign, lhs_var, bb_last); 397b8e80941Smrg } 398b8e80941Smrg} 399b8e80941Smrg 400b8e80941Smrg} /* unnamed namespace */ 401b8e80941Smrg 402b8e80941Smrg/** 403b8e80941Smrg * Does a copy propagation pass on the code present in the instruction stream. 404b8e80941Smrg */ 405b8e80941Smrgbool 406b8e80941Smrgdo_tree_grafting(exec_list *instructions) 407b8e80941Smrg{ 408b8e80941Smrg ir_variable_refcount_visitor refs; 409b8e80941Smrg struct tree_grafting_info info; 410b8e80941Smrg 411b8e80941Smrg info.progress = false; 412b8e80941Smrg info.refs = &refs; 413b8e80941Smrg 414b8e80941Smrg visit_list_elements(info.refs, instructions); 415b8e80941Smrg 416b8e80941Smrg call_for_basic_blocks(instructions, tree_grafting_basic_block, &info); 417b8e80941Smrg 418b8e80941Smrg return info.progress; 419b8e80941Smrg} 420