1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2010 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21b8e80941Smrg * DEALINGS IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg/** 25b8e80941Smrg * \file opt_function_inlining.cpp 26b8e80941Smrg * 27b8e80941Smrg * Replaces calls to functions with the body of the function. 28b8e80941Smrg */ 29b8e80941Smrg 30b8e80941Smrg#include "ir.h" 31b8e80941Smrg#include "ir_visitor.h" 32b8e80941Smrg#include "ir_function_inlining.h" 33b8e80941Smrg#include "ir_expression_flattening.h" 34b8e80941Smrg#include "compiler/glsl_types.h" 35b8e80941Smrg#include "util/hash_table.h" 36b8e80941Smrg 37b8e80941Smrgstatic void 38b8e80941Smrgdo_variable_replacement(exec_list *instructions, 39b8e80941Smrg ir_variable *orig, 40b8e80941Smrg ir_dereference *repl); 41b8e80941Smrg 42b8e80941Smrgnamespace { 43b8e80941Smrg 44b8e80941Smrgclass ir_function_inlining_visitor : public ir_hierarchical_visitor { 45b8e80941Smrgpublic: 46b8e80941Smrg ir_function_inlining_visitor() 47b8e80941Smrg { 48b8e80941Smrg progress = false; 49b8e80941Smrg } 50b8e80941Smrg 51b8e80941Smrg virtual ~ir_function_inlining_visitor() 52b8e80941Smrg { 53b8e80941Smrg /* empty */ 54b8e80941Smrg } 55b8e80941Smrg 56b8e80941Smrg virtual ir_visitor_status visit_enter(ir_expression *); 57b8e80941Smrg virtual ir_visitor_status visit_enter(ir_call *); 58b8e80941Smrg virtual ir_visitor_status visit_enter(ir_return *); 59b8e80941Smrg virtual ir_visitor_status visit_enter(ir_texture *); 60b8e80941Smrg virtual ir_visitor_status visit_enter(ir_swizzle *); 61b8e80941Smrg 62b8e80941Smrg bool progress; 63b8e80941Smrg}; 64b8e80941Smrg 65b8e80941Smrgclass ir_save_lvalue_visitor : public ir_hierarchical_visitor { 66b8e80941Smrgpublic: 67b8e80941Smrg virtual ir_visitor_status visit_enter(ir_dereference_array *); 68b8e80941Smrg}; 69b8e80941Smrg 70b8e80941Smrg} /* unnamed namespace */ 71b8e80941Smrg 72b8e80941Smrgbool 73b8e80941Smrgdo_function_inlining(exec_list *instructions) 74b8e80941Smrg{ 75b8e80941Smrg ir_function_inlining_visitor v; 76b8e80941Smrg 77b8e80941Smrg v.run(instructions); 78b8e80941Smrg 79b8e80941Smrg return v.progress; 80b8e80941Smrg} 81b8e80941Smrg 82b8e80941Smrgstatic void 83b8e80941Smrgreplace_return_with_assignment(ir_instruction *ir, void *data) 84b8e80941Smrg{ 85b8e80941Smrg void *ctx = ralloc_parent(ir); 86b8e80941Smrg ir_dereference *orig_deref = (ir_dereference *) data; 87b8e80941Smrg ir_return *ret = ir->as_return(); 88b8e80941Smrg 89b8e80941Smrg if (ret) { 90b8e80941Smrg if (ret->value) { 91b8e80941Smrg ir_rvalue *lhs = orig_deref->clone(ctx, NULL); 92b8e80941Smrg ret->replace_with(new(ctx) ir_assignment(lhs, ret->value)); 93b8e80941Smrg } else { 94b8e80941Smrg /* un-valued return has to be the last return, or we shouldn't 95b8e80941Smrg * have reached here. (see can_inline()). 96b8e80941Smrg */ 97b8e80941Smrg assert(ret->next->is_tail_sentinel()); 98b8e80941Smrg ret->remove(); 99b8e80941Smrg } 100b8e80941Smrg } 101b8e80941Smrg} 102b8e80941Smrg 103b8e80941Smrg/* Save the given lvalue before the given instruction. 104b8e80941Smrg * 105b8e80941Smrg * This is done by adding temporary variables into which the current value 106b8e80941Smrg * of any array indices are saved, and then modifying the dereference chain 107b8e80941Smrg * in-place to point to those temporary variables. 108b8e80941Smrg * 109b8e80941Smrg * The hierarchical visitor is only used to traverse the left-hand-side chain 110b8e80941Smrg * of derefs. 111b8e80941Smrg */ 112b8e80941Smrgir_visitor_status 113b8e80941Smrgir_save_lvalue_visitor::visit_enter(ir_dereference_array *deref) 114b8e80941Smrg{ 115b8e80941Smrg if (deref->array_index->ir_type != ir_type_constant) { 116b8e80941Smrg void *ctx = ralloc_parent(deref); 117b8e80941Smrg ir_variable *index; 118b8e80941Smrg ir_assignment *assignment; 119b8e80941Smrg 120b8e80941Smrg index = new(ctx) ir_variable(deref->array_index->type, "saved_idx", ir_var_temporary); 121b8e80941Smrg base_ir->insert_before(index); 122b8e80941Smrg 123b8e80941Smrg assignment = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(index), 124b8e80941Smrg deref->array_index); 125b8e80941Smrg base_ir->insert_before(assignment); 126b8e80941Smrg 127b8e80941Smrg deref->array_index = new(ctx) ir_dereference_variable(index); 128b8e80941Smrg } 129b8e80941Smrg 130b8e80941Smrg deref->array->accept(this); 131b8e80941Smrg return visit_stop; 132b8e80941Smrg} 133b8e80941Smrg 134b8e80941Smrgstatic bool 135b8e80941Smrgshould_replace_variable(ir_variable *sig_param, ir_rvalue *param) { 136b8e80941Smrg /* For opaque types, we want the inlined variable references 137b8e80941Smrg * referencing the passed in variable, since that will have 138b8e80941Smrg * the location information, which an assignment of an opaque 139b8e80941Smrg * variable wouldn't. 140b8e80941Smrg */ 141b8e80941Smrg return sig_param->type->contains_opaque() && 142b8e80941Smrg param->is_dereference() && 143b8e80941Smrg sig_param->data.mode == ir_var_function_in; 144b8e80941Smrg} 145b8e80941Smrg 146b8e80941Smrgvoid 147b8e80941Smrgir_call::generate_inline(ir_instruction *next_ir) 148b8e80941Smrg{ 149b8e80941Smrg void *ctx = ralloc_parent(this); 150b8e80941Smrg ir_variable **parameters; 151b8e80941Smrg unsigned num_parameters; 152b8e80941Smrg int i; 153b8e80941Smrg struct hash_table *ht; 154b8e80941Smrg 155b8e80941Smrg ht = _mesa_pointer_hash_table_create(NULL); 156b8e80941Smrg 157b8e80941Smrg num_parameters = this->callee->parameters.length(); 158b8e80941Smrg parameters = new ir_variable *[num_parameters]; 159b8e80941Smrg 160b8e80941Smrg /* Generate the declarations for the parameters to our inlined code, 161b8e80941Smrg * and set up the mapping of real function body variables to ours. 162b8e80941Smrg */ 163b8e80941Smrg i = 0; 164b8e80941Smrg foreach_two_lists(formal_node, &this->callee->parameters, 165b8e80941Smrg actual_node, &this->actual_parameters) { 166b8e80941Smrg ir_variable *sig_param = (ir_variable *) formal_node; 167b8e80941Smrg ir_rvalue *param = (ir_rvalue *) actual_node; 168b8e80941Smrg 169b8e80941Smrg /* Generate a new variable for the parameter. */ 170b8e80941Smrg if (should_replace_variable(sig_param, param)) { 171b8e80941Smrg /* Actual replacement happens below */ 172b8e80941Smrg parameters[i] = NULL; 173b8e80941Smrg } else { 174b8e80941Smrg parameters[i] = sig_param->clone(ctx, ht); 175b8e80941Smrg parameters[i]->data.mode = ir_var_temporary; 176b8e80941Smrg 177b8e80941Smrg /* Remove the read-only decoration because we're going to write 178b8e80941Smrg * directly to this variable. If the cloned variable is left 179b8e80941Smrg * read-only and the inlined function is inside a loop, the loop 180b8e80941Smrg * analysis code will get confused. 181b8e80941Smrg */ 182b8e80941Smrg parameters[i]->data.read_only = false; 183b8e80941Smrg next_ir->insert_before(parameters[i]); 184b8e80941Smrg } 185b8e80941Smrg 186b8e80941Smrg /* Section 6.1.1 (Function Calling Conventions) of the OpenGL Shading 187b8e80941Smrg * Language 4.5 spec says: 188b8e80941Smrg * 189b8e80941Smrg * "All arguments are evaluated at call time, exactly once, in order, 190b8e80941Smrg * from left to right. [...] Evaluation of an out parameter results 191b8e80941Smrg * in an l-value that is used to copy out a value when the function 192b8e80941Smrg * returns." 193b8e80941Smrg * 194b8e80941Smrg * I.e., we have to take temporary copies of any relevant array indices 195b8e80941Smrg * before the function body is executed. 196b8e80941Smrg * 197b8e80941Smrg * This ensures that 198b8e80941Smrg * (a) if an array index expressions refers to a variable that is 199b8e80941Smrg * modified by the execution of the function body, we use the 200b8e80941Smrg * original value as intended, and 201b8e80941Smrg * (b) if an array index expression has side effects, those side effects 202b8e80941Smrg * are only executed once and at the right time. 203b8e80941Smrg */ 204b8e80941Smrg if (parameters[i]) { 205b8e80941Smrg if (sig_param->data.mode == ir_var_function_in || 206b8e80941Smrg sig_param->data.mode == ir_var_const_in) { 207b8e80941Smrg ir_assignment *assign; 208b8e80941Smrg 209b8e80941Smrg assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]), 210b8e80941Smrg param); 211b8e80941Smrg next_ir->insert_before(assign); 212b8e80941Smrg } else { 213b8e80941Smrg assert(sig_param->data.mode == ir_var_function_out || 214b8e80941Smrg sig_param->data.mode == ir_var_function_inout); 215b8e80941Smrg assert(param->is_lvalue()); 216b8e80941Smrg 217b8e80941Smrg ir_save_lvalue_visitor v; 218b8e80941Smrg v.base_ir = next_ir; 219b8e80941Smrg 220b8e80941Smrg param->accept(&v); 221b8e80941Smrg 222b8e80941Smrg if (sig_param->data.mode == ir_var_function_inout) { 223b8e80941Smrg ir_assignment *assign; 224b8e80941Smrg 225b8e80941Smrg assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]), 226b8e80941Smrg param->clone(ctx, NULL)->as_rvalue()); 227b8e80941Smrg next_ir->insert_before(assign); 228b8e80941Smrg } 229b8e80941Smrg } 230b8e80941Smrg } 231b8e80941Smrg 232b8e80941Smrg ++i; 233b8e80941Smrg } 234b8e80941Smrg 235b8e80941Smrg exec_list new_instructions; 236b8e80941Smrg 237b8e80941Smrg /* Generate the inlined body of the function to a new list */ 238b8e80941Smrg foreach_in_list(ir_instruction, ir, &callee->body) { 239b8e80941Smrg ir_instruction *new_ir = ir->clone(ctx, ht); 240b8e80941Smrg 241b8e80941Smrg new_instructions.push_tail(new_ir); 242b8e80941Smrg visit_tree(new_ir, replace_return_with_assignment, this->return_deref); 243b8e80941Smrg } 244b8e80941Smrg 245b8e80941Smrg /* If any opaque types were passed in, replace any deref of the 246b8e80941Smrg * opaque variable with a deref of the argument. 247b8e80941Smrg */ 248b8e80941Smrg foreach_two_lists(formal_node, &this->callee->parameters, 249b8e80941Smrg actual_node, &this->actual_parameters) { 250b8e80941Smrg ir_rvalue *const param = (ir_rvalue *) actual_node; 251b8e80941Smrg ir_variable *sig_param = (ir_variable *) formal_node; 252b8e80941Smrg 253b8e80941Smrg if (should_replace_variable(sig_param, param)) { 254b8e80941Smrg ir_dereference *deref = param->as_dereference(); 255b8e80941Smrg 256b8e80941Smrg do_variable_replacement(&new_instructions, sig_param, deref); 257b8e80941Smrg } 258b8e80941Smrg } 259b8e80941Smrg 260b8e80941Smrg /* Now push those new instructions in. */ 261b8e80941Smrg next_ir->insert_before(&new_instructions); 262b8e80941Smrg 263b8e80941Smrg /* Copy back the value of any 'out' parameters from the function body 264b8e80941Smrg * variables to our own. 265b8e80941Smrg */ 266b8e80941Smrg i = 0; 267b8e80941Smrg foreach_two_lists(formal_node, &this->callee->parameters, 268b8e80941Smrg actual_node, &this->actual_parameters) { 269b8e80941Smrg ir_rvalue *const param = (ir_rvalue *) actual_node; 270b8e80941Smrg const ir_variable *const sig_param = (ir_variable *) formal_node; 271b8e80941Smrg 272b8e80941Smrg /* Move our param variable into the actual param if it's an 'out' type. */ 273b8e80941Smrg if (parameters[i] && (sig_param->data.mode == ir_var_function_out || 274b8e80941Smrg sig_param->data.mode == ir_var_function_inout)) { 275b8e80941Smrg ir_assignment *assign; 276b8e80941Smrg 277b8e80941Smrg assign = new(ctx) ir_assignment(param, 278b8e80941Smrg new(ctx) ir_dereference_variable(parameters[i])); 279b8e80941Smrg next_ir->insert_before(assign); 280b8e80941Smrg } 281b8e80941Smrg 282b8e80941Smrg ++i; 283b8e80941Smrg } 284b8e80941Smrg 285b8e80941Smrg delete [] parameters; 286b8e80941Smrg 287b8e80941Smrg _mesa_hash_table_destroy(ht, NULL); 288b8e80941Smrg} 289b8e80941Smrg 290b8e80941Smrg 291b8e80941Smrgir_visitor_status 292b8e80941Smrgir_function_inlining_visitor::visit_enter(ir_expression *ir) 293b8e80941Smrg{ 294b8e80941Smrg (void) ir; 295b8e80941Smrg return visit_continue_with_parent; 296b8e80941Smrg} 297b8e80941Smrg 298b8e80941Smrg 299b8e80941Smrgir_visitor_status 300b8e80941Smrgir_function_inlining_visitor::visit_enter(ir_return *ir) 301b8e80941Smrg{ 302b8e80941Smrg (void) ir; 303b8e80941Smrg return visit_continue_with_parent; 304b8e80941Smrg} 305b8e80941Smrg 306b8e80941Smrg 307b8e80941Smrgir_visitor_status 308b8e80941Smrgir_function_inlining_visitor::visit_enter(ir_texture *ir) 309b8e80941Smrg{ 310b8e80941Smrg (void) ir; 311b8e80941Smrg return visit_continue_with_parent; 312b8e80941Smrg} 313b8e80941Smrg 314b8e80941Smrg 315b8e80941Smrgir_visitor_status 316b8e80941Smrgir_function_inlining_visitor::visit_enter(ir_swizzle *ir) 317b8e80941Smrg{ 318b8e80941Smrg (void) ir; 319b8e80941Smrg return visit_continue_with_parent; 320b8e80941Smrg} 321b8e80941Smrg 322b8e80941Smrg 323b8e80941Smrgir_visitor_status 324b8e80941Smrgir_function_inlining_visitor::visit_enter(ir_call *ir) 325b8e80941Smrg{ 326b8e80941Smrg if (can_inline(ir)) { 327b8e80941Smrg ir->generate_inline(ir); 328b8e80941Smrg ir->remove(); 329b8e80941Smrg this->progress = true; 330b8e80941Smrg } 331b8e80941Smrg 332b8e80941Smrg return visit_continue; 333b8e80941Smrg} 334b8e80941Smrg 335b8e80941Smrg 336b8e80941Smrg/** 337b8e80941Smrg * Replaces references to the "orig" variable with a clone of "repl." 338b8e80941Smrg * 339b8e80941Smrg * From the spec, opaque types can appear in the tree as function 340b8e80941Smrg * (non-out) parameters and as the result of array indexing and 341b8e80941Smrg * structure field selection. In our builtin implementation, they 342b8e80941Smrg * also appear in the sampler field of an ir_tex instruction. 343b8e80941Smrg */ 344b8e80941Smrg 345b8e80941Smrgclass ir_variable_replacement_visitor : public ir_hierarchical_visitor { 346b8e80941Smrgpublic: 347b8e80941Smrg ir_variable_replacement_visitor(ir_variable *orig, ir_dereference *repl) 348b8e80941Smrg { 349b8e80941Smrg this->orig = orig; 350b8e80941Smrg this->repl = repl; 351b8e80941Smrg } 352b8e80941Smrg 353b8e80941Smrg virtual ~ir_variable_replacement_visitor() 354b8e80941Smrg { 355b8e80941Smrg } 356b8e80941Smrg 357b8e80941Smrg virtual ir_visitor_status visit_leave(ir_call *); 358b8e80941Smrg virtual ir_visitor_status visit_leave(ir_dereference_array *); 359b8e80941Smrg virtual ir_visitor_status visit_leave(ir_dereference_record *); 360b8e80941Smrg virtual ir_visitor_status visit_leave(ir_texture *); 361b8e80941Smrg virtual ir_visitor_status visit_leave(ir_assignment *); 362b8e80941Smrg virtual ir_visitor_status visit_leave(ir_expression *); 363b8e80941Smrg virtual ir_visitor_status visit_leave(ir_return *); 364b8e80941Smrg 365b8e80941Smrg void replace_deref(ir_dereference **deref); 366b8e80941Smrg void replace_rvalue(ir_rvalue **rvalue); 367b8e80941Smrg 368b8e80941Smrg ir_variable *orig; 369b8e80941Smrg ir_dereference *repl; 370b8e80941Smrg}; 371b8e80941Smrg 372b8e80941Smrgvoid 373b8e80941Smrgir_variable_replacement_visitor::replace_deref(ir_dereference **deref) 374b8e80941Smrg{ 375b8e80941Smrg ir_dereference_variable *deref_var = (*deref)->as_dereference_variable(); 376b8e80941Smrg if (deref_var && deref_var->var == this->orig) { 377b8e80941Smrg *deref = this->repl->clone(ralloc_parent(*deref), NULL); 378b8e80941Smrg } 379b8e80941Smrg} 380b8e80941Smrg 381b8e80941Smrgvoid 382b8e80941Smrgir_variable_replacement_visitor::replace_rvalue(ir_rvalue **rvalue) 383b8e80941Smrg{ 384b8e80941Smrg if (!*rvalue) 385b8e80941Smrg return; 386b8e80941Smrg 387b8e80941Smrg ir_dereference *deref = (*rvalue)->as_dereference(); 388b8e80941Smrg 389b8e80941Smrg if (!deref) 390b8e80941Smrg return; 391b8e80941Smrg 392b8e80941Smrg replace_deref(&deref); 393b8e80941Smrg *rvalue = deref; 394b8e80941Smrg} 395b8e80941Smrg 396b8e80941Smrgir_visitor_status 397b8e80941Smrgir_variable_replacement_visitor::visit_leave(ir_texture *ir) 398b8e80941Smrg{ 399b8e80941Smrg replace_deref(&ir->sampler); 400b8e80941Smrg 401b8e80941Smrg return visit_continue; 402b8e80941Smrg} 403b8e80941Smrg 404b8e80941Smrgir_visitor_status 405b8e80941Smrgir_variable_replacement_visitor::visit_leave(ir_assignment *ir) 406b8e80941Smrg{ 407b8e80941Smrg replace_deref(&ir->lhs); 408b8e80941Smrg replace_rvalue(&ir->rhs); 409b8e80941Smrg 410b8e80941Smrg return visit_continue; 411b8e80941Smrg} 412b8e80941Smrg 413b8e80941Smrgir_visitor_status 414b8e80941Smrgir_variable_replacement_visitor::visit_leave(ir_expression *ir) 415b8e80941Smrg{ 416b8e80941Smrg for (uint8_t i = 0; i < ir->num_operands; i++) 417b8e80941Smrg replace_rvalue(&ir->operands[i]); 418b8e80941Smrg 419b8e80941Smrg return visit_continue; 420b8e80941Smrg} 421b8e80941Smrg 422b8e80941Smrgir_visitor_status 423b8e80941Smrgir_variable_replacement_visitor::visit_leave(ir_return *ir) 424b8e80941Smrg{ 425b8e80941Smrg replace_rvalue(&ir->value); 426b8e80941Smrg 427b8e80941Smrg return visit_continue; 428b8e80941Smrg} 429b8e80941Smrg 430b8e80941Smrgir_visitor_status 431b8e80941Smrgir_variable_replacement_visitor::visit_leave(ir_dereference_array *ir) 432b8e80941Smrg{ 433b8e80941Smrg replace_rvalue(&ir->array); 434b8e80941Smrg return visit_continue; 435b8e80941Smrg} 436b8e80941Smrg 437b8e80941Smrgir_visitor_status 438b8e80941Smrgir_variable_replacement_visitor::visit_leave(ir_dereference_record *ir) 439b8e80941Smrg{ 440b8e80941Smrg replace_rvalue(&ir->record); 441b8e80941Smrg return visit_continue; 442b8e80941Smrg} 443b8e80941Smrg 444b8e80941Smrgir_visitor_status 445b8e80941Smrgir_variable_replacement_visitor::visit_leave(ir_call *ir) 446b8e80941Smrg{ 447b8e80941Smrg foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) { 448b8e80941Smrg ir_rvalue *new_param = param; 449b8e80941Smrg replace_rvalue(&new_param); 450b8e80941Smrg 451b8e80941Smrg if (new_param != param) { 452b8e80941Smrg param->replace_with(new_param); 453b8e80941Smrg } 454b8e80941Smrg } 455b8e80941Smrg return visit_continue; 456b8e80941Smrg} 457b8e80941Smrg 458b8e80941Smrgstatic void 459b8e80941Smrgdo_variable_replacement(exec_list *instructions, 460b8e80941Smrg ir_variable *orig, 461b8e80941Smrg ir_dereference *repl) 462b8e80941Smrg{ 463b8e80941Smrg ir_variable_replacement_visitor v(orig, repl); 464b8e80941Smrg 465b8e80941Smrg visit_list_elements(&v, instructions); 466b8e80941Smrg} 467