101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2010 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2101e04c3fSmrg * DEALINGS IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg/**
2501e04c3fSmrg * \file opt_tree_grafting.cpp
2601e04c3fSmrg *
2701e04c3fSmrg * Takes assignments to variables that are dereferenced only once and
2801e04c3fSmrg * pastes the RHS expression into where the variable is dereferenced.
2901e04c3fSmrg *
3001e04c3fSmrg * In the process of various operations like function inlining and
3101e04c3fSmrg * tertiary op handling, we'll end up with our expression trees having
3201e04c3fSmrg * been chopped up into a series of assignments of short expressions
3301e04c3fSmrg * to temps.  Other passes like ir_algebraic.cpp would prefer to see
3401e04c3fSmrg * the deepest expression trees they can to try to optimize them.
3501e04c3fSmrg *
3601e04c3fSmrg * This is a lot like copy propagaton.  In comparison, copy
3701e04c3fSmrg * propagation only acts on plain copies, not arbitrary expressions on
3801e04c3fSmrg * the RHS.  Generally, we wouldn't want to go pasting some
3901e04c3fSmrg * complicated expression everywhere it got used, though, so we don't
4001e04c3fSmrg * handle expressions in that pass.
4101e04c3fSmrg *
4201e04c3fSmrg * The hard part is making sure we don't move an expression across
4301e04c3fSmrg * some other assignments that would change the value of the
4401e04c3fSmrg * expression.  So we split this into two passes: First, find the
4501e04c3fSmrg * variables in our scope which are written to once and read once, and
4601e04c3fSmrg * then go through basic blocks seeing if we find an opportunity to
4701e04c3fSmrg * move those expressions safely.
4801e04c3fSmrg */
4901e04c3fSmrg
5001e04c3fSmrg#include "ir.h"
5101e04c3fSmrg#include "ir_visitor.h"
5201e04c3fSmrg#include "ir_variable_refcount.h"
5301e04c3fSmrg#include "ir_basic_block.h"
5401e04c3fSmrg#include "ir_optimization.h"
5501e04c3fSmrg#include "compiler/glsl_types.h"
5601e04c3fSmrg
5701e04c3fSmrgnamespace {
5801e04c3fSmrg
5901e04c3fSmrgstatic bool debug = false;
6001e04c3fSmrg
6101e04c3fSmrgclass ir_tree_grafting_visitor : public ir_hierarchical_visitor {
6201e04c3fSmrgpublic:
6301e04c3fSmrg   ir_tree_grafting_visitor(ir_assignment *graft_assign,
6401e04c3fSmrg			    ir_variable *graft_var)
6501e04c3fSmrg   {
6601e04c3fSmrg      this->progress = false;
6701e04c3fSmrg      this->graft_assign = graft_assign;
6801e04c3fSmrg      this->graft_var = graft_var;
6901e04c3fSmrg   }
7001e04c3fSmrg
7101e04c3fSmrg   virtual ir_visitor_status visit_leave(class ir_assignment *);
7201e04c3fSmrg   virtual ir_visitor_status visit_enter(class ir_call *);
7301e04c3fSmrg   virtual ir_visitor_status visit_enter(class ir_expression *);
7401e04c3fSmrg   virtual ir_visitor_status visit_enter(class ir_function *);
7501e04c3fSmrg   virtual ir_visitor_status visit_enter(class ir_function_signature *);
7601e04c3fSmrg   virtual ir_visitor_status visit_enter(class ir_if *);
7701e04c3fSmrg   virtual ir_visitor_status visit_enter(class ir_loop *);
7801e04c3fSmrg   virtual ir_visitor_status visit_enter(class ir_swizzle *);
7901e04c3fSmrg   virtual ir_visitor_status visit_enter(class ir_texture *);
8001e04c3fSmrg
8101e04c3fSmrg   ir_visitor_status check_graft(ir_instruction *ir, ir_variable *var);
8201e04c3fSmrg
8301e04c3fSmrg   bool do_graft(ir_rvalue **rvalue);
8401e04c3fSmrg
8501e04c3fSmrg   bool progress;
8601e04c3fSmrg   ir_variable *graft_var;
8701e04c3fSmrg   ir_assignment *graft_assign;
8801e04c3fSmrg};
8901e04c3fSmrg
9001e04c3fSmrgstruct find_deref_info {
9101e04c3fSmrg   ir_variable *var;
9201e04c3fSmrg   bool found;
9301e04c3fSmrg};
9401e04c3fSmrg
9501e04c3fSmrgvoid
9601e04c3fSmrgdereferences_variable_callback(ir_instruction *ir, void *data)
9701e04c3fSmrg{
9801e04c3fSmrg   struct find_deref_info *info = (struct find_deref_info *)data;
9901e04c3fSmrg   ir_dereference_variable *deref = ir->as_dereference_variable();
10001e04c3fSmrg
10101e04c3fSmrg   if (deref && deref->var == info->var)
10201e04c3fSmrg      info->found = true;
10301e04c3fSmrg}
10401e04c3fSmrg
10501e04c3fSmrgstatic bool
10601e04c3fSmrgdereferences_variable(ir_instruction *ir, ir_variable *var)
10701e04c3fSmrg{
10801e04c3fSmrg   struct find_deref_info info;
10901e04c3fSmrg
11001e04c3fSmrg   info.var = var;
11101e04c3fSmrg   info.found = false;
11201e04c3fSmrg
11301e04c3fSmrg   visit_tree(ir, dereferences_variable_callback, &info);
11401e04c3fSmrg
11501e04c3fSmrg   return info.found;
11601e04c3fSmrg}
11701e04c3fSmrg
11801e04c3fSmrgbool
11901e04c3fSmrgir_tree_grafting_visitor::do_graft(ir_rvalue **rvalue)
12001e04c3fSmrg{
12101e04c3fSmrg   if (!*rvalue)
12201e04c3fSmrg      return false;
12301e04c3fSmrg
12401e04c3fSmrg   ir_dereference_variable *deref = (*rvalue)->as_dereference_variable();
12501e04c3fSmrg
12601e04c3fSmrg   if (!deref || deref->var != this->graft_var)
12701e04c3fSmrg      return false;
12801e04c3fSmrg
12901e04c3fSmrg   if (debug) {
13001e04c3fSmrg      fprintf(stderr, "GRAFTING:\n");
13101e04c3fSmrg      this->graft_assign->fprint(stderr);
13201e04c3fSmrg      fprintf(stderr, "\n");
13301e04c3fSmrg      fprintf(stderr, "TO:\n");
13401e04c3fSmrg      (*rvalue)->fprint(stderr);
13501e04c3fSmrg      fprintf(stderr, "\n");
13601e04c3fSmrg   }
13701e04c3fSmrg
13801e04c3fSmrg   this->graft_assign->remove();
13901e04c3fSmrg   *rvalue = this->graft_assign->rhs;
14001e04c3fSmrg
14101e04c3fSmrg   this->progress = true;
14201e04c3fSmrg   return true;
14301e04c3fSmrg}
14401e04c3fSmrg
14501e04c3fSmrgir_visitor_status
14601e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_loop *ir)
14701e04c3fSmrg{
14801e04c3fSmrg   (void)ir;
14901e04c3fSmrg   /* Do not traverse into the body of the loop since that is a
15001e04c3fSmrg    * different basic block.
15101e04c3fSmrg    */
15201e04c3fSmrg   return visit_stop;
15301e04c3fSmrg}
15401e04c3fSmrg
15501e04c3fSmrg/**
15601e04c3fSmrg * Check if we can continue grafting after writing to a variable.  If the
15701e04c3fSmrg * expression we're trying to graft references the variable, we must stop.
15801e04c3fSmrg *
15901e04c3fSmrg * \param ir   An instruction that writes to a variable.
16001e04c3fSmrg * \param var  The variable being updated.
16101e04c3fSmrg */
16201e04c3fSmrgir_visitor_status
16301e04c3fSmrgir_tree_grafting_visitor::check_graft(ir_instruction *ir, ir_variable *var)
16401e04c3fSmrg{
16501e04c3fSmrg   if (dereferences_variable(this->graft_assign->rhs, var)) {
16601e04c3fSmrg      if (debug) {
16701e04c3fSmrg	 fprintf(stderr, "graft killed by: ");
16801e04c3fSmrg	 ir->fprint(stderr);
16901e04c3fSmrg	 fprintf(stderr, "\n");
17001e04c3fSmrg      }
17101e04c3fSmrg      return visit_stop;
17201e04c3fSmrg   }
17301e04c3fSmrg
17401e04c3fSmrg   return visit_continue;
17501e04c3fSmrg}
17601e04c3fSmrg
17701e04c3fSmrgir_visitor_status
17801e04c3fSmrgir_tree_grafting_visitor::visit_leave(ir_assignment *ir)
17901e04c3fSmrg{
18001e04c3fSmrg   if (do_graft(&ir->rhs) ||
18101e04c3fSmrg       do_graft(&ir->condition))
18201e04c3fSmrg      return visit_stop;
18301e04c3fSmrg
18401e04c3fSmrg   /* If this assignment updates a variable used in the assignment
18501e04c3fSmrg    * we're trying to graft, then we're done.
18601e04c3fSmrg    */
18701e04c3fSmrg   return check_graft(ir, ir->lhs->variable_referenced());
18801e04c3fSmrg}
18901e04c3fSmrg
19001e04c3fSmrgir_visitor_status
19101e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_function *ir)
19201e04c3fSmrg{
19301e04c3fSmrg   (void) ir;
19401e04c3fSmrg   return visit_continue_with_parent;
19501e04c3fSmrg}
19601e04c3fSmrg
19701e04c3fSmrgir_visitor_status
19801e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_function_signature *ir)
19901e04c3fSmrg{
20001e04c3fSmrg   (void)ir;
20101e04c3fSmrg   return visit_continue_with_parent;
20201e04c3fSmrg}
20301e04c3fSmrg
20401e04c3fSmrgir_visitor_status
20501e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_call *ir)
20601e04c3fSmrg{
20701e04c3fSmrg   foreach_two_lists(formal_node, &ir->callee->parameters,
20801e04c3fSmrg                     actual_node, &ir->actual_parameters) {
20901e04c3fSmrg      ir_variable *sig_param = (ir_variable *) formal_node;
21001e04c3fSmrg      ir_rvalue *ir = (ir_rvalue *) actual_node;
21101e04c3fSmrg      ir_rvalue *new_ir = ir;
21201e04c3fSmrg
21301e04c3fSmrg      if (sig_param->data.mode != ir_var_function_in
21401e04c3fSmrg          && sig_param->data.mode != ir_var_const_in) {
21501e04c3fSmrg	 if (check_graft(ir, sig_param) == visit_stop)
21601e04c3fSmrg	    return visit_stop;
21701e04c3fSmrg	 continue;
21801e04c3fSmrg      }
21901e04c3fSmrg
22001e04c3fSmrg      if (do_graft(&new_ir)) {
22101e04c3fSmrg	 ir->replace_with(new_ir);
22201e04c3fSmrg	 return visit_stop;
22301e04c3fSmrg      }
22401e04c3fSmrg   }
22501e04c3fSmrg
22601e04c3fSmrg   if (ir->return_deref && check_graft(ir, ir->return_deref->var) == visit_stop)
22701e04c3fSmrg      return visit_stop;
22801e04c3fSmrg
22901e04c3fSmrg   return visit_continue;
23001e04c3fSmrg}
23101e04c3fSmrg
23201e04c3fSmrgir_visitor_status
23301e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_expression *ir)
23401e04c3fSmrg{
23501e04c3fSmrg   for (unsigned int i = 0; i < ir->num_operands; i++) {
23601e04c3fSmrg      if (do_graft(&ir->operands[i]))
23701e04c3fSmrg	 return visit_stop;
23801e04c3fSmrg   }
23901e04c3fSmrg
24001e04c3fSmrg   return visit_continue;
24101e04c3fSmrg}
24201e04c3fSmrg
24301e04c3fSmrgir_visitor_status
24401e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_if *ir)
24501e04c3fSmrg{
24601e04c3fSmrg   if (do_graft(&ir->condition))
24701e04c3fSmrg      return visit_stop;
24801e04c3fSmrg
24901e04c3fSmrg   /* Do not traverse into the body of the if-statement since that is a
25001e04c3fSmrg    * different basic block.
25101e04c3fSmrg    */
25201e04c3fSmrg   return visit_continue_with_parent;
25301e04c3fSmrg}
25401e04c3fSmrg
25501e04c3fSmrgir_visitor_status
25601e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_swizzle *ir)
25701e04c3fSmrg{
25801e04c3fSmrg   if (do_graft(&ir->val))
25901e04c3fSmrg      return visit_stop;
26001e04c3fSmrg
26101e04c3fSmrg   return visit_continue;
26201e04c3fSmrg}
26301e04c3fSmrg
26401e04c3fSmrgir_visitor_status
26501e04c3fSmrgir_tree_grafting_visitor::visit_enter(ir_texture *ir)
26601e04c3fSmrg{
26701e04c3fSmrg   if (do_graft(&ir->coordinate) ||
26801e04c3fSmrg       do_graft(&ir->projector) ||
26901e04c3fSmrg       do_graft(&ir->offset) ||
27001e04c3fSmrg       do_graft(&ir->shadow_comparator))
27101e04c3fSmrg	 return visit_stop;
27201e04c3fSmrg
27301e04c3fSmrg   switch (ir->op) {
27401e04c3fSmrg   case ir_tex:
27501e04c3fSmrg   case ir_lod:
27601e04c3fSmrg   case ir_query_levels:
27701e04c3fSmrg   case ir_texture_samples:
27801e04c3fSmrg   case ir_samples_identical:
27901e04c3fSmrg      break;
28001e04c3fSmrg   case ir_txb:
28101e04c3fSmrg      if (do_graft(&ir->lod_info.bias))
28201e04c3fSmrg	 return visit_stop;
28301e04c3fSmrg      break;
28401e04c3fSmrg   case ir_txf:
28501e04c3fSmrg   case ir_txl:
28601e04c3fSmrg   case ir_txs:
28701e04c3fSmrg      if (do_graft(&ir->lod_info.lod))
28801e04c3fSmrg	 return visit_stop;
28901e04c3fSmrg      break;
29001e04c3fSmrg   case ir_txf_ms:
29101e04c3fSmrg      if (do_graft(&ir->lod_info.sample_index))
29201e04c3fSmrg         return visit_stop;
29301e04c3fSmrg      break;
29401e04c3fSmrg   case ir_txd:
29501e04c3fSmrg      if (do_graft(&ir->lod_info.grad.dPdx) ||
29601e04c3fSmrg	  do_graft(&ir->lod_info.grad.dPdy))
29701e04c3fSmrg	 return visit_stop;
29801e04c3fSmrg      break;
29901e04c3fSmrg   case ir_tg4:
30001e04c3fSmrg      if (do_graft(&ir->lod_info.component))
30101e04c3fSmrg         return visit_stop;
30201e04c3fSmrg      break;
30301e04c3fSmrg   }
30401e04c3fSmrg
30501e04c3fSmrg   return visit_continue;
30601e04c3fSmrg}
30701e04c3fSmrg
30801e04c3fSmrgstruct tree_grafting_info {
30901e04c3fSmrg   ir_variable_refcount_visitor *refs;
31001e04c3fSmrg   bool progress;
31101e04c3fSmrg};
31201e04c3fSmrg
31301e04c3fSmrgstatic bool
31401e04c3fSmrgtry_tree_grafting(ir_assignment *start,
31501e04c3fSmrg		  ir_variable *lhs_var,
31601e04c3fSmrg		  ir_instruction *bb_last)
31701e04c3fSmrg{
31801e04c3fSmrg   ir_tree_grafting_visitor v(start, lhs_var);
31901e04c3fSmrg
32001e04c3fSmrg   if (debug) {
32101e04c3fSmrg      fprintf(stderr, "trying to graft: ");
32201e04c3fSmrg      lhs_var->fprint(stderr);
32301e04c3fSmrg      fprintf(stderr, "\n");
32401e04c3fSmrg   }
32501e04c3fSmrg
32601e04c3fSmrg   for (ir_instruction *ir = (ir_instruction *)start->next;
32701e04c3fSmrg	ir != bb_last->next;
32801e04c3fSmrg	ir = (ir_instruction *)ir->next) {
32901e04c3fSmrg
33001e04c3fSmrg      if (debug) {
33101e04c3fSmrg	 fprintf(stderr, "- ");
33201e04c3fSmrg	 ir->fprint(stderr);
33301e04c3fSmrg	 fprintf(stderr, "\n");
33401e04c3fSmrg      }
33501e04c3fSmrg
33601e04c3fSmrg      ir_visitor_status s = ir->accept(&v);
33701e04c3fSmrg      if (s == visit_stop)
33801e04c3fSmrg	 return v.progress;
33901e04c3fSmrg   }
34001e04c3fSmrg
34101e04c3fSmrg   return false;
34201e04c3fSmrg}
34301e04c3fSmrg
34401e04c3fSmrgstatic void
34501e04c3fSmrgtree_grafting_basic_block(ir_instruction *bb_first,
34601e04c3fSmrg			  ir_instruction *bb_last,
34701e04c3fSmrg			  void *data)
34801e04c3fSmrg{
34901e04c3fSmrg   struct tree_grafting_info *info = (struct tree_grafting_info *)data;
35001e04c3fSmrg   ir_instruction *ir, *next;
35101e04c3fSmrg
35201e04c3fSmrg   for (ir = bb_first, next = (ir_instruction *)ir->next;
35301e04c3fSmrg	ir != bb_last->next;
35401e04c3fSmrg	ir = next, next = (ir_instruction *)ir->next) {
35501e04c3fSmrg      ir_assignment *assign = ir->as_assignment();
35601e04c3fSmrg
35701e04c3fSmrg      if (!assign)
35801e04c3fSmrg	 continue;
35901e04c3fSmrg
36001e04c3fSmrg      ir_variable *lhs_var = assign->whole_variable_written();
36101e04c3fSmrg      if (!lhs_var)
36201e04c3fSmrg	 continue;
36301e04c3fSmrg
36401e04c3fSmrg      if (lhs_var->data.mode == ir_var_function_out ||
36501e04c3fSmrg          lhs_var->data.mode == ir_var_function_inout ||
36601e04c3fSmrg          lhs_var->data.mode == ir_var_shader_out ||
36701e04c3fSmrg          lhs_var->data.mode == ir_var_shader_storage ||
36801e04c3fSmrg          lhs_var->data.mode == ir_var_shader_shared)
36901e04c3fSmrg         continue;
37001e04c3fSmrg
37101e04c3fSmrg      if (lhs_var->data.precise)
37201e04c3fSmrg         continue;
37301e04c3fSmrg
37401e04c3fSmrg      /* Do not graft sampler and image variables. This is a workaround to
37501e04c3fSmrg       * st/glsl_to_tgsi being unable to handle expression parameters to image
37601e04c3fSmrg       * intrinsics.
37701e04c3fSmrg       *
37801e04c3fSmrg       * Note that if this is ever fixed, we still need to skip grafting when
37901e04c3fSmrg       * any image layout qualifiers (including the image format) are set,
38001e04c3fSmrg       * since we must not lose those.
38101e04c3fSmrg       */
38201e04c3fSmrg      if (lhs_var->type->is_sampler() || lhs_var->type->is_image())
38301e04c3fSmrg         continue;
38401e04c3fSmrg
38501e04c3fSmrg      ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var);
38601e04c3fSmrg
38701e04c3fSmrg      if (!entry->declaration ||
38801e04c3fSmrg	  entry->assigned_count != 1 ||
38901e04c3fSmrg	  entry->referenced_count != 2)
39001e04c3fSmrg	 continue;
39101e04c3fSmrg
39201e04c3fSmrg      /* Found a possibly graftable assignment.  Now, walk through the
39301e04c3fSmrg       * rest of the BB seeing if the deref is here, and if nothing interfered with
39401e04c3fSmrg       * pasting its expression's values in between.
39501e04c3fSmrg       */
39601e04c3fSmrg      info->progress |= try_tree_grafting(assign, lhs_var, bb_last);
39701e04c3fSmrg   }
39801e04c3fSmrg}
39901e04c3fSmrg
40001e04c3fSmrg} /* unnamed namespace */
40101e04c3fSmrg
40201e04c3fSmrg/**
40301e04c3fSmrg * Does a copy propagation pass on the code present in the instruction stream.
40401e04c3fSmrg */
40501e04c3fSmrgbool
40601e04c3fSmrgdo_tree_grafting(exec_list *instructions)
40701e04c3fSmrg{
40801e04c3fSmrg   ir_variable_refcount_visitor refs;
40901e04c3fSmrg   struct tree_grafting_info info;
41001e04c3fSmrg
41101e04c3fSmrg   info.progress = false;
41201e04c3fSmrg   info.refs = &refs;
41301e04c3fSmrg
41401e04c3fSmrg   visit_list_elements(info.refs, instructions);
41501e04c3fSmrg
41601e04c3fSmrg   call_for_basic_blocks(instructions, tree_grafting_basic_block, &info);
41701e04c3fSmrg
41801e04c3fSmrg   return info.progress;
41901e04c3fSmrg}
420