1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2010 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21b8e80941Smrg * DEALINGS IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg/**
25b8e80941Smrg * \file opt_tree_grafting.cpp
26b8e80941Smrg *
27b8e80941Smrg * Takes assignments to variables that are dereferenced only once and
28b8e80941Smrg * pastes the RHS expression into where the variable is dereferenced.
29b8e80941Smrg *
30b8e80941Smrg * In the process of various operations like function inlining and
31b8e80941Smrg * tertiary op handling, we'll end up with our expression trees having
32b8e80941Smrg * been chopped up into a series of assignments of short expressions
33b8e80941Smrg * to temps.  Other passes like ir_algebraic.cpp would prefer to see
34b8e80941Smrg * the deepest expression trees they can to try to optimize them.
35b8e80941Smrg *
36b8e80941Smrg * This is a lot like copy propagaton.  In comparison, copy
37b8e80941Smrg * propagation only acts on plain copies, not arbitrary expressions on
38b8e80941Smrg * the RHS.  Generally, we wouldn't want to go pasting some
39b8e80941Smrg * complicated expression everywhere it got used, though, so we don't
40b8e80941Smrg * handle expressions in that pass.
41b8e80941Smrg *
42b8e80941Smrg * The hard part is making sure we don't move an expression across
43b8e80941Smrg * some other assignments that would change the value of the
44b8e80941Smrg * expression.  So we split this into two passes: First, find the
45b8e80941Smrg * variables in our scope which are written to once and read once, and
46b8e80941Smrg * then go through basic blocks seeing if we find an opportunity to
47b8e80941Smrg * move those expressions safely.
48b8e80941Smrg */
49b8e80941Smrg
50b8e80941Smrg#include "ir.h"
51b8e80941Smrg#include "ir_visitor.h"
52b8e80941Smrg#include "ir_variable_refcount.h"
53b8e80941Smrg#include "ir_basic_block.h"
54b8e80941Smrg#include "ir_optimization.h"
55b8e80941Smrg#include "compiler/glsl_types.h"
56b8e80941Smrg
57b8e80941Smrgnamespace {
58b8e80941Smrg
59b8e80941Smrgstatic bool debug = false;
60b8e80941Smrg
61b8e80941Smrgclass ir_tree_grafting_visitor : public ir_hierarchical_visitor {
62b8e80941Smrgpublic:
63b8e80941Smrg   ir_tree_grafting_visitor(ir_assignment *graft_assign,
64b8e80941Smrg			    ir_variable *graft_var)
65b8e80941Smrg   {
66b8e80941Smrg      this->progress = false;
67b8e80941Smrg      this->graft_assign = graft_assign;
68b8e80941Smrg      this->graft_var = graft_var;
69b8e80941Smrg   }
70b8e80941Smrg
71b8e80941Smrg   virtual ir_visitor_status visit_leave(class ir_assignment *);
72b8e80941Smrg   virtual ir_visitor_status visit_enter(class ir_call *);
73b8e80941Smrg   virtual ir_visitor_status visit_enter(class ir_expression *);
74b8e80941Smrg   virtual ir_visitor_status visit_enter(class ir_function *);
75b8e80941Smrg   virtual ir_visitor_status visit_enter(class ir_function_signature *);
76b8e80941Smrg   virtual ir_visitor_status visit_enter(class ir_if *);
77b8e80941Smrg   virtual ir_visitor_status visit_enter(class ir_loop *);
78b8e80941Smrg   virtual ir_visitor_status visit_enter(class ir_swizzle *);
79b8e80941Smrg   virtual ir_visitor_status visit_enter(class ir_texture *);
80b8e80941Smrg
81b8e80941Smrg   ir_visitor_status check_graft(ir_instruction *ir, ir_variable *var);
82b8e80941Smrg
83b8e80941Smrg   bool do_graft(ir_rvalue **rvalue);
84b8e80941Smrg
85b8e80941Smrg   bool progress;
86b8e80941Smrg   ir_variable *graft_var;
87b8e80941Smrg   ir_assignment *graft_assign;
88b8e80941Smrg};
89b8e80941Smrg
90b8e80941Smrgstruct find_deref_info {
91b8e80941Smrg   ir_variable *var;
92b8e80941Smrg   bool found;
93b8e80941Smrg};
94b8e80941Smrg
95b8e80941Smrgvoid
96b8e80941Smrgdereferences_variable_callback(ir_instruction *ir, void *data)
97b8e80941Smrg{
98b8e80941Smrg   struct find_deref_info *info = (struct find_deref_info *)data;
99b8e80941Smrg   ir_dereference_variable *deref = ir->as_dereference_variable();
100b8e80941Smrg
101b8e80941Smrg   if (deref && deref->var == info->var)
102b8e80941Smrg      info->found = true;
103b8e80941Smrg}
104b8e80941Smrg
105b8e80941Smrgstatic bool
106b8e80941Smrgdereferences_variable(ir_instruction *ir, ir_variable *var)
107b8e80941Smrg{
108b8e80941Smrg   struct find_deref_info info;
109b8e80941Smrg
110b8e80941Smrg   info.var = var;
111b8e80941Smrg   info.found = false;
112b8e80941Smrg
113b8e80941Smrg   visit_tree(ir, dereferences_variable_callback, &info);
114b8e80941Smrg
115b8e80941Smrg   return info.found;
116b8e80941Smrg}
117b8e80941Smrg
118b8e80941Smrgbool
119b8e80941Smrgir_tree_grafting_visitor::do_graft(ir_rvalue **rvalue)
120b8e80941Smrg{
121b8e80941Smrg   if (!*rvalue)
122b8e80941Smrg      return false;
123b8e80941Smrg
124b8e80941Smrg   ir_dereference_variable *deref = (*rvalue)->as_dereference_variable();
125b8e80941Smrg
126b8e80941Smrg   if (!deref || deref->var != this->graft_var)
127b8e80941Smrg      return false;
128b8e80941Smrg
129b8e80941Smrg   if (debug) {
130b8e80941Smrg      fprintf(stderr, "GRAFTING:\n");
131b8e80941Smrg      this->graft_assign->fprint(stderr);
132b8e80941Smrg      fprintf(stderr, "\n");
133b8e80941Smrg      fprintf(stderr, "TO:\n");
134b8e80941Smrg      (*rvalue)->fprint(stderr);
135b8e80941Smrg      fprintf(stderr, "\n");
136b8e80941Smrg   }
137b8e80941Smrg
138b8e80941Smrg   this->graft_assign->remove();
139b8e80941Smrg   *rvalue = this->graft_assign->rhs;
140b8e80941Smrg
141b8e80941Smrg   this->progress = true;
142b8e80941Smrg   return true;
143b8e80941Smrg}
144b8e80941Smrg
145b8e80941Smrgir_visitor_status
146b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_loop *ir)
147b8e80941Smrg{
148b8e80941Smrg   (void)ir;
149b8e80941Smrg   /* Do not traverse into the body of the loop since that is a
150b8e80941Smrg    * different basic block.
151b8e80941Smrg    */
152b8e80941Smrg   return visit_stop;
153b8e80941Smrg}
154b8e80941Smrg
155b8e80941Smrg/**
156b8e80941Smrg * Check if we can continue grafting after writing to a variable.  If the
157b8e80941Smrg * expression we're trying to graft references the variable, we must stop.
158b8e80941Smrg *
159b8e80941Smrg * \param ir   An instruction that writes to a variable.
160b8e80941Smrg * \param var  The variable being updated.
161b8e80941Smrg */
162b8e80941Smrgir_visitor_status
163b8e80941Smrgir_tree_grafting_visitor::check_graft(ir_instruction *ir, ir_variable *var)
164b8e80941Smrg{
165b8e80941Smrg   if (dereferences_variable(this->graft_assign->rhs, var)) {
166b8e80941Smrg      if (debug) {
167b8e80941Smrg	 fprintf(stderr, "graft killed by: ");
168b8e80941Smrg	 ir->fprint(stderr);
169b8e80941Smrg	 fprintf(stderr, "\n");
170b8e80941Smrg      }
171b8e80941Smrg      return visit_stop;
172b8e80941Smrg   }
173b8e80941Smrg
174b8e80941Smrg   return visit_continue;
175b8e80941Smrg}
176b8e80941Smrg
177b8e80941Smrgir_visitor_status
178b8e80941Smrgir_tree_grafting_visitor::visit_leave(ir_assignment *ir)
179b8e80941Smrg{
180b8e80941Smrg   if (do_graft(&ir->rhs) ||
181b8e80941Smrg       do_graft(&ir->condition))
182b8e80941Smrg      return visit_stop;
183b8e80941Smrg
184b8e80941Smrg   /* If this assignment updates a variable used in the assignment
185b8e80941Smrg    * we're trying to graft, then we're done.
186b8e80941Smrg    */
187b8e80941Smrg   return check_graft(ir, ir->lhs->variable_referenced());
188b8e80941Smrg}
189b8e80941Smrg
190b8e80941Smrgir_visitor_status
191b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_function *ir)
192b8e80941Smrg{
193b8e80941Smrg   (void) ir;
194b8e80941Smrg   return visit_continue_with_parent;
195b8e80941Smrg}
196b8e80941Smrg
197b8e80941Smrgir_visitor_status
198b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_function_signature *ir)
199b8e80941Smrg{
200b8e80941Smrg   (void)ir;
201b8e80941Smrg   return visit_continue_with_parent;
202b8e80941Smrg}
203b8e80941Smrg
204b8e80941Smrgir_visitor_status
205b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_call *ir)
206b8e80941Smrg{
207b8e80941Smrg   foreach_two_lists(formal_node, &ir->callee->parameters,
208b8e80941Smrg                     actual_node, &ir->actual_parameters) {
209b8e80941Smrg      ir_variable *sig_param = (ir_variable *) formal_node;
210b8e80941Smrg      ir_rvalue *ir = (ir_rvalue *) actual_node;
211b8e80941Smrg      ir_rvalue *new_ir = ir;
212b8e80941Smrg
213b8e80941Smrg      if (sig_param->data.mode != ir_var_function_in
214b8e80941Smrg          && sig_param->data.mode != ir_var_const_in) {
215b8e80941Smrg	 if (check_graft(ir, sig_param) == visit_stop)
216b8e80941Smrg	    return visit_stop;
217b8e80941Smrg	 continue;
218b8e80941Smrg      }
219b8e80941Smrg
220b8e80941Smrg      if (do_graft(&new_ir)) {
221b8e80941Smrg	 ir->replace_with(new_ir);
222b8e80941Smrg	 return visit_stop;
223b8e80941Smrg      }
224b8e80941Smrg   }
225b8e80941Smrg
226b8e80941Smrg   if (ir->return_deref && check_graft(ir, ir->return_deref->var) == visit_stop)
227b8e80941Smrg      return visit_stop;
228b8e80941Smrg
229b8e80941Smrg   return visit_continue;
230b8e80941Smrg}
231b8e80941Smrg
232b8e80941Smrgir_visitor_status
233b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_expression *ir)
234b8e80941Smrg{
235b8e80941Smrg   for (unsigned int i = 0; i < ir->num_operands; i++) {
236b8e80941Smrg      if (do_graft(&ir->operands[i]))
237b8e80941Smrg	 return visit_stop;
238b8e80941Smrg   }
239b8e80941Smrg
240b8e80941Smrg   return visit_continue;
241b8e80941Smrg}
242b8e80941Smrg
243b8e80941Smrgir_visitor_status
244b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_if *ir)
245b8e80941Smrg{
246b8e80941Smrg   if (do_graft(&ir->condition))
247b8e80941Smrg      return visit_stop;
248b8e80941Smrg
249b8e80941Smrg   /* Do not traverse into the body of the if-statement since that is a
250b8e80941Smrg    * different basic block.
251b8e80941Smrg    */
252b8e80941Smrg   return visit_continue_with_parent;
253b8e80941Smrg}
254b8e80941Smrg
255b8e80941Smrgir_visitor_status
256b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_swizzle *ir)
257b8e80941Smrg{
258b8e80941Smrg   if (do_graft(&ir->val))
259b8e80941Smrg      return visit_stop;
260b8e80941Smrg
261b8e80941Smrg   return visit_continue;
262b8e80941Smrg}
263b8e80941Smrg
264b8e80941Smrgir_visitor_status
265b8e80941Smrgir_tree_grafting_visitor::visit_enter(ir_texture *ir)
266b8e80941Smrg{
267b8e80941Smrg   if (do_graft(&ir->coordinate) ||
268b8e80941Smrg       do_graft(&ir->projector) ||
269b8e80941Smrg       do_graft(&ir->offset) ||
270b8e80941Smrg       do_graft(&ir->shadow_comparator))
271b8e80941Smrg	 return visit_stop;
272b8e80941Smrg
273b8e80941Smrg   switch (ir->op) {
274b8e80941Smrg   case ir_tex:
275b8e80941Smrg   case ir_lod:
276b8e80941Smrg   case ir_query_levels:
277b8e80941Smrg   case ir_texture_samples:
278b8e80941Smrg   case ir_samples_identical:
279b8e80941Smrg      break;
280b8e80941Smrg   case ir_txb:
281b8e80941Smrg      if (do_graft(&ir->lod_info.bias))
282b8e80941Smrg	 return visit_stop;
283b8e80941Smrg      break;
284b8e80941Smrg   case ir_txf:
285b8e80941Smrg   case ir_txl:
286b8e80941Smrg   case ir_txs:
287b8e80941Smrg      if (do_graft(&ir->lod_info.lod))
288b8e80941Smrg	 return visit_stop;
289b8e80941Smrg      break;
290b8e80941Smrg   case ir_txf_ms:
291b8e80941Smrg      if (do_graft(&ir->lod_info.sample_index))
292b8e80941Smrg         return visit_stop;
293b8e80941Smrg      break;
294b8e80941Smrg   case ir_txd:
295b8e80941Smrg      if (do_graft(&ir->lod_info.grad.dPdx) ||
296b8e80941Smrg	  do_graft(&ir->lod_info.grad.dPdy))
297b8e80941Smrg	 return visit_stop;
298b8e80941Smrg      break;
299b8e80941Smrg   case ir_tg4:
300b8e80941Smrg      if (do_graft(&ir->lod_info.component))
301b8e80941Smrg         return visit_stop;
302b8e80941Smrg      break;
303b8e80941Smrg   }
304b8e80941Smrg
305b8e80941Smrg   return visit_continue;
306b8e80941Smrg}
307b8e80941Smrg
308b8e80941Smrgstruct tree_grafting_info {
309b8e80941Smrg   ir_variable_refcount_visitor *refs;
310b8e80941Smrg   bool progress;
311b8e80941Smrg};
312b8e80941Smrg
313b8e80941Smrgstatic bool
314b8e80941Smrgtry_tree_grafting(ir_assignment *start,
315b8e80941Smrg		  ir_variable *lhs_var,
316b8e80941Smrg		  ir_instruction *bb_last)
317b8e80941Smrg{
318b8e80941Smrg   ir_tree_grafting_visitor v(start, lhs_var);
319b8e80941Smrg
320b8e80941Smrg   if (debug) {
321b8e80941Smrg      fprintf(stderr, "trying to graft: ");
322b8e80941Smrg      lhs_var->fprint(stderr);
323b8e80941Smrg      fprintf(stderr, "\n");
324b8e80941Smrg   }
325b8e80941Smrg
326b8e80941Smrg   for (ir_instruction *ir = (ir_instruction *)start->next;
327b8e80941Smrg	ir != bb_last->next;
328b8e80941Smrg	ir = (ir_instruction *)ir->next) {
329b8e80941Smrg
330b8e80941Smrg      if (debug) {
331b8e80941Smrg	 fprintf(stderr, "- ");
332b8e80941Smrg	 ir->fprint(stderr);
333b8e80941Smrg	 fprintf(stderr, "\n");
334b8e80941Smrg      }
335b8e80941Smrg
336b8e80941Smrg      ir_visitor_status s = ir->accept(&v);
337b8e80941Smrg      if (s == visit_stop)
338b8e80941Smrg	 return v.progress;
339b8e80941Smrg   }
340b8e80941Smrg
341b8e80941Smrg   return false;
342b8e80941Smrg}
343b8e80941Smrg
344b8e80941Smrgstatic void
345b8e80941Smrgtree_grafting_basic_block(ir_instruction *bb_first,
346b8e80941Smrg			  ir_instruction *bb_last,
347b8e80941Smrg			  void *data)
348b8e80941Smrg{
349b8e80941Smrg   struct tree_grafting_info *info = (struct tree_grafting_info *)data;
350b8e80941Smrg   ir_instruction *ir, *next;
351b8e80941Smrg
352b8e80941Smrg   for (ir = bb_first, next = (ir_instruction *)ir->next;
353b8e80941Smrg	ir != bb_last->next;
354b8e80941Smrg	ir = next, next = (ir_instruction *)ir->next) {
355b8e80941Smrg      ir_assignment *assign = ir->as_assignment();
356b8e80941Smrg
357b8e80941Smrg      if (!assign)
358b8e80941Smrg	 continue;
359b8e80941Smrg
360b8e80941Smrg      ir_variable *lhs_var = assign->whole_variable_written();
361b8e80941Smrg      if (!lhs_var)
362b8e80941Smrg	 continue;
363b8e80941Smrg
364b8e80941Smrg      if (lhs_var->data.mode == ir_var_function_out ||
365b8e80941Smrg          lhs_var->data.mode == ir_var_function_inout ||
366b8e80941Smrg          lhs_var->data.mode == ir_var_shader_out ||
367b8e80941Smrg          lhs_var->data.mode == ir_var_shader_storage ||
368b8e80941Smrg          lhs_var->data.mode == ir_var_shader_shared)
369b8e80941Smrg         continue;
370b8e80941Smrg
371b8e80941Smrg      if (lhs_var->data.precise)
372b8e80941Smrg         continue;
373b8e80941Smrg
374b8e80941Smrg      /* Do not graft sampler and image variables. This is a workaround to
375b8e80941Smrg       * st/glsl_to_tgsi being unable to handle expression parameters to image
376b8e80941Smrg       * intrinsics.
377b8e80941Smrg       *
378b8e80941Smrg       * Note that if this is ever fixed, we still need to skip grafting when
379b8e80941Smrg       * any image layout qualifiers (including the image format) are set,
380b8e80941Smrg       * since we must not lose those.
381b8e80941Smrg       */
382b8e80941Smrg      if (lhs_var->type->is_sampler() || lhs_var->type->is_image())
383b8e80941Smrg         continue;
384b8e80941Smrg
385b8e80941Smrg      ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var);
386b8e80941Smrg
387b8e80941Smrg      if (!entry->declaration ||
388b8e80941Smrg	  entry->assigned_count != 1 ||
389b8e80941Smrg	  entry->referenced_count != 2)
390b8e80941Smrg	 continue;
391b8e80941Smrg
392b8e80941Smrg      /* Found a possibly graftable assignment.  Now, walk through the
393b8e80941Smrg       * rest of the BB seeing if the deref is here, and if nothing interfered with
394b8e80941Smrg       * pasting its expression's values in between.
395b8e80941Smrg       */
396b8e80941Smrg      info->progress |= try_tree_grafting(assign, lhs_var, bb_last);
397b8e80941Smrg   }
398b8e80941Smrg}
399b8e80941Smrg
400b8e80941Smrg} /* unnamed namespace */
401b8e80941Smrg
402b8e80941Smrg/**
403b8e80941Smrg * Does a copy propagation pass on the code present in the instruction stream.
404b8e80941Smrg */
405b8e80941Smrgbool
406b8e80941Smrgdo_tree_grafting(exec_list *instructions)
407b8e80941Smrg{
408b8e80941Smrg   ir_variable_refcount_visitor refs;
409b8e80941Smrg   struct tree_grafting_info info;
410b8e80941Smrg
411b8e80941Smrg   info.progress = false;
412b8e80941Smrg   info.refs = &refs;
413b8e80941Smrg
414b8e80941Smrg   visit_list_elements(info.refs, instructions);
415b8e80941Smrg
416b8e80941Smrg   call_for_basic_blocks(instructions, tree_grafting_basic_block, &info);
417b8e80941Smrg
418b8e80941Smrg   return info.progress;
419b8e80941Smrg}
420