101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2014 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2101e04c3fSmrg * DEALINGS IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg/**
2501e04c3fSmrg * \file opt_rebalance_tree.cpp
2601e04c3fSmrg *
2701e04c3fSmrg * Rebalances a reduction expression tree.
2801e04c3fSmrg *
2901e04c3fSmrg * For reduction operations (e.g., x + y + z + w) we generate an expression
3001e04c3fSmrg * tree like
3101e04c3fSmrg *
3201e04c3fSmrg *        +
3301e04c3fSmrg *       / \
3401e04c3fSmrg *      +   w
3501e04c3fSmrg *     / \
3601e04c3fSmrg *    +   z
3701e04c3fSmrg *   / \
3801e04c3fSmrg *  x   y
3901e04c3fSmrg *
4001e04c3fSmrg * which we can rebalance into
4101e04c3fSmrg *
4201e04c3fSmrg *       +
4301e04c3fSmrg *      / \
4401e04c3fSmrg *     /   \
4501e04c3fSmrg *    +     +
4601e04c3fSmrg *   / \   / \
4701e04c3fSmrg *  x   y z   w
4801e04c3fSmrg *
4901e04c3fSmrg * to get a better instruction scheduling.
5001e04c3fSmrg *
5101e04c3fSmrg * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
5201e04c3fSmrg * and Bette L. Warren.
5301e04c3fSmrg *
5401e04c3fSmrg * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable
5501e04c3fSmrg * explanation of the of the tree_to_vine() (rightward rotation) and
5601e04c3fSmrg * vine_to_tree() (leftward rotation) algorithms.
5701e04c3fSmrg */
5801e04c3fSmrg
5901e04c3fSmrg#include "ir.h"
6001e04c3fSmrg#include "ir_visitor.h"
6101e04c3fSmrg#include "ir_rvalue_visitor.h"
6201e04c3fSmrg#include "ir_optimization.h"
6301e04c3fSmrg#include "main/macros.h" /* for MAX2 */
6401e04c3fSmrg
6501e04c3fSmrg/* The DSW algorithm generates a degenerate tree (really, a linked list) in
6601e04c3fSmrg * tree_to_vine(). We'd rather not leave a binary expression with only one
6701e04c3fSmrg * operand, so trivial modifications (the ternary operators below) are needed
6801e04c3fSmrg * to ensure that we only rotate around the ir_expression nodes of the tree.
6901e04c3fSmrg */
7001e04c3fSmrgstatic unsigned
7101e04c3fSmrgtree_to_vine(ir_expression *root)
7201e04c3fSmrg{
7301e04c3fSmrg   unsigned size = 0;
7401e04c3fSmrg   ir_rvalue *vine_tail = root;
7501e04c3fSmrg   ir_rvalue *remainder = root->operands[1];
7601e04c3fSmrg
7701e04c3fSmrg   while (remainder != NULL) {
7801e04c3fSmrg      ir_expression *remainder_temp = remainder->as_expression();
7901e04c3fSmrg      ir_expression *remainder_left = remainder_temp ?
8001e04c3fSmrg         remainder_temp->operands[0]->as_expression() : NULL;
8101e04c3fSmrg
8201e04c3fSmrg      if (remainder_left == NULL) {
8301e04c3fSmrg         /* move vine_tail down one */
8401e04c3fSmrg         vine_tail = remainder;
8501e04c3fSmrg         remainder = remainder->as_expression() ?
8601e04c3fSmrg            ((ir_expression *)remainder)->operands[1] : NULL;
8701e04c3fSmrg         size++;
8801e04c3fSmrg      } else {
8901e04c3fSmrg         /* rotate */
9001e04c3fSmrg         ir_expression *tempptr = remainder_left;
9101e04c3fSmrg         ((ir_expression *)remainder)->operands[0] = tempptr->operands[1];
9201e04c3fSmrg         tempptr->operands[1] = remainder;
9301e04c3fSmrg         remainder = tempptr;
9401e04c3fSmrg         ((ir_expression *)vine_tail)->operands[1] = tempptr;
9501e04c3fSmrg      }
9601e04c3fSmrg   }
9701e04c3fSmrg
9801e04c3fSmrg   return size;
9901e04c3fSmrg}
10001e04c3fSmrg
10101e04c3fSmrgstatic void
10201e04c3fSmrgcompression(ir_expression *root, unsigned count)
10301e04c3fSmrg{
10401e04c3fSmrg   ir_expression *scanner = root;
10501e04c3fSmrg
10601e04c3fSmrg   for (unsigned i = 0; i < count; i++) {
10701e04c3fSmrg      ir_expression *child = (ir_expression *)scanner->operands[1];
10801e04c3fSmrg      scanner->operands[1] = child->operands[1];
10901e04c3fSmrg      scanner = (ir_expression *)scanner->operands[1];
11001e04c3fSmrg      child->operands[1] = scanner->operands[0];
11101e04c3fSmrg      scanner->operands[0] = child;
11201e04c3fSmrg   }
11301e04c3fSmrg}
11401e04c3fSmrg
11501e04c3fSmrgstatic void
11601e04c3fSmrgvine_to_tree(ir_expression *root, unsigned size)
11701e04c3fSmrg{
11801e04c3fSmrg   int n = size - 1;
11901e04c3fSmrg   for (int m = n / 2; m > 0; m = n / 2) {
12001e04c3fSmrg      compression(root, m);
12101e04c3fSmrg      n -= m + 1;
12201e04c3fSmrg   }
12301e04c3fSmrg}
12401e04c3fSmrg
12501e04c3fSmrgnamespace {
12601e04c3fSmrg
12701e04c3fSmrgclass ir_rebalance_visitor : public ir_rvalue_enter_visitor {
12801e04c3fSmrgpublic:
12901e04c3fSmrg   ir_rebalance_visitor()
13001e04c3fSmrg   {
13101e04c3fSmrg      progress = false;
13201e04c3fSmrg   }
13301e04c3fSmrg
13401e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_assignment *ir);
13501e04c3fSmrg
13601e04c3fSmrg   void handle_rvalue(ir_rvalue **rvalue);
13701e04c3fSmrg
13801e04c3fSmrg   bool progress;
13901e04c3fSmrg};
14001e04c3fSmrg
14101e04c3fSmrgstruct is_reduction_data {
14201e04c3fSmrg   ir_expression_operation operation;
14301e04c3fSmrg   const glsl_type *type;
14401e04c3fSmrg   unsigned num_expr;
14501e04c3fSmrg   bool is_reduction;
14601e04c3fSmrg   bool contains_constant;
14701e04c3fSmrg};
14801e04c3fSmrg
14901e04c3fSmrg} /* anonymous namespace */
15001e04c3fSmrg
15101e04c3fSmrgir_visitor_status
15201e04c3fSmrgir_rebalance_visitor::visit_enter(ir_assignment *ir)
15301e04c3fSmrg{
15401e04c3fSmrg   ir_variable *var = ir->lhs->variable_referenced();
15501e04c3fSmrg   if (var->data.invariant || var->data.precise) {
15601e04c3fSmrg      /* If we're assigning to an invariant variable, just bail.  Tree
15701e04c3fSmrg       * rebalancing (reassociation) isn't precision-safe.
15801e04c3fSmrg       */
15901e04c3fSmrg      return visit_continue_with_parent;
16001e04c3fSmrg   } else {
16101e04c3fSmrg      return visit_continue;
16201e04c3fSmrg   }
16301e04c3fSmrg}
16401e04c3fSmrg
16501e04c3fSmrgstatic bool
16601e04c3fSmrgis_reduction_operation(ir_expression_operation operation)
16701e04c3fSmrg{
16801e04c3fSmrg   switch (operation) {
16901e04c3fSmrg   case ir_binop_add:
17001e04c3fSmrg   case ir_binop_mul:
17101e04c3fSmrg   case ir_binop_bit_and:
17201e04c3fSmrg   case ir_binop_bit_xor:
17301e04c3fSmrg   case ir_binop_bit_or:
17401e04c3fSmrg   case ir_binop_logic_and:
17501e04c3fSmrg   case ir_binop_logic_xor:
17601e04c3fSmrg   case ir_binop_logic_or:
17701e04c3fSmrg   case ir_binop_min:
17801e04c3fSmrg   case ir_binop_max:
17901e04c3fSmrg      return true;
18001e04c3fSmrg   default:
18101e04c3fSmrg      return false;
18201e04c3fSmrg   }
18301e04c3fSmrg}
18401e04c3fSmrg
18501e04c3fSmrg/* Note that this function does not attempt to recognize that reduction trees
18601e04c3fSmrg * are already balanced.
18701e04c3fSmrg *
18801e04c3fSmrg * We return false from this function for a number of reasons other than an
18901e04c3fSmrg * expression tree not being a mathematical reduction. Namely,
19001e04c3fSmrg *
19101e04c3fSmrg *    - if the tree contains multiple constants that we may be able to combine.
19201e04c3fSmrg *    - if the tree contains matrices:
19301e04c3fSmrg *       - they might contain vec4's with many constant components that we can
19401e04c3fSmrg *         simplify after splitting.
19501e04c3fSmrg *       - applying the matrix chain ordering optimization is more than just
19601e04c3fSmrg *         balancing an expression tree.
19701e04c3fSmrg *    - if the tree contains operations on multiple types.
19801e04c3fSmrg *    - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c
19901e04c3fSmrg *      would trick the visiting pass.
20001e04c3fSmrg */
20101e04c3fSmrgstatic void
20201e04c3fSmrgis_reduction(ir_instruction *ir, void *data)
20301e04c3fSmrg{
20401e04c3fSmrg   struct is_reduction_data *ird = (struct is_reduction_data *)data;
20501e04c3fSmrg   if (!ird->is_reduction)
20601e04c3fSmrg      return;
20701e04c3fSmrg
20801e04c3fSmrg   /* We don't want to balance a tree that contains multiple constants, since
20901e04c3fSmrg    * we'll be able to constant fold them if they're not in separate subtrees.
21001e04c3fSmrg    */
21101e04c3fSmrg   if (ir->as_constant()) {
21201e04c3fSmrg      if (ird->contains_constant) {
21301e04c3fSmrg         ird->is_reduction = false;
21401e04c3fSmrg      }
21501e04c3fSmrg      ird->contains_constant = true;
21601e04c3fSmrg      return;
21701e04c3fSmrg   }
21801e04c3fSmrg
21901e04c3fSmrg   /* Array/record dereferences have subtrees that are not part of the expr
22001e04c3fSmrg    * tree we're balancing. Skip trees containing them.
22101e04c3fSmrg    */
22201e04c3fSmrg   if (ir->ir_type == ir_type_dereference_array ||
22301e04c3fSmrg       ir->ir_type == ir_type_dereference_record) {
22401e04c3fSmrg      ird->is_reduction = false;
22501e04c3fSmrg      return;
22601e04c3fSmrg   }
22701e04c3fSmrg
22801e04c3fSmrg   ir_expression *expr = ir->as_expression();
22901e04c3fSmrg   if (!expr)
23001e04c3fSmrg      return;
23101e04c3fSmrg
23201e04c3fSmrg   /* Non-constant matrices might still contain constant vec4 that we can
23301e04c3fSmrg    * constant fold once split up. Handling matrices will need some more
23401e04c3fSmrg    * work.
23501e04c3fSmrg    */
23601e04c3fSmrg   if (expr->type->is_matrix() ||
23701e04c3fSmrg       expr->operands[0]->type->is_matrix() ||
23801e04c3fSmrg       (expr->operands[1] && expr->operands[1]->type->is_matrix())) {
23901e04c3fSmrg      ird->is_reduction = false;
24001e04c3fSmrg      return;
24101e04c3fSmrg   }
24201e04c3fSmrg
24301e04c3fSmrg   if (ird->type != NULL && ird->type != expr->type) {
24401e04c3fSmrg      ird->is_reduction = false;
24501e04c3fSmrg      return;
24601e04c3fSmrg   }
24701e04c3fSmrg   ird->type = expr->type;
24801e04c3fSmrg
24901e04c3fSmrg   ird->num_expr++;
25001e04c3fSmrg   if (is_reduction_operation(expr->operation)) {
25101e04c3fSmrg      if (ird->operation != 0 && ird->operation != expr->operation)
25201e04c3fSmrg         ird->is_reduction = false;
25301e04c3fSmrg      ird->operation = expr->operation;
25401e04c3fSmrg   } else {
25501e04c3fSmrg      ird->is_reduction = false;
25601e04c3fSmrg   }
25701e04c3fSmrg}
25801e04c3fSmrg
25901e04c3fSmrgstatic ir_rvalue *
26001e04c3fSmrghandle_expression(ir_expression *expr)
26101e04c3fSmrg{
26201e04c3fSmrg   struct is_reduction_data ird;
26301e04c3fSmrg   ird.operation = (ir_expression_operation)0;
26401e04c3fSmrg   ird.type = NULL;
26501e04c3fSmrg   ird.num_expr = 0;
26601e04c3fSmrg   ird.is_reduction = true;
26701e04c3fSmrg   ird.contains_constant = false;
26801e04c3fSmrg
26901e04c3fSmrg   visit_tree(expr, is_reduction, (void *)&ird);
27001e04c3fSmrg
27101e04c3fSmrg   if (ird.is_reduction && ird.num_expr > 2) {
27201e04c3fSmrg      ir_constant z = ir_constant(0.0f);
27301e04c3fSmrg      ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr);
27401e04c3fSmrg
27501e04c3fSmrg      unsigned size = tree_to_vine(&pseudo_root);
27601e04c3fSmrg      vine_to_tree(&pseudo_root, size);
27701e04c3fSmrg
27801e04c3fSmrg      expr = (ir_expression *)pseudo_root.operands[1];
27901e04c3fSmrg   }
28001e04c3fSmrg   return expr;
28101e04c3fSmrg}
28201e04c3fSmrg
28301e04c3fSmrgstatic void
28401e04c3fSmrgupdate_types(ir_instruction *ir, void *)
28501e04c3fSmrg{
28601e04c3fSmrg   ir_expression *expr = ir->as_expression();
28701e04c3fSmrg   if (!expr)
28801e04c3fSmrg      return;
28901e04c3fSmrg
29001e04c3fSmrg   const glsl_type *const new_type =
29101e04c3fSmrg      glsl_type::get_instance(expr->type->base_type,
29201e04c3fSmrg                              MAX2(expr->operands[0]->type->vector_elements,
29301e04c3fSmrg                                   expr->operands[1]->type->vector_elements),
29401e04c3fSmrg                              1);
29501e04c3fSmrg   assert(new_type != glsl_type::error_type);
29601e04c3fSmrg   expr->type = new_type;
29701e04c3fSmrg}
29801e04c3fSmrg
29901e04c3fSmrgvoid
30001e04c3fSmrgir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue)
30101e04c3fSmrg{
30201e04c3fSmrg   if (!*rvalue)
30301e04c3fSmrg      return;
30401e04c3fSmrg
30501e04c3fSmrg   ir_expression *expr = (*rvalue)->as_expression();
30601e04c3fSmrg   if (!expr || !is_reduction_operation(expr->operation))
30701e04c3fSmrg      return;
30801e04c3fSmrg
30901e04c3fSmrg   ir_rvalue *new_rvalue = handle_expression(expr);
31001e04c3fSmrg
31101e04c3fSmrg   /* If we failed to rebalance the tree (e.g., because it wasn't a reduction,
31201e04c3fSmrg    * or some other set of cases) new_rvalue will point to the same root as
31301e04c3fSmrg    * before.
31401e04c3fSmrg    *
31501e04c3fSmrg    * Similarly, if the tree rooted at *rvalue was a reduction and was already
31601e04c3fSmrg    * balanced, the algorithm will rearrange the tree but will ultimately
31701e04c3fSmrg    * return an identical tree, so this check will handle that as well and
31801e04c3fSmrg    * will not set progress = true.
31901e04c3fSmrg    */
32001e04c3fSmrg   if (new_rvalue == *rvalue)
32101e04c3fSmrg      return;
32201e04c3fSmrg
32301e04c3fSmrg   visit_tree(new_rvalue, NULL, NULL, update_types);
32401e04c3fSmrg
32501e04c3fSmrg   *rvalue = new_rvalue;
32601e04c3fSmrg   this->progress = true;
32701e04c3fSmrg}
32801e04c3fSmrg
32901e04c3fSmrgbool
33001e04c3fSmrgdo_rebalance_tree(exec_list *instructions)
33101e04c3fSmrg{
33201e04c3fSmrg   ir_rebalance_visitor v;
33301e04c3fSmrg
33401e04c3fSmrg   v.run(instructions);
33501e04c3fSmrg
33601e04c3fSmrg   return v.progress;
33701e04c3fSmrg}
338