101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2014 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 2101e04c3fSmrg * DEALINGS IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg/** 2501e04c3fSmrg * \file opt_rebalance_tree.cpp 2601e04c3fSmrg * 2701e04c3fSmrg * Rebalances a reduction expression tree. 2801e04c3fSmrg * 2901e04c3fSmrg * For reduction operations (e.g., x + y + z + w) we generate an expression 3001e04c3fSmrg * tree like 3101e04c3fSmrg * 3201e04c3fSmrg * + 3301e04c3fSmrg * / \ 3401e04c3fSmrg * + w 3501e04c3fSmrg * / \ 3601e04c3fSmrg * + z 3701e04c3fSmrg * / \ 3801e04c3fSmrg * x y 3901e04c3fSmrg * 4001e04c3fSmrg * which we can rebalance into 4101e04c3fSmrg * 4201e04c3fSmrg * + 4301e04c3fSmrg * / \ 4401e04c3fSmrg * / \ 4501e04c3fSmrg * + + 4601e04c3fSmrg * / \ / \ 4701e04c3fSmrg * x y z w 4801e04c3fSmrg * 4901e04c3fSmrg * to get a better instruction scheduling. 5001e04c3fSmrg * 5101e04c3fSmrg * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout 5201e04c3fSmrg * and Bette L. Warren. 5301e04c3fSmrg * 5401e04c3fSmrg * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable 5501e04c3fSmrg * explanation of the of the tree_to_vine() (rightward rotation) and 5601e04c3fSmrg * vine_to_tree() (leftward rotation) algorithms. 5701e04c3fSmrg */ 5801e04c3fSmrg 5901e04c3fSmrg#include "ir.h" 6001e04c3fSmrg#include "ir_visitor.h" 6101e04c3fSmrg#include "ir_rvalue_visitor.h" 6201e04c3fSmrg#include "ir_optimization.h" 6301e04c3fSmrg#include "main/macros.h" /* for MAX2 */ 6401e04c3fSmrg 6501e04c3fSmrg/* The DSW algorithm generates a degenerate tree (really, a linked list) in 6601e04c3fSmrg * tree_to_vine(). We'd rather not leave a binary expression with only one 6701e04c3fSmrg * operand, so trivial modifications (the ternary operators below) are needed 6801e04c3fSmrg * to ensure that we only rotate around the ir_expression nodes of the tree. 6901e04c3fSmrg */ 7001e04c3fSmrgstatic unsigned 7101e04c3fSmrgtree_to_vine(ir_expression *root) 7201e04c3fSmrg{ 7301e04c3fSmrg unsigned size = 0; 7401e04c3fSmrg ir_rvalue *vine_tail = root; 7501e04c3fSmrg ir_rvalue *remainder = root->operands[1]; 7601e04c3fSmrg 7701e04c3fSmrg while (remainder != NULL) { 7801e04c3fSmrg ir_expression *remainder_temp = remainder->as_expression(); 7901e04c3fSmrg ir_expression *remainder_left = remainder_temp ? 8001e04c3fSmrg remainder_temp->operands[0]->as_expression() : NULL; 8101e04c3fSmrg 8201e04c3fSmrg if (remainder_left == NULL) { 8301e04c3fSmrg /* move vine_tail down one */ 8401e04c3fSmrg vine_tail = remainder; 8501e04c3fSmrg remainder = remainder->as_expression() ? 8601e04c3fSmrg ((ir_expression *)remainder)->operands[1] : NULL; 8701e04c3fSmrg size++; 8801e04c3fSmrg } else { 8901e04c3fSmrg /* rotate */ 9001e04c3fSmrg ir_expression *tempptr = remainder_left; 9101e04c3fSmrg ((ir_expression *)remainder)->operands[0] = tempptr->operands[1]; 9201e04c3fSmrg tempptr->operands[1] = remainder; 9301e04c3fSmrg remainder = tempptr; 9401e04c3fSmrg ((ir_expression *)vine_tail)->operands[1] = tempptr; 9501e04c3fSmrg } 9601e04c3fSmrg } 9701e04c3fSmrg 9801e04c3fSmrg return size; 9901e04c3fSmrg} 10001e04c3fSmrg 10101e04c3fSmrgstatic void 10201e04c3fSmrgcompression(ir_expression *root, unsigned count) 10301e04c3fSmrg{ 10401e04c3fSmrg ir_expression *scanner = root; 10501e04c3fSmrg 10601e04c3fSmrg for (unsigned i = 0; i < count; i++) { 10701e04c3fSmrg ir_expression *child = (ir_expression *)scanner->operands[1]; 10801e04c3fSmrg scanner->operands[1] = child->operands[1]; 10901e04c3fSmrg scanner = (ir_expression *)scanner->operands[1]; 11001e04c3fSmrg child->operands[1] = scanner->operands[0]; 11101e04c3fSmrg scanner->operands[0] = child; 11201e04c3fSmrg } 11301e04c3fSmrg} 11401e04c3fSmrg 11501e04c3fSmrgstatic void 11601e04c3fSmrgvine_to_tree(ir_expression *root, unsigned size) 11701e04c3fSmrg{ 11801e04c3fSmrg int n = size - 1; 11901e04c3fSmrg for (int m = n / 2; m > 0; m = n / 2) { 12001e04c3fSmrg compression(root, m); 12101e04c3fSmrg n -= m + 1; 12201e04c3fSmrg } 12301e04c3fSmrg} 12401e04c3fSmrg 12501e04c3fSmrgnamespace { 12601e04c3fSmrg 12701e04c3fSmrgclass ir_rebalance_visitor : public ir_rvalue_enter_visitor { 12801e04c3fSmrgpublic: 12901e04c3fSmrg ir_rebalance_visitor() 13001e04c3fSmrg { 13101e04c3fSmrg progress = false; 13201e04c3fSmrg } 13301e04c3fSmrg 13401e04c3fSmrg virtual ir_visitor_status visit_enter(ir_assignment *ir); 13501e04c3fSmrg 13601e04c3fSmrg void handle_rvalue(ir_rvalue **rvalue); 13701e04c3fSmrg 13801e04c3fSmrg bool progress; 13901e04c3fSmrg}; 14001e04c3fSmrg 14101e04c3fSmrgstruct is_reduction_data { 14201e04c3fSmrg ir_expression_operation operation; 14301e04c3fSmrg const glsl_type *type; 14401e04c3fSmrg unsigned num_expr; 14501e04c3fSmrg bool is_reduction; 14601e04c3fSmrg bool contains_constant; 14701e04c3fSmrg}; 14801e04c3fSmrg 14901e04c3fSmrg} /* anonymous namespace */ 15001e04c3fSmrg 15101e04c3fSmrgir_visitor_status 15201e04c3fSmrgir_rebalance_visitor::visit_enter(ir_assignment *ir) 15301e04c3fSmrg{ 15401e04c3fSmrg ir_variable *var = ir->lhs->variable_referenced(); 15501e04c3fSmrg if (var->data.invariant || var->data.precise) { 15601e04c3fSmrg /* If we're assigning to an invariant variable, just bail. Tree 15701e04c3fSmrg * rebalancing (reassociation) isn't precision-safe. 15801e04c3fSmrg */ 15901e04c3fSmrg return visit_continue_with_parent; 16001e04c3fSmrg } else { 16101e04c3fSmrg return visit_continue; 16201e04c3fSmrg } 16301e04c3fSmrg} 16401e04c3fSmrg 16501e04c3fSmrgstatic bool 16601e04c3fSmrgis_reduction_operation(ir_expression_operation operation) 16701e04c3fSmrg{ 16801e04c3fSmrg switch (operation) { 16901e04c3fSmrg case ir_binop_add: 17001e04c3fSmrg case ir_binop_mul: 17101e04c3fSmrg case ir_binop_bit_and: 17201e04c3fSmrg case ir_binop_bit_xor: 17301e04c3fSmrg case ir_binop_bit_or: 17401e04c3fSmrg case ir_binop_logic_and: 17501e04c3fSmrg case ir_binop_logic_xor: 17601e04c3fSmrg case ir_binop_logic_or: 17701e04c3fSmrg case ir_binop_min: 17801e04c3fSmrg case ir_binop_max: 17901e04c3fSmrg return true; 18001e04c3fSmrg default: 18101e04c3fSmrg return false; 18201e04c3fSmrg } 18301e04c3fSmrg} 18401e04c3fSmrg 18501e04c3fSmrg/* Note that this function does not attempt to recognize that reduction trees 18601e04c3fSmrg * are already balanced. 18701e04c3fSmrg * 18801e04c3fSmrg * We return false from this function for a number of reasons other than an 18901e04c3fSmrg * expression tree not being a mathematical reduction. Namely, 19001e04c3fSmrg * 19101e04c3fSmrg * - if the tree contains multiple constants that we may be able to combine. 19201e04c3fSmrg * - if the tree contains matrices: 19301e04c3fSmrg * - they might contain vec4's with many constant components that we can 19401e04c3fSmrg * simplify after splitting. 19501e04c3fSmrg * - applying the matrix chain ordering optimization is more than just 19601e04c3fSmrg * balancing an expression tree. 19701e04c3fSmrg * - if the tree contains operations on multiple types. 19801e04c3fSmrg * - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c 19901e04c3fSmrg * would trick the visiting pass. 20001e04c3fSmrg */ 20101e04c3fSmrgstatic void 20201e04c3fSmrgis_reduction(ir_instruction *ir, void *data) 20301e04c3fSmrg{ 20401e04c3fSmrg struct is_reduction_data *ird = (struct is_reduction_data *)data; 20501e04c3fSmrg if (!ird->is_reduction) 20601e04c3fSmrg return; 20701e04c3fSmrg 20801e04c3fSmrg /* We don't want to balance a tree that contains multiple constants, since 20901e04c3fSmrg * we'll be able to constant fold them if they're not in separate subtrees. 21001e04c3fSmrg */ 21101e04c3fSmrg if (ir->as_constant()) { 21201e04c3fSmrg if (ird->contains_constant) { 21301e04c3fSmrg ird->is_reduction = false; 21401e04c3fSmrg } 21501e04c3fSmrg ird->contains_constant = true; 21601e04c3fSmrg return; 21701e04c3fSmrg } 21801e04c3fSmrg 21901e04c3fSmrg /* Array/record dereferences have subtrees that are not part of the expr 22001e04c3fSmrg * tree we're balancing. Skip trees containing them. 22101e04c3fSmrg */ 22201e04c3fSmrg if (ir->ir_type == ir_type_dereference_array || 22301e04c3fSmrg ir->ir_type == ir_type_dereference_record) { 22401e04c3fSmrg ird->is_reduction = false; 22501e04c3fSmrg return; 22601e04c3fSmrg } 22701e04c3fSmrg 22801e04c3fSmrg ir_expression *expr = ir->as_expression(); 22901e04c3fSmrg if (!expr) 23001e04c3fSmrg return; 23101e04c3fSmrg 23201e04c3fSmrg /* Non-constant matrices might still contain constant vec4 that we can 23301e04c3fSmrg * constant fold once split up. Handling matrices will need some more 23401e04c3fSmrg * work. 23501e04c3fSmrg */ 23601e04c3fSmrg if (expr->type->is_matrix() || 23701e04c3fSmrg expr->operands[0]->type->is_matrix() || 23801e04c3fSmrg (expr->operands[1] && expr->operands[1]->type->is_matrix())) { 23901e04c3fSmrg ird->is_reduction = false; 24001e04c3fSmrg return; 24101e04c3fSmrg } 24201e04c3fSmrg 24301e04c3fSmrg if (ird->type != NULL && ird->type != expr->type) { 24401e04c3fSmrg ird->is_reduction = false; 24501e04c3fSmrg return; 24601e04c3fSmrg } 24701e04c3fSmrg ird->type = expr->type; 24801e04c3fSmrg 24901e04c3fSmrg ird->num_expr++; 25001e04c3fSmrg if (is_reduction_operation(expr->operation)) { 25101e04c3fSmrg if (ird->operation != 0 && ird->operation != expr->operation) 25201e04c3fSmrg ird->is_reduction = false; 25301e04c3fSmrg ird->operation = expr->operation; 25401e04c3fSmrg } else { 25501e04c3fSmrg ird->is_reduction = false; 25601e04c3fSmrg } 25701e04c3fSmrg} 25801e04c3fSmrg 25901e04c3fSmrgstatic ir_rvalue * 26001e04c3fSmrghandle_expression(ir_expression *expr) 26101e04c3fSmrg{ 26201e04c3fSmrg struct is_reduction_data ird; 26301e04c3fSmrg ird.operation = (ir_expression_operation)0; 26401e04c3fSmrg ird.type = NULL; 26501e04c3fSmrg ird.num_expr = 0; 26601e04c3fSmrg ird.is_reduction = true; 26701e04c3fSmrg ird.contains_constant = false; 26801e04c3fSmrg 26901e04c3fSmrg visit_tree(expr, is_reduction, (void *)&ird); 27001e04c3fSmrg 27101e04c3fSmrg if (ird.is_reduction && ird.num_expr > 2) { 27201e04c3fSmrg ir_constant z = ir_constant(0.0f); 27301e04c3fSmrg ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr); 27401e04c3fSmrg 27501e04c3fSmrg unsigned size = tree_to_vine(&pseudo_root); 27601e04c3fSmrg vine_to_tree(&pseudo_root, size); 27701e04c3fSmrg 27801e04c3fSmrg expr = (ir_expression *)pseudo_root.operands[1]; 27901e04c3fSmrg } 28001e04c3fSmrg return expr; 28101e04c3fSmrg} 28201e04c3fSmrg 28301e04c3fSmrgstatic void 28401e04c3fSmrgupdate_types(ir_instruction *ir, void *) 28501e04c3fSmrg{ 28601e04c3fSmrg ir_expression *expr = ir->as_expression(); 28701e04c3fSmrg if (!expr) 28801e04c3fSmrg return; 28901e04c3fSmrg 29001e04c3fSmrg const glsl_type *const new_type = 29101e04c3fSmrg glsl_type::get_instance(expr->type->base_type, 29201e04c3fSmrg MAX2(expr->operands[0]->type->vector_elements, 29301e04c3fSmrg expr->operands[1]->type->vector_elements), 29401e04c3fSmrg 1); 29501e04c3fSmrg assert(new_type != glsl_type::error_type); 29601e04c3fSmrg expr->type = new_type; 29701e04c3fSmrg} 29801e04c3fSmrg 29901e04c3fSmrgvoid 30001e04c3fSmrgir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue) 30101e04c3fSmrg{ 30201e04c3fSmrg if (!*rvalue) 30301e04c3fSmrg return; 30401e04c3fSmrg 30501e04c3fSmrg ir_expression *expr = (*rvalue)->as_expression(); 30601e04c3fSmrg if (!expr || !is_reduction_operation(expr->operation)) 30701e04c3fSmrg return; 30801e04c3fSmrg 30901e04c3fSmrg ir_rvalue *new_rvalue = handle_expression(expr); 31001e04c3fSmrg 31101e04c3fSmrg /* If we failed to rebalance the tree (e.g., because it wasn't a reduction, 31201e04c3fSmrg * or some other set of cases) new_rvalue will point to the same root as 31301e04c3fSmrg * before. 31401e04c3fSmrg * 31501e04c3fSmrg * Similarly, if the tree rooted at *rvalue was a reduction and was already 31601e04c3fSmrg * balanced, the algorithm will rearrange the tree but will ultimately 31701e04c3fSmrg * return an identical tree, so this check will handle that as well and 31801e04c3fSmrg * will not set progress = true. 31901e04c3fSmrg */ 32001e04c3fSmrg if (new_rvalue == *rvalue) 32101e04c3fSmrg return; 32201e04c3fSmrg 32301e04c3fSmrg visit_tree(new_rvalue, NULL, NULL, update_types); 32401e04c3fSmrg 32501e04c3fSmrg *rvalue = new_rvalue; 32601e04c3fSmrg this->progress = true; 32701e04c3fSmrg} 32801e04c3fSmrg 32901e04c3fSmrgbool 33001e04c3fSmrgdo_rebalance_tree(exec_list *instructions) 33101e04c3fSmrg{ 33201e04c3fSmrg ir_rebalance_visitor v; 33301e04c3fSmrg 33401e04c3fSmrg v.run(instructions); 33501e04c3fSmrg 33601e04c3fSmrg return v.progress; 33701e04c3fSmrg} 338