101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2010 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 2101e04c3fSmrg * DEALINGS IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg/** 2501e04c3fSmrg * \file opt_algebraic.cpp 2601e04c3fSmrg * 2701e04c3fSmrg * Takes advantage of association, commutivity, and other algebraic 2801e04c3fSmrg * properties to simplify expressions. 2901e04c3fSmrg */ 3001e04c3fSmrg 3101e04c3fSmrg#include "ir.h" 3201e04c3fSmrg#include "ir_visitor.h" 3301e04c3fSmrg#include "ir_rvalue_visitor.h" 3401e04c3fSmrg#include "ir_optimization.h" 3501e04c3fSmrg#include "ir_builder.h" 3601e04c3fSmrg#include "compiler/glsl_types.h" 3701e04c3fSmrg#include "main/mtypes.h" 3801e04c3fSmrg 3901e04c3fSmrgusing namespace ir_builder; 4001e04c3fSmrg 4101e04c3fSmrgnamespace { 4201e04c3fSmrg 4301e04c3fSmrg/** 4401e04c3fSmrg * Visitor class for replacing expressions with ir_constant values. 4501e04c3fSmrg */ 4601e04c3fSmrg 4701e04c3fSmrgclass ir_algebraic_visitor : public ir_rvalue_visitor { 4801e04c3fSmrgpublic: 4901e04c3fSmrg ir_algebraic_visitor(bool native_integers, 5001e04c3fSmrg const struct gl_shader_compiler_options *options) 5101e04c3fSmrg : options(options) 5201e04c3fSmrg { 5301e04c3fSmrg this->progress = false; 5401e04c3fSmrg this->mem_ctx = NULL; 5501e04c3fSmrg this->native_integers = native_integers; 5601e04c3fSmrg } 5701e04c3fSmrg 5801e04c3fSmrg virtual ~ir_algebraic_visitor() 5901e04c3fSmrg { 6001e04c3fSmrg } 6101e04c3fSmrg 6201e04c3fSmrg virtual ir_visitor_status visit_enter(ir_assignment *ir); 6301e04c3fSmrg 6401e04c3fSmrg ir_rvalue *handle_expression(ir_expression *ir); 6501e04c3fSmrg void handle_rvalue(ir_rvalue **rvalue); 6601e04c3fSmrg bool reassociate_constant(ir_expression *ir1, 6701e04c3fSmrg int const_index, 6801e04c3fSmrg ir_constant *constant, 6901e04c3fSmrg ir_expression *ir2); 7001e04c3fSmrg void reassociate_operands(ir_expression *ir1, 7101e04c3fSmrg int op1, 7201e04c3fSmrg ir_expression *ir2, 7301e04c3fSmrg int op2); 7401e04c3fSmrg ir_rvalue *swizzle_if_required(ir_expression *expr, 7501e04c3fSmrg ir_rvalue *operand); 7601e04c3fSmrg 7701e04c3fSmrg const struct gl_shader_compiler_options *options; 7801e04c3fSmrg void *mem_ctx; 7901e04c3fSmrg 8001e04c3fSmrg bool native_integers; 8101e04c3fSmrg bool progress; 8201e04c3fSmrg}; 8301e04c3fSmrg 8401e04c3fSmrg} /* unnamed namespace */ 8501e04c3fSmrg 8601e04c3fSmrgir_visitor_status 8701e04c3fSmrgir_algebraic_visitor::visit_enter(ir_assignment *ir) 8801e04c3fSmrg{ 8901e04c3fSmrg ir_variable *var = ir->lhs->variable_referenced(); 9001e04c3fSmrg if (var->data.invariant || var->data.precise) { 9101e04c3fSmrg /* If we're assigning to an invariant or precise variable, just bail. 9201e04c3fSmrg * Most of the algebraic optimizations aren't precision-safe. 9301e04c3fSmrg * 9401e04c3fSmrg * FINISHME: Find out which optimizations are precision-safe and enable 9501e04c3fSmrg * then only for invariant or precise trees. 9601e04c3fSmrg */ 9701e04c3fSmrg return visit_continue_with_parent; 9801e04c3fSmrg } else { 9901e04c3fSmrg return visit_continue; 10001e04c3fSmrg } 10101e04c3fSmrg} 10201e04c3fSmrg 10301e04c3fSmrgstatic inline bool 10401e04c3fSmrgis_vec_zero(ir_constant *ir) 10501e04c3fSmrg{ 10601e04c3fSmrg return (ir == NULL) ? false : ir->is_zero(); 10701e04c3fSmrg} 10801e04c3fSmrg 10901e04c3fSmrgstatic inline bool 11001e04c3fSmrgis_vec_one(ir_constant *ir) 11101e04c3fSmrg{ 11201e04c3fSmrg return (ir == NULL) ? false : ir->is_one(); 11301e04c3fSmrg} 11401e04c3fSmrg 11501e04c3fSmrgstatic inline bool 11601e04c3fSmrgis_vec_two(ir_constant *ir) 11701e04c3fSmrg{ 11801e04c3fSmrg return (ir == NULL) ? false : ir->is_value(2.0, 2); 11901e04c3fSmrg} 12001e04c3fSmrg 12101e04c3fSmrgstatic inline bool 12201e04c3fSmrgis_vec_four(ir_constant *ir) 12301e04c3fSmrg{ 12401e04c3fSmrg return (ir == NULL) ? false : ir->is_value(4.0, 4); 12501e04c3fSmrg} 12601e04c3fSmrg 12701e04c3fSmrgstatic inline bool 12801e04c3fSmrgis_vec_negative_one(ir_constant *ir) 12901e04c3fSmrg{ 13001e04c3fSmrg return (ir == NULL) ? false : ir->is_negative_one(); 13101e04c3fSmrg} 13201e04c3fSmrg 13301e04c3fSmrgstatic inline bool 13401e04c3fSmrgis_valid_vec_const(ir_constant *ir) 13501e04c3fSmrg{ 13601e04c3fSmrg if (ir == NULL) 13701e04c3fSmrg return false; 13801e04c3fSmrg 13901e04c3fSmrg if (!ir->type->is_scalar() && !ir->type->is_vector()) 14001e04c3fSmrg return false; 14101e04c3fSmrg 14201e04c3fSmrg return true; 14301e04c3fSmrg} 14401e04c3fSmrg 14501e04c3fSmrgstatic inline bool 14601e04c3fSmrgis_less_than_one(ir_constant *ir) 14701e04c3fSmrg{ 14801e04c3fSmrg assert(ir->type->is_float()); 14901e04c3fSmrg 15001e04c3fSmrg if (!is_valid_vec_const(ir)) 15101e04c3fSmrg return false; 15201e04c3fSmrg 15301e04c3fSmrg unsigned component = 0; 15401e04c3fSmrg for (int c = 0; c < ir->type->vector_elements; c++) { 15501e04c3fSmrg if (ir->get_float_component(c) < 1.0f) 15601e04c3fSmrg component++; 15701e04c3fSmrg } 15801e04c3fSmrg 15901e04c3fSmrg return (component == ir->type->vector_elements); 16001e04c3fSmrg} 16101e04c3fSmrg 16201e04c3fSmrgstatic inline bool 16301e04c3fSmrgis_greater_than_zero(ir_constant *ir) 16401e04c3fSmrg{ 16501e04c3fSmrg assert(ir->type->is_float()); 16601e04c3fSmrg 16701e04c3fSmrg if (!is_valid_vec_const(ir)) 16801e04c3fSmrg return false; 16901e04c3fSmrg 17001e04c3fSmrg unsigned component = 0; 17101e04c3fSmrg for (int c = 0; c < ir->type->vector_elements; c++) { 17201e04c3fSmrg if (ir->get_float_component(c) > 0.0f) 17301e04c3fSmrg component++; 17401e04c3fSmrg } 17501e04c3fSmrg 17601e04c3fSmrg return (component == ir->type->vector_elements); 17701e04c3fSmrg} 17801e04c3fSmrg 17901e04c3fSmrgstatic void 18001e04c3fSmrgupdate_type(ir_expression *ir) 18101e04c3fSmrg{ 18201e04c3fSmrg if (ir->operands[0]->type->is_vector()) 18301e04c3fSmrg ir->type = ir->operands[0]->type; 18401e04c3fSmrg else 18501e04c3fSmrg ir->type = ir->operands[1]->type; 18601e04c3fSmrg} 18701e04c3fSmrg 18801e04c3fSmrg/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ 18901e04c3fSmrgstatic ir_expression * 19001e04c3fSmrgtry_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx) 19101e04c3fSmrg{ 19201e04c3fSmrg if (expr0 && expr0->operation == ir_binop_add && 19301e04c3fSmrg expr0->type->is_float() && 19401e04c3fSmrg expr1 && expr1->operation == ir_binop_add && 19501e04c3fSmrg expr1->type->is_float()) { 19601e04c3fSmrg ir_swizzle *x = expr0->operands[0]->as_swizzle(); 19701e04c3fSmrg ir_swizzle *y = expr0->operands[1]->as_swizzle(); 19801e04c3fSmrg ir_swizzle *z = expr1->operands[0]->as_swizzle(); 19901e04c3fSmrg ir_swizzle *w = expr1->operands[1]->as_swizzle(); 20001e04c3fSmrg 20101e04c3fSmrg if (!x || x->mask.num_components != 1 || 20201e04c3fSmrg !y || y->mask.num_components != 1 || 20301e04c3fSmrg !z || z->mask.num_components != 1 || 20401e04c3fSmrg !w || w->mask.num_components != 1) { 20501e04c3fSmrg return NULL; 20601e04c3fSmrg } 20701e04c3fSmrg 20801e04c3fSmrg bool swiz_seen[4] = {false, false, false, false}; 20901e04c3fSmrg swiz_seen[x->mask.x] = true; 21001e04c3fSmrg swiz_seen[y->mask.x] = true; 21101e04c3fSmrg swiz_seen[z->mask.x] = true; 21201e04c3fSmrg swiz_seen[w->mask.x] = true; 21301e04c3fSmrg 21401e04c3fSmrg if (!swiz_seen[0] || !swiz_seen[1] || 21501e04c3fSmrg !swiz_seen[2] || !swiz_seen[3]) { 21601e04c3fSmrg return NULL; 21701e04c3fSmrg } 21801e04c3fSmrg 21901e04c3fSmrg if (x->val->equals(y->val) && 22001e04c3fSmrg x->val->equals(z->val) && 22101e04c3fSmrg x->val->equals(w->val)) { 22201e04c3fSmrg return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4)); 22301e04c3fSmrg } 22401e04c3fSmrg } 22501e04c3fSmrg return NULL; 22601e04c3fSmrg} 22701e04c3fSmrg 22801e04c3fSmrgvoid 22901e04c3fSmrgir_algebraic_visitor::reassociate_operands(ir_expression *ir1, 23001e04c3fSmrg int op1, 23101e04c3fSmrg ir_expression *ir2, 23201e04c3fSmrg int op2) 23301e04c3fSmrg{ 23401e04c3fSmrg ir_rvalue *temp = ir2->operands[op2]; 23501e04c3fSmrg ir2->operands[op2] = ir1->operands[op1]; 23601e04c3fSmrg ir1->operands[op1] = temp; 23701e04c3fSmrg 23801e04c3fSmrg /* Update the type of ir2. The type of ir1 won't have changed -- 23901e04c3fSmrg * base types matched, and at least one of the operands of the 2 24001e04c3fSmrg * binops is still a vector if any of them were. 24101e04c3fSmrg */ 24201e04c3fSmrg update_type(ir2); 24301e04c3fSmrg 24401e04c3fSmrg this->progress = true; 24501e04c3fSmrg} 24601e04c3fSmrg 24701e04c3fSmrg/** 24801e04c3fSmrg * Reassociates a constant down a tree of adds or multiplies. 24901e04c3fSmrg * 25001e04c3fSmrg * Consider (2 * (a * (b * 0.5))). We want to end up with a * b. 25101e04c3fSmrg */ 25201e04c3fSmrgbool 25301e04c3fSmrgir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index, 25401e04c3fSmrg ir_constant *constant, 25501e04c3fSmrg ir_expression *ir2) 25601e04c3fSmrg{ 25701e04c3fSmrg if (!ir2 || ir1->operation != ir2->operation) 25801e04c3fSmrg return false; 25901e04c3fSmrg 26001e04c3fSmrg /* Don't want to even think about matrices. */ 26101e04c3fSmrg if (ir1->operands[0]->type->is_matrix() || 26201e04c3fSmrg ir1->operands[1]->type->is_matrix() || 26301e04c3fSmrg ir2->operands[0]->type->is_matrix() || 26401e04c3fSmrg ir2->operands[1]->type->is_matrix()) 26501e04c3fSmrg return false; 26601e04c3fSmrg 26701e04c3fSmrg void *mem_ctx = ralloc_parent(ir2); 26801e04c3fSmrg 26901e04c3fSmrg ir_constant *ir2_const[2]; 27001e04c3fSmrg ir2_const[0] = ir2->operands[0]->constant_expression_value(mem_ctx); 27101e04c3fSmrg ir2_const[1] = ir2->operands[1]->constant_expression_value(mem_ctx); 27201e04c3fSmrg 27301e04c3fSmrg if (ir2_const[0] && ir2_const[1]) 27401e04c3fSmrg return false; 27501e04c3fSmrg 27601e04c3fSmrg if (ir2_const[0]) { 27701e04c3fSmrg reassociate_operands(ir1, const_index, ir2, 1); 27801e04c3fSmrg return true; 27901e04c3fSmrg } else if (ir2_const[1]) { 28001e04c3fSmrg reassociate_operands(ir1, const_index, ir2, 0); 28101e04c3fSmrg return true; 28201e04c3fSmrg } 28301e04c3fSmrg 28401e04c3fSmrg if (reassociate_constant(ir1, const_index, constant, 28501e04c3fSmrg ir2->operands[0]->as_expression())) { 28601e04c3fSmrg update_type(ir2); 28701e04c3fSmrg return true; 28801e04c3fSmrg } 28901e04c3fSmrg 29001e04c3fSmrg if (reassociate_constant(ir1, const_index, constant, 29101e04c3fSmrg ir2->operands[1]->as_expression())) { 29201e04c3fSmrg update_type(ir2); 29301e04c3fSmrg return true; 29401e04c3fSmrg } 29501e04c3fSmrg 29601e04c3fSmrg return false; 29701e04c3fSmrg} 29801e04c3fSmrg 29901e04c3fSmrg/* When eliminating an expression and just returning one of its operands, 30001e04c3fSmrg * we may need to swizzle that operand out to a vector if the expression was 30101e04c3fSmrg * vector type. 30201e04c3fSmrg */ 30301e04c3fSmrgir_rvalue * 30401e04c3fSmrgir_algebraic_visitor::swizzle_if_required(ir_expression *expr, 30501e04c3fSmrg ir_rvalue *operand) 30601e04c3fSmrg{ 30701e04c3fSmrg if (expr->type->is_vector() && operand->type->is_scalar()) { 30801e04c3fSmrg return new(mem_ctx) ir_swizzle(operand, 0, 0, 0, 0, 30901e04c3fSmrg expr->type->vector_elements); 31001e04c3fSmrg } else 31101e04c3fSmrg return operand; 31201e04c3fSmrg} 31301e04c3fSmrg 31401e04c3fSmrgir_rvalue * 31501e04c3fSmrgir_algebraic_visitor::handle_expression(ir_expression *ir) 31601e04c3fSmrg{ 31701e04c3fSmrg ir_constant *op_const[4] = {NULL, NULL, NULL, NULL}; 31801e04c3fSmrg ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL}; 31901e04c3fSmrg 32001e04c3fSmrg if (ir->operation == ir_binop_mul && 32101e04c3fSmrg ir->operands[0]->type->is_matrix() && 32201e04c3fSmrg ir->operands[1]->type->is_vector()) { 32301e04c3fSmrg ir_expression *matrix_mul = ir->operands[0]->as_expression(); 32401e04c3fSmrg 32501e04c3fSmrg if (matrix_mul && matrix_mul->operation == ir_binop_mul && 32601e04c3fSmrg matrix_mul->operands[0]->type->is_matrix() && 32701e04c3fSmrg matrix_mul->operands[1]->type->is_matrix()) { 32801e04c3fSmrg 32901e04c3fSmrg return mul(matrix_mul->operands[0], 33001e04c3fSmrg mul(matrix_mul->operands[1], ir->operands[1])); 33101e04c3fSmrg } 33201e04c3fSmrg } 33301e04c3fSmrg 33401e04c3fSmrg assert(ir->num_operands <= 4); 33501e04c3fSmrg for (unsigned i = 0; i < ir->num_operands; i++) { 33601e04c3fSmrg if (ir->operands[i]->type->is_matrix()) 33701e04c3fSmrg return ir; 33801e04c3fSmrg 33901e04c3fSmrg op_const[i] = 34001e04c3fSmrg ir->operands[i]->constant_expression_value(ralloc_parent(ir)); 34101e04c3fSmrg op_expr[i] = ir->operands[i]->as_expression(); 34201e04c3fSmrg } 34301e04c3fSmrg 34401e04c3fSmrg if (this->mem_ctx == NULL) 34501e04c3fSmrg this->mem_ctx = ralloc_parent(ir); 34601e04c3fSmrg 34701e04c3fSmrg switch (ir->operation) { 34801e04c3fSmrg case ir_unop_bit_not: 34901e04c3fSmrg if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not) 35001e04c3fSmrg return op_expr[0]->operands[0]; 35101e04c3fSmrg break; 35201e04c3fSmrg 35301e04c3fSmrg case ir_unop_abs: 35401e04c3fSmrg if (op_expr[0] == NULL) 35501e04c3fSmrg break; 35601e04c3fSmrg 35701e04c3fSmrg switch (op_expr[0]->operation) { 35801e04c3fSmrg case ir_unop_abs: 35901e04c3fSmrg case ir_unop_neg: 36001e04c3fSmrg return abs(op_expr[0]->operands[0]); 36101e04c3fSmrg default: 36201e04c3fSmrg break; 36301e04c3fSmrg } 36401e04c3fSmrg break; 36501e04c3fSmrg 36601e04c3fSmrg case ir_unop_neg: 36701e04c3fSmrg if (op_expr[0] == NULL) 36801e04c3fSmrg break; 36901e04c3fSmrg 37001e04c3fSmrg if (op_expr[0]->operation == ir_unop_neg) { 37101e04c3fSmrg return op_expr[0]->operands[0]; 37201e04c3fSmrg } 37301e04c3fSmrg break; 37401e04c3fSmrg 37501e04c3fSmrg case ir_unop_exp: 37601e04c3fSmrg if (op_expr[0] == NULL) 37701e04c3fSmrg break; 37801e04c3fSmrg 37901e04c3fSmrg if (op_expr[0]->operation == ir_unop_log) { 38001e04c3fSmrg return op_expr[0]->operands[0]; 38101e04c3fSmrg } 38201e04c3fSmrg break; 38301e04c3fSmrg 38401e04c3fSmrg case ir_unop_log: 38501e04c3fSmrg if (op_expr[0] == NULL) 38601e04c3fSmrg break; 38701e04c3fSmrg 38801e04c3fSmrg if (op_expr[0]->operation == ir_unop_exp) { 38901e04c3fSmrg return op_expr[0]->operands[0]; 39001e04c3fSmrg } 39101e04c3fSmrg break; 39201e04c3fSmrg 39301e04c3fSmrg case ir_unop_exp2: 39401e04c3fSmrg if (op_expr[0] == NULL) 39501e04c3fSmrg break; 39601e04c3fSmrg 39701e04c3fSmrg if (op_expr[0]->operation == ir_unop_log2) { 39801e04c3fSmrg return op_expr[0]->operands[0]; 39901e04c3fSmrg } 40001e04c3fSmrg 40101e04c3fSmrg if (!options->EmitNoPow && op_expr[0]->operation == ir_binop_mul) { 40201e04c3fSmrg for (int log2_pos = 0; log2_pos < 2; log2_pos++) { 40301e04c3fSmrg ir_expression *log2_expr = 40401e04c3fSmrg op_expr[0]->operands[log2_pos]->as_expression(); 40501e04c3fSmrg 40601e04c3fSmrg if (log2_expr && log2_expr->operation == ir_unop_log2) { 40701e04c3fSmrg return new(mem_ctx) ir_expression(ir_binop_pow, 40801e04c3fSmrg ir->type, 40901e04c3fSmrg log2_expr->operands[0], 41001e04c3fSmrg op_expr[0]->operands[1 - log2_pos]); 41101e04c3fSmrg } 41201e04c3fSmrg } 41301e04c3fSmrg } 41401e04c3fSmrg break; 41501e04c3fSmrg 41601e04c3fSmrg case ir_unop_log2: 41701e04c3fSmrg if (op_expr[0] == NULL) 41801e04c3fSmrg break; 41901e04c3fSmrg 42001e04c3fSmrg if (op_expr[0]->operation == ir_unop_exp2) { 42101e04c3fSmrg return op_expr[0]->operands[0]; 42201e04c3fSmrg } 42301e04c3fSmrg break; 42401e04c3fSmrg 42501e04c3fSmrg case ir_unop_f2i: 42601e04c3fSmrg case ir_unop_f2u: 42701e04c3fSmrg if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) { 42801e04c3fSmrg return new(mem_ctx) ir_expression(ir->operation, 42901e04c3fSmrg ir->type, 43001e04c3fSmrg op_expr[0]->operands[0]); 43101e04c3fSmrg } 43201e04c3fSmrg break; 43301e04c3fSmrg 43401e04c3fSmrg case ir_unop_logic_not: { 43501e04c3fSmrg enum ir_expression_operation new_op = ir_unop_logic_not; 43601e04c3fSmrg 43701e04c3fSmrg if (op_expr[0] == NULL) 43801e04c3fSmrg break; 43901e04c3fSmrg 44001e04c3fSmrg switch (op_expr[0]->operation) { 44101e04c3fSmrg case ir_binop_less: new_op = ir_binop_gequal; break; 44201e04c3fSmrg case ir_binop_gequal: new_op = ir_binop_less; break; 44301e04c3fSmrg case ir_binop_equal: new_op = ir_binop_nequal; break; 44401e04c3fSmrg case ir_binop_nequal: new_op = ir_binop_equal; break; 44501e04c3fSmrg case ir_binop_all_equal: new_op = ir_binop_any_nequal; break; 44601e04c3fSmrg case ir_binop_any_nequal: new_op = ir_binop_all_equal; break; 44701e04c3fSmrg 44801e04c3fSmrg default: 44901e04c3fSmrg /* The default case handler is here to silence a warning from GCC. 45001e04c3fSmrg */ 45101e04c3fSmrg break; 45201e04c3fSmrg } 45301e04c3fSmrg 45401e04c3fSmrg if (new_op != ir_unop_logic_not) { 45501e04c3fSmrg return new(mem_ctx) ir_expression(new_op, 45601e04c3fSmrg ir->type, 45701e04c3fSmrg op_expr[0]->operands[0], 45801e04c3fSmrg op_expr[0]->operands[1]); 45901e04c3fSmrg } 46001e04c3fSmrg 46101e04c3fSmrg break; 46201e04c3fSmrg } 46301e04c3fSmrg 46401e04c3fSmrg case ir_unop_saturate: 46501e04c3fSmrg if (op_expr[0] && op_expr[0]->operation == ir_binop_add) { 46601e04c3fSmrg ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression(); 46701e04c3fSmrg ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression(); 46801e04c3fSmrg 46901e04c3fSmrg if (b2f_0 && b2f_0->operation == ir_unop_b2f && 47001e04c3fSmrg b2f_1 && b2f_1->operation == ir_unop_b2f) { 47101e04c3fSmrg return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0])); 47201e04c3fSmrg } 47301e04c3fSmrg } 47401e04c3fSmrg break; 47501e04c3fSmrg 47601e04c3fSmrg /* This macro CANNOT use the do { } while(true) mechanism because 47701e04c3fSmrg * then the breaks apply to the loop instead of the switch! 47801e04c3fSmrg */ 47901e04c3fSmrg#define HANDLE_PACK_UNPACK_INVERSE(inverse_operation) \ 48001e04c3fSmrg { \ 48101e04c3fSmrg ir_expression *const op = ir->operands[0]->as_expression(); \ 48201e04c3fSmrg if (op == NULL) \ 48301e04c3fSmrg break; \ 48401e04c3fSmrg if (op->operation == (inverse_operation)) \ 48501e04c3fSmrg return op->operands[0]; \ 48601e04c3fSmrg break; \ 48701e04c3fSmrg } 48801e04c3fSmrg 48901e04c3fSmrg case ir_unop_unpack_uint_2x32: 49001e04c3fSmrg HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_uint_2x32); 49101e04c3fSmrg case ir_unop_pack_uint_2x32: 49201e04c3fSmrg HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_uint_2x32); 49301e04c3fSmrg case ir_unop_unpack_int_2x32: 49401e04c3fSmrg HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_int_2x32); 49501e04c3fSmrg case ir_unop_pack_int_2x32: 49601e04c3fSmrg HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_int_2x32); 49701e04c3fSmrg case ir_unop_unpack_double_2x32: 49801e04c3fSmrg HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_double_2x32); 49901e04c3fSmrg case ir_unop_pack_double_2x32: 50001e04c3fSmrg HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_double_2x32); 50101e04c3fSmrg 50201e04c3fSmrg#undef HANDLE_PACK_UNPACK_INVERSE 50301e04c3fSmrg 50401e04c3fSmrg case ir_binop_add: 50501e04c3fSmrg if (is_vec_zero(op_const[0])) 50601e04c3fSmrg return ir->operands[1]; 50701e04c3fSmrg if (is_vec_zero(op_const[1])) 50801e04c3fSmrg return ir->operands[0]; 50901e04c3fSmrg 5107e102996Smaya /* Replace (x + (-x)) with constant 0 */ 5117e102996Smaya for (int i = 0; i < 2; i++) { 5127e102996Smaya if (op_expr[i]) { 5137e102996Smaya if (op_expr[i]->operation == ir_unop_neg) { 5147e102996Smaya ir_rvalue *other = ir->operands[(i + 1) % 2]; 5157e102996Smaya if (other && op_expr[i]->operands[0]->equals(other)) { 5167e102996Smaya return ir_constant::zero(ir, ir->type); 5177e102996Smaya } 5187e102996Smaya } 5197e102996Smaya } 5207e102996Smaya } 5217e102996Smaya 52201e04c3fSmrg /* Reassociate addition of constants so that we can do constant 52301e04c3fSmrg * folding. 52401e04c3fSmrg */ 52501e04c3fSmrg if (op_const[0] && !op_const[1]) 52601e04c3fSmrg reassociate_constant(ir, 0, op_const[0], op_expr[1]); 52701e04c3fSmrg if (op_const[1] && !op_const[0]) 52801e04c3fSmrg reassociate_constant(ir, 1, op_const[1], op_expr[0]); 52901e04c3fSmrg 53001e04c3fSmrg /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ 53101e04c3fSmrg if (options->OptimizeForAOS) { 53201e04c3fSmrg ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1], 53301e04c3fSmrg mem_ctx); 53401e04c3fSmrg if (expr) 53501e04c3fSmrg return expr; 53601e04c3fSmrg } 53701e04c3fSmrg 53801e04c3fSmrg /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a). 53901e04c3fSmrg * 54001e04c3fSmrg * (-x + y) * a + x 54101e04c3fSmrg * (x * -a) + (y * a) + x 54201e04c3fSmrg * x + (x * -a) + (y * a) 54301e04c3fSmrg * x * (1 - a) + y * a 54401e04c3fSmrg * lrp(x, y, a) 54501e04c3fSmrg */ 54601e04c3fSmrg for (int mul_pos = 0; mul_pos < 2; mul_pos++) { 54701e04c3fSmrg ir_expression *mul = op_expr[mul_pos]; 54801e04c3fSmrg 54901e04c3fSmrg if (!mul || mul->operation != ir_binop_mul) 55001e04c3fSmrg continue; 55101e04c3fSmrg 55201e04c3fSmrg /* Multiply found on one of the operands. Now check for an 55301e04c3fSmrg * inner addition operation. 55401e04c3fSmrg */ 55501e04c3fSmrg for (int inner_add_pos = 0; inner_add_pos < 2; inner_add_pos++) { 55601e04c3fSmrg ir_expression *inner_add = 55701e04c3fSmrg mul->operands[inner_add_pos]->as_expression(); 55801e04c3fSmrg 55901e04c3fSmrg if (!inner_add || inner_add->operation != ir_binop_add) 56001e04c3fSmrg continue; 56101e04c3fSmrg 56201e04c3fSmrg /* Inner addition found on one of the operands. Now check for 56301e04c3fSmrg * one of the operands of the inner addition to be the negative 56401e04c3fSmrg * of x_operand. 56501e04c3fSmrg */ 56601e04c3fSmrg for (int neg_pos = 0; neg_pos < 2; neg_pos++) { 56701e04c3fSmrg ir_expression *neg = 56801e04c3fSmrg inner_add->operands[neg_pos]->as_expression(); 56901e04c3fSmrg 57001e04c3fSmrg if (!neg || neg->operation != ir_unop_neg) 57101e04c3fSmrg continue; 57201e04c3fSmrg 57301e04c3fSmrg ir_rvalue *x_operand = ir->operands[1 - mul_pos]; 57401e04c3fSmrg 57501e04c3fSmrg if (!neg->operands[0]->equals(x_operand)) 57601e04c3fSmrg continue; 57701e04c3fSmrg 57801e04c3fSmrg ir_rvalue *y_operand = inner_add->operands[1 - neg_pos]; 57901e04c3fSmrg ir_rvalue *a_operand = mul->operands[1 - inner_add_pos]; 58001e04c3fSmrg 5817ec681f3Smrg if (!x_operand->type->is_float_16_32_64() || 5827ec681f3Smrg x_operand->type != y_operand->type || 58301e04c3fSmrg x_operand->type != a_operand->type) 58401e04c3fSmrg continue; 58501e04c3fSmrg 58601e04c3fSmrg return lrp(x_operand, y_operand, a_operand); 58701e04c3fSmrg } 58801e04c3fSmrg } 58901e04c3fSmrg } 59001e04c3fSmrg 59101e04c3fSmrg break; 59201e04c3fSmrg 59301e04c3fSmrg case ir_binop_sub: 59401e04c3fSmrg if (is_vec_zero(op_const[0])) 59501e04c3fSmrg return neg(ir->operands[1]); 59601e04c3fSmrg if (is_vec_zero(op_const[1])) 59701e04c3fSmrg return ir->operands[0]; 59801e04c3fSmrg break; 59901e04c3fSmrg 60001e04c3fSmrg case ir_binop_mul: 60101e04c3fSmrg if (is_vec_one(op_const[0])) 60201e04c3fSmrg return ir->operands[1]; 60301e04c3fSmrg if (is_vec_one(op_const[1])) 60401e04c3fSmrg return ir->operands[0]; 60501e04c3fSmrg 60601e04c3fSmrg if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) 60701e04c3fSmrg return ir_constant::zero(ir, ir->type); 60801e04c3fSmrg 60901e04c3fSmrg if (is_vec_negative_one(op_const[0])) 61001e04c3fSmrg return neg(ir->operands[1]); 61101e04c3fSmrg if (is_vec_negative_one(op_const[1])) 61201e04c3fSmrg return neg(ir->operands[0]); 61301e04c3fSmrg 61401e04c3fSmrg if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f && 61501e04c3fSmrg op_expr[1] && op_expr[1]->operation == ir_unop_b2f) { 61601e04c3fSmrg return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0])); 61701e04c3fSmrg } 61801e04c3fSmrg 61901e04c3fSmrg /* Reassociate multiplication of constants so that we can do 62001e04c3fSmrg * constant folding. 62101e04c3fSmrg */ 62201e04c3fSmrg if (op_const[0] && !op_const[1]) 62301e04c3fSmrg reassociate_constant(ir, 0, op_const[0], op_expr[1]); 62401e04c3fSmrg if (op_const[1] && !op_const[0]) 62501e04c3fSmrg reassociate_constant(ir, 1, op_const[1], op_expr[0]); 62601e04c3fSmrg 62701e04c3fSmrg /* Optimizes 62801e04c3fSmrg * 62901e04c3fSmrg * (mul (floor (add (abs x) 0.5) (sign x))) 63001e04c3fSmrg * 63101e04c3fSmrg * into 63201e04c3fSmrg * 63301e04c3fSmrg * (trunc (add x (mul (sign x) 0.5))) 63401e04c3fSmrg */ 63501e04c3fSmrg for (int i = 0; i < 2; i++) { 63601e04c3fSmrg ir_expression *sign_expr = ir->operands[i]->as_expression(); 63701e04c3fSmrg ir_expression *floor_expr = ir->operands[1 - i]->as_expression(); 63801e04c3fSmrg 63901e04c3fSmrg if (!sign_expr || sign_expr->operation != ir_unop_sign || 64001e04c3fSmrg !floor_expr || floor_expr->operation != ir_unop_floor) 64101e04c3fSmrg continue; 64201e04c3fSmrg 64301e04c3fSmrg ir_expression *add_expr = floor_expr->operands[0]->as_expression(); 64401e04c3fSmrg if (!add_expr || add_expr->operation != ir_binop_add) 64501e04c3fSmrg continue; 64601e04c3fSmrg 64701e04c3fSmrg for (int j = 0; j < 2; j++) { 64801e04c3fSmrg ir_expression *abs_expr = add_expr->operands[j]->as_expression(); 64901e04c3fSmrg if (!abs_expr || abs_expr->operation != ir_unop_abs) 65001e04c3fSmrg continue; 65101e04c3fSmrg 65201e04c3fSmrg ir_constant *point_five = add_expr->operands[1 - j]->as_constant(); 65301e04c3fSmrg if (!point_five || !point_five->is_value(0.5, 0)) 65401e04c3fSmrg continue; 65501e04c3fSmrg 65601e04c3fSmrg if (abs_expr->operands[0]->equals(sign_expr->operands[0])) { 65701e04c3fSmrg return trunc(add(abs_expr->operands[0], 65801e04c3fSmrg mul(sign_expr, point_five))); 65901e04c3fSmrg } 66001e04c3fSmrg } 66101e04c3fSmrg } 66201e04c3fSmrg break; 66301e04c3fSmrg 66401e04c3fSmrg case ir_binop_div: 66501e04c3fSmrg if (is_vec_one(op_const[0]) && ( 66601e04c3fSmrg ir->type->is_float() || ir->type->is_double())) { 66701e04c3fSmrg return new(mem_ctx) ir_expression(ir_unop_rcp, 66801e04c3fSmrg ir->operands[1]->type, 66901e04c3fSmrg ir->operands[1], 67001e04c3fSmrg NULL); 67101e04c3fSmrg } 67201e04c3fSmrg if (is_vec_one(op_const[1])) 67301e04c3fSmrg return ir->operands[0]; 67401e04c3fSmrg break; 67501e04c3fSmrg 67601e04c3fSmrg case ir_binop_dot: 67701e04c3fSmrg if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) 67801e04c3fSmrg return ir_constant::zero(mem_ctx, ir->type); 67901e04c3fSmrg 68001e04c3fSmrg for (int i = 0; i < 2; i++) { 68101e04c3fSmrg if (!op_const[i]) 68201e04c3fSmrg continue; 68301e04c3fSmrg 68401e04c3fSmrg unsigned components[4] = { 0 }, count = 0; 68501e04c3fSmrg 68601e04c3fSmrg for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) { 68701e04c3fSmrg if (op_const[i]->is_zero()) 68801e04c3fSmrg continue; 68901e04c3fSmrg 69001e04c3fSmrg components[count] = c; 69101e04c3fSmrg count++; 69201e04c3fSmrg } 69301e04c3fSmrg 69401e04c3fSmrg /* No channels had zero values; bail. */ 69501e04c3fSmrg if (count >= op_const[i]->type->vector_elements) 69601e04c3fSmrg break; 69701e04c3fSmrg 69801e04c3fSmrg ir_expression_operation op = count == 1 ? 69901e04c3fSmrg ir_binop_mul : ir_binop_dot; 70001e04c3fSmrg 70101e04c3fSmrg /* Swizzle both operands to remove the channels that were zero. */ 70201e04c3fSmrg return new(mem_ctx) 70301e04c3fSmrg ir_expression(op, ir->type, 70401e04c3fSmrg new(mem_ctx) ir_swizzle(ir->operands[0], 70501e04c3fSmrg components, count), 70601e04c3fSmrg new(mem_ctx) ir_swizzle(ir->operands[1], 70701e04c3fSmrg components, count)); 70801e04c3fSmrg } 70901e04c3fSmrg break; 71001e04c3fSmrg 71101e04c3fSmrg case ir_binop_equal: 71201e04c3fSmrg case ir_binop_nequal: 71301e04c3fSmrg for (int add_pos = 0; add_pos < 2; add_pos++) { 71401e04c3fSmrg ir_expression *add = op_expr[add_pos]; 71501e04c3fSmrg 71601e04c3fSmrg if (!add || add->operation != ir_binop_add) 71701e04c3fSmrg continue; 71801e04c3fSmrg 71901e04c3fSmrg ir_constant *zero = op_const[1 - add_pos]; 72001e04c3fSmrg if (!is_vec_zero(zero)) 72101e04c3fSmrg continue; 72201e04c3fSmrg 72301e04c3fSmrg /* We are allowed to add scalars with a vector or matrix. In that 72401e04c3fSmrg * case lets just exit early. 72501e04c3fSmrg */ 72601e04c3fSmrg if (add->operands[0]->type != add->operands[1]->type) 72701e04c3fSmrg continue; 72801e04c3fSmrg 72901e04c3fSmrg /* Depending of the zero position we want to optimize 73001e04c3fSmrg * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y) 73101e04c3fSmrg */ 73201e04c3fSmrg if (add_pos == 1) { 73301e04c3fSmrg return new(mem_ctx) ir_expression(ir->operation, 73401e04c3fSmrg neg(add->operands[0]), 73501e04c3fSmrg add->operands[1]); 73601e04c3fSmrg } else { 73701e04c3fSmrg return new(mem_ctx) ir_expression(ir->operation, 73801e04c3fSmrg add->operands[0], 73901e04c3fSmrg neg(add->operands[1])); 74001e04c3fSmrg } 74101e04c3fSmrg } 74201e04c3fSmrg break; 74301e04c3fSmrg 74401e04c3fSmrg case ir_binop_all_equal: 74501e04c3fSmrg case ir_binop_any_nequal: 74601e04c3fSmrg if (ir->operands[0]->type->is_scalar() && 74701e04c3fSmrg ir->operands[1]->type->is_scalar()) 74801e04c3fSmrg return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal 74901e04c3fSmrg ? ir_binop_equal : ir_binop_nequal, 75001e04c3fSmrg ir->operands[0], 75101e04c3fSmrg ir->operands[1]); 75201e04c3fSmrg break; 75301e04c3fSmrg 75401e04c3fSmrg case ir_binop_rshift: 75501e04c3fSmrg case ir_binop_lshift: 75601e04c3fSmrg /* 0 >> x == 0 */ 75701e04c3fSmrg if (is_vec_zero(op_const[0])) 75801e04c3fSmrg return ir->operands[0]; 75901e04c3fSmrg /* x >> 0 == x */ 76001e04c3fSmrg if (is_vec_zero(op_const[1])) 76101e04c3fSmrg return ir->operands[0]; 76201e04c3fSmrg break; 76301e04c3fSmrg 76401e04c3fSmrg case ir_binop_logic_and: 76501e04c3fSmrg if (is_vec_one(op_const[0])) { 76601e04c3fSmrg return ir->operands[1]; 76701e04c3fSmrg } else if (is_vec_one(op_const[1])) { 76801e04c3fSmrg return ir->operands[0]; 76901e04c3fSmrg } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { 77001e04c3fSmrg return ir_constant::zero(mem_ctx, ir->type); 77101e04c3fSmrg } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not && 77201e04c3fSmrg op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) { 77301e04c3fSmrg /* De Morgan's Law: 77401e04c3fSmrg * (not A) and (not B) === not (A or B) 77501e04c3fSmrg */ 77601e04c3fSmrg return logic_not(logic_or(op_expr[0]->operands[0], 77701e04c3fSmrg op_expr[1]->operands[0])); 77801e04c3fSmrg } else if (ir->operands[0]->equals(ir->operands[1])) { 77901e04c3fSmrg /* (a && a) == a */ 78001e04c3fSmrg return ir->operands[0]; 78101e04c3fSmrg } 78201e04c3fSmrg break; 78301e04c3fSmrg 78401e04c3fSmrg case ir_binop_logic_xor: 78501e04c3fSmrg if (is_vec_zero(op_const[0])) { 78601e04c3fSmrg return ir->operands[1]; 78701e04c3fSmrg } else if (is_vec_zero(op_const[1])) { 78801e04c3fSmrg return ir->operands[0]; 78901e04c3fSmrg } else if (is_vec_one(op_const[0])) { 79001e04c3fSmrg return logic_not(ir->operands[1]); 79101e04c3fSmrg } else if (is_vec_one(op_const[1])) { 79201e04c3fSmrg return logic_not(ir->operands[0]); 79301e04c3fSmrg } else if (ir->operands[0]->equals(ir->operands[1])) { 79401e04c3fSmrg /* (a ^^ a) == false */ 79501e04c3fSmrg return ir_constant::zero(mem_ctx, ir->type); 79601e04c3fSmrg } 79701e04c3fSmrg break; 79801e04c3fSmrg 79901e04c3fSmrg case ir_binop_logic_or: 80001e04c3fSmrg if (is_vec_zero(op_const[0])) { 80101e04c3fSmrg return ir->operands[1]; 80201e04c3fSmrg } else if (is_vec_zero(op_const[1])) { 80301e04c3fSmrg return ir->operands[0]; 80401e04c3fSmrg } else if (is_vec_one(op_const[0]) || is_vec_one(op_const[1])) { 80501e04c3fSmrg ir_constant_data data; 80601e04c3fSmrg 80701e04c3fSmrg for (unsigned i = 0; i < 16; i++) 80801e04c3fSmrg data.b[i] = true; 80901e04c3fSmrg 81001e04c3fSmrg return new(mem_ctx) ir_constant(ir->type, &data); 81101e04c3fSmrg } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not && 81201e04c3fSmrg op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) { 81301e04c3fSmrg /* De Morgan's Law: 81401e04c3fSmrg * (not A) or (not B) === not (A and B) 81501e04c3fSmrg */ 81601e04c3fSmrg return logic_not(logic_and(op_expr[0]->operands[0], 81701e04c3fSmrg op_expr[1]->operands[0])); 81801e04c3fSmrg } else if (ir->operands[0]->equals(ir->operands[1])) { 81901e04c3fSmrg /* (a || a) == a */ 82001e04c3fSmrg return ir->operands[0]; 82101e04c3fSmrg } 82201e04c3fSmrg break; 82301e04c3fSmrg 82401e04c3fSmrg case ir_binop_pow: 82501e04c3fSmrg /* 1^x == 1 */ 82601e04c3fSmrg if (is_vec_one(op_const[0])) 82701e04c3fSmrg return op_const[0]; 82801e04c3fSmrg 82901e04c3fSmrg /* x^1 == x */ 83001e04c3fSmrg if (is_vec_one(op_const[1])) 83101e04c3fSmrg return ir->operands[0]; 83201e04c3fSmrg 83301e04c3fSmrg /* pow(2,x) == exp2(x) */ 83401e04c3fSmrg if (is_vec_two(op_const[0])) 83501e04c3fSmrg return expr(ir_unop_exp2, ir->operands[1]); 83601e04c3fSmrg 83701e04c3fSmrg if (is_vec_two(op_const[1])) { 83801e04c3fSmrg ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", 83901e04c3fSmrg ir_var_temporary); 84001e04c3fSmrg base_ir->insert_before(x); 84101e04c3fSmrg base_ir->insert_before(assign(x, ir->operands[0])); 84201e04c3fSmrg return mul(x, x); 84301e04c3fSmrg } 84401e04c3fSmrg 84501e04c3fSmrg if (is_vec_four(op_const[1])) { 84601e04c3fSmrg ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", 84701e04c3fSmrg ir_var_temporary); 84801e04c3fSmrg base_ir->insert_before(x); 84901e04c3fSmrg base_ir->insert_before(assign(x, ir->operands[0])); 85001e04c3fSmrg 85101e04c3fSmrg ir_variable *squared = new(ir) ir_variable(ir->operands[1]->type, 85201e04c3fSmrg "squared", 85301e04c3fSmrg ir_var_temporary); 85401e04c3fSmrg base_ir->insert_before(squared); 85501e04c3fSmrg base_ir->insert_before(assign(squared, mul(x, x))); 85601e04c3fSmrg return mul(squared, squared); 85701e04c3fSmrg } 85801e04c3fSmrg 85901e04c3fSmrg break; 86001e04c3fSmrg 86101e04c3fSmrg case ir_binop_min: 86201e04c3fSmrg case ir_binop_max: 86301e04c3fSmrg if (!ir->type->is_float() || options->EmitNoSat) 86401e04c3fSmrg break; 86501e04c3fSmrg 86601e04c3fSmrg /* Replace min(max) operations and its commutative combinations with 86701e04c3fSmrg * a saturate operation 86801e04c3fSmrg */ 86901e04c3fSmrg for (int op = 0; op < 2; op++) { 87001e04c3fSmrg ir_expression *inner_expr = op_expr[op]; 87101e04c3fSmrg ir_constant *outer_const = op_const[1 - op]; 87201e04c3fSmrg ir_expression_operation op_cond = (ir->operation == ir_binop_max) ? 87301e04c3fSmrg ir_binop_min : ir_binop_max; 87401e04c3fSmrg 87501e04c3fSmrg if (!inner_expr || !outer_const || (inner_expr->operation != op_cond)) 87601e04c3fSmrg continue; 87701e04c3fSmrg 87801e04c3fSmrg /* One of these has to be a constant */ 87901e04c3fSmrg if (!inner_expr->operands[0]->as_constant() && 88001e04c3fSmrg !inner_expr->operands[1]->as_constant()) 88101e04c3fSmrg break; 88201e04c3fSmrg 88301e04c3fSmrg /* Found a min(max) combination. Now try to see if its operands 88401e04c3fSmrg * meet our conditions that we can do just a single saturate operation 88501e04c3fSmrg */ 88601e04c3fSmrg for (int minmax_op = 0; minmax_op < 2; minmax_op++) { 88701e04c3fSmrg ir_rvalue *x = inner_expr->operands[minmax_op]; 88801e04c3fSmrg ir_rvalue *y = inner_expr->operands[1 - minmax_op]; 88901e04c3fSmrg 89001e04c3fSmrg ir_constant *inner_const = y->as_constant(); 89101e04c3fSmrg if (!inner_const) 89201e04c3fSmrg continue; 89301e04c3fSmrg 89401e04c3fSmrg /* min(max(x, 0.0), 1.0) is sat(x) */ 89501e04c3fSmrg if (ir->operation == ir_binop_min && 89601e04c3fSmrg inner_const->is_zero() && 89701e04c3fSmrg outer_const->is_one()) 89801e04c3fSmrg return saturate(x); 89901e04c3fSmrg 90001e04c3fSmrg /* max(min(x, 1.0), 0.0) is sat(x) */ 90101e04c3fSmrg if (ir->operation == ir_binop_max && 90201e04c3fSmrg inner_const->is_one() && 90301e04c3fSmrg outer_const->is_zero()) 90401e04c3fSmrg return saturate(x); 90501e04c3fSmrg 90601e04c3fSmrg /* min(max(x, 0.0), b) where b < 1.0 is sat(min(x, b)) */ 90701e04c3fSmrg if (ir->operation == ir_binop_min && 90801e04c3fSmrg inner_const->is_zero() && 90901e04c3fSmrg is_less_than_one(outer_const)) 91001e04c3fSmrg return saturate(expr(ir_binop_min, x, outer_const)); 91101e04c3fSmrg 91201e04c3fSmrg /* max(min(x, b), 0.0) where b < 1.0 is sat(min(x, b)) */ 91301e04c3fSmrg if (ir->operation == ir_binop_max && 91401e04c3fSmrg is_less_than_one(inner_const) && 91501e04c3fSmrg outer_const->is_zero()) 91601e04c3fSmrg return saturate(expr(ir_binop_min, x, inner_const)); 91701e04c3fSmrg 91801e04c3fSmrg /* max(min(x, 1.0), b) where b > 0.0 is sat(max(x, b)) */ 91901e04c3fSmrg if (ir->operation == ir_binop_max && 92001e04c3fSmrg inner_const->is_one() && 92101e04c3fSmrg is_greater_than_zero(outer_const)) 92201e04c3fSmrg return saturate(expr(ir_binop_max, x, outer_const)); 92301e04c3fSmrg 92401e04c3fSmrg /* min(max(x, b), 1.0) where b > 0.0 is sat(max(x, b)) */ 92501e04c3fSmrg if (ir->operation == ir_binop_min && 92601e04c3fSmrg is_greater_than_zero(inner_const) && 92701e04c3fSmrg outer_const->is_one()) 92801e04c3fSmrg return saturate(expr(ir_binop_max, x, inner_const)); 92901e04c3fSmrg } 93001e04c3fSmrg } 93101e04c3fSmrg 93201e04c3fSmrg break; 93301e04c3fSmrg 93401e04c3fSmrg case ir_unop_rcp: 93501e04c3fSmrg if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp) 93601e04c3fSmrg return op_expr[0]->operands[0]; 93701e04c3fSmrg 93801e04c3fSmrg if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 || 93901e04c3fSmrg op_expr[0]->operation == ir_unop_exp)) { 94001e04c3fSmrg return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type, 94101e04c3fSmrg neg(op_expr[0]->operands[0])); 94201e04c3fSmrg } 94301e04c3fSmrg 94401e04c3fSmrg /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at 94501e04c3fSmrg * its IR level, so we can always apply this transformation. 94601e04c3fSmrg */ 94701e04c3fSmrg if (op_expr[0] && op_expr[0]->operation == ir_unop_rsq) 94801e04c3fSmrg return sqrt(op_expr[0]->operands[0]); 94901e04c3fSmrg 95001e04c3fSmrg /* As far as we know, all backends are OK with rsq. */ 95101e04c3fSmrg if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) { 95201e04c3fSmrg return rsq(op_expr[0]->operands[0]); 95301e04c3fSmrg } 95401e04c3fSmrg 95501e04c3fSmrg break; 95601e04c3fSmrg 95701e04c3fSmrg case ir_triop_fma: 95801e04c3fSmrg /* Operands are op0 * op1 + op2. */ 95901e04c3fSmrg if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { 96001e04c3fSmrg return ir->operands[2]; 96101e04c3fSmrg } else if (is_vec_zero(op_const[2])) { 96201e04c3fSmrg return mul(ir->operands[0], ir->operands[1]); 96301e04c3fSmrg } else if (is_vec_one(op_const[0])) { 96401e04c3fSmrg return add(ir->operands[1], ir->operands[2]); 96501e04c3fSmrg } else if (is_vec_one(op_const[1])) { 96601e04c3fSmrg return add(ir->operands[0], ir->operands[2]); 96701e04c3fSmrg } 96801e04c3fSmrg break; 96901e04c3fSmrg 97001e04c3fSmrg case ir_triop_lrp: 97101e04c3fSmrg /* Operands are (x, y, a). */ 97201e04c3fSmrg if (is_vec_zero(op_const[2])) { 97301e04c3fSmrg return ir->operands[0]; 97401e04c3fSmrg } else if (is_vec_one(op_const[2])) { 97501e04c3fSmrg return ir->operands[1]; 97601e04c3fSmrg } else if (ir->operands[0]->equals(ir->operands[1])) { 97701e04c3fSmrg return ir->operands[0]; 97801e04c3fSmrg } else if (is_vec_zero(op_const[0])) { 97901e04c3fSmrg return mul(ir->operands[1], ir->operands[2]); 98001e04c3fSmrg } else if (is_vec_zero(op_const[1])) { 98101e04c3fSmrg unsigned op2_components = ir->operands[2]->type->vector_elements; 98201e04c3fSmrg ir_constant *one; 98301e04c3fSmrg 98401e04c3fSmrg switch (ir->type->base_type) { 9857ec681f3Smrg case GLSL_TYPE_FLOAT16: 9867ec681f3Smrg one = new(mem_ctx) ir_constant(float16_t::one(), op2_components); 9877ec681f3Smrg break; 98801e04c3fSmrg case GLSL_TYPE_FLOAT: 98901e04c3fSmrg one = new(mem_ctx) ir_constant(1.0f, op2_components); 99001e04c3fSmrg break; 99101e04c3fSmrg case GLSL_TYPE_DOUBLE: 99201e04c3fSmrg one = new(mem_ctx) ir_constant(1.0, op2_components); 99301e04c3fSmrg break; 99401e04c3fSmrg default: 99501e04c3fSmrg one = NULL; 99601e04c3fSmrg unreachable("unexpected type"); 99701e04c3fSmrg } 99801e04c3fSmrg 99901e04c3fSmrg return mul(ir->operands[0], add(one, neg(ir->operands[2]))); 100001e04c3fSmrg } 100101e04c3fSmrg break; 100201e04c3fSmrg 100301e04c3fSmrg case ir_triop_csel: 100401e04c3fSmrg if (is_vec_one(op_const[0])) 100501e04c3fSmrg return ir->operands[1]; 100601e04c3fSmrg if (is_vec_zero(op_const[0])) 100701e04c3fSmrg return ir->operands[2]; 100801e04c3fSmrg break; 100901e04c3fSmrg 101001e04c3fSmrg /* Remove interpolateAt* instructions for demoted inputs. They are 101101e04c3fSmrg * assigned a constant expression to facilitate this. 101201e04c3fSmrg */ 101301e04c3fSmrg case ir_unop_interpolate_at_centroid: 101401e04c3fSmrg case ir_binop_interpolate_at_offset: 101501e04c3fSmrg case ir_binop_interpolate_at_sample: 101601e04c3fSmrg if (op_const[0]) 101701e04c3fSmrg return ir->operands[0]; 101801e04c3fSmrg break; 101901e04c3fSmrg 102001e04c3fSmrg default: 102101e04c3fSmrg break; 102201e04c3fSmrg } 102301e04c3fSmrg 102401e04c3fSmrg return ir; 102501e04c3fSmrg} 102601e04c3fSmrg 102701e04c3fSmrgvoid 102801e04c3fSmrgir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue) 102901e04c3fSmrg{ 103001e04c3fSmrg if (!*rvalue) 103101e04c3fSmrg return; 103201e04c3fSmrg 103301e04c3fSmrg ir_expression *expr = (*rvalue)->as_expression(); 103401e04c3fSmrg if (!expr || expr->operation == ir_quadop_vector) 103501e04c3fSmrg return; 103601e04c3fSmrg 103701e04c3fSmrg ir_rvalue *new_rvalue = handle_expression(expr); 103801e04c3fSmrg if (new_rvalue == *rvalue) 103901e04c3fSmrg return; 104001e04c3fSmrg 104101e04c3fSmrg /* If the expr used to be some vec OP scalar returning a vector, and the 104201e04c3fSmrg * optimization gave us back a scalar, we still need to turn it into a 104301e04c3fSmrg * vector. 104401e04c3fSmrg */ 104501e04c3fSmrg *rvalue = swizzle_if_required(expr, new_rvalue); 104601e04c3fSmrg 104701e04c3fSmrg this->progress = true; 104801e04c3fSmrg} 104901e04c3fSmrg 105001e04c3fSmrgbool 105101e04c3fSmrgdo_algebraic(exec_list *instructions, bool native_integers, 105201e04c3fSmrg const struct gl_shader_compiler_options *options) 105301e04c3fSmrg{ 105401e04c3fSmrg ir_algebraic_visitor v(native_integers, options); 105501e04c3fSmrg 105601e04c3fSmrg visit_list_elements(&v, instructions); 105701e04c3fSmrg 105801e04c3fSmrg return v.progress; 105901e04c3fSmrg} 1060