1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2010 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21b8e80941Smrg * DEALINGS IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg/** 25b8e80941Smrg * \file opt_algebraic.cpp 26b8e80941Smrg * 27b8e80941Smrg * Takes advantage of association, commutivity, and other algebraic 28b8e80941Smrg * properties to simplify expressions. 29b8e80941Smrg */ 30b8e80941Smrg 31b8e80941Smrg#include "ir.h" 32b8e80941Smrg#include "ir_visitor.h" 33b8e80941Smrg#include "ir_rvalue_visitor.h" 34b8e80941Smrg#include "ir_optimization.h" 35b8e80941Smrg#include "ir_builder.h" 36b8e80941Smrg#include "compiler/glsl_types.h" 37b8e80941Smrg#include "main/mtypes.h" 38b8e80941Smrg 39b8e80941Smrgusing namespace ir_builder; 40b8e80941Smrg 41b8e80941Smrgnamespace { 42b8e80941Smrg 43b8e80941Smrg/** 44b8e80941Smrg * Visitor class for replacing expressions with ir_constant values. 45b8e80941Smrg */ 46b8e80941Smrg 47b8e80941Smrgclass ir_algebraic_visitor : public ir_rvalue_visitor { 48b8e80941Smrgpublic: 49b8e80941Smrg ir_algebraic_visitor(bool native_integers, 50b8e80941Smrg const struct gl_shader_compiler_options *options) 51b8e80941Smrg : options(options) 52b8e80941Smrg { 53b8e80941Smrg this->progress = false; 54b8e80941Smrg this->mem_ctx = NULL; 55b8e80941Smrg this->native_integers = native_integers; 56b8e80941Smrg } 57b8e80941Smrg 58b8e80941Smrg virtual ~ir_algebraic_visitor() 59b8e80941Smrg { 60b8e80941Smrg } 61b8e80941Smrg 62b8e80941Smrg virtual ir_visitor_status visit_enter(ir_assignment *ir); 63b8e80941Smrg 64b8e80941Smrg ir_rvalue *handle_expression(ir_expression *ir); 65b8e80941Smrg void handle_rvalue(ir_rvalue **rvalue); 66b8e80941Smrg bool reassociate_constant(ir_expression *ir1, 67b8e80941Smrg int const_index, 68b8e80941Smrg ir_constant *constant, 69b8e80941Smrg ir_expression *ir2); 70b8e80941Smrg void reassociate_operands(ir_expression *ir1, 71b8e80941Smrg int op1, 72b8e80941Smrg ir_expression *ir2, 73b8e80941Smrg int op2); 74b8e80941Smrg ir_rvalue *swizzle_if_required(ir_expression *expr, 75b8e80941Smrg ir_rvalue *operand); 76b8e80941Smrg 77b8e80941Smrg const struct gl_shader_compiler_options *options; 78b8e80941Smrg void *mem_ctx; 79b8e80941Smrg 80b8e80941Smrg bool native_integers; 81b8e80941Smrg bool progress; 82b8e80941Smrg}; 83b8e80941Smrg 84b8e80941Smrg} /* unnamed namespace */ 85b8e80941Smrg 86b8e80941Smrgir_visitor_status 87b8e80941Smrgir_algebraic_visitor::visit_enter(ir_assignment *ir) 88b8e80941Smrg{ 89b8e80941Smrg ir_variable *var = ir->lhs->variable_referenced(); 90b8e80941Smrg if (var->data.invariant || var->data.precise) { 91b8e80941Smrg /* If we're assigning to an invariant or precise variable, just bail. 92b8e80941Smrg * Most of the algebraic optimizations aren't precision-safe. 93b8e80941Smrg * 94b8e80941Smrg * FINISHME: Find out which optimizations are precision-safe and enable 95b8e80941Smrg * then only for invariant or precise trees. 96b8e80941Smrg */ 97b8e80941Smrg return visit_continue_with_parent; 98b8e80941Smrg } else { 99b8e80941Smrg return visit_continue; 100b8e80941Smrg } 101b8e80941Smrg} 102b8e80941Smrg 103b8e80941Smrgstatic inline bool 104b8e80941Smrgis_vec_zero(ir_constant *ir) 105b8e80941Smrg{ 106b8e80941Smrg return (ir == NULL) ? false : ir->is_zero(); 107b8e80941Smrg} 108b8e80941Smrg 109b8e80941Smrgstatic inline bool 110b8e80941Smrgis_vec_one(ir_constant *ir) 111b8e80941Smrg{ 112b8e80941Smrg return (ir == NULL) ? false : ir->is_one(); 113b8e80941Smrg} 114b8e80941Smrg 115b8e80941Smrgstatic inline bool 116b8e80941Smrgis_vec_two(ir_constant *ir) 117b8e80941Smrg{ 118b8e80941Smrg return (ir == NULL) ? false : ir->is_value(2.0, 2); 119b8e80941Smrg} 120b8e80941Smrg 121b8e80941Smrgstatic inline bool 122b8e80941Smrgis_vec_four(ir_constant *ir) 123b8e80941Smrg{ 124b8e80941Smrg return (ir == NULL) ? false : ir->is_value(4.0, 4); 125b8e80941Smrg} 126b8e80941Smrg 127b8e80941Smrgstatic inline bool 128b8e80941Smrgis_vec_negative_one(ir_constant *ir) 129b8e80941Smrg{ 130b8e80941Smrg return (ir == NULL) ? false : ir->is_negative_one(); 131b8e80941Smrg} 132b8e80941Smrg 133b8e80941Smrgstatic inline bool 134b8e80941Smrgis_valid_vec_const(ir_constant *ir) 135b8e80941Smrg{ 136b8e80941Smrg if (ir == NULL) 137b8e80941Smrg return false; 138b8e80941Smrg 139b8e80941Smrg if (!ir->type->is_scalar() && !ir->type->is_vector()) 140b8e80941Smrg return false; 141b8e80941Smrg 142b8e80941Smrg return true; 143b8e80941Smrg} 144b8e80941Smrg 145b8e80941Smrgstatic inline bool 146b8e80941Smrgis_less_than_one(ir_constant *ir) 147b8e80941Smrg{ 148b8e80941Smrg assert(ir->type->is_float()); 149b8e80941Smrg 150b8e80941Smrg if (!is_valid_vec_const(ir)) 151b8e80941Smrg return false; 152b8e80941Smrg 153b8e80941Smrg unsigned component = 0; 154b8e80941Smrg for (int c = 0; c < ir->type->vector_elements; c++) { 155b8e80941Smrg if (ir->get_float_component(c) < 1.0f) 156b8e80941Smrg component++; 157b8e80941Smrg } 158b8e80941Smrg 159b8e80941Smrg return (component == ir->type->vector_elements); 160b8e80941Smrg} 161b8e80941Smrg 162b8e80941Smrgstatic inline bool 163b8e80941Smrgis_greater_than_zero(ir_constant *ir) 164b8e80941Smrg{ 165b8e80941Smrg assert(ir->type->is_float()); 166b8e80941Smrg 167b8e80941Smrg if (!is_valid_vec_const(ir)) 168b8e80941Smrg return false; 169b8e80941Smrg 170b8e80941Smrg unsigned component = 0; 171b8e80941Smrg for (int c = 0; c < ir->type->vector_elements; c++) { 172b8e80941Smrg if (ir->get_float_component(c) > 0.0f) 173b8e80941Smrg component++; 174b8e80941Smrg } 175b8e80941Smrg 176b8e80941Smrg return (component == ir->type->vector_elements); 177b8e80941Smrg} 178b8e80941Smrg 179b8e80941Smrgstatic void 180b8e80941Smrgupdate_type(ir_expression *ir) 181b8e80941Smrg{ 182b8e80941Smrg if (ir->operands[0]->type->is_vector()) 183b8e80941Smrg ir->type = ir->operands[0]->type; 184b8e80941Smrg else 185b8e80941Smrg ir->type = ir->operands[1]->type; 186b8e80941Smrg} 187b8e80941Smrg 188b8e80941Smrg/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ 189b8e80941Smrgstatic ir_expression * 190b8e80941Smrgtry_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx) 191b8e80941Smrg{ 192b8e80941Smrg if (expr0 && expr0->operation == ir_binop_add && 193b8e80941Smrg expr0->type->is_float() && 194b8e80941Smrg expr1 && expr1->operation == ir_binop_add && 195b8e80941Smrg expr1->type->is_float()) { 196b8e80941Smrg ir_swizzle *x = expr0->operands[0]->as_swizzle(); 197b8e80941Smrg ir_swizzle *y = expr0->operands[1]->as_swizzle(); 198b8e80941Smrg ir_swizzle *z = expr1->operands[0]->as_swizzle(); 199b8e80941Smrg ir_swizzle *w = expr1->operands[1]->as_swizzle(); 200b8e80941Smrg 201b8e80941Smrg if (!x || x->mask.num_components != 1 || 202b8e80941Smrg !y || y->mask.num_components != 1 || 203b8e80941Smrg !z || z->mask.num_components != 1 || 204b8e80941Smrg !w || w->mask.num_components != 1) { 205b8e80941Smrg return NULL; 206b8e80941Smrg } 207b8e80941Smrg 208b8e80941Smrg bool swiz_seen[4] = {false, false, false, false}; 209b8e80941Smrg swiz_seen[x->mask.x] = true; 210b8e80941Smrg swiz_seen[y->mask.x] = true; 211b8e80941Smrg swiz_seen[z->mask.x] = true; 212b8e80941Smrg swiz_seen[w->mask.x] = true; 213b8e80941Smrg 214b8e80941Smrg if (!swiz_seen[0] || !swiz_seen[1] || 215b8e80941Smrg !swiz_seen[2] || !swiz_seen[3]) { 216b8e80941Smrg return NULL; 217b8e80941Smrg } 218b8e80941Smrg 219b8e80941Smrg if (x->val->equals(y->val) && 220b8e80941Smrg x->val->equals(z->val) && 221b8e80941Smrg x->val->equals(w->val)) { 222b8e80941Smrg return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4)); 223b8e80941Smrg } 224b8e80941Smrg } 225b8e80941Smrg return NULL; 226b8e80941Smrg} 227b8e80941Smrg 228b8e80941Smrgvoid 229b8e80941Smrgir_algebraic_visitor::reassociate_operands(ir_expression *ir1, 230b8e80941Smrg int op1, 231b8e80941Smrg ir_expression *ir2, 232b8e80941Smrg int op2) 233b8e80941Smrg{ 234b8e80941Smrg ir_rvalue *temp = ir2->operands[op2]; 235b8e80941Smrg ir2->operands[op2] = ir1->operands[op1]; 236b8e80941Smrg ir1->operands[op1] = temp; 237b8e80941Smrg 238b8e80941Smrg /* Update the type of ir2. The type of ir1 won't have changed -- 239b8e80941Smrg * base types matched, and at least one of the operands of the 2 240b8e80941Smrg * binops is still a vector if any of them were. 241b8e80941Smrg */ 242b8e80941Smrg update_type(ir2); 243b8e80941Smrg 244b8e80941Smrg this->progress = true; 245b8e80941Smrg} 246b8e80941Smrg 247b8e80941Smrg/** 248b8e80941Smrg * Reassociates a constant down a tree of adds or multiplies. 249b8e80941Smrg * 250b8e80941Smrg * Consider (2 * (a * (b * 0.5))). We want to end up with a * b. 251b8e80941Smrg */ 252b8e80941Smrgbool 253b8e80941Smrgir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index, 254b8e80941Smrg ir_constant *constant, 255b8e80941Smrg ir_expression *ir2) 256b8e80941Smrg{ 257b8e80941Smrg if (!ir2 || ir1->operation != ir2->operation) 258b8e80941Smrg return false; 259b8e80941Smrg 260b8e80941Smrg /* Don't want to even think about matrices. */ 261b8e80941Smrg if (ir1->operands[0]->type->is_matrix() || 262b8e80941Smrg ir1->operands[1]->type->is_matrix() || 263b8e80941Smrg ir2->operands[0]->type->is_matrix() || 264b8e80941Smrg ir2->operands[1]->type->is_matrix()) 265b8e80941Smrg return false; 266b8e80941Smrg 267b8e80941Smrg void *mem_ctx = ralloc_parent(ir2); 268b8e80941Smrg 269b8e80941Smrg ir_constant *ir2_const[2]; 270b8e80941Smrg ir2_const[0] = ir2->operands[0]->constant_expression_value(mem_ctx); 271b8e80941Smrg ir2_const[1] = ir2->operands[1]->constant_expression_value(mem_ctx); 272b8e80941Smrg 273b8e80941Smrg if (ir2_const[0] && ir2_const[1]) 274b8e80941Smrg return false; 275b8e80941Smrg 276b8e80941Smrg if (ir2_const[0]) { 277b8e80941Smrg reassociate_operands(ir1, const_index, ir2, 1); 278b8e80941Smrg return true; 279b8e80941Smrg } else if (ir2_const[1]) { 280b8e80941Smrg reassociate_operands(ir1, const_index, ir2, 0); 281b8e80941Smrg return true; 282b8e80941Smrg } 283b8e80941Smrg 284b8e80941Smrg if (reassociate_constant(ir1, const_index, constant, 285b8e80941Smrg ir2->operands[0]->as_expression())) { 286b8e80941Smrg update_type(ir2); 287b8e80941Smrg return true; 288b8e80941Smrg } 289b8e80941Smrg 290b8e80941Smrg if (reassociate_constant(ir1, const_index, constant, 291b8e80941Smrg ir2->operands[1]->as_expression())) { 292b8e80941Smrg update_type(ir2); 293b8e80941Smrg return true; 294b8e80941Smrg } 295b8e80941Smrg 296b8e80941Smrg return false; 297b8e80941Smrg} 298b8e80941Smrg 299b8e80941Smrg/* When eliminating an expression and just returning one of its operands, 300b8e80941Smrg * we may need to swizzle that operand out to a vector if the expression was 301b8e80941Smrg * vector type. 302b8e80941Smrg */ 303b8e80941Smrgir_rvalue * 304b8e80941Smrgir_algebraic_visitor::swizzle_if_required(ir_expression *expr, 305b8e80941Smrg ir_rvalue *operand) 306b8e80941Smrg{ 307b8e80941Smrg if (expr->type->is_vector() && operand->type->is_scalar()) { 308b8e80941Smrg return new(mem_ctx) ir_swizzle(operand, 0, 0, 0, 0, 309b8e80941Smrg expr->type->vector_elements); 310b8e80941Smrg } else 311b8e80941Smrg return operand; 312b8e80941Smrg} 313b8e80941Smrg 314b8e80941Smrgir_rvalue * 315b8e80941Smrgir_algebraic_visitor::handle_expression(ir_expression *ir) 316b8e80941Smrg{ 317b8e80941Smrg ir_constant *op_const[4] = {NULL, NULL, NULL, NULL}; 318b8e80941Smrg ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL}; 319b8e80941Smrg 320b8e80941Smrg if (ir->operation == ir_binop_mul && 321b8e80941Smrg ir->operands[0]->type->is_matrix() && 322b8e80941Smrg ir->operands[1]->type->is_vector()) { 323b8e80941Smrg ir_expression *matrix_mul = ir->operands[0]->as_expression(); 324b8e80941Smrg 325b8e80941Smrg if (matrix_mul && matrix_mul->operation == ir_binop_mul && 326b8e80941Smrg matrix_mul->operands[0]->type->is_matrix() && 327b8e80941Smrg matrix_mul->operands[1]->type->is_matrix()) { 328b8e80941Smrg 329b8e80941Smrg return mul(matrix_mul->operands[0], 330b8e80941Smrg mul(matrix_mul->operands[1], ir->operands[1])); 331b8e80941Smrg } 332b8e80941Smrg } 333b8e80941Smrg 334b8e80941Smrg assert(ir->num_operands <= 4); 335b8e80941Smrg for (unsigned i = 0; i < ir->num_operands; i++) { 336b8e80941Smrg if (ir->operands[i]->type->is_matrix()) 337b8e80941Smrg return ir; 338b8e80941Smrg 339b8e80941Smrg op_const[i] = 340b8e80941Smrg ir->operands[i]->constant_expression_value(ralloc_parent(ir)); 341b8e80941Smrg op_expr[i] = ir->operands[i]->as_expression(); 342b8e80941Smrg } 343b8e80941Smrg 344b8e80941Smrg if (this->mem_ctx == NULL) 345b8e80941Smrg this->mem_ctx = ralloc_parent(ir); 346b8e80941Smrg 347b8e80941Smrg switch (ir->operation) { 348b8e80941Smrg case ir_unop_bit_not: 349b8e80941Smrg if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not) 350b8e80941Smrg return op_expr[0]->operands[0]; 351b8e80941Smrg break; 352b8e80941Smrg 353b8e80941Smrg case ir_unop_abs: 354b8e80941Smrg if (op_expr[0] == NULL) 355b8e80941Smrg break; 356b8e80941Smrg 357b8e80941Smrg switch (op_expr[0]->operation) { 358b8e80941Smrg case ir_unop_abs: 359b8e80941Smrg case ir_unop_neg: 360b8e80941Smrg return abs(op_expr[0]->operands[0]); 361b8e80941Smrg default: 362b8e80941Smrg break; 363b8e80941Smrg } 364b8e80941Smrg break; 365b8e80941Smrg 366b8e80941Smrg case ir_unop_neg: 367b8e80941Smrg if (op_expr[0] == NULL) 368b8e80941Smrg break; 369b8e80941Smrg 370b8e80941Smrg if (op_expr[0]->operation == ir_unop_neg) { 371b8e80941Smrg return op_expr[0]->operands[0]; 372b8e80941Smrg } 373b8e80941Smrg break; 374b8e80941Smrg 375b8e80941Smrg case ir_unop_exp: 376b8e80941Smrg if (op_expr[0] == NULL) 377b8e80941Smrg break; 378b8e80941Smrg 379b8e80941Smrg if (op_expr[0]->operation == ir_unop_log) { 380b8e80941Smrg return op_expr[0]->operands[0]; 381b8e80941Smrg } 382b8e80941Smrg break; 383b8e80941Smrg 384b8e80941Smrg case ir_unop_log: 385b8e80941Smrg if (op_expr[0] == NULL) 386b8e80941Smrg break; 387b8e80941Smrg 388b8e80941Smrg if (op_expr[0]->operation == ir_unop_exp) { 389b8e80941Smrg return op_expr[0]->operands[0]; 390b8e80941Smrg } 391b8e80941Smrg break; 392b8e80941Smrg 393b8e80941Smrg case ir_unop_exp2: 394b8e80941Smrg if (op_expr[0] == NULL) 395b8e80941Smrg break; 396b8e80941Smrg 397b8e80941Smrg if (op_expr[0]->operation == ir_unop_log2) { 398b8e80941Smrg return op_expr[0]->operands[0]; 399b8e80941Smrg } 400b8e80941Smrg 401b8e80941Smrg if (!options->EmitNoPow && op_expr[0]->operation == ir_binop_mul) { 402b8e80941Smrg for (int log2_pos = 0; log2_pos < 2; log2_pos++) { 403b8e80941Smrg ir_expression *log2_expr = 404b8e80941Smrg op_expr[0]->operands[log2_pos]->as_expression(); 405b8e80941Smrg 406b8e80941Smrg if (log2_expr && log2_expr->operation == ir_unop_log2) { 407b8e80941Smrg return new(mem_ctx) ir_expression(ir_binop_pow, 408b8e80941Smrg ir->type, 409b8e80941Smrg log2_expr->operands[0], 410b8e80941Smrg op_expr[0]->operands[1 - log2_pos]); 411b8e80941Smrg } 412b8e80941Smrg } 413b8e80941Smrg } 414b8e80941Smrg break; 415b8e80941Smrg 416b8e80941Smrg case ir_unop_log2: 417b8e80941Smrg if (op_expr[0] == NULL) 418b8e80941Smrg break; 419b8e80941Smrg 420b8e80941Smrg if (op_expr[0]->operation == ir_unop_exp2) { 421b8e80941Smrg return op_expr[0]->operands[0]; 422b8e80941Smrg } 423b8e80941Smrg break; 424b8e80941Smrg 425b8e80941Smrg case ir_unop_f2i: 426b8e80941Smrg case ir_unop_f2u: 427b8e80941Smrg if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) { 428b8e80941Smrg return new(mem_ctx) ir_expression(ir->operation, 429b8e80941Smrg ir->type, 430b8e80941Smrg op_expr[0]->operands[0]); 431b8e80941Smrg } 432b8e80941Smrg break; 433b8e80941Smrg 434b8e80941Smrg case ir_unop_logic_not: { 435b8e80941Smrg enum ir_expression_operation new_op = ir_unop_logic_not; 436b8e80941Smrg 437b8e80941Smrg if (op_expr[0] == NULL) 438b8e80941Smrg break; 439b8e80941Smrg 440b8e80941Smrg switch (op_expr[0]->operation) { 441b8e80941Smrg case ir_binop_less: new_op = ir_binop_gequal; break; 442b8e80941Smrg case ir_binop_gequal: new_op = ir_binop_less; break; 443b8e80941Smrg case ir_binop_equal: new_op = ir_binop_nequal; break; 444b8e80941Smrg case ir_binop_nequal: new_op = ir_binop_equal; break; 445b8e80941Smrg case ir_binop_all_equal: new_op = ir_binop_any_nequal; break; 446b8e80941Smrg case ir_binop_any_nequal: new_op = ir_binop_all_equal; break; 447b8e80941Smrg 448b8e80941Smrg default: 449b8e80941Smrg /* The default case handler is here to silence a warning from GCC. 450b8e80941Smrg */ 451b8e80941Smrg break; 452b8e80941Smrg } 453b8e80941Smrg 454b8e80941Smrg if (new_op != ir_unop_logic_not) { 455b8e80941Smrg return new(mem_ctx) ir_expression(new_op, 456b8e80941Smrg ir->type, 457b8e80941Smrg op_expr[0]->operands[0], 458b8e80941Smrg op_expr[0]->operands[1]); 459b8e80941Smrg } 460b8e80941Smrg 461b8e80941Smrg break; 462b8e80941Smrg } 463b8e80941Smrg 464b8e80941Smrg case ir_unop_saturate: 465b8e80941Smrg if (op_expr[0] && op_expr[0]->operation == ir_binop_add) { 466b8e80941Smrg ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression(); 467b8e80941Smrg ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression(); 468b8e80941Smrg 469b8e80941Smrg if (b2f_0 && b2f_0->operation == ir_unop_b2f && 470b8e80941Smrg b2f_1 && b2f_1->operation == ir_unop_b2f) { 471b8e80941Smrg return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0])); 472b8e80941Smrg } 473b8e80941Smrg } 474b8e80941Smrg break; 475b8e80941Smrg 476b8e80941Smrg /* This macro CANNOT use the do { } while(true) mechanism because 477b8e80941Smrg * then the breaks apply to the loop instead of the switch! 478b8e80941Smrg */ 479b8e80941Smrg#define HANDLE_PACK_UNPACK_INVERSE(inverse_operation) \ 480b8e80941Smrg { \ 481b8e80941Smrg ir_expression *const op = ir->operands[0]->as_expression(); \ 482b8e80941Smrg if (op == NULL) \ 483b8e80941Smrg break; \ 484b8e80941Smrg if (op->operation == (inverse_operation)) \ 485b8e80941Smrg return op->operands[0]; \ 486b8e80941Smrg break; \ 487b8e80941Smrg } 488b8e80941Smrg 489b8e80941Smrg case ir_unop_unpack_uint_2x32: 490b8e80941Smrg HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_uint_2x32); 491b8e80941Smrg case ir_unop_pack_uint_2x32: 492b8e80941Smrg HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_uint_2x32); 493b8e80941Smrg case ir_unop_unpack_int_2x32: 494b8e80941Smrg HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_int_2x32); 495b8e80941Smrg case ir_unop_pack_int_2x32: 496b8e80941Smrg HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_int_2x32); 497b8e80941Smrg case ir_unop_unpack_double_2x32: 498b8e80941Smrg HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_double_2x32); 499b8e80941Smrg case ir_unop_pack_double_2x32: 500b8e80941Smrg HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_double_2x32); 501b8e80941Smrg 502b8e80941Smrg#undef HANDLE_PACK_UNPACK_INVERSE 503b8e80941Smrg 504b8e80941Smrg case ir_binop_add: 505b8e80941Smrg if (is_vec_zero(op_const[0])) 506b8e80941Smrg return ir->operands[1]; 507b8e80941Smrg if (is_vec_zero(op_const[1])) 508b8e80941Smrg return ir->operands[0]; 509b8e80941Smrg 510b8e80941Smrg /* Replace (x + (-x)) with constant 0 */ 511b8e80941Smrg for (int i = 0; i < 2; i++) { 512b8e80941Smrg if (op_expr[i]) { 513b8e80941Smrg if (op_expr[i]->operation == ir_unop_neg) { 514b8e80941Smrg ir_rvalue *other = ir->operands[(i + 1) % 2]; 515b8e80941Smrg if (other && op_expr[i]->operands[0]->equals(other)) { 516b8e80941Smrg return ir_constant::zero(ir, ir->type); 517b8e80941Smrg } 518b8e80941Smrg } 519b8e80941Smrg } 520b8e80941Smrg } 521b8e80941Smrg 522b8e80941Smrg /* Reassociate addition of constants so that we can do constant 523b8e80941Smrg * folding. 524b8e80941Smrg */ 525b8e80941Smrg if (op_const[0] && !op_const[1]) 526b8e80941Smrg reassociate_constant(ir, 0, op_const[0], op_expr[1]); 527b8e80941Smrg if (op_const[1] && !op_const[0]) 528b8e80941Smrg reassociate_constant(ir, 1, op_const[1], op_expr[0]); 529b8e80941Smrg 530b8e80941Smrg /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ 531b8e80941Smrg if (options->OptimizeForAOS) { 532b8e80941Smrg ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1], 533b8e80941Smrg mem_ctx); 534b8e80941Smrg if (expr) 535b8e80941Smrg return expr; 536b8e80941Smrg } 537b8e80941Smrg 538b8e80941Smrg /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a). 539b8e80941Smrg * 540b8e80941Smrg * (-x + y) * a + x 541b8e80941Smrg * (x * -a) + (y * a) + x 542b8e80941Smrg * x + (x * -a) + (y * a) 543b8e80941Smrg * x * (1 - a) + y * a 544b8e80941Smrg * lrp(x, y, a) 545b8e80941Smrg */ 546b8e80941Smrg for (int mul_pos = 0; mul_pos < 2; mul_pos++) { 547b8e80941Smrg ir_expression *mul = op_expr[mul_pos]; 548b8e80941Smrg 549b8e80941Smrg if (!mul || mul->operation != ir_binop_mul) 550b8e80941Smrg continue; 551b8e80941Smrg 552b8e80941Smrg /* Multiply found on one of the operands. Now check for an 553b8e80941Smrg * inner addition operation. 554b8e80941Smrg */ 555b8e80941Smrg for (int inner_add_pos = 0; inner_add_pos < 2; inner_add_pos++) { 556b8e80941Smrg ir_expression *inner_add = 557b8e80941Smrg mul->operands[inner_add_pos]->as_expression(); 558b8e80941Smrg 559b8e80941Smrg if (!inner_add || inner_add->operation != ir_binop_add) 560b8e80941Smrg continue; 561b8e80941Smrg 562b8e80941Smrg /* Inner addition found on one of the operands. Now check for 563b8e80941Smrg * one of the operands of the inner addition to be the negative 564b8e80941Smrg * of x_operand. 565b8e80941Smrg */ 566b8e80941Smrg for (int neg_pos = 0; neg_pos < 2; neg_pos++) { 567b8e80941Smrg ir_expression *neg = 568b8e80941Smrg inner_add->operands[neg_pos]->as_expression(); 569b8e80941Smrg 570b8e80941Smrg if (!neg || neg->operation != ir_unop_neg) 571b8e80941Smrg continue; 572b8e80941Smrg 573b8e80941Smrg ir_rvalue *x_operand = ir->operands[1 - mul_pos]; 574b8e80941Smrg 575b8e80941Smrg if (!neg->operands[0]->equals(x_operand)) 576b8e80941Smrg continue; 577b8e80941Smrg 578b8e80941Smrg ir_rvalue *y_operand = inner_add->operands[1 - neg_pos]; 579b8e80941Smrg ir_rvalue *a_operand = mul->operands[1 - inner_add_pos]; 580b8e80941Smrg 581b8e80941Smrg if (x_operand->type != y_operand->type || 582b8e80941Smrg x_operand->type != a_operand->type) 583b8e80941Smrg continue; 584b8e80941Smrg 585b8e80941Smrg return lrp(x_operand, y_operand, a_operand); 586b8e80941Smrg } 587b8e80941Smrg } 588b8e80941Smrg } 589b8e80941Smrg 590b8e80941Smrg break; 591b8e80941Smrg 592b8e80941Smrg case ir_binop_sub: 593b8e80941Smrg if (is_vec_zero(op_const[0])) 594b8e80941Smrg return neg(ir->operands[1]); 595b8e80941Smrg if (is_vec_zero(op_const[1])) 596b8e80941Smrg return ir->operands[0]; 597b8e80941Smrg break; 598b8e80941Smrg 599b8e80941Smrg case ir_binop_mul: 600b8e80941Smrg if (is_vec_one(op_const[0])) 601b8e80941Smrg return ir->operands[1]; 602b8e80941Smrg if (is_vec_one(op_const[1])) 603b8e80941Smrg return ir->operands[0]; 604b8e80941Smrg 605b8e80941Smrg if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) 606b8e80941Smrg return ir_constant::zero(ir, ir->type); 607b8e80941Smrg 608b8e80941Smrg if (is_vec_negative_one(op_const[0])) 609b8e80941Smrg return neg(ir->operands[1]); 610b8e80941Smrg if (is_vec_negative_one(op_const[1])) 611b8e80941Smrg return neg(ir->operands[0]); 612b8e80941Smrg 613b8e80941Smrg if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f && 614b8e80941Smrg op_expr[1] && op_expr[1]->operation == ir_unop_b2f) { 615b8e80941Smrg return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0])); 616b8e80941Smrg } 617b8e80941Smrg 618b8e80941Smrg /* Reassociate multiplication of constants so that we can do 619b8e80941Smrg * constant folding. 620b8e80941Smrg */ 621b8e80941Smrg if (op_const[0] && !op_const[1]) 622b8e80941Smrg reassociate_constant(ir, 0, op_const[0], op_expr[1]); 623b8e80941Smrg if (op_const[1] && !op_const[0]) 624b8e80941Smrg reassociate_constant(ir, 1, op_const[1], op_expr[0]); 625b8e80941Smrg 626b8e80941Smrg /* Optimizes 627b8e80941Smrg * 628b8e80941Smrg * (mul (floor (add (abs x) 0.5) (sign x))) 629b8e80941Smrg * 630b8e80941Smrg * into 631b8e80941Smrg * 632b8e80941Smrg * (trunc (add x (mul (sign x) 0.5))) 633b8e80941Smrg */ 634b8e80941Smrg for (int i = 0; i < 2; i++) { 635b8e80941Smrg ir_expression *sign_expr = ir->operands[i]->as_expression(); 636b8e80941Smrg ir_expression *floor_expr = ir->operands[1 - i]->as_expression(); 637b8e80941Smrg 638b8e80941Smrg if (!sign_expr || sign_expr->operation != ir_unop_sign || 639b8e80941Smrg !floor_expr || floor_expr->operation != ir_unop_floor) 640b8e80941Smrg continue; 641b8e80941Smrg 642b8e80941Smrg ir_expression *add_expr = floor_expr->operands[0]->as_expression(); 643b8e80941Smrg if (!add_expr || add_expr->operation != ir_binop_add) 644b8e80941Smrg continue; 645b8e80941Smrg 646b8e80941Smrg for (int j = 0; j < 2; j++) { 647b8e80941Smrg ir_expression *abs_expr = add_expr->operands[j]->as_expression(); 648b8e80941Smrg if (!abs_expr || abs_expr->operation != ir_unop_abs) 649b8e80941Smrg continue; 650b8e80941Smrg 651b8e80941Smrg ir_constant *point_five = add_expr->operands[1 - j]->as_constant(); 652b8e80941Smrg if (!point_five || !point_five->is_value(0.5, 0)) 653b8e80941Smrg continue; 654b8e80941Smrg 655b8e80941Smrg if (abs_expr->operands[0]->equals(sign_expr->operands[0])) { 656b8e80941Smrg return trunc(add(abs_expr->operands[0], 657b8e80941Smrg mul(sign_expr, point_five))); 658b8e80941Smrg } 659b8e80941Smrg } 660b8e80941Smrg } 661b8e80941Smrg break; 662b8e80941Smrg 663b8e80941Smrg case ir_binop_div: 664b8e80941Smrg if (is_vec_one(op_const[0]) && ( 665b8e80941Smrg ir->type->is_float() || ir->type->is_double())) { 666b8e80941Smrg return new(mem_ctx) ir_expression(ir_unop_rcp, 667b8e80941Smrg ir->operands[1]->type, 668b8e80941Smrg ir->operands[1], 669b8e80941Smrg NULL); 670b8e80941Smrg } 671b8e80941Smrg if (is_vec_one(op_const[1])) 672b8e80941Smrg return ir->operands[0]; 673b8e80941Smrg break; 674b8e80941Smrg 675b8e80941Smrg case ir_binop_dot: 676b8e80941Smrg if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) 677b8e80941Smrg return ir_constant::zero(mem_ctx, ir->type); 678b8e80941Smrg 679b8e80941Smrg for (int i = 0; i < 2; i++) { 680b8e80941Smrg if (!op_const[i]) 681b8e80941Smrg continue; 682b8e80941Smrg 683b8e80941Smrg unsigned components[4] = { 0 }, count = 0; 684b8e80941Smrg 685b8e80941Smrg for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) { 686b8e80941Smrg if (op_const[i]->is_zero()) 687b8e80941Smrg continue; 688b8e80941Smrg 689b8e80941Smrg components[count] = c; 690b8e80941Smrg count++; 691b8e80941Smrg } 692b8e80941Smrg 693b8e80941Smrg /* No channels had zero values; bail. */ 694b8e80941Smrg if (count >= op_const[i]->type->vector_elements) 695b8e80941Smrg break; 696b8e80941Smrg 697b8e80941Smrg ir_expression_operation op = count == 1 ? 698b8e80941Smrg ir_binop_mul : ir_binop_dot; 699b8e80941Smrg 700b8e80941Smrg /* Swizzle both operands to remove the channels that were zero. */ 701b8e80941Smrg return new(mem_ctx) 702b8e80941Smrg ir_expression(op, ir->type, 703b8e80941Smrg new(mem_ctx) ir_swizzle(ir->operands[0], 704b8e80941Smrg components, count), 705b8e80941Smrg new(mem_ctx) ir_swizzle(ir->operands[1], 706b8e80941Smrg components, count)); 707b8e80941Smrg } 708b8e80941Smrg break; 709b8e80941Smrg 710b8e80941Smrg case ir_binop_less: 711b8e80941Smrg case ir_binop_gequal: 712b8e80941Smrg case ir_binop_equal: 713b8e80941Smrg case ir_binop_nequal: 714b8e80941Smrg for (int add_pos = 0; add_pos < 2; add_pos++) { 715b8e80941Smrg ir_expression *add = op_expr[add_pos]; 716b8e80941Smrg 717b8e80941Smrg if (!add || add->operation != ir_binop_add) 718b8e80941Smrg continue; 719b8e80941Smrg 720b8e80941Smrg ir_constant *zero = op_const[1 - add_pos]; 721b8e80941Smrg if (!is_vec_zero(zero)) 722b8e80941Smrg continue; 723b8e80941Smrg 724b8e80941Smrg /* We are allowed to add scalars with a vector or matrix. In that 725b8e80941Smrg * case lets just exit early. 726b8e80941Smrg */ 727b8e80941Smrg if (add->operands[0]->type != add->operands[1]->type) 728b8e80941Smrg continue; 729b8e80941Smrg 730b8e80941Smrg /* Depending of the zero position we want to optimize 731b8e80941Smrg * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y) 732b8e80941Smrg */ 733b8e80941Smrg if (add_pos == 1) { 734b8e80941Smrg return new(mem_ctx) ir_expression(ir->operation, 735b8e80941Smrg neg(add->operands[0]), 736b8e80941Smrg add->operands[1]); 737b8e80941Smrg } else { 738b8e80941Smrg return new(mem_ctx) ir_expression(ir->operation, 739b8e80941Smrg add->operands[0], 740b8e80941Smrg neg(add->operands[1])); 741b8e80941Smrg } 742b8e80941Smrg } 743b8e80941Smrg break; 744b8e80941Smrg 745b8e80941Smrg case ir_binop_all_equal: 746b8e80941Smrg case ir_binop_any_nequal: 747b8e80941Smrg if (ir->operands[0]->type->is_scalar() && 748b8e80941Smrg ir->operands[1]->type->is_scalar()) 749b8e80941Smrg return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal 750b8e80941Smrg ? ir_binop_equal : ir_binop_nequal, 751b8e80941Smrg ir->operands[0], 752b8e80941Smrg ir->operands[1]); 753b8e80941Smrg break; 754b8e80941Smrg 755b8e80941Smrg case ir_binop_rshift: 756b8e80941Smrg case ir_binop_lshift: 757b8e80941Smrg /* 0 >> x == 0 */ 758b8e80941Smrg if (is_vec_zero(op_const[0])) 759b8e80941Smrg return ir->operands[0]; 760b8e80941Smrg /* x >> 0 == x */ 761b8e80941Smrg if (is_vec_zero(op_const[1])) 762b8e80941Smrg return ir->operands[0]; 763b8e80941Smrg break; 764b8e80941Smrg 765b8e80941Smrg case ir_binop_logic_and: 766b8e80941Smrg if (is_vec_one(op_const[0])) { 767b8e80941Smrg return ir->operands[1]; 768b8e80941Smrg } else if (is_vec_one(op_const[1])) { 769b8e80941Smrg return ir->operands[0]; 770b8e80941Smrg } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { 771b8e80941Smrg return ir_constant::zero(mem_ctx, ir->type); 772b8e80941Smrg } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not && 773b8e80941Smrg op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) { 774b8e80941Smrg /* De Morgan's Law: 775b8e80941Smrg * (not A) and (not B) === not (A or B) 776b8e80941Smrg */ 777b8e80941Smrg return logic_not(logic_or(op_expr[0]->operands[0], 778b8e80941Smrg op_expr[1]->operands[0])); 779b8e80941Smrg } else if (ir->operands[0]->equals(ir->operands[1])) { 780b8e80941Smrg /* (a && a) == a */ 781b8e80941Smrg return ir->operands[0]; 782b8e80941Smrg } 783b8e80941Smrg break; 784b8e80941Smrg 785b8e80941Smrg case ir_binop_logic_xor: 786b8e80941Smrg if (is_vec_zero(op_const[0])) { 787b8e80941Smrg return ir->operands[1]; 788b8e80941Smrg } else if (is_vec_zero(op_const[1])) { 789b8e80941Smrg return ir->operands[0]; 790b8e80941Smrg } else if (is_vec_one(op_const[0])) { 791b8e80941Smrg return logic_not(ir->operands[1]); 792b8e80941Smrg } else if (is_vec_one(op_const[1])) { 793b8e80941Smrg return logic_not(ir->operands[0]); 794b8e80941Smrg } else if (ir->operands[0]->equals(ir->operands[1])) { 795b8e80941Smrg /* (a ^^ a) == false */ 796b8e80941Smrg return ir_constant::zero(mem_ctx, ir->type); 797b8e80941Smrg } 798b8e80941Smrg break; 799b8e80941Smrg 800b8e80941Smrg case ir_binop_logic_or: 801b8e80941Smrg if (is_vec_zero(op_const[0])) { 802b8e80941Smrg return ir->operands[1]; 803b8e80941Smrg } else if (is_vec_zero(op_const[1])) { 804b8e80941Smrg return ir->operands[0]; 805b8e80941Smrg } else if (is_vec_one(op_const[0]) || is_vec_one(op_const[1])) { 806b8e80941Smrg ir_constant_data data; 807b8e80941Smrg 808b8e80941Smrg for (unsigned i = 0; i < 16; i++) 809b8e80941Smrg data.b[i] = true; 810b8e80941Smrg 811b8e80941Smrg return new(mem_ctx) ir_constant(ir->type, &data); 812b8e80941Smrg } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not && 813b8e80941Smrg op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) { 814b8e80941Smrg /* De Morgan's Law: 815b8e80941Smrg * (not A) or (not B) === not (A and B) 816b8e80941Smrg */ 817b8e80941Smrg return logic_not(logic_and(op_expr[0]->operands[0], 818b8e80941Smrg op_expr[1]->operands[0])); 819b8e80941Smrg } else if (ir->operands[0]->equals(ir->operands[1])) { 820b8e80941Smrg /* (a || a) == a */ 821b8e80941Smrg return ir->operands[0]; 822b8e80941Smrg } 823b8e80941Smrg break; 824b8e80941Smrg 825b8e80941Smrg case ir_binop_pow: 826b8e80941Smrg /* 1^x == 1 */ 827b8e80941Smrg if (is_vec_one(op_const[0])) 828b8e80941Smrg return op_const[0]; 829b8e80941Smrg 830b8e80941Smrg /* x^1 == x */ 831b8e80941Smrg if (is_vec_one(op_const[1])) 832b8e80941Smrg return ir->operands[0]; 833b8e80941Smrg 834b8e80941Smrg /* pow(2,x) == exp2(x) */ 835b8e80941Smrg if (is_vec_two(op_const[0])) 836b8e80941Smrg return expr(ir_unop_exp2, ir->operands[1]); 837b8e80941Smrg 838b8e80941Smrg if (is_vec_two(op_const[1])) { 839b8e80941Smrg ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", 840b8e80941Smrg ir_var_temporary); 841b8e80941Smrg base_ir->insert_before(x); 842b8e80941Smrg base_ir->insert_before(assign(x, ir->operands[0])); 843b8e80941Smrg return mul(x, x); 844b8e80941Smrg } 845b8e80941Smrg 846b8e80941Smrg if (is_vec_four(op_const[1])) { 847b8e80941Smrg ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", 848b8e80941Smrg ir_var_temporary); 849b8e80941Smrg base_ir->insert_before(x); 850b8e80941Smrg base_ir->insert_before(assign(x, ir->operands[0])); 851b8e80941Smrg 852b8e80941Smrg ir_variable *squared = new(ir) ir_variable(ir->operands[1]->type, 853b8e80941Smrg "squared", 854b8e80941Smrg ir_var_temporary); 855b8e80941Smrg base_ir->insert_before(squared); 856b8e80941Smrg base_ir->insert_before(assign(squared, mul(x, x))); 857b8e80941Smrg return mul(squared, squared); 858b8e80941Smrg } 859b8e80941Smrg 860b8e80941Smrg break; 861b8e80941Smrg 862b8e80941Smrg case ir_binop_min: 863b8e80941Smrg case ir_binop_max: 864b8e80941Smrg if (!ir->type->is_float() || options->EmitNoSat) 865b8e80941Smrg break; 866b8e80941Smrg 867b8e80941Smrg /* Replace min(max) operations and its commutative combinations with 868b8e80941Smrg * a saturate operation 869b8e80941Smrg */ 870b8e80941Smrg for (int op = 0; op < 2; op++) { 871b8e80941Smrg ir_expression *inner_expr = op_expr[op]; 872b8e80941Smrg ir_constant *outer_const = op_const[1 - op]; 873b8e80941Smrg ir_expression_operation op_cond = (ir->operation == ir_binop_max) ? 874b8e80941Smrg ir_binop_min : ir_binop_max; 875b8e80941Smrg 876b8e80941Smrg if (!inner_expr || !outer_const || (inner_expr->operation != op_cond)) 877b8e80941Smrg continue; 878b8e80941Smrg 879b8e80941Smrg /* One of these has to be a constant */ 880b8e80941Smrg if (!inner_expr->operands[0]->as_constant() && 881b8e80941Smrg !inner_expr->operands[1]->as_constant()) 882b8e80941Smrg break; 883b8e80941Smrg 884b8e80941Smrg /* Found a min(max) combination. Now try to see if its operands 885b8e80941Smrg * meet our conditions that we can do just a single saturate operation 886b8e80941Smrg */ 887b8e80941Smrg for (int minmax_op = 0; minmax_op < 2; minmax_op++) { 888b8e80941Smrg ir_rvalue *x = inner_expr->operands[minmax_op]; 889b8e80941Smrg ir_rvalue *y = inner_expr->operands[1 - minmax_op]; 890b8e80941Smrg 891b8e80941Smrg ir_constant *inner_const = y->as_constant(); 892b8e80941Smrg if (!inner_const) 893b8e80941Smrg continue; 894b8e80941Smrg 895b8e80941Smrg /* min(max(x, 0.0), 1.0) is sat(x) */ 896b8e80941Smrg if (ir->operation == ir_binop_min && 897b8e80941Smrg inner_const->is_zero() && 898b8e80941Smrg outer_const->is_one()) 899b8e80941Smrg return saturate(x); 900b8e80941Smrg 901b8e80941Smrg /* max(min(x, 1.0), 0.0) is sat(x) */ 902b8e80941Smrg if (ir->operation == ir_binop_max && 903b8e80941Smrg inner_const->is_one() && 904b8e80941Smrg outer_const->is_zero()) 905b8e80941Smrg return saturate(x); 906b8e80941Smrg 907b8e80941Smrg /* min(max(x, 0.0), b) where b < 1.0 is sat(min(x, b)) */ 908b8e80941Smrg if (ir->operation == ir_binop_min && 909b8e80941Smrg inner_const->is_zero() && 910b8e80941Smrg is_less_than_one(outer_const)) 911b8e80941Smrg return saturate(expr(ir_binop_min, x, outer_const)); 912b8e80941Smrg 913b8e80941Smrg /* max(min(x, b), 0.0) where b < 1.0 is sat(min(x, b)) */ 914b8e80941Smrg if (ir->operation == ir_binop_max && 915b8e80941Smrg is_less_than_one(inner_const) && 916b8e80941Smrg outer_const->is_zero()) 917b8e80941Smrg return saturate(expr(ir_binop_min, x, inner_const)); 918b8e80941Smrg 919b8e80941Smrg /* max(min(x, 1.0), b) where b > 0.0 is sat(max(x, b)) */ 920b8e80941Smrg if (ir->operation == ir_binop_max && 921b8e80941Smrg inner_const->is_one() && 922b8e80941Smrg is_greater_than_zero(outer_const)) 923b8e80941Smrg return saturate(expr(ir_binop_max, x, outer_const)); 924b8e80941Smrg 925b8e80941Smrg /* min(max(x, b), 1.0) where b > 0.0 is sat(max(x, b)) */ 926b8e80941Smrg if (ir->operation == ir_binop_min && 927b8e80941Smrg is_greater_than_zero(inner_const) && 928b8e80941Smrg outer_const->is_one()) 929b8e80941Smrg return saturate(expr(ir_binop_max, x, inner_const)); 930b8e80941Smrg } 931b8e80941Smrg } 932b8e80941Smrg 933b8e80941Smrg break; 934b8e80941Smrg 935b8e80941Smrg case ir_unop_rcp: 936b8e80941Smrg if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp) 937b8e80941Smrg return op_expr[0]->operands[0]; 938b8e80941Smrg 939b8e80941Smrg if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 || 940b8e80941Smrg op_expr[0]->operation == ir_unop_exp)) { 941b8e80941Smrg return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type, 942b8e80941Smrg neg(op_expr[0]->operands[0])); 943b8e80941Smrg } 944b8e80941Smrg 945b8e80941Smrg /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at 946b8e80941Smrg * its IR level, so we can always apply this transformation. 947b8e80941Smrg */ 948b8e80941Smrg if (op_expr[0] && op_expr[0]->operation == ir_unop_rsq) 949b8e80941Smrg return sqrt(op_expr[0]->operands[0]); 950b8e80941Smrg 951b8e80941Smrg /* As far as we know, all backends are OK with rsq. */ 952b8e80941Smrg if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) { 953b8e80941Smrg return rsq(op_expr[0]->operands[0]); 954b8e80941Smrg } 955b8e80941Smrg 956b8e80941Smrg break; 957b8e80941Smrg 958b8e80941Smrg case ir_triop_fma: 959b8e80941Smrg /* Operands are op0 * op1 + op2. */ 960b8e80941Smrg if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { 961b8e80941Smrg return ir->operands[2]; 962b8e80941Smrg } else if (is_vec_zero(op_const[2])) { 963b8e80941Smrg return mul(ir->operands[0], ir->operands[1]); 964b8e80941Smrg } else if (is_vec_one(op_const[0])) { 965b8e80941Smrg return add(ir->operands[1], ir->operands[2]); 966b8e80941Smrg } else if (is_vec_one(op_const[1])) { 967b8e80941Smrg return add(ir->operands[0], ir->operands[2]); 968b8e80941Smrg } 969b8e80941Smrg break; 970b8e80941Smrg 971b8e80941Smrg case ir_triop_lrp: 972b8e80941Smrg /* Operands are (x, y, a). */ 973b8e80941Smrg if (is_vec_zero(op_const[2])) { 974b8e80941Smrg return ir->operands[0]; 975b8e80941Smrg } else if (is_vec_one(op_const[2])) { 976b8e80941Smrg return ir->operands[1]; 977b8e80941Smrg } else if (ir->operands[0]->equals(ir->operands[1])) { 978b8e80941Smrg return ir->operands[0]; 979b8e80941Smrg } else if (is_vec_zero(op_const[0])) { 980b8e80941Smrg return mul(ir->operands[1], ir->operands[2]); 981b8e80941Smrg } else if (is_vec_zero(op_const[1])) { 982b8e80941Smrg unsigned op2_components = ir->operands[2]->type->vector_elements; 983b8e80941Smrg ir_constant *one; 984b8e80941Smrg 985b8e80941Smrg switch (ir->type->base_type) { 986b8e80941Smrg case GLSL_TYPE_FLOAT: 987b8e80941Smrg one = new(mem_ctx) ir_constant(1.0f, op2_components); 988b8e80941Smrg break; 989b8e80941Smrg case GLSL_TYPE_DOUBLE: 990b8e80941Smrg one = new(mem_ctx) ir_constant(1.0, op2_components); 991b8e80941Smrg break; 992b8e80941Smrg default: 993b8e80941Smrg one = NULL; 994b8e80941Smrg unreachable("unexpected type"); 995b8e80941Smrg } 996b8e80941Smrg 997b8e80941Smrg return mul(ir->operands[0], add(one, neg(ir->operands[2]))); 998b8e80941Smrg } 999b8e80941Smrg break; 1000b8e80941Smrg 1001b8e80941Smrg case ir_triop_csel: 1002b8e80941Smrg if (is_vec_one(op_const[0])) 1003b8e80941Smrg return ir->operands[1]; 1004b8e80941Smrg if (is_vec_zero(op_const[0])) 1005b8e80941Smrg return ir->operands[2]; 1006b8e80941Smrg break; 1007b8e80941Smrg 1008b8e80941Smrg /* Remove interpolateAt* instructions for demoted inputs. They are 1009b8e80941Smrg * assigned a constant expression to facilitate this. 1010b8e80941Smrg */ 1011b8e80941Smrg case ir_unop_interpolate_at_centroid: 1012b8e80941Smrg case ir_binop_interpolate_at_offset: 1013b8e80941Smrg case ir_binop_interpolate_at_sample: 1014b8e80941Smrg if (op_const[0]) 1015b8e80941Smrg return ir->operands[0]; 1016b8e80941Smrg break; 1017b8e80941Smrg 1018b8e80941Smrg default: 1019b8e80941Smrg break; 1020b8e80941Smrg } 1021b8e80941Smrg 1022b8e80941Smrg return ir; 1023b8e80941Smrg} 1024b8e80941Smrg 1025b8e80941Smrgvoid 1026b8e80941Smrgir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue) 1027b8e80941Smrg{ 1028b8e80941Smrg if (!*rvalue) 1029b8e80941Smrg return; 1030b8e80941Smrg 1031b8e80941Smrg ir_expression *expr = (*rvalue)->as_expression(); 1032b8e80941Smrg if (!expr || expr->operation == ir_quadop_vector) 1033b8e80941Smrg return; 1034b8e80941Smrg 1035b8e80941Smrg ir_rvalue *new_rvalue = handle_expression(expr); 1036b8e80941Smrg if (new_rvalue == *rvalue) 1037b8e80941Smrg return; 1038b8e80941Smrg 1039b8e80941Smrg /* If the expr used to be some vec OP scalar returning a vector, and the 1040b8e80941Smrg * optimization gave us back a scalar, we still need to turn it into a 1041b8e80941Smrg * vector. 1042b8e80941Smrg */ 1043b8e80941Smrg *rvalue = swizzle_if_required(expr, new_rvalue); 1044b8e80941Smrg 1045b8e80941Smrg this->progress = true; 1046b8e80941Smrg} 1047b8e80941Smrg 1048b8e80941Smrgbool 1049b8e80941Smrgdo_algebraic(exec_list *instructions, bool native_integers, 1050b8e80941Smrg const struct gl_shader_compiler_options *options) 1051b8e80941Smrg{ 1052b8e80941Smrg ir_algebraic_visitor v(native_integers, options); 1053b8e80941Smrg 1054b8e80941Smrg visit_list_elements(&v, instructions); 1055b8e80941Smrg 1056b8e80941Smrg return v.progress; 1057b8e80941Smrg} 1058