1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file opt_algebraic.cpp 26 * 27 * Takes advantage of association, commutivity, and other algebraic 28 * properties to simplify expressions. 29 */ 30 31#include "ir.h" 32#include "ir_visitor.h" 33#include "ir_rvalue_visitor.h" 34#include "ir_optimization.h" 35#include "ir_builder.h" 36#include "compiler/glsl_types.h" 37#include "main/mtypes.h" 38 39using namespace ir_builder; 40 41namespace { 42 43/** 44 * Visitor class for replacing expressions with ir_constant values. 45 */ 46 47class ir_algebraic_visitor : public ir_rvalue_visitor { 48public: 49 ir_algebraic_visitor(bool native_integers, 50 const struct gl_shader_compiler_options *options) 51 : options(options) 52 { 53 this->progress = false; 54 this->mem_ctx = NULL; 55 this->native_integers = native_integers; 56 } 57 58 virtual ~ir_algebraic_visitor() 59 { 60 } 61 62 virtual ir_visitor_status visit_enter(ir_assignment *ir); 63 64 ir_rvalue *handle_expression(ir_expression *ir); 65 void handle_rvalue(ir_rvalue **rvalue); 66 bool reassociate_constant(ir_expression *ir1, 67 int const_index, 68 ir_constant *constant, 69 ir_expression *ir2); 70 void reassociate_operands(ir_expression *ir1, 71 int op1, 72 ir_expression *ir2, 73 int op2); 74 ir_rvalue *swizzle_if_required(ir_expression *expr, 75 ir_rvalue *operand); 76 77 const struct gl_shader_compiler_options *options; 78 void *mem_ctx; 79 80 bool native_integers; 81 bool progress; 82}; 83 84} /* unnamed namespace */ 85 86ir_visitor_status 87ir_algebraic_visitor::visit_enter(ir_assignment *ir) 88{ 89 ir_variable *var = ir->lhs->variable_referenced(); 90 if (var->data.invariant || var->data.precise) { 91 /* If we're assigning to an invariant or precise variable, just bail. 92 * Most of the algebraic optimizations aren't precision-safe. 93 * 94 * FINISHME: Find out which optimizations are precision-safe and enable 95 * then only for invariant or precise trees. 96 */ 97 return visit_continue_with_parent; 98 } else { 99 return visit_continue; 100 } 101} 102 103static inline bool 104is_vec_zero(ir_constant *ir) 105{ 106 return (ir == NULL) ? false : ir->is_zero(); 107} 108 109static inline bool 110is_vec_one(ir_constant *ir) 111{ 112 return (ir == NULL) ? false : ir->is_one(); 113} 114 115static inline bool 116is_vec_two(ir_constant *ir) 117{ 118 return (ir == NULL) ? false : ir->is_value(2.0, 2); 119} 120 121static inline bool 122is_vec_four(ir_constant *ir) 123{ 124 return (ir == NULL) ? false : ir->is_value(4.0, 4); 125} 126 127static inline bool 128is_vec_negative_one(ir_constant *ir) 129{ 130 return (ir == NULL) ? false : ir->is_negative_one(); 131} 132 133static inline bool 134is_valid_vec_const(ir_constant *ir) 135{ 136 if (ir == NULL) 137 return false; 138 139 if (!ir->type->is_scalar() && !ir->type->is_vector()) 140 return false; 141 142 return true; 143} 144 145static inline bool 146is_less_than_one(ir_constant *ir) 147{ 148 assert(ir->type->is_float()); 149 150 if (!is_valid_vec_const(ir)) 151 return false; 152 153 unsigned component = 0; 154 for (int c = 0; c < ir->type->vector_elements; c++) { 155 if (ir->get_float_component(c) < 1.0f) 156 component++; 157 } 158 159 return (component == ir->type->vector_elements); 160} 161 162static inline bool 163is_greater_than_zero(ir_constant *ir) 164{ 165 assert(ir->type->is_float()); 166 167 if (!is_valid_vec_const(ir)) 168 return false; 169 170 unsigned component = 0; 171 for (int c = 0; c < ir->type->vector_elements; c++) { 172 if (ir->get_float_component(c) > 0.0f) 173 component++; 174 } 175 176 return (component == ir->type->vector_elements); 177} 178 179static void 180update_type(ir_expression *ir) 181{ 182 if (ir->operands[0]->type->is_vector()) 183 ir->type = ir->operands[0]->type; 184 else 185 ir->type = ir->operands[1]->type; 186} 187 188/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ 189static ir_expression * 190try_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx) 191{ 192 if (expr0 && expr0->operation == ir_binop_add && 193 expr0->type->is_float() && 194 expr1 && expr1->operation == ir_binop_add && 195 expr1->type->is_float()) { 196 ir_swizzle *x = expr0->operands[0]->as_swizzle(); 197 ir_swizzle *y = expr0->operands[1]->as_swizzle(); 198 ir_swizzle *z = expr1->operands[0]->as_swizzle(); 199 ir_swizzle *w = expr1->operands[1]->as_swizzle(); 200 201 if (!x || x->mask.num_components != 1 || 202 !y || y->mask.num_components != 1 || 203 !z || z->mask.num_components != 1 || 204 !w || w->mask.num_components != 1) { 205 return NULL; 206 } 207 208 bool swiz_seen[4] = {false, false, false, false}; 209 swiz_seen[x->mask.x] = true; 210 swiz_seen[y->mask.x] = true; 211 swiz_seen[z->mask.x] = true; 212 swiz_seen[w->mask.x] = true; 213 214 if (!swiz_seen[0] || !swiz_seen[1] || 215 !swiz_seen[2] || !swiz_seen[3]) { 216 return NULL; 217 } 218 219 if (x->val->equals(y->val) && 220 x->val->equals(z->val) && 221 x->val->equals(w->val)) { 222 return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4)); 223 } 224 } 225 return NULL; 226} 227 228void 229ir_algebraic_visitor::reassociate_operands(ir_expression *ir1, 230 int op1, 231 ir_expression *ir2, 232 int op2) 233{ 234 ir_rvalue *temp = ir2->operands[op2]; 235 ir2->operands[op2] = ir1->operands[op1]; 236 ir1->operands[op1] = temp; 237 238 /* Update the type of ir2. The type of ir1 won't have changed -- 239 * base types matched, and at least one of the operands of the 2 240 * binops is still a vector if any of them were. 241 */ 242 update_type(ir2); 243 244 this->progress = true; 245} 246 247/** 248 * Reassociates a constant down a tree of adds or multiplies. 249 * 250 * Consider (2 * (a * (b * 0.5))). We want to end up with a * b. 251 */ 252bool 253ir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index, 254 ir_constant *constant, 255 ir_expression *ir2) 256{ 257 if (!ir2 || ir1->operation != ir2->operation) 258 return false; 259 260 /* Don't want to even think about matrices. */ 261 if (ir1->operands[0]->type->is_matrix() || 262 ir1->operands[1]->type->is_matrix() || 263 ir2->operands[0]->type->is_matrix() || 264 ir2->operands[1]->type->is_matrix()) 265 return false; 266 267 void *mem_ctx = ralloc_parent(ir2); 268 269 ir_constant *ir2_const[2]; 270 ir2_const[0] = ir2->operands[0]->constant_expression_value(mem_ctx); 271 ir2_const[1] = ir2->operands[1]->constant_expression_value(mem_ctx); 272 273 if (ir2_const[0] && ir2_const[1]) 274 return false; 275 276 if (ir2_const[0]) { 277 reassociate_operands(ir1, const_index, ir2, 1); 278 return true; 279 } else if (ir2_const[1]) { 280 reassociate_operands(ir1, const_index, ir2, 0); 281 return true; 282 } 283 284 if (reassociate_constant(ir1, const_index, constant, 285 ir2->operands[0]->as_expression())) { 286 update_type(ir2); 287 return true; 288 } 289 290 if (reassociate_constant(ir1, const_index, constant, 291 ir2->operands[1]->as_expression())) { 292 update_type(ir2); 293 return true; 294 } 295 296 return false; 297} 298 299/* When eliminating an expression and just returning one of its operands, 300 * we may need to swizzle that operand out to a vector if the expression was 301 * vector type. 302 */ 303ir_rvalue * 304ir_algebraic_visitor::swizzle_if_required(ir_expression *expr, 305 ir_rvalue *operand) 306{ 307 if (expr->type->is_vector() && operand->type->is_scalar()) { 308 return new(mem_ctx) ir_swizzle(operand, 0, 0, 0, 0, 309 expr->type->vector_elements); 310 } else 311 return operand; 312} 313 314ir_rvalue * 315ir_algebraic_visitor::handle_expression(ir_expression *ir) 316{ 317 ir_constant *op_const[4] = {NULL, NULL, NULL, NULL}; 318 ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL}; 319 320 if (ir->operation == ir_binop_mul && 321 ir->operands[0]->type->is_matrix() && 322 ir->operands[1]->type->is_vector()) { 323 ir_expression *matrix_mul = ir->operands[0]->as_expression(); 324 325 if (matrix_mul && matrix_mul->operation == ir_binop_mul && 326 matrix_mul->operands[0]->type->is_matrix() && 327 matrix_mul->operands[1]->type->is_matrix()) { 328 329 return mul(matrix_mul->operands[0], 330 mul(matrix_mul->operands[1], ir->operands[1])); 331 } 332 } 333 334 assert(ir->num_operands <= 4); 335 for (unsigned i = 0; i < ir->num_operands; i++) { 336 if (ir->operands[i]->type->is_matrix()) 337 return ir; 338 339 op_const[i] = 340 ir->operands[i]->constant_expression_value(ralloc_parent(ir)); 341 op_expr[i] = ir->operands[i]->as_expression(); 342 } 343 344 if (this->mem_ctx == NULL) 345 this->mem_ctx = ralloc_parent(ir); 346 347 switch (ir->operation) { 348 case ir_unop_bit_not: 349 if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not) 350 return op_expr[0]->operands[0]; 351 break; 352 353 case ir_unop_abs: 354 if (op_expr[0] == NULL) 355 break; 356 357 switch (op_expr[0]->operation) { 358 case ir_unop_abs: 359 case ir_unop_neg: 360 return abs(op_expr[0]->operands[0]); 361 default: 362 break; 363 } 364 break; 365 366 case ir_unop_neg: 367 if (op_expr[0] == NULL) 368 break; 369 370 if (op_expr[0]->operation == ir_unop_neg) { 371 return op_expr[0]->operands[0]; 372 } 373 break; 374 375 case ir_unop_exp: 376 if (op_expr[0] == NULL) 377 break; 378 379 if (op_expr[0]->operation == ir_unop_log) { 380 return op_expr[0]->operands[0]; 381 } 382 break; 383 384 case ir_unop_log: 385 if (op_expr[0] == NULL) 386 break; 387 388 if (op_expr[0]->operation == ir_unop_exp) { 389 return op_expr[0]->operands[0]; 390 } 391 break; 392 393 case ir_unop_exp2: 394 if (op_expr[0] == NULL) 395 break; 396 397 if (op_expr[0]->operation == ir_unop_log2) { 398 return op_expr[0]->operands[0]; 399 } 400 401 if (!options->EmitNoPow && op_expr[0]->operation == ir_binop_mul) { 402 for (int log2_pos = 0; log2_pos < 2; log2_pos++) { 403 ir_expression *log2_expr = 404 op_expr[0]->operands[log2_pos]->as_expression(); 405 406 if (log2_expr && log2_expr->operation == ir_unop_log2) { 407 return new(mem_ctx) ir_expression(ir_binop_pow, 408 ir->type, 409 log2_expr->operands[0], 410 op_expr[0]->operands[1 - log2_pos]); 411 } 412 } 413 } 414 break; 415 416 case ir_unop_log2: 417 if (op_expr[0] == NULL) 418 break; 419 420 if (op_expr[0]->operation == ir_unop_exp2) { 421 return op_expr[0]->operands[0]; 422 } 423 break; 424 425 case ir_unop_f2i: 426 case ir_unop_f2u: 427 if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) { 428 return new(mem_ctx) ir_expression(ir->operation, 429 ir->type, 430 op_expr[0]->operands[0]); 431 } 432 break; 433 434 case ir_unop_logic_not: { 435 enum ir_expression_operation new_op = ir_unop_logic_not; 436 437 if (op_expr[0] == NULL) 438 break; 439 440 switch (op_expr[0]->operation) { 441 case ir_binop_less: new_op = ir_binop_gequal; break; 442 case ir_binop_gequal: new_op = ir_binop_less; break; 443 case ir_binop_equal: new_op = ir_binop_nequal; break; 444 case ir_binop_nequal: new_op = ir_binop_equal; break; 445 case ir_binop_all_equal: new_op = ir_binop_any_nequal; break; 446 case ir_binop_any_nequal: new_op = ir_binop_all_equal; break; 447 448 default: 449 /* The default case handler is here to silence a warning from GCC. 450 */ 451 break; 452 } 453 454 if (new_op != ir_unop_logic_not) { 455 return new(mem_ctx) ir_expression(new_op, 456 ir->type, 457 op_expr[0]->operands[0], 458 op_expr[0]->operands[1]); 459 } 460 461 break; 462 } 463 464 case ir_unop_saturate: 465 if (op_expr[0] && op_expr[0]->operation == ir_binop_add) { 466 ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression(); 467 ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression(); 468 469 if (b2f_0 && b2f_0->operation == ir_unop_b2f && 470 b2f_1 && b2f_1->operation == ir_unop_b2f) { 471 return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0])); 472 } 473 } 474 break; 475 476 /* This macro CANNOT use the do { } while(true) mechanism because 477 * then the breaks apply to the loop instead of the switch! 478 */ 479#define HANDLE_PACK_UNPACK_INVERSE(inverse_operation) \ 480 { \ 481 ir_expression *const op = ir->operands[0]->as_expression(); \ 482 if (op == NULL) \ 483 break; \ 484 if (op->operation == (inverse_operation)) \ 485 return op->operands[0]; \ 486 break; \ 487 } 488 489 case ir_unop_unpack_uint_2x32: 490 HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_uint_2x32); 491 case ir_unop_pack_uint_2x32: 492 HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_uint_2x32); 493 case ir_unop_unpack_int_2x32: 494 HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_int_2x32); 495 case ir_unop_pack_int_2x32: 496 HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_int_2x32); 497 case ir_unop_unpack_double_2x32: 498 HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_double_2x32); 499 case ir_unop_pack_double_2x32: 500 HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_double_2x32); 501 502#undef HANDLE_PACK_UNPACK_INVERSE 503 504 case ir_binop_add: 505 if (is_vec_zero(op_const[0])) 506 return ir->operands[1]; 507 if (is_vec_zero(op_const[1])) 508 return ir->operands[0]; 509 510 /* Replace (x + (-x)) with constant 0 */ 511 for (int i = 0; i < 2; i++) { 512 if (op_expr[i]) { 513 if (op_expr[i]->operation == ir_unop_neg) { 514 ir_rvalue *other = ir->operands[(i + 1) % 2]; 515 if (other && op_expr[i]->operands[0]->equals(other)) { 516 return ir_constant::zero(ir, ir->type); 517 } 518 } 519 } 520 } 521 522 /* Reassociate addition of constants so that we can do constant 523 * folding. 524 */ 525 if (op_const[0] && !op_const[1]) 526 reassociate_constant(ir, 0, op_const[0], op_expr[1]); 527 if (op_const[1] && !op_const[0]) 528 reassociate_constant(ir, 1, op_const[1], op_expr[0]); 529 530 /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */ 531 if (options->OptimizeForAOS) { 532 ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1], 533 mem_ctx); 534 if (expr) 535 return expr; 536 } 537 538 /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a). 539 * 540 * (-x + y) * a + x 541 * (x * -a) + (y * a) + x 542 * x + (x * -a) + (y * a) 543 * x * (1 - a) + y * a 544 * lrp(x, y, a) 545 */ 546 for (int mul_pos = 0; mul_pos < 2; mul_pos++) { 547 ir_expression *mul = op_expr[mul_pos]; 548 549 if (!mul || mul->operation != ir_binop_mul) 550 continue; 551 552 /* Multiply found on one of the operands. Now check for an 553 * inner addition operation. 554 */ 555 for (int inner_add_pos = 0; inner_add_pos < 2; inner_add_pos++) { 556 ir_expression *inner_add = 557 mul->operands[inner_add_pos]->as_expression(); 558 559 if (!inner_add || inner_add->operation != ir_binop_add) 560 continue; 561 562 /* Inner addition found on one of the operands. Now check for 563 * one of the operands of the inner addition to be the negative 564 * of x_operand. 565 */ 566 for (int neg_pos = 0; neg_pos < 2; neg_pos++) { 567 ir_expression *neg = 568 inner_add->operands[neg_pos]->as_expression(); 569 570 if (!neg || neg->operation != ir_unop_neg) 571 continue; 572 573 ir_rvalue *x_operand = ir->operands[1 - mul_pos]; 574 575 if (!neg->operands[0]->equals(x_operand)) 576 continue; 577 578 ir_rvalue *y_operand = inner_add->operands[1 - neg_pos]; 579 ir_rvalue *a_operand = mul->operands[1 - inner_add_pos]; 580 581 if (x_operand->type != y_operand->type || 582 x_operand->type != a_operand->type) 583 continue; 584 585 return lrp(x_operand, y_operand, a_operand); 586 } 587 } 588 } 589 590 break; 591 592 case ir_binop_sub: 593 if (is_vec_zero(op_const[0])) 594 return neg(ir->operands[1]); 595 if (is_vec_zero(op_const[1])) 596 return ir->operands[0]; 597 break; 598 599 case ir_binop_mul: 600 if (is_vec_one(op_const[0])) 601 return ir->operands[1]; 602 if (is_vec_one(op_const[1])) 603 return ir->operands[0]; 604 605 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) 606 return ir_constant::zero(ir, ir->type); 607 608 if (is_vec_negative_one(op_const[0])) 609 return neg(ir->operands[1]); 610 if (is_vec_negative_one(op_const[1])) 611 return neg(ir->operands[0]); 612 613 if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f && 614 op_expr[1] && op_expr[1]->operation == ir_unop_b2f) { 615 return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0])); 616 } 617 618 /* Reassociate multiplication of constants so that we can do 619 * constant folding. 620 */ 621 if (op_const[0] && !op_const[1]) 622 reassociate_constant(ir, 0, op_const[0], op_expr[1]); 623 if (op_const[1] && !op_const[0]) 624 reassociate_constant(ir, 1, op_const[1], op_expr[0]); 625 626 /* Optimizes 627 * 628 * (mul (floor (add (abs x) 0.5) (sign x))) 629 * 630 * into 631 * 632 * (trunc (add x (mul (sign x) 0.5))) 633 */ 634 for (int i = 0; i < 2; i++) { 635 ir_expression *sign_expr = ir->operands[i]->as_expression(); 636 ir_expression *floor_expr = ir->operands[1 - i]->as_expression(); 637 638 if (!sign_expr || sign_expr->operation != ir_unop_sign || 639 !floor_expr || floor_expr->operation != ir_unop_floor) 640 continue; 641 642 ir_expression *add_expr = floor_expr->operands[0]->as_expression(); 643 if (!add_expr || add_expr->operation != ir_binop_add) 644 continue; 645 646 for (int j = 0; j < 2; j++) { 647 ir_expression *abs_expr = add_expr->operands[j]->as_expression(); 648 if (!abs_expr || abs_expr->operation != ir_unop_abs) 649 continue; 650 651 ir_constant *point_five = add_expr->operands[1 - j]->as_constant(); 652 if (!point_five || !point_five->is_value(0.5, 0)) 653 continue; 654 655 if (abs_expr->operands[0]->equals(sign_expr->operands[0])) { 656 return trunc(add(abs_expr->operands[0], 657 mul(sign_expr, point_five))); 658 } 659 } 660 } 661 break; 662 663 case ir_binop_div: 664 if (is_vec_one(op_const[0]) && ( 665 ir->type->is_float() || ir->type->is_double())) { 666 return new(mem_ctx) ir_expression(ir_unop_rcp, 667 ir->operands[1]->type, 668 ir->operands[1], 669 NULL); 670 } 671 if (is_vec_one(op_const[1])) 672 return ir->operands[0]; 673 break; 674 675 case ir_binop_dot: 676 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) 677 return ir_constant::zero(mem_ctx, ir->type); 678 679 for (int i = 0; i < 2; i++) { 680 if (!op_const[i]) 681 continue; 682 683 unsigned components[4] = { 0 }, count = 0; 684 685 for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) { 686 if (op_const[i]->is_zero()) 687 continue; 688 689 components[count] = c; 690 count++; 691 } 692 693 /* No channels had zero values; bail. */ 694 if (count >= op_const[i]->type->vector_elements) 695 break; 696 697 ir_expression_operation op = count == 1 ? 698 ir_binop_mul : ir_binop_dot; 699 700 /* Swizzle both operands to remove the channels that were zero. */ 701 return new(mem_ctx) 702 ir_expression(op, ir->type, 703 new(mem_ctx) ir_swizzle(ir->operands[0], 704 components, count), 705 new(mem_ctx) ir_swizzle(ir->operands[1], 706 components, count)); 707 } 708 break; 709 710 case ir_binop_less: 711 case ir_binop_gequal: 712 case ir_binop_equal: 713 case ir_binop_nequal: 714 for (int add_pos = 0; add_pos < 2; add_pos++) { 715 ir_expression *add = op_expr[add_pos]; 716 717 if (!add || add->operation != ir_binop_add) 718 continue; 719 720 ir_constant *zero = op_const[1 - add_pos]; 721 if (!is_vec_zero(zero)) 722 continue; 723 724 /* We are allowed to add scalars with a vector or matrix. In that 725 * case lets just exit early. 726 */ 727 if (add->operands[0]->type != add->operands[1]->type) 728 continue; 729 730 /* Depending of the zero position we want to optimize 731 * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y) 732 */ 733 if (add_pos == 1) { 734 return new(mem_ctx) ir_expression(ir->operation, 735 neg(add->operands[0]), 736 add->operands[1]); 737 } else { 738 return new(mem_ctx) ir_expression(ir->operation, 739 add->operands[0], 740 neg(add->operands[1])); 741 } 742 } 743 break; 744 745 case ir_binop_all_equal: 746 case ir_binop_any_nequal: 747 if (ir->operands[0]->type->is_scalar() && 748 ir->operands[1]->type->is_scalar()) 749 return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal 750 ? ir_binop_equal : ir_binop_nequal, 751 ir->operands[0], 752 ir->operands[1]); 753 break; 754 755 case ir_binop_rshift: 756 case ir_binop_lshift: 757 /* 0 >> x == 0 */ 758 if (is_vec_zero(op_const[0])) 759 return ir->operands[0]; 760 /* x >> 0 == x */ 761 if (is_vec_zero(op_const[1])) 762 return ir->operands[0]; 763 break; 764 765 case ir_binop_logic_and: 766 if (is_vec_one(op_const[0])) { 767 return ir->operands[1]; 768 } else if (is_vec_one(op_const[1])) { 769 return ir->operands[0]; 770 } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { 771 return ir_constant::zero(mem_ctx, ir->type); 772 } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not && 773 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) { 774 /* De Morgan's Law: 775 * (not A) and (not B) === not (A or B) 776 */ 777 return logic_not(logic_or(op_expr[0]->operands[0], 778 op_expr[1]->operands[0])); 779 } else if (ir->operands[0]->equals(ir->operands[1])) { 780 /* (a && a) == a */ 781 return ir->operands[0]; 782 } 783 break; 784 785 case ir_binop_logic_xor: 786 if (is_vec_zero(op_const[0])) { 787 return ir->operands[1]; 788 } else if (is_vec_zero(op_const[1])) { 789 return ir->operands[0]; 790 } else if (is_vec_one(op_const[0])) { 791 return logic_not(ir->operands[1]); 792 } else if (is_vec_one(op_const[1])) { 793 return logic_not(ir->operands[0]); 794 } else if (ir->operands[0]->equals(ir->operands[1])) { 795 /* (a ^^ a) == false */ 796 return ir_constant::zero(mem_ctx, ir->type); 797 } 798 break; 799 800 case ir_binop_logic_or: 801 if (is_vec_zero(op_const[0])) { 802 return ir->operands[1]; 803 } else if (is_vec_zero(op_const[1])) { 804 return ir->operands[0]; 805 } else if (is_vec_one(op_const[0]) || is_vec_one(op_const[1])) { 806 ir_constant_data data; 807 808 for (unsigned i = 0; i < 16; i++) 809 data.b[i] = true; 810 811 return new(mem_ctx) ir_constant(ir->type, &data); 812 } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not && 813 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) { 814 /* De Morgan's Law: 815 * (not A) or (not B) === not (A and B) 816 */ 817 return logic_not(logic_and(op_expr[0]->operands[0], 818 op_expr[1]->operands[0])); 819 } else if (ir->operands[0]->equals(ir->operands[1])) { 820 /* (a || a) == a */ 821 return ir->operands[0]; 822 } 823 break; 824 825 case ir_binop_pow: 826 /* 1^x == 1 */ 827 if (is_vec_one(op_const[0])) 828 return op_const[0]; 829 830 /* x^1 == x */ 831 if (is_vec_one(op_const[1])) 832 return ir->operands[0]; 833 834 /* pow(2,x) == exp2(x) */ 835 if (is_vec_two(op_const[0])) 836 return expr(ir_unop_exp2, ir->operands[1]); 837 838 if (is_vec_two(op_const[1])) { 839 ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", 840 ir_var_temporary); 841 base_ir->insert_before(x); 842 base_ir->insert_before(assign(x, ir->operands[0])); 843 return mul(x, x); 844 } 845 846 if (is_vec_four(op_const[1])) { 847 ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x", 848 ir_var_temporary); 849 base_ir->insert_before(x); 850 base_ir->insert_before(assign(x, ir->operands[0])); 851 852 ir_variable *squared = new(ir) ir_variable(ir->operands[1]->type, 853 "squared", 854 ir_var_temporary); 855 base_ir->insert_before(squared); 856 base_ir->insert_before(assign(squared, mul(x, x))); 857 return mul(squared, squared); 858 } 859 860 break; 861 862 case ir_binop_min: 863 case ir_binop_max: 864 if (!ir->type->is_float() || options->EmitNoSat) 865 break; 866 867 /* Replace min(max) operations and its commutative combinations with 868 * a saturate operation 869 */ 870 for (int op = 0; op < 2; op++) { 871 ir_expression *inner_expr = op_expr[op]; 872 ir_constant *outer_const = op_const[1 - op]; 873 ir_expression_operation op_cond = (ir->operation == ir_binop_max) ? 874 ir_binop_min : ir_binop_max; 875 876 if (!inner_expr || !outer_const || (inner_expr->operation != op_cond)) 877 continue; 878 879 /* One of these has to be a constant */ 880 if (!inner_expr->operands[0]->as_constant() && 881 !inner_expr->operands[1]->as_constant()) 882 break; 883 884 /* Found a min(max) combination. Now try to see if its operands 885 * meet our conditions that we can do just a single saturate operation 886 */ 887 for (int minmax_op = 0; minmax_op < 2; minmax_op++) { 888 ir_rvalue *x = inner_expr->operands[minmax_op]; 889 ir_rvalue *y = inner_expr->operands[1 - minmax_op]; 890 891 ir_constant *inner_const = y->as_constant(); 892 if (!inner_const) 893 continue; 894 895 /* min(max(x, 0.0), 1.0) is sat(x) */ 896 if (ir->operation == ir_binop_min && 897 inner_const->is_zero() && 898 outer_const->is_one()) 899 return saturate(x); 900 901 /* max(min(x, 1.0), 0.0) is sat(x) */ 902 if (ir->operation == ir_binop_max && 903 inner_const->is_one() && 904 outer_const->is_zero()) 905 return saturate(x); 906 907 /* min(max(x, 0.0), b) where b < 1.0 is sat(min(x, b)) */ 908 if (ir->operation == ir_binop_min && 909 inner_const->is_zero() && 910 is_less_than_one(outer_const)) 911 return saturate(expr(ir_binop_min, x, outer_const)); 912 913 /* max(min(x, b), 0.0) where b < 1.0 is sat(min(x, b)) */ 914 if (ir->operation == ir_binop_max && 915 is_less_than_one(inner_const) && 916 outer_const->is_zero()) 917 return saturate(expr(ir_binop_min, x, inner_const)); 918 919 /* max(min(x, 1.0), b) where b > 0.0 is sat(max(x, b)) */ 920 if (ir->operation == ir_binop_max && 921 inner_const->is_one() && 922 is_greater_than_zero(outer_const)) 923 return saturate(expr(ir_binop_max, x, outer_const)); 924 925 /* min(max(x, b), 1.0) where b > 0.0 is sat(max(x, b)) */ 926 if (ir->operation == ir_binop_min && 927 is_greater_than_zero(inner_const) && 928 outer_const->is_one()) 929 return saturate(expr(ir_binop_max, x, inner_const)); 930 } 931 } 932 933 break; 934 935 case ir_unop_rcp: 936 if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp) 937 return op_expr[0]->operands[0]; 938 939 if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 || 940 op_expr[0]->operation == ir_unop_exp)) { 941 return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type, 942 neg(op_expr[0]->operands[0])); 943 } 944 945 /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at 946 * its IR level, so we can always apply this transformation. 947 */ 948 if (op_expr[0] && op_expr[0]->operation == ir_unop_rsq) 949 return sqrt(op_expr[0]->operands[0]); 950 951 /* As far as we know, all backends are OK with rsq. */ 952 if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) { 953 return rsq(op_expr[0]->operands[0]); 954 } 955 956 break; 957 958 case ir_triop_fma: 959 /* Operands are op0 * op1 + op2. */ 960 if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) { 961 return ir->operands[2]; 962 } else if (is_vec_zero(op_const[2])) { 963 return mul(ir->operands[0], ir->operands[1]); 964 } else if (is_vec_one(op_const[0])) { 965 return add(ir->operands[1], ir->operands[2]); 966 } else if (is_vec_one(op_const[1])) { 967 return add(ir->operands[0], ir->operands[2]); 968 } 969 break; 970 971 case ir_triop_lrp: 972 /* Operands are (x, y, a). */ 973 if (is_vec_zero(op_const[2])) { 974 return ir->operands[0]; 975 } else if (is_vec_one(op_const[2])) { 976 return ir->operands[1]; 977 } else if (ir->operands[0]->equals(ir->operands[1])) { 978 return ir->operands[0]; 979 } else if (is_vec_zero(op_const[0])) { 980 return mul(ir->operands[1], ir->operands[2]); 981 } else if (is_vec_zero(op_const[1])) { 982 unsigned op2_components = ir->operands[2]->type->vector_elements; 983 ir_constant *one; 984 985 switch (ir->type->base_type) { 986 case GLSL_TYPE_FLOAT: 987 one = new(mem_ctx) ir_constant(1.0f, op2_components); 988 break; 989 case GLSL_TYPE_DOUBLE: 990 one = new(mem_ctx) ir_constant(1.0, op2_components); 991 break; 992 default: 993 one = NULL; 994 unreachable("unexpected type"); 995 } 996 997 return mul(ir->operands[0], add(one, neg(ir->operands[2]))); 998 } 999 break; 1000 1001 case ir_triop_csel: 1002 if (is_vec_one(op_const[0])) 1003 return ir->operands[1]; 1004 if (is_vec_zero(op_const[0])) 1005 return ir->operands[2]; 1006 break; 1007 1008 /* Remove interpolateAt* instructions for demoted inputs. They are 1009 * assigned a constant expression to facilitate this. 1010 */ 1011 case ir_unop_interpolate_at_centroid: 1012 case ir_binop_interpolate_at_offset: 1013 case ir_binop_interpolate_at_sample: 1014 if (op_const[0]) 1015 return ir->operands[0]; 1016 break; 1017 1018 default: 1019 break; 1020 } 1021 1022 return ir; 1023} 1024 1025void 1026ir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue) 1027{ 1028 if (!*rvalue) 1029 return; 1030 1031 ir_expression *expr = (*rvalue)->as_expression(); 1032 if (!expr || expr->operation == ir_quadop_vector) 1033 return; 1034 1035 ir_rvalue *new_rvalue = handle_expression(expr); 1036 if (new_rvalue == *rvalue) 1037 return; 1038 1039 /* If the expr used to be some vec OP scalar returning a vector, and the 1040 * optimization gave us back a scalar, we still need to turn it into a 1041 * vector. 1042 */ 1043 *rvalue = swizzle_if_required(expr, new_rvalue); 1044 1045 this->progress = true; 1046} 1047 1048bool 1049do_algebraic(exec_list *instructions, bool native_integers, 1050 const struct gl_shader_compiler_options *options) 1051{ 1052 ir_algebraic_visitor v(native_integers, options); 1053 1054 visit_list_elements(&v, instructions); 1055 1056 return v.progress; 1057} 1058