101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2014 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 2101e04c3fSmrg * DEALINGS IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg/** 2501e04c3fSmrg * \file opt_minmax.cpp 2601e04c3fSmrg * 2701e04c3fSmrg * Drop operands from an expression tree of only min/max operations if they 2801e04c3fSmrg * can be proven to not contribute to the final result. 2901e04c3fSmrg * 3001e04c3fSmrg * The algorithm is similar to alpha-beta pruning on a minmax search. 3101e04c3fSmrg */ 3201e04c3fSmrg 3301e04c3fSmrg#include "ir.h" 3401e04c3fSmrg#include "ir_visitor.h" 3501e04c3fSmrg#include "ir_rvalue_visitor.h" 3601e04c3fSmrg#include "ir_optimization.h" 3701e04c3fSmrg#include "ir_builder.h" 3801e04c3fSmrg#include "program/prog_instruction.h" 3901e04c3fSmrg#include "compiler/glsl_types.h" 4001e04c3fSmrg#include "main/macros.h" 417ec681f3Smrg#include "util/half_float.h" 4201e04c3fSmrg 4301e04c3fSmrgusing namespace ir_builder; 4401e04c3fSmrg 4501e04c3fSmrgnamespace { 4601e04c3fSmrg 4701e04c3fSmrgenum compare_components_result { 4801e04c3fSmrg LESS, 4901e04c3fSmrg LESS_OR_EQUAL, 5001e04c3fSmrg EQUAL, 5101e04c3fSmrg GREATER_OR_EQUAL, 5201e04c3fSmrg GREATER, 5301e04c3fSmrg MIXED 5401e04c3fSmrg}; 5501e04c3fSmrg 5601e04c3fSmrgclass minmax_range { 5701e04c3fSmrgpublic: 5801e04c3fSmrg minmax_range(ir_constant *low = NULL, ir_constant *high = NULL) 5901e04c3fSmrg { 6001e04c3fSmrg this->low = low; 6101e04c3fSmrg this->high = high; 6201e04c3fSmrg } 6301e04c3fSmrg 6401e04c3fSmrg /* low is the lower limit of the range, high is the higher limit. NULL on 6501e04c3fSmrg * low means negative infinity (unlimited) and on high positive infinity 6601e04c3fSmrg * (unlimited). Because of the two interpretations of the value NULL, 6701e04c3fSmrg * arbitrary comparison between ir_constants is impossible. 6801e04c3fSmrg */ 6901e04c3fSmrg ir_constant *low; 7001e04c3fSmrg ir_constant *high; 7101e04c3fSmrg}; 7201e04c3fSmrg 7301e04c3fSmrgclass ir_minmax_visitor : public ir_rvalue_enter_visitor { 7401e04c3fSmrgpublic: 7501e04c3fSmrg ir_minmax_visitor() 7601e04c3fSmrg : progress(false) 7701e04c3fSmrg { 7801e04c3fSmrg } 7901e04c3fSmrg 8001e04c3fSmrg ir_rvalue *prune_expression(ir_expression *expr, minmax_range baserange); 8101e04c3fSmrg 8201e04c3fSmrg void handle_rvalue(ir_rvalue **rvalue); 8301e04c3fSmrg 8401e04c3fSmrg bool progress; 8501e04c3fSmrg}; 8601e04c3fSmrg 8701e04c3fSmrg/* 8801e04c3fSmrg * Returns LESS if all vector components of `a' are strictly lower than of `b', 8901e04c3fSmrg * GREATER if all vector components of `a' are strictly greater than of `b', 9001e04c3fSmrg * MIXED if some vector components of `a' are strictly lower than of `b' while 9101e04c3fSmrg * others are strictly greater, or EQUAL otherwise. 9201e04c3fSmrg */ 9301e04c3fSmrgstatic enum compare_components_result 9401e04c3fSmrgcompare_components(ir_constant *a, ir_constant *b) 9501e04c3fSmrg{ 9601e04c3fSmrg assert(a != NULL); 9701e04c3fSmrg assert(b != NULL); 9801e04c3fSmrg 9901e04c3fSmrg assert(a->type->base_type == b->type->base_type); 10001e04c3fSmrg 10101e04c3fSmrg unsigned a_inc = a->type->is_scalar() ? 0 : 1; 10201e04c3fSmrg unsigned b_inc = b->type->is_scalar() ? 0 : 1; 10301e04c3fSmrg unsigned components = MAX2(a->type->components(), b->type->components()); 10401e04c3fSmrg 10501e04c3fSmrg bool foundless = false; 10601e04c3fSmrg bool foundgreater = false; 10701e04c3fSmrg bool foundequal = false; 10801e04c3fSmrg 10901e04c3fSmrg for (unsigned i = 0, c0 = 0, c1 = 0; 11001e04c3fSmrg i < components; 11101e04c3fSmrg c0 += a_inc, c1 += b_inc, ++i) { 11201e04c3fSmrg switch (a->type->base_type) { 1137ec681f3Smrg case GLSL_TYPE_UINT16: 1147ec681f3Smrg if (a->value.u16[c0] < b->value.u16[c1]) 1157ec681f3Smrg foundless = true; 1167ec681f3Smrg else if (a->value.u16[c0] > b->value.u16[c1]) 1177ec681f3Smrg foundgreater = true; 1187ec681f3Smrg else 1197ec681f3Smrg foundequal = true; 1207ec681f3Smrg break; 1217ec681f3Smrg case GLSL_TYPE_INT16: 1227ec681f3Smrg if (a->value.i16[c0] < b->value.i16[c1]) 1237ec681f3Smrg foundless = true; 1247ec681f3Smrg else if (a->value.i16[c0] > b->value.i16[c1]) 1257ec681f3Smrg foundgreater = true; 1267ec681f3Smrg else 1277ec681f3Smrg foundequal = true; 1287ec681f3Smrg break; 12901e04c3fSmrg case GLSL_TYPE_UINT: 13001e04c3fSmrg if (a->value.u[c0] < b->value.u[c1]) 13101e04c3fSmrg foundless = true; 13201e04c3fSmrg else if (a->value.u[c0] > b->value.u[c1]) 13301e04c3fSmrg foundgreater = true; 13401e04c3fSmrg else 13501e04c3fSmrg foundequal = true; 13601e04c3fSmrg break; 13701e04c3fSmrg case GLSL_TYPE_INT: 13801e04c3fSmrg if (a->value.i[c0] < b->value.i[c1]) 13901e04c3fSmrg foundless = true; 14001e04c3fSmrg else if (a->value.i[c0] > b->value.i[c1]) 14101e04c3fSmrg foundgreater = true; 14201e04c3fSmrg else 14301e04c3fSmrg foundequal = true; 14401e04c3fSmrg break; 1457ec681f3Smrg case GLSL_TYPE_FLOAT16: { 1467ec681f3Smrg float af = _mesa_half_to_float(a->value.f16[c0]); 1477ec681f3Smrg float bf = _mesa_half_to_float(b->value.f16[c1]); 1487ec681f3Smrg if (af < bf) 1497ec681f3Smrg foundless = true; 1507ec681f3Smrg else if (af > bf) 1517ec681f3Smrg foundgreater = true; 1527ec681f3Smrg else 1537ec681f3Smrg foundequal = true; 1547ec681f3Smrg break; 1557ec681f3Smrg } 15601e04c3fSmrg case GLSL_TYPE_FLOAT: 15701e04c3fSmrg if (a->value.f[c0] < b->value.f[c1]) 15801e04c3fSmrg foundless = true; 15901e04c3fSmrg else if (a->value.f[c0] > b->value.f[c1]) 16001e04c3fSmrg foundgreater = true; 16101e04c3fSmrg else 16201e04c3fSmrg foundequal = true; 16301e04c3fSmrg break; 16401e04c3fSmrg case GLSL_TYPE_DOUBLE: 16501e04c3fSmrg if (a->value.d[c0] < b->value.d[c1]) 16601e04c3fSmrg foundless = true; 16701e04c3fSmrg else if (a->value.d[c0] > b->value.d[c1]) 16801e04c3fSmrg foundgreater = true; 16901e04c3fSmrg else 17001e04c3fSmrg foundequal = true; 17101e04c3fSmrg break; 17201e04c3fSmrg default: 17301e04c3fSmrg unreachable("not reached"); 17401e04c3fSmrg } 17501e04c3fSmrg } 17601e04c3fSmrg 17701e04c3fSmrg if (foundless && foundgreater) { 17801e04c3fSmrg /* Some components are strictly lower, others are strictly greater */ 17901e04c3fSmrg return MIXED; 18001e04c3fSmrg } 18101e04c3fSmrg 18201e04c3fSmrg if (foundequal) { 18301e04c3fSmrg /* It is not mixed, but it is not strictly lower or greater */ 18401e04c3fSmrg if (foundless) 18501e04c3fSmrg return LESS_OR_EQUAL; 18601e04c3fSmrg if (foundgreater) 18701e04c3fSmrg return GREATER_OR_EQUAL; 18801e04c3fSmrg return EQUAL; 18901e04c3fSmrg } 19001e04c3fSmrg 19101e04c3fSmrg /* All components are strictly lower or strictly greater */ 19201e04c3fSmrg return foundless ? LESS : GREATER; 19301e04c3fSmrg} 19401e04c3fSmrg 19501e04c3fSmrgstatic ir_constant * 19601e04c3fSmrgcombine_constant(bool ismin, ir_constant *a, ir_constant *b) 19701e04c3fSmrg{ 19801e04c3fSmrg void *mem_ctx = ralloc_parent(a); 19901e04c3fSmrg ir_constant *c = a->clone(mem_ctx, NULL); 20001e04c3fSmrg for (unsigned i = 0; i < c->type->components(); i++) { 20101e04c3fSmrg switch (c->type->base_type) { 2027ec681f3Smrg case GLSL_TYPE_UINT16: 2037ec681f3Smrg if ((ismin && b->value.u16[i] < c->value.u16[i]) || 2047ec681f3Smrg (!ismin && b->value.u16[i] > c->value.u16[i])) 2057ec681f3Smrg c->value.u16[i] = b->value.u16[i]; 2067ec681f3Smrg break; 2077ec681f3Smrg case GLSL_TYPE_INT16: 2087ec681f3Smrg if ((ismin && b->value.i16[i] < c->value.i16[i]) || 2097ec681f3Smrg (!ismin && b->value.i16[i] > c->value.i16[i])) 2107ec681f3Smrg c->value.i16[i] = b->value.i16[i]; 2117ec681f3Smrg break; 21201e04c3fSmrg case GLSL_TYPE_UINT: 21301e04c3fSmrg if ((ismin && b->value.u[i] < c->value.u[i]) || 21401e04c3fSmrg (!ismin && b->value.u[i] > c->value.u[i])) 21501e04c3fSmrg c->value.u[i] = b->value.u[i]; 21601e04c3fSmrg break; 21701e04c3fSmrg case GLSL_TYPE_INT: 21801e04c3fSmrg if ((ismin && b->value.i[i] < c->value.i[i]) || 21901e04c3fSmrg (!ismin && b->value.i[i] > c->value.i[i])) 22001e04c3fSmrg c->value.i[i] = b->value.i[i]; 22101e04c3fSmrg break; 2227ec681f3Smrg case GLSL_TYPE_FLOAT16: { 2237ec681f3Smrg float bf = _mesa_half_to_float(b->value.f16[i]); 2247ec681f3Smrg float cf = _mesa_half_to_float(c->value.f16[i]); 2257ec681f3Smrg if ((ismin && bf < cf) || (!ismin && bf > cf)) 2267ec681f3Smrg c->value.f16[i] = b->value.f16[i]; 2277ec681f3Smrg break; 2287ec681f3Smrg } 22901e04c3fSmrg case GLSL_TYPE_FLOAT: 23001e04c3fSmrg if ((ismin && b->value.f[i] < c->value.f[i]) || 23101e04c3fSmrg (!ismin && b->value.f[i] > c->value.f[i])) 23201e04c3fSmrg c->value.f[i] = b->value.f[i]; 23301e04c3fSmrg break; 23401e04c3fSmrg case GLSL_TYPE_DOUBLE: 23501e04c3fSmrg if ((ismin && b->value.d[i] < c->value.d[i]) || 23601e04c3fSmrg (!ismin && b->value.d[i] > c->value.d[i])) 23701e04c3fSmrg c->value.d[i] = b->value.d[i]; 23801e04c3fSmrg break; 23901e04c3fSmrg default: 24001e04c3fSmrg assert(!"not reached"); 24101e04c3fSmrg } 24201e04c3fSmrg } 24301e04c3fSmrg return c; 24401e04c3fSmrg} 24501e04c3fSmrg 24601e04c3fSmrgstatic ir_constant * 24701e04c3fSmrgsmaller_constant(ir_constant *a, ir_constant *b) 24801e04c3fSmrg{ 24901e04c3fSmrg assert(a != NULL); 25001e04c3fSmrg assert(b != NULL); 25101e04c3fSmrg 25201e04c3fSmrg enum compare_components_result ret = compare_components(a, b); 25301e04c3fSmrg if (ret == MIXED) 25401e04c3fSmrg return combine_constant(true, a, b); 25501e04c3fSmrg else if (ret < EQUAL) 25601e04c3fSmrg return a; 25701e04c3fSmrg else 25801e04c3fSmrg return b; 25901e04c3fSmrg} 26001e04c3fSmrg 26101e04c3fSmrgstatic ir_constant * 26201e04c3fSmrglarger_constant(ir_constant *a, ir_constant *b) 26301e04c3fSmrg{ 26401e04c3fSmrg assert(a != NULL); 26501e04c3fSmrg assert(b != NULL); 26601e04c3fSmrg 26701e04c3fSmrg enum compare_components_result ret = compare_components(a, b); 26801e04c3fSmrg if (ret == MIXED) 26901e04c3fSmrg return combine_constant(false, a, b); 27001e04c3fSmrg else if (ret < EQUAL) 27101e04c3fSmrg return b; 27201e04c3fSmrg else 27301e04c3fSmrg return a; 27401e04c3fSmrg} 27501e04c3fSmrg 27601e04c3fSmrg/* Combines two ranges by doing an element-wise min() / max() depending on the 27701e04c3fSmrg * operation. 27801e04c3fSmrg */ 27901e04c3fSmrgstatic minmax_range 28001e04c3fSmrgcombine_range(minmax_range r0, minmax_range r1, bool ismin) 28101e04c3fSmrg{ 28201e04c3fSmrg minmax_range ret; 28301e04c3fSmrg 28401e04c3fSmrg if (!r0.low) { 28501e04c3fSmrg ret.low = ismin ? r0.low : r1.low; 28601e04c3fSmrg } else if (!r1.low) { 28701e04c3fSmrg ret.low = ismin ? r1.low : r0.low; 28801e04c3fSmrg } else { 28901e04c3fSmrg ret.low = ismin ? smaller_constant(r0.low, r1.low) : 29001e04c3fSmrg larger_constant(r0.low, r1.low); 29101e04c3fSmrg } 29201e04c3fSmrg 29301e04c3fSmrg if (!r0.high) { 29401e04c3fSmrg ret.high = ismin ? r1.high : r0.high; 29501e04c3fSmrg } else if (!r1.high) { 29601e04c3fSmrg ret.high = ismin ? r0.high : r1.high; 29701e04c3fSmrg } else { 29801e04c3fSmrg ret.high = ismin ? smaller_constant(r0.high, r1.high) : 29901e04c3fSmrg larger_constant(r0.high, r1.high); 30001e04c3fSmrg } 30101e04c3fSmrg 30201e04c3fSmrg return ret; 30301e04c3fSmrg} 30401e04c3fSmrg 30501e04c3fSmrg/* Returns a range so that lower limit is the larger of the two lower limits, 30601e04c3fSmrg * and higher limit is the smaller of the two higher limits. 30701e04c3fSmrg */ 30801e04c3fSmrgstatic minmax_range 30901e04c3fSmrgrange_intersection(minmax_range r0, minmax_range r1) 31001e04c3fSmrg{ 31101e04c3fSmrg minmax_range ret; 31201e04c3fSmrg 31301e04c3fSmrg if (!r0.low) 31401e04c3fSmrg ret.low = r1.low; 31501e04c3fSmrg else if (!r1.low) 31601e04c3fSmrg ret.low = r0.low; 31701e04c3fSmrg else 31801e04c3fSmrg ret.low = larger_constant(r0.low, r1.low); 31901e04c3fSmrg 32001e04c3fSmrg if (!r0.high) 32101e04c3fSmrg ret.high = r1.high; 32201e04c3fSmrg else if (!r1.high) 32301e04c3fSmrg ret.high = r0.high; 32401e04c3fSmrg else 32501e04c3fSmrg ret.high = smaller_constant(r0.high, r1.high); 32601e04c3fSmrg 32701e04c3fSmrg return ret; 32801e04c3fSmrg} 32901e04c3fSmrg 33001e04c3fSmrgstatic minmax_range 33101e04c3fSmrgget_range(ir_rvalue *rval) 33201e04c3fSmrg{ 33301e04c3fSmrg ir_expression *expr = rval->as_expression(); 33401e04c3fSmrg if (expr && (expr->operation == ir_binop_min || 33501e04c3fSmrg expr->operation == ir_binop_max)) { 33601e04c3fSmrg minmax_range r0 = get_range(expr->operands[0]); 33701e04c3fSmrg minmax_range r1 = get_range(expr->operands[1]); 33801e04c3fSmrg return combine_range(r0, r1, expr->operation == ir_binop_min); 33901e04c3fSmrg } 34001e04c3fSmrg 34101e04c3fSmrg ir_constant *c = rval->as_constant(); 34201e04c3fSmrg if (c) { 34301e04c3fSmrg return minmax_range(c, c); 34401e04c3fSmrg } 34501e04c3fSmrg 34601e04c3fSmrg return minmax_range(); 34701e04c3fSmrg} 34801e04c3fSmrg 34901e04c3fSmrg/** 35001e04c3fSmrg * Prunes a min/max expression considering the base range of the parent 35101e04c3fSmrg * min/max expression. 35201e04c3fSmrg * 35301e04c3fSmrg * @param baserange the range that the parents of this min/max expression 35401e04c3fSmrg * in the min/max tree will clamp its value to. 35501e04c3fSmrg */ 35601e04c3fSmrgir_rvalue * 35701e04c3fSmrgir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange) 35801e04c3fSmrg{ 35901e04c3fSmrg assert(expr->operation == ir_binop_min || 36001e04c3fSmrg expr->operation == ir_binop_max); 36101e04c3fSmrg 36201e04c3fSmrg bool ismin = expr->operation == ir_binop_min; 36301e04c3fSmrg minmax_range limits[2]; 36401e04c3fSmrg 36501e04c3fSmrg /* Recurse to get the ranges for each of the subtrees of this 36601e04c3fSmrg * expression. We need to do this as a separate step because we need to 36701e04c3fSmrg * know the ranges of each of the subtrees before we prune either one. 36801e04c3fSmrg * Consider something like this: 36901e04c3fSmrg * 37001e04c3fSmrg * max 37101e04c3fSmrg * / \ 37201e04c3fSmrg * max max 37301e04c3fSmrg * / \ / \ 37401e04c3fSmrg * 3 a b 2 37501e04c3fSmrg * 37601e04c3fSmrg * We would like to prune away the max on the bottom-right, but to do so 37701e04c3fSmrg * we need to know the range of the expression on the left beforehand, 37801e04c3fSmrg * and there's no guarantee that we will visit either subtree in a 37901e04c3fSmrg * particular order. 38001e04c3fSmrg */ 38101e04c3fSmrg for (unsigned i = 0; i < 2; ++i) 38201e04c3fSmrg limits[i] = get_range(expr->operands[i]); 38301e04c3fSmrg 38401e04c3fSmrg for (unsigned i = 0; i < 2; ++i) { 38501e04c3fSmrg bool is_redundant = false; 38601e04c3fSmrg 38701e04c3fSmrg enum compare_components_result cr = LESS; 38801e04c3fSmrg if (ismin) { 38901e04c3fSmrg /* If this operand will always be greater than the other one, it's 39001e04c3fSmrg * redundant. 39101e04c3fSmrg */ 39201e04c3fSmrg if (limits[i].low && limits[1 - i].high) { 39301e04c3fSmrg cr = compare_components(limits[i].low, limits[1 - i].high); 39401e04c3fSmrg if (cr >= EQUAL && cr != MIXED) 39501e04c3fSmrg is_redundant = true; 39601e04c3fSmrg } 39701e04c3fSmrg /* If this operand is always greater than baserange, then even if 39801e04c3fSmrg * it's smaller than the other one it'll get clamped, so it's 39901e04c3fSmrg * redundant. 40001e04c3fSmrg */ 40101e04c3fSmrg if (!is_redundant && limits[i].low && baserange.high) { 40201e04c3fSmrg cr = compare_components(limits[i].low, baserange.high); 40301e04c3fSmrg if (cr > EQUAL && cr != MIXED) 40401e04c3fSmrg is_redundant = true; 40501e04c3fSmrg } 40601e04c3fSmrg } else { 40701e04c3fSmrg /* If this operand will always be lower than the other one, it's 40801e04c3fSmrg * redundant. 40901e04c3fSmrg */ 41001e04c3fSmrg if (limits[i].high && limits[1 - i].low) { 41101e04c3fSmrg cr = compare_components(limits[i].high, limits[1 - i].low); 41201e04c3fSmrg if (cr <= EQUAL) 41301e04c3fSmrg is_redundant = true; 41401e04c3fSmrg } 41501e04c3fSmrg /* If this operand is always lower than baserange, then even if 41601e04c3fSmrg * it's greater than the other one it'll get clamped, so it's 41701e04c3fSmrg * redundant. 41801e04c3fSmrg */ 41901e04c3fSmrg if (!is_redundant && limits[i].high && baserange.low) { 42001e04c3fSmrg cr = compare_components(limits[i].high, baserange.low); 42101e04c3fSmrg if (cr < EQUAL) 42201e04c3fSmrg is_redundant = true; 42301e04c3fSmrg } 42401e04c3fSmrg } 42501e04c3fSmrg 42601e04c3fSmrg if (is_redundant) { 42701e04c3fSmrg progress = true; 42801e04c3fSmrg 42901e04c3fSmrg /* Recurse if necessary. */ 43001e04c3fSmrg ir_expression *op_expr = expr->operands[1 - i]->as_expression(); 43101e04c3fSmrg if (op_expr && (op_expr->operation == ir_binop_min || 43201e04c3fSmrg op_expr->operation == ir_binop_max)) { 43301e04c3fSmrg return prune_expression(op_expr, baserange); 43401e04c3fSmrg } 43501e04c3fSmrg 43601e04c3fSmrg return expr->operands[1 - i]; 43701e04c3fSmrg } else if (cr == MIXED) { 43801e04c3fSmrg /* If we have mixed vector operands, we can try to resolve the minmax 43901e04c3fSmrg * expression by doing a component-wise minmax: 44001e04c3fSmrg * 44101e04c3fSmrg * min min 44201e04c3fSmrg * / \ / \ 44301e04c3fSmrg * min a ===> [1,1] a 44401e04c3fSmrg * / \ 44501e04c3fSmrg * [1,3] [3,1] 44601e04c3fSmrg * 44701e04c3fSmrg */ 44801e04c3fSmrg ir_constant *a = expr->operands[0]->as_constant(); 44901e04c3fSmrg ir_constant *b = expr->operands[1]->as_constant(); 45001e04c3fSmrg if (a && b) 45101e04c3fSmrg return combine_constant(ismin, a, b); 45201e04c3fSmrg } 45301e04c3fSmrg } 45401e04c3fSmrg 45501e04c3fSmrg /* Now recurse to operands giving them the proper baserange. The baserange 45601e04c3fSmrg * to pass is the intersection of our baserange and the other operand's 45701e04c3fSmrg * limit with one of the ranges unlimited. If we can't compute a valid 45801e04c3fSmrg * intersection, we use the current baserange. 45901e04c3fSmrg */ 46001e04c3fSmrg for (unsigned i = 0; i < 2; ++i) { 46101e04c3fSmrg ir_expression *op_expr = expr->operands[i]->as_expression(); 46201e04c3fSmrg if (op_expr && (op_expr->operation == ir_binop_min || 46301e04c3fSmrg op_expr->operation == ir_binop_max)) { 46401e04c3fSmrg /* We can only compute a new baserange for this operand if we managed 46501e04c3fSmrg * to compute a valid range for the other operand. 46601e04c3fSmrg */ 46701e04c3fSmrg if (ismin) 46801e04c3fSmrg limits[1 - i].low = NULL; 46901e04c3fSmrg else 47001e04c3fSmrg limits[1 - i].high = NULL; 47101e04c3fSmrg minmax_range base = range_intersection(limits[1 - i], baserange); 47201e04c3fSmrg expr->operands[i] = prune_expression(op_expr, base); 47301e04c3fSmrg } 47401e04c3fSmrg } 47501e04c3fSmrg 47601e04c3fSmrg /* If we got here we could not discard any of the operands of the minmax 47701e04c3fSmrg * expression, but we can still try to resolve the expression if both 47801e04c3fSmrg * operands are constant. We do this after the loop above, to make sure 47901e04c3fSmrg * that if our operands are minmax expressions we have tried to prune them 48001e04c3fSmrg * first (hopefully reducing them to constants). 48101e04c3fSmrg */ 48201e04c3fSmrg ir_constant *a = expr->operands[0]->as_constant(); 48301e04c3fSmrg ir_constant *b = expr->operands[1]->as_constant(); 48401e04c3fSmrg if (a && b) 48501e04c3fSmrg return combine_constant(ismin, a, b); 48601e04c3fSmrg 48701e04c3fSmrg return expr; 48801e04c3fSmrg} 48901e04c3fSmrg 49001e04c3fSmrgstatic ir_rvalue * 49101e04c3fSmrgswizzle_if_required(ir_expression *expr, ir_rvalue *rval) 49201e04c3fSmrg{ 49301e04c3fSmrg if (expr->type->is_vector() && rval->type->is_scalar()) { 49401e04c3fSmrg return swizzle(rval, SWIZZLE_XXXX, expr->type->vector_elements); 49501e04c3fSmrg } else { 49601e04c3fSmrg return rval; 49701e04c3fSmrg } 49801e04c3fSmrg} 49901e04c3fSmrg 50001e04c3fSmrgvoid 50101e04c3fSmrgir_minmax_visitor::handle_rvalue(ir_rvalue **rvalue) 50201e04c3fSmrg{ 50301e04c3fSmrg if (!*rvalue) 50401e04c3fSmrg return; 50501e04c3fSmrg 50601e04c3fSmrg ir_expression *expr = (*rvalue)->as_expression(); 50701e04c3fSmrg if (!expr || (expr->operation != ir_binop_min && 50801e04c3fSmrg expr->operation != ir_binop_max)) 50901e04c3fSmrg return; 51001e04c3fSmrg 51101e04c3fSmrg ir_rvalue *new_rvalue = prune_expression(expr, minmax_range()); 51201e04c3fSmrg if (new_rvalue == *rvalue) 51301e04c3fSmrg return; 51401e04c3fSmrg 51501e04c3fSmrg /* If the expression type is a vector and the optimization leaves a scalar 51601e04c3fSmrg * as the result, we need to turn it into a vector. 51701e04c3fSmrg */ 51801e04c3fSmrg *rvalue = swizzle_if_required(expr, new_rvalue); 51901e04c3fSmrg 52001e04c3fSmrg progress = true; 52101e04c3fSmrg} 52201e04c3fSmrg 52301e04c3fSmrg} 52401e04c3fSmrg 52501e04c3fSmrgbool 52601e04c3fSmrgdo_minmax_prune(exec_list *instructions) 52701e04c3fSmrg{ 52801e04c3fSmrg ir_minmax_visitor v; 52901e04c3fSmrg 53001e04c3fSmrg visit_list_elements(&v, instructions); 53101e04c3fSmrg 53201e04c3fSmrg return v.progress; 53301e04c3fSmrg} 534