1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2010 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21b8e80941Smrg * DEALINGS IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg/**
25b8e80941Smrg * \file opt_algebraic.cpp
26b8e80941Smrg *
27b8e80941Smrg * Takes advantage of association, commutivity, and other algebraic
28b8e80941Smrg * properties to simplify expressions.
29b8e80941Smrg */
30b8e80941Smrg
31b8e80941Smrg#include "ir.h"
32b8e80941Smrg#include "ir_visitor.h"
33b8e80941Smrg#include "ir_rvalue_visitor.h"
34b8e80941Smrg#include "ir_optimization.h"
35b8e80941Smrg#include "ir_builder.h"
36b8e80941Smrg#include "compiler/glsl_types.h"
37b8e80941Smrg#include "main/mtypes.h"
38b8e80941Smrg
39b8e80941Smrgusing namespace ir_builder;
40b8e80941Smrg
41b8e80941Smrgnamespace {
42b8e80941Smrg
43b8e80941Smrg/**
44b8e80941Smrg * Visitor class for replacing expressions with ir_constant values.
45b8e80941Smrg */
46b8e80941Smrg
47b8e80941Smrgclass ir_algebraic_visitor : public ir_rvalue_visitor {
48b8e80941Smrgpublic:
49b8e80941Smrg   ir_algebraic_visitor(bool native_integers,
50b8e80941Smrg                        const struct gl_shader_compiler_options *options)
51b8e80941Smrg      : options(options)
52b8e80941Smrg   {
53b8e80941Smrg      this->progress = false;
54b8e80941Smrg      this->mem_ctx = NULL;
55b8e80941Smrg      this->native_integers = native_integers;
56b8e80941Smrg   }
57b8e80941Smrg
58b8e80941Smrg   virtual ~ir_algebraic_visitor()
59b8e80941Smrg   {
60b8e80941Smrg   }
61b8e80941Smrg
62b8e80941Smrg   virtual ir_visitor_status visit_enter(ir_assignment *ir);
63b8e80941Smrg
64b8e80941Smrg   ir_rvalue *handle_expression(ir_expression *ir);
65b8e80941Smrg   void handle_rvalue(ir_rvalue **rvalue);
66b8e80941Smrg   bool reassociate_constant(ir_expression *ir1,
67b8e80941Smrg			     int const_index,
68b8e80941Smrg			     ir_constant *constant,
69b8e80941Smrg			     ir_expression *ir2);
70b8e80941Smrg   void reassociate_operands(ir_expression *ir1,
71b8e80941Smrg			     int op1,
72b8e80941Smrg			     ir_expression *ir2,
73b8e80941Smrg			     int op2);
74b8e80941Smrg   ir_rvalue *swizzle_if_required(ir_expression *expr,
75b8e80941Smrg				  ir_rvalue *operand);
76b8e80941Smrg
77b8e80941Smrg   const struct gl_shader_compiler_options *options;
78b8e80941Smrg   void *mem_ctx;
79b8e80941Smrg
80b8e80941Smrg   bool native_integers;
81b8e80941Smrg   bool progress;
82b8e80941Smrg};
83b8e80941Smrg
84b8e80941Smrg} /* unnamed namespace */
85b8e80941Smrg
86b8e80941Smrgir_visitor_status
87b8e80941Smrgir_algebraic_visitor::visit_enter(ir_assignment *ir)
88b8e80941Smrg{
89b8e80941Smrg   ir_variable *var = ir->lhs->variable_referenced();
90b8e80941Smrg   if (var->data.invariant || var->data.precise) {
91b8e80941Smrg      /* If we're assigning to an invariant or precise variable, just bail.
92b8e80941Smrg       * Most of the algebraic optimizations aren't precision-safe.
93b8e80941Smrg       *
94b8e80941Smrg       * FINISHME: Find out which optimizations are precision-safe and enable
95b8e80941Smrg       * then only for invariant or precise trees.
96b8e80941Smrg       */
97b8e80941Smrg      return visit_continue_with_parent;
98b8e80941Smrg   } else {
99b8e80941Smrg      return visit_continue;
100b8e80941Smrg   }
101b8e80941Smrg}
102b8e80941Smrg
103b8e80941Smrgstatic inline bool
104b8e80941Smrgis_vec_zero(ir_constant *ir)
105b8e80941Smrg{
106b8e80941Smrg   return (ir == NULL) ? false : ir->is_zero();
107b8e80941Smrg}
108b8e80941Smrg
109b8e80941Smrgstatic inline bool
110b8e80941Smrgis_vec_one(ir_constant *ir)
111b8e80941Smrg{
112b8e80941Smrg   return (ir == NULL) ? false : ir->is_one();
113b8e80941Smrg}
114b8e80941Smrg
115b8e80941Smrgstatic inline bool
116b8e80941Smrgis_vec_two(ir_constant *ir)
117b8e80941Smrg{
118b8e80941Smrg   return (ir == NULL) ? false : ir->is_value(2.0, 2);
119b8e80941Smrg}
120b8e80941Smrg
121b8e80941Smrgstatic inline bool
122b8e80941Smrgis_vec_four(ir_constant *ir)
123b8e80941Smrg{
124b8e80941Smrg   return (ir == NULL) ? false : ir->is_value(4.0, 4);
125b8e80941Smrg}
126b8e80941Smrg
127b8e80941Smrgstatic inline bool
128b8e80941Smrgis_vec_negative_one(ir_constant *ir)
129b8e80941Smrg{
130b8e80941Smrg   return (ir == NULL) ? false : ir->is_negative_one();
131b8e80941Smrg}
132b8e80941Smrg
133b8e80941Smrgstatic inline bool
134b8e80941Smrgis_valid_vec_const(ir_constant *ir)
135b8e80941Smrg{
136b8e80941Smrg   if (ir == NULL)
137b8e80941Smrg      return false;
138b8e80941Smrg
139b8e80941Smrg   if (!ir->type->is_scalar() && !ir->type->is_vector())
140b8e80941Smrg      return false;
141b8e80941Smrg
142b8e80941Smrg   return true;
143b8e80941Smrg}
144b8e80941Smrg
145b8e80941Smrgstatic inline bool
146b8e80941Smrgis_less_than_one(ir_constant *ir)
147b8e80941Smrg{
148b8e80941Smrg   assert(ir->type->is_float());
149b8e80941Smrg
150b8e80941Smrg   if (!is_valid_vec_const(ir))
151b8e80941Smrg      return false;
152b8e80941Smrg
153b8e80941Smrg   unsigned component = 0;
154b8e80941Smrg   for (int c = 0; c < ir->type->vector_elements; c++) {
155b8e80941Smrg      if (ir->get_float_component(c) < 1.0f)
156b8e80941Smrg         component++;
157b8e80941Smrg   }
158b8e80941Smrg
159b8e80941Smrg   return (component == ir->type->vector_elements);
160b8e80941Smrg}
161b8e80941Smrg
162b8e80941Smrgstatic inline bool
163b8e80941Smrgis_greater_than_zero(ir_constant *ir)
164b8e80941Smrg{
165b8e80941Smrg   assert(ir->type->is_float());
166b8e80941Smrg
167b8e80941Smrg   if (!is_valid_vec_const(ir))
168b8e80941Smrg      return false;
169b8e80941Smrg
170b8e80941Smrg   unsigned component = 0;
171b8e80941Smrg   for (int c = 0; c < ir->type->vector_elements; c++) {
172b8e80941Smrg      if (ir->get_float_component(c) > 0.0f)
173b8e80941Smrg         component++;
174b8e80941Smrg   }
175b8e80941Smrg
176b8e80941Smrg   return (component == ir->type->vector_elements);
177b8e80941Smrg}
178b8e80941Smrg
179b8e80941Smrgstatic void
180b8e80941Smrgupdate_type(ir_expression *ir)
181b8e80941Smrg{
182b8e80941Smrg   if (ir->operands[0]->type->is_vector())
183b8e80941Smrg      ir->type = ir->operands[0]->type;
184b8e80941Smrg   else
185b8e80941Smrg      ir->type = ir->operands[1]->type;
186b8e80941Smrg}
187b8e80941Smrg
188b8e80941Smrg/* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
189b8e80941Smrgstatic ir_expression *
190b8e80941Smrgtry_replace_with_dot(ir_expression *expr0, ir_expression *expr1, void *mem_ctx)
191b8e80941Smrg{
192b8e80941Smrg   if (expr0 && expr0->operation == ir_binop_add &&
193b8e80941Smrg       expr0->type->is_float() &&
194b8e80941Smrg       expr1 && expr1->operation == ir_binop_add &&
195b8e80941Smrg       expr1->type->is_float()) {
196b8e80941Smrg      ir_swizzle *x = expr0->operands[0]->as_swizzle();
197b8e80941Smrg      ir_swizzle *y = expr0->operands[1]->as_swizzle();
198b8e80941Smrg      ir_swizzle *z = expr1->operands[0]->as_swizzle();
199b8e80941Smrg      ir_swizzle *w = expr1->operands[1]->as_swizzle();
200b8e80941Smrg
201b8e80941Smrg      if (!x || x->mask.num_components != 1 ||
202b8e80941Smrg          !y || y->mask.num_components != 1 ||
203b8e80941Smrg          !z || z->mask.num_components != 1 ||
204b8e80941Smrg          !w || w->mask.num_components != 1) {
205b8e80941Smrg         return NULL;
206b8e80941Smrg      }
207b8e80941Smrg
208b8e80941Smrg      bool swiz_seen[4] = {false, false, false, false};
209b8e80941Smrg      swiz_seen[x->mask.x] = true;
210b8e80941Smrg      swiz_seen[y->mask.x] = true;
211b8e80941Smrg      swiz_seen[z->mask.x] = true;
212b8e80941Smrg      swiz_seen[w->mask.x] = true;
213b8e80941Smrg
214b8e80941Smrg      if (!swiz_seen[0] || !swiz_seen[1] ||
215b8e80941Smrg          !swiz_seen[2] || !swiz_seen[3]) {
216b8e80941Smrg         return NULL;
217b8e80941Smrg      }
218b8e80941Smrg
219b8e80941Smrg      if (x->val->equals(y->val) &&
220b8e80941Smrg          x->val->equals(z->val) &&
221b8e80941Smrg          x->val->equals(w->val)) {
222b8e80941Smrg         return dot(x->val, new(mem_ctx) ir_constant(1.0f, 4));
223b8e80941Smrg      }
224b8e80941Smrg   }
225b8e80941Smrg   return NULL;
226b8e80941Smrg}
227b8e80941Smrg
228b8e80941Smrgvoid
229b8e80941Smrgir_algebraic_visitor::reassociate_operands(ir_expression *ir1,
230b8e80941Smrg					   int op1,
231b8e80941Smrg					   ir_expression *ir2,
232b8e80941Smrg					   int op2)
233b8e80941Smrg{
234b8e80941Smrg   ir_rvalue *temp = ir2->operands[op2];
235b8e80941Smrg   ir2->operands[op2] = ir1->operands[op1];
236b8e80941Smrg   ir1->operands[op1] = temp;
237b8e80941Smrg
238b8e80941Smrg   /* Update the type of ir2.  The type of ir1 won't have changed --
239b8e80941Smrg    * base types matched, and at least one of the operands of the 2
240b8e80941Smrg    * binops is still a vector if any of them were.
241b8e80941Smrg    */
242b8e80941Smrg   update_type(ir2);
243b8e80941Smrg
244b8e80941Smrg   this->progress = true;
245b8e80941Smrg}
246b8e80941Smrg
247b8e80941Smrg/**
248b8e80941Smrg * Reassociates a constant down a tree of adds or multiplies.
249b8e80941Smrg *
250b8e80941Smrg * Consider (2 * (a * (b * 0.5))).  We want to end up with a * b.
251b8e80941Smrg */
252b8e80941Smrgbool
253b8e80941Smrgir_algebraic_visitor::reassociate_constant(ir_expression *ir1, int const_index,
254b8e80941Smrg					   ir_constant *constant,
255b8e80941Smrg					   ir_expression *ir2)
256b8e80941Smrg{
257b8e80941Smrg   if (!ir2 || ir1->operation != ir2->operation)
258b8e80941Smrg      return false;
259b8e80941Smrg
260b8e80941Smrg   /* Don't want to even think about matrices. */
261b8e80941Smrg   if (ir1->operands[0]->type->is_matrix() ||
262b8e80941Smrg       ir1->operands[1]->type->is_matrix() ||
263b8e80941Smrg       ir2->operands[0]->type->is_matrix() ||
264b8e80941Smrg       ir2->operands[1]->type->is_matrix())
265b8e80941Smrg      return false;
266b8e80941Smrg
267b8e80941Smrg   void *mem_ctx = ralloc_parent(ir2);
268b8e80941Smrg
269b8e80941Smrg   ir_constant *ir2_const[2];
270b8e80941Smrg   ir2_const[0] = ir2->operands[0]->constant_expression_value(mem_ctx);
271b8e80941Smrg   ir2_const[1] = ir2->operands[1]->constant_expression_value(mem_ctx);
272b8e80941Smrg
273b8e80941Smrg   if (ir2_const[0] && ir2_const[1])
274b8e80941Smrg      return false;
275b8e80941Smrg
276b8e80941Smrg   if (ir2_const[0]) {
277b8e80941Smrg      reassociate_operands(ir1, const_index, ir2, 1);
278b8e80941Smrg      return true;
279b8e80941Smrg   } else if (ir2_const[1]) {
280b8e80941Smrg      reassociate_operands(ir1, const_index, ir2, 0);
281b8e80941Smrg      return true;
282b8e80941Smrg   }
283b8e80941Smrg
284b8e80941Smrg   if (reassociate_constant(ir1, const_index, constant,
285b8e80941Smrg			    ir2->operands[0]->as_expression())) {
286b8e80941Smrg      update_type(ir2);
287b8e80941Smrg      return true;
288b8e80941Smrg   }
289b8e80941Smrg
290b8e80941Smrg   if (reassociate_constant(ir1, const_index, constant,
291b8e80941Smrg			    ir2->operands[1]->as_expression())) {
292b8e80941Smrg      update_type(ir2);
293b8e80941Smrg      return true;
294b8e80941Smrg   }
295b8e80941Smrg
296b8e80941Smrg   return false;
297b8e80941Smrg}
298b8e80941Smrg
299b8e80941Smrg/* When eliminating an expression and just returning one of its operands,
300b8e80941Smrg * we may need to swizzle that operand out to a vector if the expression was
301b8e80941Smrg * vector type.
302b8e80941Smrg */
303b8e80941Smrgir_rvalue *
304b8e80941Smrgir_algebraic_visitor::swizzle_if_required(ir_expression *expr,
305b8e80941Smrg					  ir_rvalue *operand)
306b8e80941Smrg{
307b8e80941Smrg   if (expr->type->is_vector() && operand->type->is_scalar()) {
308b8e80941Smrg      return new(mem_ctx) ir_swizzle(operand, 0, 0, 0, 0,
309b8e80941Smrg				     expr->type->vector_elements);
310b8e80941Smrg   } else
311b8e80941Smrg      return operand;
312b8e80941Smrg}
313b8e80941Smrg
314b8e80941Smrgir_rvalue *
315b8e80941Smrgir_algebraic_visitor::handle_expression(ir_expression *ir)
316b8e80941Smrg{
317b8e80941Smrg   ir_constant *op_const[4] = {NULL, NULL, NULL, NULL};
318b8e80941Smrg   ir_expression *op_expr[4] = {NULL, NULL, NULL, NULL};
319b8e80941Smrg
320b8e80941Smrg   if (ir->operation == ir_binop_mul &&
321b8e80941Smrg       ir->operands[0]->type->is_matrix() &&
322b8e80941Smrg       ir->operands[1]->type->is_vector()) {
323b8e80941Smrg      ir_expression *matrix_mul = ir->operands[0]->as_expression();
324b8e80941Smrg
325b8e80941Smrg      if (matrix_mul && matrix_mul->operation == ir_binop_mul &&
326b8e80941Smrg         matrix_mul->operands[0]->type->is_matrix() &&
327b8e80941Smrg         matrix_mul->operands[1]->type->is_matrix()) {
328b8e80941Smrg
329b8e80941Smrg         return mul(matrix_mul->operands[0],
330b8e80941Smrg                    mul(matrix_mul->operands[1], ir->operands[1]));
331b8e80941Smrg      }
332b8e80941Smrg   }
333b8e80941Smrg
334b8e80941Smrg   assert(ir->num_operands <= 4);
335b8e80941Smrg   for (unsigned i = 0; i < ir->num_operands; i++) {
336b8e80941Smrg      if (ir->operands[i]->type->is_matrix())
337b8e80941Smrg	 return ir;
338b8e80941Smrg
339b8e80941Smrg      op_const[i] =
340b8e80941Smrg         ir->operands[i]->constant_expression_value(ralloc_parent(ir));
341b8e80941Smrg      op_expr[i] = ir->operands[i]->as_expression();
342b8e80941Smrg   }
343b8e80941Smrg
344b8e80941Smrg   if (this->mem_ctx == NULL)
345b8e80941Smrg      this->mem_ctx = ralloc_parent(ir);
346b8e80941Smrg
347b8e80941Smrg   switch (ir->operation) {
348b8e80941Smrg   case ir_unop_bit_not:
349b8e80941Smrg      if (op_expr[0] && op_expr[0]->operation == ir_unop_bit_not)
350b8e80941Smrg         return op_expr[0]->operands[0];
351b8e80941Smrg      break;
352b8e80941Smrg
353b8e80941Smrg   case ir_unop_abs:
354b8e80941Smrg      if (op_expr[0] == NULL)
355b8e80941Smrg	 break;
356b8e80941Smrg
357b8e80941Smrg      switch (op_expr[0]->operation) {
358b8e80941Smrg      case ir_unop_abs:
359b8e80941Smrg      case ir_unop_neg:
360b8e80941Smrg         return abs(op_expr[0]->operands[0]);
361b8e80941Smrg      default:
362b8e80941Smrg         break;
363b8e80941Smrg      }
364b8e80941Smrg      break;
365b8e80941Smrg
366b8e80941Smrg   case ir_unop_neg:
367b8e80941Smrg      if (op_expr[0] == NULL)
368b8e80941Smrg	 break;
369b8e80941Smrg
370b8e80941Smrg      if (op_expr[0]->operation == ir_unop_neg) {
371b8e80941Smrg         return op_expr[0]->operands[0];
372b8e80941Smrg      }
373b8e80941Smrg      break;
374b8e80941Smrg
375b8e80941Smrg   case ir_unop_exp:
376b8e80941Smrg      if (op_expr[0] == NULL)
377b8e80941Smrg	 break;
378b8e80941Smrg
379b8e80941Smrg      if (op_expr[0]->operation == ir_unop_log) {
380b8e80941Smrg         return op_expr[0]->operands[0];
381b8e80941Smrg      }
382b8e80941Smrg      break;
383b8e80941Smrg
384b8e80941Smrg   case ir_unop_log:
385b8e80941Smrg      if (op_expr[0] == NULL)
386b8e80941Smrg	 break;
387b8e80941Smrg
388b8e80941Smrg      if (op_expr[0]->operation == ir_unop_exp) {
389b8e80941Smrg         return op_expr[0]->operands[0];
390b8e80941Smrg      }
391b8e80941Smrg      break;
392b8e80941Smrg
393b8e80941Smrg   case ir_unop_exp2:
394b8e80941Smrg      if (op_expr[0] == NULL)
395b8e80941Smrg	 break;
396b8e80941Smrg
397b8e80941Smrg      if (op_expr[0]->operation == ir_unop_log2) {
398b8e80941Smrg         return op_expr[0]->operands[0];
399b8e80941Smrg      }
400b8e80941Smrg
401b8e80941Smrg      if (!options->EmitNoPow && op_expr[0]->operation == ir_binop_mul) {
402b8e80941Smrg         for (int log2_pos = 0; log2_pos < 2; log2_pos++) {
403b8e80941Smrg            ir_expression *log2_expr =
404b8e80941Smrg               op_expr[0]->operands[log2_pos]->as_expression();
405b8e80941Smrg
406b8e80941Smrg            if (log2_expr && log2_expr->operation == ir_unop_log2) {
407b8e80941Smrg               return new(mem_ctx) ir_expression(ir_binop_pow,
408b8e80941Smrg                                                 ir->type,
409b8e80941Smrg                                                 log2_expr->operands[0],
410b8e80941Smrg                                                 op_expr[0]->operands[1 - log2_pos]);
411b8e80941Smrg            }
412b8e80941Smrg         }
413b8e80941Smrg      }
414b8e80941Smrg      break;
415b8e80941Smrg
416b8e80941Smrg   case ir_unop_log2:
417b8e80941Smrg      if (op_expr[0] == NULL)
418b8e80941Smrg	 break;
419b8e80941Smrg
420b8e80941Smrg      if (op_expr[0]->operation == ir_unop_exp2) {
421b8e80941Smrg         return op_expr[0]->operands[0];
422b8e80941Smrg      }
423b8e80941Smrg      break;
424b8e80941Smrg
425b8e80941Smrg   case ir_unop_f2i:
426b8e80941Smrg   case ir_unop_f2u:
427b8e80941Smrg      if (op_expr[0] && op_expr[0]->operation == ir_unop_trunc) {
428b8e80941Smrg         return new(mem_ctx) ir_expression(ir->operation,
429b8e80941Smrg                                           ir->type,
430b8e80941Smrg                                           op_expr[0]->operands[0]);
431b8e80941Smrg      }
432b8e80941Smrg      break;
433b8e80941Smrg
434b8e80941Smrg   case ir_unop_logic_not: {
435b8e80941Smrg      enum ir_expression_operation new_op = ir_unop_logic_not;
436b8e80941Smrg
437b8e80941Smrg      if (op_expr[0] == NULL)
438b8e80941Smrg	 break;
439b8e80941Smrg
440b8e80941Smrg      switch (op_expr[0]->operation) {
441b8e80941Smrg      case ir_binop_less:    new_op = ir_binop_gequal;  break;
442b8e80941Smrg      case ir_binop_gequal:  new_op = ir_binop_less;    break;
443b8e80941Smrg      case ir_binop_equal:   new_op = ir_binop_nequal;  break;
444b8e80941Smrg      case ir_binop_nequal:  new_op = ir_binop_equal;   break;
445b8e80941Smrg      case ir_binop_all_equal:   new_op = ir_binop_any_nequal;  break;
446b8e80941Smrg      case ir_binop_any_nequal:  new_op = ir_binop_all_equal;   break;
447b8e80941Smrg
448b8e80941Smrg      default:
449b8e80941Smrg	 /* The default case handler is here to silence a warning from GCC.
450b8e80941Smrg	  */
451b8e80941Smrg	 break;
452b8e80941Smrg      }
453b8e80941Smrg
454b8e80941Smrg      if (new_op != ir_unop_logic_not) {
455b8e80941Smrg	 return new(mem_ctx) ir_expression(new_op,
456b8e80941Smrg					   ir->type,
457b8e80941Smrg					   op_expr[0]->operands[0],
458b8e80941Smrg					   op_expr[0]->operands[1]);
459b8e80941Smrg      }
460b8e80941Smrg
461b8e80941Smrg      break;
462b8e80941Smrg   }
463b8e80941Smrg
464b8e80941Smrg   case ir_unop_saturate:
465b8e80941Smrg      if (op_expr[0] && op_expr[0]->operation == ir_binop_add) {
466b8e80941Smrg         ir_expression *b2f_0 = op_expr[0]->operands[0]->as_expression();
467b8e80941Smrg         ir_expression *b2f_1 = op_expr[0]->operands[1]->as_expression();
468b8e80941Smrg
469b8e80941Smrg         if (b2f_0 && b2f_0->operation == ir_unop_b2f &&
470b8e80941Smrg             b2f_1 && b2f_1->operation == ir_unop_b2f) {
471b8e80941Smrg            return b2f(logic_or(b2f_0->operands[0], b2f_1->operands[0]));
472b8e80941Smrg         }
473b8e80941Smrg      }
474b8e80941Smrg      break;
475b8e80941Smrg
476b8e80941Smrg      /* This macro CANNOT use the do { } while(true) mechanism because
477b8e80941Smrg       * then the breaks apply to the loop instead of the switch!
478b8e80941Smrg       */
479b8e80941Smrg#define HANDLE_PACK_UNPACK_INVERSE(inverse_operation)                   \
480b8e80941Smrg      {                                                                 \
481b8e80941Smrg         ir_expression *const op = ir->operands[0]->as_expression();    \
482b8e80941Smrg         if (op == NULL)                                                \
483b8e80941Smrg            break;                                                      \
484b8e80941Smrg         if (op->operation == (inverse_operation))                      \
485b8e80941Smrg            return op->operands[0];                                     \
486b8e80941Smrg         break;                                                         \
487b8e80941Smrg      }
488b8e80941Smrg
489b8e80941Smrg   case ir_unop_unpack_uint_2x32:
490b8e80941Smrg      HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_uint_2x32);
491b8e80941Smrg   case ir_unop_pack_uint_2x32:
492b8e80941Smrg      HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_uint_2x32);
493b8e80941Smrg   case ir_unop_unpack_int_2x32:
494b8e80941Smrg      HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_int_2x32);
495b8e80941Smrg   case ir_unop_pack_int_2x32:
496b8e80941Smrg      HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_int_2x32);
497b8e80941Smrg   case ir_unop_unpack_double_2x32:
498b8e80941Smrg      HANDLE_PACK_UNPACK_INVERSE(ir_unop_pack_double_2x32);
499b8e80941Smrg   case ir_unop_pack_double_2x32:
500b8e80941Smrg      HANDLE_PACK_UNPACK_INVERSE(ir_unop_unpack_double_2x32);
501b8e80941Smrg
502b8e80941Smrg#undef HANDLE_PACK_UNPACK_INVERSE
503b8e80941Smrg
504b8e80941Smrg   case ir_binop_add:
505b8e80941Smrg      if (is_vec_zero(op_const[0]))
506b8e80941Smrg	 return ir->operands[1];
507b8e80941Smrg      if (is_vec_zero(op_const[1]))
508b8e80941Smrg	 return ir->operands[0];
509b8e80941Smrg
510b8e80941Smrg      /* Replace (x + (-x)) with constant 0 */
511b8e80941Smrg      for (int i = 0; i < 2; i++) {
512b8e80941Smrg         if (op_expr[i]) {
513b8e80941Smrg            if (op_expr[i]->operation == ir_unop_neg) {
514b8e80941Smrg               ir_rvalue *other = ir->operands[(i + 1) % 2];
515b8e80941Smrg               if (other && op_expr[i]->operands[0]->equals(other)) {
516b8e80941Smrg                  return ir_constant::zero(ir, ir->type);
517b8e80941Smrg               }
518b8e80941Smrg            }
519b8e80941Smrg         }
520b8e80941Smrg      }
521b8e80941Smrg
522b8e80941Smrg      /* Reassociate addition of constants so that we can do constant
523b8e80941Smrg       * folding.
524b8e80941Smrg       */
525b8e80941Smrg      if (op_const[0] && !op_const[1])
526b8e80941Smrg	 reassociate_constant(ir, 0, op_const[0], op_expr[1]);
527b8e80941Smrg      if (op_const[1] && !op_const[0])
528b8e80941Smrg	 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
529b8e80941Smrg
530b8e80941Smrg      /* Recognize (v.x + v.y) + (v.z + v.w) as dot(v, 1.0) */
531b8e80941Smrg      if (options->OptimizeForAOS) {
532b8e80941Smrg         ir_expression *expr = try_replace_with_dot(op_expr[0], op_expr[1],
533b8e80941Smrg                                                    mem_ctx);
534b8e80941Smrg         if (expr)
535b8e80941Smrg            return expr;
536b8e80941Smrg      }
537b8e80941Smrg
538b8e80941Smrg      /* Replace (-x + y) * a + x and commutative variations with lrp(x, y, a).
539b8e80941Smrg       *
540b8e80941Smrg       * (-x + y) * a + x
541b8e80941Smrg       * (x * -a) + (y * a) + x
542b8e80941Smrg       * x + (x * -a) + (y * a)
543b8e80941Smrg       * x * (1 - a) + y * a
544b8e80941Smrg       * lrp(x, y, a)
545b8e80941Smrg       */
546b8e80941Smrg      for (int mul_pos = 0; mul_pos < 2; mul_pos++) {
547b8e80941Smrg         ir_expression *mul = op_expr[mul_pos];
548b8e80941Smrg
549b8e80941Smrg         if (!mul || mul->operation != ir_binop_mul)
550b8e80941Smrg            continue;
551b8e80941Smrg
552b8e80941Smrg         /* Multiply found on one of the operands. Now check for an
553b8e80941Smrg          * inner addition operation.
554b8e80941Smrg          */
555b8e80941Smrg         for (int inner_add_pos = 0; inner_add_pos < 2; inner_add_pos++) {
556b8e80941Smrg            ir_expression *inner_add =
557b8e80941Smrg               mul->operands[inner_add_pos]->as_expression();
558b8e80941Smrg
559b8e80941Smrg            if (!inner_add || inner_add->operation != ir_binop_add)
560b8e80941Smrg               continue;
561b8e80941Smrg
562b8e80941Smrg            /* Inner addition found on one of the operands. Now check for
563b8e80941Smrg             * one of the operands of the inner addition to be the negative
564b8e80941Smrg             * of x_operand.
565b8e80941Smrg             */
566b8e80941Smrg            for (int neg_pos = 0; neg_pos < 2; neg_pos++) {
567b8e80941Smrg               ir_expression *neg =
568b8e80941Smrg                  inner_add->operands[neg_pos]->as_expression();
569b8e80941Smrg
570b8e80941Smrg               if (!neg || neg->operation != ir_unop_neg)
571b8e80941Smrg                  continue;
572b8e80941Smrg
573b8e80941Smrg               ir_rvalue *x_operand = ir->operands[1 - mul_pos];
574b8e80941Smrg
575b8e80941Smrg               if (!neg->operands[0]->equals(x_operand))
576b8e80941Smrg                  continue;
577b8e80941Smrg
578b8e80941Smrg               ir_rvalue *y_operand = inner_add->operands[1 - neg_pos];
579b8e80941Smrg               ir_rvalue *a_operand = mul->operands[1 - inner_add_pos];
580b8e80941Smrg
581b8e80941Smrg               if (x_operand->type != y_operand->type ||
582b8e80941Smrg                   x_operand->type != a_operand->type)
583b8e80941Smrg                  continue;
584b8e80941Smrg
585b8e80941Smrg               return lrp(x_operand, y_operand, a_operand);
586b8e80941Smrg            }
587b8e80941Smrg         }
588b8e80941Smrg      }
589b8e80941Smrg
590b8e80941Smrg      break;
591b8e80941Smrg
592b8e80941Smrg   case ir_binop_sub:
593b8e80941Smrg      if (is_vec_zero(op_const[0]))
594b8e80941Smrg	 return neg(ir->operands[1]);
595b8e80941Smrg      if (is_vec_zero(op_const[1]))
596b8e80941Smrg	 return ir->operands[0];
597b8e80941Smrg      break;
598b8e80941Smrg
599b8e80941Smrg   case ir_binop_mul:
600b8e80941Smrg      if (is_vec_one(op_const[0]))
601b8e80941Smrg	 return ir->operands[1];
602b8e80941Smrg      if (is_vec_one(op_const[1]))
603b8e80941Smrg	 return ir->operands[0];
604b8e80941Smrg
605b8e80941Smrg      if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
606b8e80941Smrg	 return ir_constant::zero(ir, ir->type);
607b8e80941Smrg
608b8e80941Smrg      if (is_vec_negative_one(op_const[0]))
609b8e80941Smrg         return neg(ir->operands[1]);
610b8e80941Smrg      if (is_vec_negative_one(op_const[1]))
611b8e80941Smrg         return neg(ir->operands[0]);
612b8e80941Smrg
613b8e80941Smrg      if (op_expr[0] && op_expr[0]->operation == ir_unop_b2f &&
614b8e80941Smrg          op_expr[1] && op_expr[1]->operation == ir_unop_b2f) {
615b8e80941Smrg         return b2f(logic_and(op_expr[0]->operands[0], op_expr[1]->operands[0]));
616b8e80941Smrg      }
617b8e80941Smrg
618b8e80941Smrg      /* Reassociate multiplication of constants so that we can do
619b8e80941Smrg       * constant folding.
620b8e80941Smrg       */
621b8e80941Smrg      if (op_const[0] && !op_const[1])
622b8e80941Smrg	 reassociate_constant(ir, 0, op_const[0], op_expr[1]);
623b8e80941Smrg      if (op_const[1] && !op_const[0])
624b8e80941Smrg	 reassociate_constant(ir, 1, op_const[1], op_expr[0]);
625b8e80941Smrg
626b8e80941Smrg      /* Optimizes
627b8e80941Smrg       *
628b8e80941Smrg       *    (mul (floor (add (abs x) 0.5) (sign x)))
629b8e80941Smrg       *
630b8e80941Smrg       * into
631b8e80941Smrg       *
632b8e80941Smrg       *    (trunc (add x (mul (sign x) 0.5)))
633b8e80941Smrg       */
634b8e80941Smrg      for (int i = 0; i < 2; i++) {
635b8e80941Smrg         ir_expression *sign_expr = ir->operands[i]->as_expression();
636b8e80941Smrg         ir_expression *floor_expr = ir->operands[1 - i]->as_expression();
637b8e80941Smrg
638b8e80941Smrg         if (!sign_expr || sign_expr->operation != ir_unop_sign ||
639b8e80941Smrg             !floor_expr || floor_expr->operation != ir_unop_floor)
640b8e80941Smrg            continue;
641b8e80941Smrg
642b8e80941Smrg         ir_expression *add_expr = floor_expr->operands[0]->as_expression();
643b8e80941Smrg         if (!add_expr || add_expr->operation != ir_binop_add)
644b8e80941Smrg            continue;
645b8e80941Smrg
646b8e80941Smrg         for (int j = 0; j < 2; j++) {
647b8e80941Smrg            ir_expression *abs_expr = add_expr->operands[j]->as_expression();
648b8e80941Smrg            if (!abs_expr || abs_expr->operation != ir_unop_abs)
649b8e80941Smrg               continue;
650b8e80941Smrg
651b8e80941Smrg            ir_constant *point_five = add_expr->operands[1 - j]->as_constant();
652b8e80941Smrg            if (!point_five || !point_five->is_value(0.5, 0))
653b8e80941Smrg               continue;
654b8e80941Smrg
655b8e80941Smrg            if (abs_expr->operands[0]->equals(sign_expr->operands[0])) {
656b8e80941Smrg               return trunc(add(abs_expr->operands[0],
657b8e80941Smrg                                mul(sign_expr, point_five)));
658b8e80941Smrg            }
659b8e80941Smrg         }
660b8e80941Smrg      }
661b8e80941Smrg      break;
662b8e80941Smrg
663b8e80941Smrg   case ir_binop_div:
664b8e80941Smrg      if (is_vec_one(op_const[0]) && (
665b8e80941Smrg                ir->type->is_float() || ir->type->is_double())) {
666b8e80941Smrg	 return new(mem_ctx) ir_expression(ir_unop_rcp,
667b8e80941Smrg					   ir->operands[1]->type,
668b8e80941Smrg					   ir->operands[1],
669b8e80941Smrg					   NULL);
670b8e80941Smrg      }
671b8e80941Smrg      if (is_vec_one(op_const[1]))
672b8e80941Smrg	 return ir->operands[0];
673b8e80941Smrg      break;
674b8e80941Smrg
675b8e80941Smrg   case ir_binop_dot:
676b8e80941Smrg      if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1]))
677b8e80941Smrg	 return ir_constant::zero(mem_ctx, ir->type);
678b8e80941Smrg
679b8e80941Smrg      for (int i = 0; i < 2; i++) {
680b8e80941Smrg         if (!op_const[i])
681b8e80941Smrg            continue;
682b8e80941Smrg
683b8e80941Smrg         unsigned components[4] = { 0 }, count = 0;
684b8e80941Smrg
685b8e80941Smrg         for (unsigned c = 0; c < op_const[i]->type->vector_elements; c++) {
686b8e80941Smrg            if (op_const[i]->is_zero())
687b8e80941Smrg               continue;
688b8e80941Smrg
689b8e80941Smrg            components[count] = c;
690b8e80941Smrg            count++;
691b8e80941Smrg         }
692b8e80941Smrg
693b8e80941Smrg         /* No channels had zero values; bail. */
694b8e80941Smrg         if (count >= op_const[i]->type->vector_elements)
695b8e80941Smrg            break;
696b8e80941Smrg
697b8e80941Smrg         ir_expression_operation op = count == 1 ?
698b8e80941Smrg            ir_binop_mul : ir_binop_dot;
699b8e80941Smrg
700b8e80941Smrg         /* Swizzle both operands to remove the channels that were zero. */
701b8e80941Smrg         return new(mem_ctx)
702b8e80941Smrg            ir_expression(op, ir->type,
703b8e80941Smrg                          new(mem_ctx) ir_swizzle(ir->operands[0],
704b8e80941Smrg                                                  components, count),
705b8e80941Smrg                          new(mem_ctx) ir_swizzle(ir->operands[1],
706b8e80941Smrg                                                  components, count));
707b8e80941Smrg      }
708b8e80941Smrg      break;
709b8e80941Smrg
710b8e80941Smrg   case ir_binop_less:
711b8e80941Smrg   case ir_binop_gequal:
712b8e80941Smrg   case ir_binop_equal:
713b8e80941Smrg   case ir_binop_nequal:
714b8e80941Smrg      for (int add_pos = 0; add_pos < 2; add_pos++) {
715b8e80941Smrg         ir_expression *add = op_expr[add_pos];
716b8e80941Smrg
717b8e80941Smrg         if (!add || add->operation != ir_binop_add)
718b8e80941Smrg            continue;
719b8e80941Smrg
720b8e80941Smrg         ir_constant *zero = op_const[1 - add_pos];
721b8e80941Smrg         if (!is_vec_zero(zero))
722b8e80941Smrg            continue;
723b8e80941Smrg
724b8e80941Smrg         /* We are allowed to add scalars with a vector or matrix. In that
725b8e80941Smrg          * case lets just exit early.
726b8e80941Smrg          */
727b8e80941Smrg         if (add->operands[0]->type != add->operands[1]->type)
728b8e80941Smrg            continue;
729b8e80941Smrg
730b8e80941Smrg         /* Depending of the zero position we want to optimize
731b8e80941Smrg          * (0 cmp x+y) into (-x cmp y) or (x+y cmp 0) into (x cmp -y)
732b8e80941Smrg          */
733b8e80941Smrg         if (add_pos == 1) {
734b8e80941Smrg            return new(mem_ctx) ir_expression(ir->operation,
735b8e80941Smrg                                              neg(add->operands[0]),
736b8e80941Smrg                                              add->operands[1]);
737b8e80941Smrg         } else {
738b8e80941Smrg            return new(mem_ctx) ir_expression(ir->operation,
739b8e80941Smrg                                              add->operands[0],
740b8e80941Smrg                                              neg(add->operands[1]));
741b8e80941Smrg         }
742b8e80941Smrg      }
743b8e80941Smrg      break;
744b8e80941Smrg
745b8e80941Smrg   case ir_binop_all_equal:
746b8e80941Smrg   case ir_binop_any_nequal:
747b8e80941Smrg      if (ir->operands[0]->type->is_scalar() &&
748b8e80941Smrg          ir->operands[1]->type->is_scalar())
749b8e80941Smrg         return new(mem_ctx) ir_expression(ir->operation == ir_binop_all_equal
750b8e80941Smrg                                           ? ir_binop_equal : ir_binop_nequal,
751b8e80941Smrg                                           ir->operands[0],
752b8e80941Smrg                                           ir->operands[1]);
753b8e80941Smrg      break;
754b8e80941Smrg
755b8e80941Smrg   case ir_binop_rshift:
756b8e80941Smrg   case ir_binop_lshift:
757b8e80941Smrg      /* 0 >> x == 0 */
758b8e80941Smrg      if (is_vec_zero(op_const[0]))
759b8e80941Smrg         return ir->operands[0];
760b8e80941Smrg      /* x >> 0 == x */
761b8e80941Smrg      if (is_vec_zero(op_const[1]))
762b8e80941Smrg         return ir->operands[0];
763b8e80941Smrg      break;
764b8e80941Smrg
765b8e80941Smrg   case ir_binop_logic_and:
766b8e80941Smrg      if (is_vec_one(op_const[0])) {
767b8e80941Smrg	 return ir->operands[1];
768b8e80941Smrg      } else if (is_vec_one(op_const[1])) {
769b8e80941Smrg	 return ir->operands[0];
770b8e80941Smrg      } else if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
771b8e80941Smrg	 return ir_constant::zero(mem_ctx, ir->type);
772b8e80941Smrg      } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not &&
773b8e80941Smrg                 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) {
774b8e80941Smrg         /* De Morgan's Law:
775b8e80941Smrg          *    (not A) and (not B) === not (A or B)
776b8e80941Smrg          */
777b8e80941Smrg         return logic_not(logic_or(op_expr[0]->operands[0],
778b8e80941Smrg                                   op_expr[1]->operands[0]));
779b8e80941Smrg      } else if (ir->operands[0]->equals(ir->operands[1])) {
780b8e80941Smrg         /* (a && a) == a */
781b8e80941Smrg         return ir->operands[0];
782b8e80941Smrg      }
783b8e80941Smrg      break;
784b8e80941Smrg
785b8e80941Smrg   case ir_binop_logic_xor:
786b8e80941Smrg      if (is_vec_zero(op_const[0])) {
787b8e80941Smrg	 return ir->operands[1];
788b8e80941Smrg      } else if (is_vec_zero(op_const[1])) {
789b8e80941Smrg	 return ir->operands[0];
790b8e80941Smrg      } else if (is_vec_one(op_const[0])) {
791b8e80941Smrg	 return logic_not(ir->operands[1]);
792b8e80941Smrg      } else if (is_vec_one(op_const[1])) {
793b8e80941Smrg	 return logic_not(ir->operands[0]);
794b8e80941Smrg      } else if (ir->operands[0]->equals(ir->operands[1])) {
795b8e80941Smrg         /* (a ^^ a) == false */
796b8e80941Smrg	 return ir_constant::zero(mem_ctx, ir->type);
797b8e80941Smrg      }
798b8e80941Smrg      break;
799b8e80941Smrg
800b8e80941Smrg   case ir_binop_logic_or:
801b8e80941Smrg      if (is_vec_zero(op_const[0])) {
802b8e80941Smrg	 return ir->operands[1];
803b8e80941Smrg      } else if (is_vec_zero(op_const[1])) {
804b8e80941Smrg	 return ir->operands[0];
805b8e80941Smrg      } else if (is_vec_one(op_const[0]) || is_vec_one(op_const[1])) {
806b8e80941Smrg	 ir_constant_data data;
807b8e80941Smrg
808b8e80941Smrg	 for (unsigned i = 0; i < 16; i++)
809b8e80941Smrg	    data.b[i] = true;
810b8e80941Smrg
811b8e80941Smrg	 return new(mem_ctx) ir_constant(ir->type, &data);
812b8e80941Smrg      } else if (op_expr[0] && op_expr[0]->operation == ir_unop_logic_not &&
813b8e80941Smrg                 op_expr[1] && op_expr[1]->operation == ir_unop_logic_not) {
814b8e80941Smrg         /* De Morgan's Law:
815b8e80941Smrg          *    (not A) or (not B) === not (A and B)
816b8e80941Smrg          */
817b8e80941Smrg         return logic_not(logic_and(op_expr[0]->operands[0],
818b8e80941Smrg                                    op_expr[1]->operands[0]));
819b8e80941Smrg      } else if (ir->operands[0]->equals(ir->operands[1])) {
820b8e80941Smrg         /* (a || a) == a */
821b8e80941Smrg         return ir->operands[0];
822b8e80941Smrg      }
823b8e80941Smrg      break;
824b8e80941Smrg
825b8e80941Smrg   case ir_binop_pow:
826b8e80941Smrg      /* 1^x == 1 */
827b8e80941Smrg      if (is_vec_one(op_const[0]))
828b8e80941Smrg         return op_const[0];
829b8e80941Smrg
830b8e80941Smrg      /* x^1 == x */
831b8e80941Smrg      if (is_vec_one(op_const[1]))
832b8e80941Smrg         return ir->operands[0];
833b8e80941Smrg
834b8e80941Smrg      /* pow(2,x) == exp2(x) */
835b8e80941Smrg      if (is_vec_two(op_const[0]))
836b8e80941Smrg         return expr(ir_unop_exp2, ir->operands[1]);
837b8e80941Smrg
838b8e80941Smrg      if (is_vec_two(op_const[1])) {
839b8e80941Smrg         ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x",
840b8e80941Smrg                                              ir_var_temporary);
841b8e80941Smrg         base_ir->insert_before(x);
842b8e80941Smrg         base_ir->insert_before(assign(x, ir->operands[0]));
843b8e80941Smrg         return mul(x, x);
844b8e80941Smrg      }
845b8e80941Smrg
846b8e80941Smrg      if (is_vec_four(op_const[1])) {
847b8e80941Smrg         ir_variable *x = new(ir) ir_variable(ir->operands[1]->type, "x",
848b8e80941Smrg                                              ir_var_temporary);
849b8e80941Smrg         base_ir->insert_before(x);
850b8e80941Smrg         base_ir->insert_before(assign(x, ir->operands[0]));
851b8e80941Smrg
852b8e80941Smrg         ir_variable *squared = new(ir) ir_variable(ir->operands[1]->type,
853b8e80941Smrg                                                    "squared",
854b8e80941Smrg                                                    ir_var_temporary);
855b8e80941Smrg         base_ir->insert_before(squared);
856b8e80941Smrg         base_ir->insert_before(assign(squared, mul(x, x)));
857b8e80941Smrg         return mul(squared, squared);
858b8e80941Smrg      }
859b8e80941Smrg
860b8e80941Smrg      break;
861b8e80941Smrg
862b8e80941Smrg   case ir_binop_min:
863b8e80941Smrg   case ir_binop_max:
864b8e80941Smrg      if (!ir->type->is_float() || options->EmitNoSat)
865b8e80941Smrg         break;
866b8e80941Smrg
867b8e80941Smrg      /* Replace min(max) operations and its commutative combinations with
868b8e80941Smrg       * a saturate operation
869b8e80941Smrg       */
870b8e80941Smrg      for (int op = 0; op < 2; op++) {
871b8e80941Smrg         ir_expression *inner_expr = op_expr[op];
872b8e80941Smrg         ir_constant *outer_const = op_const[1 - op];
873b8e80941Smrg         ir_expression_operation op_cond = (ir->operation == ir_binop_max) ?
874b8e80941Smrg            ir_binop_min : ir_binop_max;
875b8e80941Smrg
876b8e80941Smrg         if (!inner_expr || !outer_const || (inner_expr->operation != op_cond))
877b8e80941Smrg            continue;
878b8e80941Smrg
879b8e80941Smrg         /* One of these has to be a constant */
880b8e80941Smrg         if (!inner_expr->operands[0]->as_constant() &&
881b8e80941Smrg             !inner_expr->operands[1]->as_constant())
882b8e80941Smrg            break;
883b8e80941Smrg
884b8e80941Smrg         /* Found a min(max) combination. Now try to see if its operands
885b8e80941Smrg          * meet our conditions that we can do just a single saturate operation
886b8e80941Smrg          */
887b8e80941Smrg         for (int minmax_op = 0; minmax_op < 2; minmax_op++) {
888b8e80941Smrg            ir_rvalue *x = inner_expr->operands[minmax_op];
889b8e80941Smrg            ir_rvalue *y = inner_expr->operands[1 - minmax_op];
890b8e80941Smrg
891b8e80941Smrg            ir_constant *inner_const = y->as_constant();
892b8e80941Smrg            if (!inner_const)
893b8e80941Smrg               continue;
894b8e80941Smrg
895b8e80941Smrg            /* min(max(x, 0.0), 1.0) is sat(x) */
896b8e80941Smrg            if (ir->operation == ir_binop_min &&
897b8e80941Smrg                inner_const->is_zero() &&
898b8e80941Smrg                outer_const->is_one())
899b8e80941Smrg               return saturate(x);
900b8e80941Smrg
901b8e80941Smrg            /* max(min(x, 1.0), 0.0) is sat(x) */
902b8e80941Smrg            if (ir->operation == ir_binop_max &&
903b8e80941Smrg                inner_const->is_one() &&
904b8e80941Smrg                outer_const->is_zero())
905b8e80941Smrg               return saturate(x);
906b8e80941Smrg
907b8e80941Smrg            /* min(max(x, 0.0), b) where b < 1.0 is sat(min(x, b)) */
908b8e80941Smrg            if (ir->operation == ir_binop_min &&
909b8e80941Smrg                inner_const->is_zero() &&
910b8e80941Smrg                is_less_than_one(outer_const))
911b8e80941Smrg               return saturate(expr(ir_binop_min, x, outer_const));
912b8e80941Smrg
913b8e80941Smrg            /* max(min(x, b), 0.0) where b < 1.0 is sat(min(x, b)) */
914b8e80941Smrg            if (ir->operation == ir_binop_max &&
915b8e80941Smrg                is_less_than_one(inner_const) &&
916b8e80941Smrg                outer_const->is_zero())
917b8e80941Smrg               return saturate(expr(ir_binop_min, x, inner_const));
918b8e80941Smrg
919b8e80941Smrg            /* max(min(x, 1.0), b) where b > 0.0 is sat(max(x, b)) */
920b8e80941Smrg            if (ir->operation == ir_binop_max &&
921b8e80941Smrg                inner_const->is_one() &&
922b8e80941Smrg                is_greater_than_zero(outer_const))
923b8e80941Smrg               return saturate(expr(ir_binop_max, x, outer_const));
924b8e80941Smrg
925b8e80941Smrg            /* min(max(x, b), 1.0) where b > 0.0 is sat(max(x, b)) */
926b8e80941Smrg            if (ir->operation == ir_binop_min &&
927b8e80941Smrg                is_greater_than_zero(inner_const) &&
928b8e80941Smrg                outer_const->is_one())
929b8e80941Smrg               return saturate(expr(ir_binop_max, x, inner_const));
930b8e80941Smrg         }
931b8e80941Smrg      }
932b8e80941Smrg
933b8e80941Smrg      break;
934b8e80941Smrg
935b8e80941Smrg   case ir_unop_rcp:
936b8e80941Smrg      if (op_expr[0] && op_expr[0]->operation == ir_unop_rcp)
937b8e80941Smrg	 return op_expr[0]->operands[0];
938b8e80941Smrg
939b8e80941Smrg      if (op_expr[0] && (op_expr[0]->operation == ir_unop_exp2 ||
940b8e80941Smrg                         op_expr[0]->operation == ir_unop_exp)) {
941b8e80941Smrg         return new(mem_ctx) ir_expression(op_expr[0]->operation, ir->type,
942b8e80941Smrg                                           neg(op_expr[0]->operands[0]));
943b8e80941Smrg      }
944b8e80941Smrg
945b8e80941Smrg      /* While ir_to_mesa.cpp will lower sqrt(x) to rcp(rsq(x)), it does so at
946b8e80941Smrg       * its IR level, so we can always apply this transformation.
947b8e80941Smrg       */
948b8e80941Smrg      if (op_expr[0] && op_expr[0]->operation == ir_unop_rsq)
949b8e80941Smrg         return sqrt(op_expr[0]->operands[0]);
950b8e80941Smrg
951b8e80941Smrg      /* As far as we know, all backends are OK with rsq. */
952b8e80941Smrg      if (op_expr[0] && op_expr[0]->operation == ir_unop_sqrt) {
953b8e80941Smrg	 return rsq(op_expr[0]->operands[0]);
954b8e80941Smrg      }
955b8e80941Smrg
956b8e80941Smrg      break;
957b8e80941Smrg
958b8e80941Smrg   case ir_triop_fma:
959b8e80941Smrg      /* Operands are op0 * op1 + op2. */
960b8e80941Smrg      if (is_vec_zero(op_const[0]) || is_vec_zero(op_const[1])) {
961b8e80941Smrg         return ir->operands[2];
962b8e80941Smrg      } else if (is_vec_zero(op_const[2])) {
963b8e80941Smrg         return mul(ir->operands[0], ir->operands[1]);
964b8e80941Smrg      } else if (is_vec_one(op_const[0])) {
965b8e80941Smrg         return add(ir->operands[1], ir->operands[2]);
966b8e80941Smrg      } else if (is_vec_one(op_const[1])) {
967b8e80941Smrg         return add(ir->operands[0], ir->operands[2]);
968b8e80941Smrg      }
969b8e80941Smrg      break;
970b8e80941Smrg
971b8e80941Smrg   case ir_triop_lrp:
972b8e80941Smrg      /* Operands are (x, y, a). */
973b8e80941Smrg      if (is_vec_zero(op_const[2])) {
974b8e80941Smrg         return ir->operands[0];
975b8e80941Smrg      } else if (is_vec_one(op_const[2])) {
976b8e80941Smrg         return ir->operands[1];
977b8e80941Smrg      } else if (ir->operands[0]->equals(ir->operands[1])) {
978b8e80941Smrg         return ir->operands[0];
979b8e80941Smrg      } else if (is_vec_zero(op_const[0])) {
980b8e80941Smrg         return mul(ir->operands[1], ir->operands[2]);
981b8e80941Smrg      } else if (is_vec_zero(op_const[1])) {
982b8e80941Smrg         unsigned op2_components = ir->operands[2]->type->vector_elements;
983b8e80941Smrg         ir_constant *one;
984b8e80941Smrg
985b8e80941Smrg         switch (ir->type->base_type) {
986b8e80941Smrg         case GLSL_TYPE_FLOAT:
987b8e80941Smrg            one = new(mem_ctx) ir_constant(1.0f, op2_components);
988b8e80941Smrg            break;
989b8e80941Smrg         case GLSL_TYPE_DOUBLE:
990b8e80941Smrg            one = new(mem_ctx) ir_constant(1.0, op2_components);
991b8e80941Smrg            break;
992b8e80941Smrg         default:
993b8e80941Smrg            one = NULL;
994b8e80941Smrg            unreachable("unexpected type");
995b8e80941Smrg         }
996b8e80941Smrg
997b8e80941Smrg         return mul(ir->operands[0], add(one, neg(ir->operands[2])));
998b8e80941Smrg      }
999b8e80941Smrg      break;
1000b8e80941Smrg
1001b8e80941Smrg   case ir_triop_csel:
1002b8e80941Smrg      if (is_vec_one(op_const[0]))
1003b8e80941Smrg	 return ir->operands[1];
1004b8e80941Smrg      if (is_vec_zero(op_const[0]))
1005b8e80941Smrg	 return ir->operands[2];
1006b8e80941Smrg      break;
1007b8e80941Smrg
1008b8e80941Smrg   /* Remove interpolateAt* instructions for demoted inputs. They are
1009b8e80941Smrg    * assigned a constant expression to facilitate this.
1010b8e80941Smrg    */
1011b8e80941Smrg   case ir_unop_interpolate_at_centroid:
1012b8e80941Smrg   case ir_binop_interpolate_at_offset:
1013b8e80941Smrg   case ir_binop_interpolate_at_sample:
1014b8e80941Smrg      if (op_const[0])
1015b8e80941Smrg         return ir->operands[0];
1016b8e80941Smrg      break;
1017b8e80941Smrg
1018b8e80941Smrg   default:
1019b8e80941Smrg      break;
1020b8e80941Smrg   }
1021b8e80941Smrg
1022b8e80941Smrg   return ir;
1023b8e80941Smrg}
1024b8e80941Smrg
1025b8e80941Smrgvoid
1026b8e80941Smrgir_algebraic_visitor::handle_rvalue(ir_rvalue **rvalue)
1027b8e80941Smrg{
1028b8e80941Smrg   if (!*rvalue)
1029b8e80941Smrg      return;
1030b8e80941Smrg
1031b8e80941Smrg   ir_expression *expr = (*rvalue)->as_expression();
1032b8e80941Smrg   if (!expr || expr->operation == ir_quadop_vector)
1033b8e80941Smrg      return;
1034b8e80941Smrg
1035b8e80941Smrg   ir_rvalue *new_rvalue = handle_expression(expr);
1036b8e80941Smrg   if (new_rvalue == *rvalue)
1037b8e80941Smrg      return;
1038b8e80941Smrg
1039b8e80941Smrg   /* If the expr used to be some vec OP scalar returning a vector, and the
1040b8e80941Smrg    * optimization gave us back a scalar, we still need to turn it into a
1041b8e80941Smrg    * vector.
1042b8e80941Smrg    */
1043b8e80941Smrg   *rvalue = swizzle_if_required(expr, new_rvalue);
1044b8e80941Smrg
1045b8e80941Smrg   this->progress = true;
1046b8e80941Smrg}
1047b8e80941Smrg
1048b8e80941Smrgbool
1049b8e80941Smrgdo_algebraic(exec_list *instructions, bool native_integers,
1050b8e80941Smrg             const struct gl_shader_compiler_options *options)
1051b8e80941Smrg{
1052b8e80941Smrg   ir_algebraic_visitor v(native_integers, options);
1053b8e80941Smrg
1054b8e80941Smrg   visit_list_elements(&v, instructions);
1055b8e80941Smrg
1056b8e80941Smrg   return v.progress;
1057b8e80941Smrg}
1058