1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2010 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21b8e80941Smrg * DEALINGS IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg/**
25b8e80941Smrg * \file lower_vector.cpp
26b8e80941Smrg * IR lowering pass to remove some types of ir_quadop_vector
27b8e80941Smrg *
28b8e80941Smrg * \author Ian Romanick <ian.d.romanick@intel.com>
29b8e80941Smrg */
30b8e80941Smrg
31b8e80941Smrg#include "ir.h"
32b8e80941Smrg#include "ir_rvalue_visitor.h"
33b8e80941Smrg
34b8e80941Smrgnamespace {
35b8e80941Smrg
36b8e80941Smrgclass lower_vector_visitor : public ir_rvalue_visitor {
37b8e80941Smrgpublic:
38b8e80941Smrg   lower_vector_visitor() : dont_lower_swz(false), progress(false)
39b8e80941Smrg   {
40b8e80941Smrg      /* empty */
41b8e80941Smrg   }
42b8e80941Smrg
43b8e80941Smrg   void handle_rvalue(ir_rvalue **rvalue);
44b8e80941Smrg
45b8e80941Smrg   /**
46b8e80941Smrg    * Should SWZ-like expressions be lowered?
47b8e80941Smrg    */
48b8e80941Smrg   bool dont_lower_swz;
49b8e80941Smrg
50b8e80941Smrg   bool progress;
51b8e80941Smrg};
52b8e80941Smrg
53b8e80941Smrg} /* anonymous namespace */
54b8e80941Smrg
55b8e80941Smrg/**
56b8e80941Smrg * Determine if an IR expression tree looks like an extended swizzle
57b8e80941Smrg *
58b8e80941Smrg * Extended swizzles consist of access of a single vector source (with possible
59b8e80941Smrg * per component negation) and the constants -1, 0, or 1.
60b8e80941Smrg */
61b8e80941Smrgstatic bool
62b8e80941Smrgis_extended_swizzle(ir_expression *ir)
63b8e80941Smrg{
64b8e80941Smrg   /* Track any variables that are accessed by this expression.
65b8e80941Smrg    */
66b8e80941Smrg   ir_variable *var = NULL;
67b8e80941Smrg
68b8e80941Smrg   assert(ir->operation == ir_quadop_vector);
69b8e80941Smrg
70b8e80941Smrg   for (unsigned i = 0; i < ir->type->vector_elements; i++) {
71b8e80941Smrg      ir_rvalue *op = ir->operands[i];
72b8e80941Smrg
73b8e80941Smrg      while (op != NULL) {
74b8e80941Smrg	 switch (op->ir_type) {
75b8e80941Smrg	 case ir_type_constant: {
76b8e80941Smrg	    const ir_constant *const c = op->as_constant();
77b8e80941Smrg
78b8e80941Smrg	    if (!c->is_one() && !c->is_zero() && !c->is_negative_one())
79b8e80941Smrg	       return false;
80b8e80941Smrg
81b8e80941Smrg	    op = NULL;
82b8e80941Smrg	    break;
83b8e80941Smrg	 }
84b8e80941Smrg
85b8e80941Smrg	 case ir_type_dereference_variable: {
86b8e80941Smrg	    ir_dereference_variable *const d = (ir_dereference_variable *) op;
87b8e80941Smrg
88b8e80941Smrg	    if ((var != NULL) && (var != d->var))
89b8e80941Smrg	       return false;
90b8e80941Smrg
91b8e80941Smrg	    var = d->var;
92b8e80941Smrg	    op = NULL;
93b8e80941Smrg	    break;
94b8e80941Smrg	 }
95b8e80941Smrg
96b8e80941Smrg	 case ir_type_expression: {
97b8e80941Smrg	    ir_expression *const ex = (ir_expression *) op;
98b8e80941Smrg
99b8e80941Smrg	    if (ex->operation != ir_unop_neg)
100b8e80941Smrg	       return false;
101b8e80941Smrg
102b8e80941Smrg	    op = ex->operands[0];
103b8e80941Smrg	    break;
104b8e80941Smrg	 }
105b8e80941Smrg
106b8e80941Smrg	 case ir_type_swizzle:
107b8e80941Smrg	    op = ((ir_swizzle *) op)->val;
108b8e80941Smrg	    break;
109b8e80941Smrg
110b8e80941Smrg	 default:
111b8e80941Smrg	    return false;
112b8e80941Smrg	 }
113b8e80941Smrg      }
114b8e80941Smrg   }
115b8e80941Smrg
116b8e80941Smrg   return true;
117b8e80941Smrg}
118b8e80941Smrg
119b8e80941Smrgvoid
120b8e80941Smrglower_vector_visitor::handle_rvalue(ir_rvalue **rvalue)
121b8e80941Smrg{
122b8e80941Smrg   if (!*rvalue)
123b8e80941Smrg      return;
124b8e80941Smrg
125b8e80941Smrg   ir_expression *expr = (*rvalue)->as_expression();
126b8e80941Smrg   if ((expr == NULL) || (expr->operation != ir_quadop_vector))
127b8e80941Smrg      return;
128b8e80941Smrg
129b8e80941Smrg   if (this->dont_lower_swz && is_extended_swizzle(expr))
130b8e80941Smrg      return;
131b8e80941Smrg
132b8e80941Smrg   /* FINISHME: Is this the right thing to use for the ralloc context?
133b8e80941Smrg    */
134b8e80941Smrg   void *const mem_ctx = expr;
135b8e80941Smrg
136b8e80941Smrg   assert(expr->type->vector_elements == expr->num_operands);
137b8e80941Smrg
138b8e80941Smrg   /* Generate a temporary with the same type as the ir_quadop_operation.
139b8e80941Smrg    */
140b8e80941Smrg   ir_variable *const temp =
141b8e80941Smrg      new(mem_ctx) ir_variable(expr->type, "vecop_tmp", ir_var_temporary);
142b8e80941Smrg
143b8e80941Smrg   this->base_ir->insert_before(temp);
144b8e80941Smrg
145b8e80941Smrg   /* Counter of the number of components collected so far.
146b8e80941Smrg    */
147b8e80941Smrg   unsigned assigned;
148b8e80941Smrg
149b8e80941Smrg   /* Write-mask in the destination that receives counted by 'assigned'.
150b8e80941Smrg    */
151b8e80941Smrg   unsigned write_mask;
152b8e80941Smrg
153b8e80941Smrg
154b8e80941Smrg   /* Generate upto four assignments to that variable.  Try to group component
155b8e80941Smrg    * assignments together:
156b8e80941Smrg    *
157b8e80941Smrg    * - All constant components can be assigned at once.
158b8e80941Smrg    * - All assigments of components from a single variable with the same
159b8e80941Smrg    *   unary operator can be assigned at once.
160b8e80941Smrg    */
161b8e80941Smrg   ir_constant_data d = { { 0 } };
162b8e80941Smrg
163b8e80941Smrg   assigned = 0;
164b8e80941Smrg   write_mask = 0;
165b8e80941Smrg   for (unsigned i = 0; i < expr->type->vector_elements; i++) {
166b8e80941Smrg      const ir_constant *const c = expr->operands[i]->as_constant();
167b8e80941Smrg
168b8e80941Smrg      if (c == NULL)
169b8e80941Smrg	 continue;
170b8e80941Smrg
171b8e80941Smrg      switch (expr->type->base_type) {
172b8e80941Smrg      case GLSL_TYPE_UINT:  d.u[assigned] = c->value.u[0]; break;
173b8e80941Smrg      case GLSL_TYPE_INT:   d.i[assigned] = c->value.i[0]; break;
174b8e80941Smrg      case GLSL_TYPE_FLOAT: d.f[assigned] = c->value.f[0]; break;
175b8e80941Smrg      case GLSL_TYPE_BOOL:  d.b[assigned] = c->value.b[0]; break;
176b8e80941Smrg      default:              assert(!"Should not get here."); break;
177b8e80941Smrg      }
178b8e80941Smrg
179b8e80941Smrg      write_mask |= (1U << i);
180b8e80941Smrg      assigned++;
181b8e80941Smrg   }
182b8e80941Smrg
183b8e80941Smrg   assert((write_mask == 0) == (assigned == 0));
184b8e80941Smrg
185b8e80941Smrg   /* If there were constant values, generate an assignment.
186b8e80941Smrg    */
187b8e80941Smrg   if (assigned > 0) {
188b8e80941Smrg      ir_constant *const c =
189b8e80941Smrg	 new(mem_ctx) ir_constant(glsl_type::get_instance(expr->type->base_type,
190b8e80941Smrg							  assigned, 1),
191b8e80941Smrg				  &d);
192b8e80941Smrg      ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
193b8e80941Smrg      ir_assignment *const assign =
194b8e80941Smrg	 new(mem_ctx) ir_assignment(lhs, c, NULL, write_mask);
195b8e80941Smrg
196b8e80941Smrg      this->base_ir->insert_before(assign);
197b8e80941Smrg   }
198b8e80941Smrg
199b8e80941Smrg   /* FINISHME: This should try to coalesce assignments.
200b8e80941Smrg    */
201b8e80941Smrg   for (unsigned i = 0; i < expr->type->vector_elements; i++) {
202b8e80941Smrg      if (expr->operands[i]->ir_type == ir_type_constant)
203b8e80941Smrg	 continue;
204b8e80941Smrg
205b8e80941Smrg      ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
206b8e80941Smrg      ir_assignment *const assign =
207b8e80941Smrg	 new(mem_ctx) ir_assignment(lhs, expr->operands[i], NULL, (1U << i));
208b8e80941Smrg
209b8e80941Smrg      this->base_ir->insert_before(assign);
210b8e80941Smrg      assigned++;
211b8e80941Smrg   }
212b8e80941Smrg
213b8e80941Smrg   assert(assigned == expr->type->vector_elements);
214b8e80941Smrg
215b8e80941Smrg   *rvalue = new(mem_ctx) ir_dereference_variable(temp);
216b8e80941Smrg   this->progress = true;
217b8e80941Smrg}
218b8e80941Smrg
219b8e80941Smrgbool
220b8e80941Smrglower_quadop_vector(exec_list *instructions, bool dont_lower_swz)
221b8e80941Smrg{
222b8e80941Smrg   lower_vector_visitor v;
223b8e80941Smrg
224b8e80941Smrg   v.dont_lower_swz = dont_lower_swz;
225b8e80941Smrg   visit_list_elements(&v, instructions);
226b8e80941Smrg
227b8e80941Smrg   return v.progress;
228b8e80941Smrg}
229