1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2013 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21b8e80941Smrg * DEALINGS IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg/**
25b8e80941Smrg * \file opt_vectorize.cpp
26b8e80941Smrg *
27b8e80941Smrg * Combines scalar assignments of the same expression (modulo swizzle) to
28b8e80941Smrg * multiple channels of the same variable into a single vectorized expression
29b8e80941Smrg * and assignment.
30b8e80941Smrg *
31b8e80941Smrg * Many generated shaders contain scalarized code. That is, they contain
32b8e80941Smrg *
33b8e80941Smrg * r1.x = log2(v0.x);
34b8e80941Smrg * r1.y = log2(v0.y);
35b8e80941Smrg * r1.z = log2(v0.z);
36b8e80941Smrg *
37b8e80941Smrg * rather than
38b8e80941Smrg *
39b8e80941Smrg * r1.xyz = log2(v0.xyz);
40b8e80941Smrg *
41b8e80941Smrg * We look for consecutive assignments of the same expression (modulo swizzle)
42b8e80941Smrg * to each channel of the same variable.
43b8e80941Smrg *
44b8e80941Smrg * For instance, we want to convert these three scalar operations
45b8e80941Smrg *
46b8e80941Smrg * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0))))
47b8e80941Smrg * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0))))
48b8e80941Smrg * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0))))
49b8e80941Smrg *
50b8e80941Smrg * into a single vector operation
51b8e80941Smrg *
52b8e80941Smrg * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0))))
53b8e80941Smrg */
54b8e80941Smrg
55b8e80941Smrg#include "ir.h"
56b8e80941Smrg#include "ir_visitor.h"
57b8e80941Smrg#include "ir_optimization.h"
58b8e80941Smrg#include "compiler/glsl_types.h"
59b8e80941Smrg#include "program/prog_instruction.h"
60b8e80941Smrg
61b8e80941Smrgnamespace {
62b8e80941Smrg
63b8e80941Smrgclass ir_vectorize_visitor : public ir_hierarchical_visitor {
64b8e80941Smrgpublic:
65b8e80941Smrg   void clear()
66b8e80941Smrg   {
67b8e80941Smrg      assignment[0] = NULL;
68b8e80941Smrg      assignment[1] = NULL;
69b8e80941Smrg      assignment[2] = NULL;
70b8e80941Smrg      assignment[3] = NULL;
71b8e80941Smrg      current_assignment = NULL;
72b8e80941Smrg      last_assignment = NULL;
73b8e80941Smrg      channels = 0;
74b8e80941Smrg      has_swizzle = false;
75b8e80941Smrg   }
76b8e80941Smrg
77b8e80941Smrg   ir_vectorize_visitor()
78b8e80941Smrg   {
79b8e80941Smrg      clear();
80b8e80941Smrg      progress = false;
81b8e80941Smrg   }
82b8e80941Smrg
83b8e80941Smrg   virtual ir_visitor_status visit_enter(ir_assignment *);
84b8e80941Smrg   virtual ir_visitor_status visit_enter(ir_swizzle *);
85b8e80941Smrg   virtual ir_visitor_status visit_enter(ir_dereference_array *);
86b8e80941Smrg   virtual ir_visitor_status visit_enter(ir_expression *);
87b8e80941Smrg   virtual ir_visitor_status visit_enter(ir_if *);
88b8e80941Smrg   virtual ir_visitor_status visit_enter(ir_loop *);
89b8e80941Smrg   virtual ir_visitor_status visit_enter(ir_texture *);
90b8e80941Smrg
91b8e80941Smrg   virtual ir_visitor_status visit_leave(ir_assignment *);
92b8e80941Smrg
93b8e80941Smrg   void try_vectorize();
94b8e80941Smrg
95b8e80941Smrg   ir_assignment *assignment[4];
96b8e80941Smrg   ir_assignment *current_assignment, *last_assignment;
97b8e80941Smrg   unsigned channels;
98b8e80941Smrg   bool has_swizzle;
99b8e80941Smrg
100b8e80941Smrg   bool progress;
101b8e80941Smrg};
102b8e80941Smrg
103b8e80941Smrg} /* unnamed namespace */
104b8e80941Smrg
105b8e80941Smrg/**
106b8e80941Smrg * Rewrites the swizzles and types of a right-hand side of an assignment.
107b8e80941Smrg *
108b8e80941Smrg * From the example above, this function would be called (by visit_tree()) on
109b8e80941Smrg * the nodes of the tree (expression float log2 (swiz z   (var_ref v0))),
110b8e80941Smrg * rewriting it into     (expression vec3  log2 (swiz xyz (var_ref v0))).
111b8e80941Smrg *
112b8e80941Smrg * The function operates on ir_expressions (and its operands) and ir_swizzles.
113b8e80941Smrg * For expressions it sets a new type and swizzles any non-expression and non-
114b8e80941Smrg * swizzle scalar operands into appropriately sized vector arguments. For
115b8e80941Smrg * example, if combining
116b8e80941Smrg *
117b8e80941Smrg * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1))))
118b8e80941Smrg * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1))))
119b8e80941Smrg *
120b8e80941Smrg * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on
121b8e80941Smrg * (var_ref v1) such that the final result was
122b8e80941Smrg *
123b8e80941Smrg * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0))
124b8e80941Smrg *                                              (swiz xx (var_ref v1))))
125b8e80941Smrg *
126b8e80941Smrg * For swizzles, it sets a new type, and if the variable being swizzled is a
127b8e80941Smrg * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the
128b8e80941Smrg * data parameter. If the swizzled variable is scalar, then the swizzle was
129b8e80941Smrg * added by an earlier call to rewrite_swizzle() on an expression, so the
130b8e80941Smrg * mask should not be modified.
131b8e80941Smrg */
132b8e80941Smrgstatic void
133b8e80941Smrgrewrite_swizzle(ir_instruction *ir, void *data)
134b8e80941Smrg{
135b8e80941Smrg   ir_swizzle_mask *mask = (ir_swizzle_mask *)data;
136b8e80941Smrg
137b8e80941Smrg   switch (ir->ir_type) {
138b8e80941Smrg   case ir_type_swizzle: {
139b8e80941Smrg      ir_swizzle *swz = (ir_swizzle *)ir;
140b8e80941Smrg      if (swz->val->type->is_vector()) {
141b8e80941Smrg         swz->mask = *mask;
142b8e80941Smrg      }
143b8e80941Smrg      swz->type = glsl_type::get_instance(swz->type->base_type,
144b8e80941Smrg                                          mask->num_components, 1);
145b8e80941Smrg      break;
146b8e80941Smrg   }
147b8e80941Smrg   case ir_type_expression: {
148b8e80941Smrg      ir_expression *expr = (ir_expression *)ir;
149b8e80941Smrg      expr->type = glsl_type::get_instance(expr->type->base_type,
150b8e80941Smrg                                           mask->num_components, 1);
151b8e80941Smrg      for (unsigned i = 0; i < 4; i++) {
152b8e80941Smrg         if (expr->operands[i]) {
153b8e80941Smrg            ir_rvalue *rval = expr->operands[i]->as_rvalue();
154b8e80941Smrg            if (rval && rval->type->is_scalar() &&
155b8e80941Smrg                !rval->as_expression() && !rval->as_swizzle()) {
156b8e80941Smrg               expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0,
157b8e80941Smrg                                                      mask->num_components);
158b8e80941Smrg            }
159b8e80941Smrg         }
160b8e80941Smrg      }
161b8e80941Smrg      break;
162b8e80941Smrg   }
163b8e80941Smrg   default:
164b8e80941Smrg      break;
165b8e80941Smrg   }
166b8e80941Smrg}
167b8e80941Smrg
168b8e80941Smrg/**
169b8e80941Smrg * Attempt to vectorize the previously saved assignments, and clear them from
170b8e80941Smrg * consideration.
171b8e80941Smrg *
172b8e80941Smrg * If the assignments are able to be combined, it modifies in-place the last
173b8e80941Smrg * assignment seen to be an equivalent vector form of the scalar assignments.
174b8e80941Smrg * It then removes the other now obsolete scalar assignments.
175b8e80941Smrg */
176b8e80941Smrgvoid
177b8e80941Smrgir_vectorize_visitor::try_vectorize()
178b8e80941Smrg{
179b8e80941Smrg   if (this->last_assignment && this->channels > 1) {
180b8e80941Smrg      ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0};
181b8e80941Smrg
182b8e80941Smrg      this->last_assignment->write_mask = 0;
183b8e80941Smrg
184b8e80941Smrg      for (unsigned i = 0, j = 0; i < 4; i++) {
185b8e80941Smrg         if (this->assignment[i]) {
186b8e80941Smrg            this->last_assignment->write_mask |= 1 << i;
187b8e80941Smrg
188b8e80941Smrg            if (this->assignment[i] != this->last_assignment) {
189b8e80941Smrg               this->assignment[i]->remove();
190b8e80941Smrg            }
191b8e80941Smrg
192b8e80941Smrg            switch (j) {
193b8e80941Smrg            case 0: mask.x = i; break;
194b8e80941Smrg            case 1: mask.y = i; break;
195b8e80941Smrg            case 2: mask.z = i; break;
196b8e80941Smrg            case 3: mask.w = i; break;
197b8e80941Smrg            }
198b8e80941Smrg
199b8e80941Smrg            j++;
200b8e80941Smrg         }
201b8e80941Smrg      }
202b8e80941Smrg
203b8e80941Smrg      visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask);
204b8e80941Smrg
205b8e80941Smrg      this->progress = true;
206b8e80941Smrg   }
207b8e80941Smrg   clear();
208b8e80941Smrg}
209b8e80941Smrg
210b8e80941Smrg/**
211b8e80941Smrg * Returns whether the write mask is a single channel.
212b8e80941Smrg */
213b8e80941Smrgstatic bool
214b8e80941Smrgsingle_channel_write_mask(unsigned write_mask)
215b8e80941Smrg{
216b8e80941Smrg   return write_mask != 0 && (write_mask & (write_mask - 1)) == 0;
217b8e80941Smrg}
218b8e80941Smrg
219b8e80941Smrg/**
220b8e80941Smrg * Translates single-channeled write mask to single-channeled swizzle.
221b8e80941Smrg */
222b8e80941Smrgstatic unsigned
223b8e80941Smrgwrite_mask_to_swizzle(unsigned write_mask)
224b8e80941Smrg{
225b8e80941Smrg   switch (write_mask) {
226b8e80941Smrg   case WRITEMASK_X: return SWIZZLE_X;
227b8e80941Smrg   case WRITEMASK_Y: return SWIZZLE_Y;
228b8e80941Smrg   case WRITEMASK_Z: return SWIZZLE_Z;
229b8e80941Smrg   case WRITEMASK_W: return SWIZZLE_W;
230b8e80941Smrg   }
231b8e80941Smrg   unreachable("not reached");
232b8e80941Smrg}
233b8e80941Smrg
234b8e80941Smrg/**
235b8e80941Smrg * Returns whether a single-channeled write mask matches a swizzle.
236b8e80941Smrg */
237b8e80941Smrgstatic bool
238b8e80941Smrgwrite_mask_matches_swizzle(unsigned write_mask,
239b8e80941Smrg                           const ir_swizzle *swz)
240b8e80941Smrg{
241b8e80941Smrg   return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) ||
242b8e80941Smrg           (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) ||
243b8e80941Smrg           (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) ||
244b8e80941Smrg           (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W));
245b8e80941Smrg}
246b8e80941Smrg
247b8e80941Smrg/**
248b8e80941Smrg * Upon entering an ir_assignment, attempt to vectorize the currently tracked
249b8e80941Smrg * assignments if the current assignment is not suitable. Keep a pointer to
250b8e80941Smrg * the current assignment.
251b8e80941Smrg */
252b8e80941Smrgir_visitor_status
253b8e80941Smrgir_vectorize_visitor::visit_enter(ir_assignment *ir)
254b8e80941Smrg{
255b8e80941Smrg   ir_dereference *lhs = this->last_assignment != NULL ?
256b8e80941Smrg                         this->last_assignment->lhs : NULL;
257b8e80941Smrg   ir_rvalue *rhs = this->last_assignment != NULL ?
258b8e80941Smrg                    this->last_assignment->rhs : NULL;
259b8e80941Smrg
260b8e80941Smrg   if (ir->condition ||
261b8e80941Smrg       this->channels >= 4 ||
262b8e80941Smrg       !single_channel_write_mask(ir->write_mask) ||
263b8e80941Smrg       this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL ||
264b8e80941Smrg       (lhs && !ir->lhs->equals(lhs)) ||
265b8e80941Smrg       (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) {
266b8e80941Smrg      try_vectorize();
267b8e80941Smrg   }
268b8e80941Smrg
269b8e80941Smrg   this->current_assignment = ir;
270b8e80941Smrg
271b8e80941Smrg   return visit_continue;
272b8e80941Smrg}
273b8e80941Smrg
274b8e80941Smrg/**
275b8e80941Smrg * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an
276b8e80941Smrg * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask
277b8e80941Smrg * matches the current assignment's write mask.
278b8e80941Smrg *
279b8e80941Smrg * If the write mask doesn't match the swizzle mask, remove the current
280b8e80941Smrg * assignment from further consideration.
281b8e80941Smrg */
282b8e80941Smrgir_visitor_status
283b8e80941Smrgir_vectorize_visitor::visit_enter(ir_swizzle *ir)
284b8e80941Smrg{
285b8e80941Smrg   if (this->current_assignment) {
286b8e80941Smrg      if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) {
287b8e80941Smrg         this->has_swizzle = true;
288b8e80941Smrg      } else {
289b8e80941Smrg         this->current_assignment = NULL;
290b8e80941Smrg      }
291b8e80941Smrg   }
292b8e80941Smrg   return visit_continue;
293b8e80941Smrg}
294b8e80941Smrg
295b8e80941Smrg/* Upon entering an ir_array_dereference, remove the current assignment from
296b8e80941Smrg * further consideration. Since the index of an array dereference must scalar,
297b8e80941Smrg * we are not able to vectorize it.
298b8e80941Smrg *
299b8e80941Smrg * FINISHME: If all of scalar indices are identical we could vectorize.
300b8e80941Smrg */
301b8e80941Smrgir_visitor_status
302b8e80941Smrgir_vectorize_visitor::visit_enter(ir_dereference_array *)
303b8e80941Smrg{
304b8e80941Smrg   this->current_assignment = NULL;
305b8e80941Smrg   return visit_continue_with_parent;
306b8e80941Smrg}
307b8e80941Smrg
308b8e80941Smrg/**
309b8e80941Smrg * Upon entering an ir_expression, remove the current assignment from further
310b8e80941Smrg * consideration if the expression operates horizontally on vectors.
311b8e80941Smrg */
312b8e80941Smrgir_visitor_status
313b8e80941Smrgir_vectorize_visitor::visit_enter(ir_expression *ir)
314b8e80941Smrg{
315b8e80941Smrg   if (ir->is_horizontal()) {
316b8e80941Smrg      this->current_assignment = NULL;
317b8e80941Smrg      return visit_continue_with_parent;
318b8e80941Smrg   }
319b8e80941Smrg   return visit_continue;
320b8e80941Smrg}
321b8e80941Smrg
322b8e80941Smrg/* Since there is no statement to visit between the "then" and "else"
323b8e80941Smrg * instructions try to vectorize before, in between, and after them to avoid
324b8e80941Smrg * combining statements from different basic blocks.
325b8e80941Smrg */
326b8e80941Smrgir_visitor_status
327b8e80941Smrgir_vectorize_visitor::visit_enter(ir_if *ir)
328b8e80941Smrg{
329b8e80941Smrg   try_vectorize();
330b8e80941Smrg
331b8e80941Smrg   visit_list_elements(this, &ir->then_instructions);
332b8e80941Smrg   try_vectorize();
333b8e80941Smrg
334b8e80941Smrg   visit_list_elements(this, &ir->else_instructions);
335b8e80941Smrg   try_vectorize();
336b8e80941Smrg
337b8e80941Smrg   return visit_continue_with_parent;
338b8e80941Smrg}
339b8e80941Smrg
340b8e80941Smrg/* Since there is no statement to visit between the instructions in the body of
341b8e80941Smrg * the loop and the instructions after it try to vectorize before and after the
342b8e80941Smrg * body to avoid combining statements from different basic blocks.
343b8e80941Smrg */
344b8e80941Smrgir_visitor_status
345b8e80941Smrgir_vectorize_visitor::visit_enter(ir_loop *ir)
346b8e80941Smrg{
347b8e80941Smrg   try_vectorize();
348b8e80941Smrg
349b8e80941Smrg   visit_list_elements(this, &ir->body_instructions);
350b8e80941Smrg   try_vectorize();
351b8e80941Smrg
352b8e80941Smrg   return visit_continue_with_parent;
353b8e80941Smrg}
354b8e80941Smrg
355b8e80941Smrg/**
356b8e80941Smrg * Upon entering an ir_texture, remove the current assignment from
357b8e80941Smrg * further consideration. Vectorizing multiple texture lookups into one
358b8e80941Smrg * is wrong.
359b8e80941Smrg */
360b8e80941Smrgir_visitor_status
361b8e80941Smrgir_vectorize_visitor::visit_enter(ir_texture *)
362b8e80941Smrg{
363b8e80941Smrg   this->current_assignment = NULL;
364b8e80941Smrg   return visit_continue_with_parent;
365b8e80941Smrg}
366b8e80941Smrg
367b8e80941Smrg/**
368b8e80941Smrg * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if
369b8e80941Smrg * the swizzle mask(s) found were appropriate. Also save a pointer in
370b8e80941Smrg * ::last_assignment so that we can compare future assignments with it.
371b8e80941Smrg *
372b8e80941Smrg * Finally, clear ::current_assignment and ::has_swizzle.
373b8e80941Smrg */
374b8e80941Smrgir_visitor_status
375b8e80941Smrgir_vectorize_visitor::visit_leave(ir_assignment *ir)
376b8e80941Smrg{
377b8e80941Smrg   if (this->has_swizzle && this->current_assignment) {
378b8e80941Smrg      assert(this->current_assignment == ir);
379b8e80941Smrg
380b8e80941Smrg      unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask);
381b8e80941Smrg      this->assignment[channel] = ir;
382b8e80941Smrg      this->channels++;
383b8e80941Smrg
384b8e80941Smrg      this->last_assignment = this->current_assignment;
385b8e80941Smrg   }
386b8e80941Smrg   this->current_assignment = NULL;
387b8e80941Smrg   this->has_swizzle = false;
388b8e80941Smrg   return visit_continue;
389b8e80941Smrg}
390b8e80941Smrg
391b8e80941Smrg/**
392b8e80941Smrg * Combines scalar assignments of the same expression (modulo swizzle) to
393b8e80941Smrg * multiple channels of the same variable into a single vectorized expression
394b8e80941Smrg * and assignment.
395b8e80941Smrg */
396b8e80941Smrgbool
397b8e80941Smrgdo_vectorize(exec_list *instructions)
398b8e80941Smrg{
399b8e80941Smrg   ir_vectorize_visitor v;
400b8e80941Smrg
401b8e80941Smrg   v.run(instructions);
402b8e80941Smrg
403b8e80941Smrg   /* Try to vectorize the last assignments seen. */
404b8e80941Smrg   v.try_vectorize();
405b8e80941Smrg
406b8e80941Smrg   return v.progress;
407b8e80941Smrg}
408