1/*
2 * Copyright © 2013 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file opt_vectorize.cpp
26 *
27 * Combines scalar assignments of the same expression (modulo swizzle) to
28 * multiple channels of the same variable into a single vectorized expression
29 * and assignment.
30 *
31 * Many generated shaders contain scalarized code. That is, they contain
32 *
33 * r1.x = log2(v0.x);
34 * r1.y = log2(v0.y);
35 * r1.z = log2(v0.z);
36 *
37 * rather than
38 *
39 * r1.xyz = log2(v0.xyz);
40 *
41 * We look for consecutive assignments of the same expression (modulo swizzle)
42 * to each channel of the same variable.
43 *
44 * For instance, we want to convert these three scalar operations
45 *
46 * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0))))
47 * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0))))
48 * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0))))
49 *
50 * into a single vector operation
51 *
52 * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0))))
53 */
54
55#include "ir.h"
56#include "ir_visitor.h"
57#include "ir_optimization.h"
58#include "compiler/glsl_types.h"
59#include "program/prog_instruction.h"
60
61namespace {
62
63class ir_vectorize_visitor : public ir_hierarchical_visitor {
64public:
65   void clear()
66   {
67      assignment[0] = NULL;
68      assignment[1] = NULL;
69      assignment[2] = NULL;
70      assignment[3] = NULL;
71      current_assignment = NULL;
72      last_assignment = NULL;
73      channels = 0;
74      has_swizzle = false;
75   }
76
77   ir_vectorize_visitor()
78   {
79      clear();
80      progress = false;
81   }
82
83   virtual ir_visitor_status visit_enter(ir_assignment *);
84   virtual ir_visitor_status visit_enter(ir_swizzle *);
85   virtual ir_visitor_status visit_enter(ir_dereference_array *);
86   virtual ir_visitor_status visit_enter(ir_expression *);
87   virtual ir_visitor_status visit_enter(ir_if *);
88   virtual ir_visitor_status visit_enter(ir_loop *);
89   virtual ir_visitor_status visit_enter(ir_texture *);
90
91   virtual ir_visitor_status visit_leave(ir_assignment *);
92
93   void try_vectorize();
94
95   ir_assignment *assignment[4];
96   ir_assignment *current_assignment, *last_assignment;
97   unsigned channels;
98   bool has_swizzle;
99
100   bool progress;
101};
102
103} /* unnamed namespace */
104
105/**
106 * Rewrites the swizzles and types of a right-hand side of an assignment.
107 *
108 * From the example above, this function would be called (by visit_tree()) on
109 * the nodes of the tree (expression float log2 (swiz z   (var_ref v0))),
110 * rewriting it into     (expression vec3  log2 (swiz xyz (var_ref v0))).
111 *
112 * The function operates on ir_expressions (and its operands) and ir_swizzles.
113 * For expressions it sets a new type and swizzles any non-expression and non-
114 * swizzle scalar operands into appropriately sized vector arguments. For
115 * example, if combining
116 *
117 * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1))))
118 * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1))))
119 *
120 * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on
121 * (var_ref v1) such that the final result was
122 *
123 * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0))
124 *                                              (swiz xx (var_ref v1))))
125 *
126 * For swizzles, it sets a new type, and if the variable being swizzled is a
127 * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the
128 * data parameter. If the swizzled variable is scalar, then the swizzle was
129 * added by an earlier call to rewrite_swizzle() on an expression, so the
130 * mask should not be modified.
131 */
132static void
133rewrite_swizzle(ir_instruction *ir, void *data)
134{
135   ir_swizzle_mask *mask = (ir_swizzle_mask *)data;
136
137   switch (ir->ir_type) {
138   case ir_type_swizzle: {
139      ir_swizzle *swz = (ir_swizzle *)ir;
140      if (swz->val->type->is_vector()) {
141         swz->mask = *mask;
142      }
143      swz->type = glsl_type::get_instance(swz->type->base_type,
144                                          mask->num_components, 1);
145      break;
146   }
147   case ir_type_expression: {
148      ir_expression *expr = (ir_expression *)ir;
149      expr->type = glsl_type::get_instance(expr->type->base_type,
150                                           mask->num_components, 1);
151      for (unsigned i = 0; i < 4; i++) {
152         if (expr->operands[i]) {
153            ir_rvalue *rval = expr->operands[i]->as_rvalue();
154            if (rval && rval->type->is_scalar() &&
155                !rval->as_expression() && !rval->as_swizzle()) {
156               expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0,
157                                                      mask->num_components);
158            }
159         }
160      }
161      break;
162   }
163   default:
164      break;
165   }
166}
167
168/**
169 * Attempt to vectorize the previously saved assignments, and clear them from
170 * consideration.
171 *
172 * If the assignments are able to be combined, it modifies in-place the last
173 * assignment seen to be an equivalent vector form of the scalar assignments.
174 * It then removes the other now obsolete scalar assignments.
175 */
176void
177ir_vectorize_visitor::try_vectorize()
178{
179   if (this->last_assignment && this->channels > 1) {
180      ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0};
181
182      this->last_assignment->write_mask = 0;
183
184      for (unsigned i = 0, j = 0; i < 4; i++) {
185         if (this->assignment[i]) {
186            this->last_assignment->write_mask |= 1 << i;
187
188            if (this->assignment[i] != this->last_assignment) {
189               this->assignment[i]->remove();
190            }
191
192            switch (j) {
193            case 0: mask.x = i; break;
194            case 1: mask.y = i; break;
195            case 2: mask.z = i; break;
196            case 3: mask.w = i; break;
197            }
198
199            j++;
200         }
201      }
202
203      visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask);
204
205      this->progress = true;
206   }
207   clear();
208}
209
210/**
211 * Returns whether the write mask is a single channel.
212 */
213static bool
214single_channel_write_mask(unsigned write_mask)
215{
216   return write_mask != 0 && (write_mask & (write_mask - 1)) == 0;
217}
218
219/**
220 * Translates single-channeled write mask to single-channeled swizzle.
221 */
222static unsigned
223write_mask_to_swizzle(unsigned write_mask)
224{
225   switch (write_mask) {
226   case WRITEMASK_X: return SWIZZLE_X;
227   case WRITEMASK_Y: return SWIZZLE_Y;
228   case WRITEMASK_Z: return SWIZZLE_Z;
229   case WRITEMASK_W: return SWIZZLE_W;
230   }
231   unreachable("not reached");
232}
233
234/**
235 * Returns whether a single-channeled write mask matches a swizzle.
236 */
237static bool
238write_mask_matches_swizzle(unsigned write_mask,
239                           const ir_swizzle *swz)
240{
241   return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) ||
242           (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) ||
243           (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) ||
244           (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W));
245}
246
247/**
248 * Upon entering an ir_assignment, attempt to vectorize the currently tracked
249 * assignments if the current assignment is not suitable. Keep a pointer to
250 * the current assignment.
251 */
252ir_visitor_status
253ir_vectorize_visitor::visit_enter(ir_assignment *ir)
254{
255   ir_dereference *lhs = this->last_assignment != NULL ?
256                         this->last_assignment->lhs : NULL;
257   ir_rvalue *rhs = this->last_assignment != NULL ?
258                    this->last_assignment->rhs : NULL;
259
260   if (ir->condition ||
261       this->channels >= 4 ||
262       !single_channel_write_mask(ir->write_mask) ||
263       this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL ||
264       (lhs && !ir->lhs->equals(lhs)) ||
265       (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) {
266      try_vectorize();
267   }
268
269   this->current_assignment = ir;
270
271   return visit_continue;
272}
273
274/**
275 * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an
276 * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask
277 * matches the current assignment's write mask.
278 *
279 * If the write mask doesn't match the swizzle mask, remove the current
280 * assignment from further consideration.
281 */
282ir_visitor_status
283ir_vectorize_visitor::visit_enter(ir_swizzle *ir)
284{
285   if (this->current_assignment) {
286      if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) {
287         this->has_swizzle = true;
288      } else {
289         this->current_assignment = NULL;
290      }
291   }
292   return visit_continue;
293}
294
295/* Upon entering an ir_array_dereference, remove the current assignment from
296 * further consideration. Since the index of an array dereference must scalar,
297 * we are not able to vectorize it.
298 *
299 * FINISHME: If all of scalar indices are identical we could vectorize.
300 */
301ir_visitor_status
302ir_vectorize_visitor::visit_enter(ir_dereference_array *)
303{
304   this->current_assignment = NULL;
305   return visit_continue_with_parent;
306}
307
308/**
309 * Upon entering an ir_expression, remove the current assignment from further
310 * consideration if the expression operates horizontally on vectors.
311 */
312ir_visitor_status
313ir_vectorize_visitor::visit_enter(ir_expression *ir)
314{
315   if (ir->is_horizontal()) {
316      this->current_assignment = NULL;
317      return visit_continue_with_parent;
318   }
319   return visit_continue;
320}
321
322/* Since there is no statement to visit between the "then" and "else"
323 * instructions try to vectorize before, in between, and after them to avoid
324 * combining statements from different basic blocks.
325 */
326ir_visitor_status
327ir_vectorize_visitor::visit_enter(ir_if *ir)
328{
329   try_vectorize();
330
331   visit_list_elements(this, &ir->then_instructions);
332   try_vectorize();
333
334   visit_list_elements(this, &ir->else_instructions);
335   try_vectorize();
336
337   return visit_continue_with_parent;
338}
339
340/* Since there is no statement to visit between the instructions in the body of
341 * the loop and the instructions after it try to vectorize before and after the
342 * body to avoid combining statements from different basic blocks.
343 */
344ir_visitor_status
345ir_vectorize_visitor::visit_enter(ir_loop *ir)
346{
347   try_vectorize();
348
349   visit_list_elements(this, &ir->body_instructions);
350   try_vectorize();
351
352   return visit_continue_with_parent;
353}
354
355/**
356 * Upon entering an ir_texture, remove the current assignment from
357 * further consideration. Vectorizing multiple texture lookups into one
358 * is wrong.
359 */
360ir_visitor_status
361ir_vectorize_visitor::visit_enter(ir_texture *)
362{
363   this->current_assignment = NULL;
364   return visit_continue_with_parent;
365}
366
367/**
368 * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if
369 * the swizzle mask(s) found were appropriate. Also save a pointer in
370 * ::last_assignment so that we can compare future assignments with it.
371 *
372 * Finally, clear ::current_assignment and ::has_swizzle.
373 */
374ir_visitor_status
375ir_vectorize_visitor::visit_leave(ir_assignment *ir)
376{
377   if (this->has_swizzle && this->current_assignment) {
378      assert(this->current_assignment == ir);
379
380      unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask);
381      this->assignment[channel] = ir;
382      this->channels++;
383
384      this->last_assignment = this->current_assignment;
385   }
386   this->current_assignment = NULL;
387   this->has_swizzle = false;
388   return visit_continue;
389}
390
391/**
392 * Combines scalar assignments of the same expression (modulo swizzle) to
393 * multiple channels of the same variable into a single vectorized expression
394 * and assignment.
395 */
396bool
397do_vectorize(exec_list *instructions)
398{
399   ir_vectorize_visitor v;
400
401   v.run(instructions);
402
403   /* Try to vectorize the last assignments seen. */
404   v.try_vectorize();
405
406   return v.progress;
407}
408