101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2013 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2101e04c3fSmrg * DEALINGS IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg/**
2501e04c3fSmrg * \file opt_vectorize.cpp
2601e04c3fSmrg *
2701e04c3fSmrg * Combines scalar assignments of the same expression (modulo swizzle) to
2801e04c3fSmrg * multiple channels of the same variable into a single vectorized expression
2901e04c3fSmrg * and assignment.
3001e04c3fSmrg *
3101e04c3fSmrg * Many generated shaders contain scalarized code. That is, they contain
3201e04c3fSmrg *
3301e04c3fSmrg * r1.x = log2(v0.x);
3401e04c3fSmrg * r1.y = log2(v0.y);
3501e04c3fSmrg * r1.z = log2(v0.z);
3601e04c3fSmrg *
3701e04c3fSmrg * rather than
3801e04c3fSmrg *
3901e04c3fSmrg * r1.xyz = log2(v0.xyz);
4001e04c3fSmrg *
4101e04c3fSmrg * We look for consecutive assignments of the same expression (modulo swizzle)
4201e04c3fSmrg * to each channel of the same variable.
4301e04c3fSmrg *
4401e04c3fSmrg * For instance, we want to convert these three scalar operations
4501e04c3fSmrg *
4601e04c3fSmrg * (assign (x) (var_ref r1) (expression float log2 (swiz x (var_ref v0))))
4701e04c3fSmrg * (assign (y) (var_ref r1) (expression float log2 (swiz y (var_ref v0))))
4801e04c3fSmrg * (assign (z) (var_ref r1) (expression float log2 (swiz z (var_ref v0))))
4901e04c3fSmrg *
5001e04c3fSmrg * into a single vector operation
5101e04c3fSmrg *
5201e04c3fSmrg * (assign (xyz) (var_ref r1) (expression vec3 log2 (swiz xyz (var_ref v0))))
5301e04c3fSmrg */
5401e04c3fSmrg
5501e04c3fSmrg#include "ir.h"
5601e04c3fSmrg#include "ir_visitor.h"
5701e04c3fSmrg#include "ir_optimization.h"
5801e04c3fSmrg#include "compiler/glsl_types.h"
5901e04c3fSmrg#include "program/prog_instruction.h"
6001e04c3fSmrg
6101e04c3fSmrgnamespace {
6201e04c3fSmrg
6301e04c3fSmrgclass ir_vectorize_visitor : public ir_hierarchical_visitor {
6401e04c3fSmrgpublic:
6501e04c3fSmrg   void clear()
6601e04c3fSmrg   {
6701e04c3fSmrg      assignment[0] = NULL;
6801e04c3fSmrg      assignment[1] = NULL;
6901e04c3fSmrg      assignment[2] = NULL;
7001e04c3fSmrg      assignment[3] = NULL;
7101e04c3fSmrg      current_assignment = NULL;
7201e04c3fSmrg      last_assignment = NULL;
7301e04c3fSmrg      channels = 0;
7401e04c3fSmrg      has_swizzle = false;
7501e04c3fSmrg   }
7601e04c3fSmrg
7701e04c3fSmrg   ir_vectorize_visitor()
7801e04c3fSmrg   {
7901e04c3fSmrg      clear();
8001e04c3fSmrg      progress = false;
8101e04c3fSmrg   }
8201e04c3fSmrg
8301e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_assignment *);
8401e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_swizzle *);
8501e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_dereference_array *);
8601e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_expression *);
8701e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_if *);
8801e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_loop *);
8901e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_texture *);
9001e04c3fSmrg
9101e04c3fSmrg   virtual ir_visitor_status visit_leave(ir_assignment *);
9201e04c3fSmrg
9301e04c3fSmrg   void try_vectorize();
9401e04c3fSmrg
9501e04c3fSmrg   ir_assignment *assignment[4];
9601e04c3fSmrg   ir_assignment *current_assignment, *last_assignment;
9701e04c3fSmrg   unsigned channels;
9801e04c3fSmrg   bool has_swizzle;
9901e04c3fSmrg
10001e04c3fSmrg   bool progress;
10101e04c3fSmrg};
10201e04c3fSmrg
10301e04c3fSmrg} /* unnamed namespace */
10401e04c3fSmrg
10501e04c3fSmrg/**
10601e04c3fSmrg * Rewrites the swizzles and types of a right-hand side of an assignment.
10701e04c3fSmrg *
10801e04c3fSmrg * From the example above, this function would be called (by visit_tree()) on
10901e04c3fSmrg * the nodes of the tree (expression float log2 (swiz z   (var_ref v0))),
11001e04c3fSmrg * rewriting it into     (expression vec3  log2 (swiz xyz (var_ref v0))).
11101e04c3fSmrg *
11201e04c3fSmrg * The function operates on ir_expressions (and its operands) and ir_swizzles.
11301e04c3fSmrg * For expressions it sets a new type and swizzles any non-expression and non-
11401e04c3fSmrg * swizzle scalar operands into appropriately sized vector arguments. For
11501e04c3fSmrg * example, if combining
11601e04c3fSmrg *
11701e04c3fSmrg * (assign (x) (var_ref r1) (expression float + (swiz x (var_ref v0) (var_ref v1))))
11801e04c3fSmrg * (assign (y) (var_ref r1) (expression float + (swiz y (var_ref v0) (var_ref v1))))
11901e04c3fSmrg *
12001e04c3fSmrg * where v1 is a scalar, rewrite_swizzle() would insert a swizzle on
12101e04c3fSmrg * (var_ref v1) such that the final result was
12201e04c3fSmrg *
12301e04c3fSmrg * (assign (xy) (var_ref r1) (expression vec2 + (swiz xy (var_ref v0))
12401e04c3fSmrg *                                              (swiz xx (var_ref v1))))
12501e04c3fSmrg *
12601e04c3fSmrg * For swizzles, it sets a new type, and if the variable being swizzled is a
12701e04c3fSmrg * vector it overwrites the swizzle mask with the ir_swizzle_mask passed as the
12801e04c3fSmrg * data parameter. If the swizzled variable is scalar, then the swizzle was
12901e04c3fSmrg * added by an earlier call to rewrite_swizzle() on an expression, so the
13001e04c3fSmrg * mask should not be modified.
13101e04c3fSmrg */
13201e04c3fSmrgstatic void
13301e04c3fSmrgrewrite_swizzle(ir_instruction *ir, void *data)
13401e04c3fSmrg{
13501e04c3fSmrg   ir_swizzle_mask *mask = (ir_swizzle_mask *)data;
13601e04c3fSmrg
13701e04c3fSmrg   switch (ir->ir_type) {
13801e04c3fSmrg   case ir_type_swizzle: {
13901e04c3fSmrg      ir_swizzle *swz = (ir_swizzle *)ir;
14001e04c3fSmrg      if (swz->val->type->is_vector()) {
14101e04c3fSmrg         swz->mask = *mask;
14201e04c3fSmrg      }
14301e04c3fSmrg      swz->type = glsl_type::get_instance(swz->type->base_type,
14401e04c3fSmrg                                          mask->num_components, 1);
14501e04c3fSmrg      break;
14601e04c3fSmrg   }
14701e04c3fSmrg   case ir_type_expression: {
14801e04c3fSmrg      ir_expression *expr = (ir_expression *)ir;
14901e04c3fSmrg      expr->type = glsl_type::get_instance(expr->type->base_type,
15001e04c3fSmrg                                           mask->num_components, 1);
15101e04c3fSmrg      for (unsigned i = 0; i < 4; i++) {
15201e04c3fSmrg         if (expr->operands[i]) {
15301e04c3fSmrg            ir_rvalue *rval = expr->operands[i]->as_rvalue();
15401e04c3fSmrg            if (rval && rval->type->is_scalar() &&
15501e04c3fSmrg                !rval->as_expression() && !rval->as_swizzle()) {
15601e04c3fSmrg               expr->operands[i] = new(ir) ir_swizzle(rval, 0, 0, 0, 0,
15701e04c3fSmrg                                                      mask->num_components);
15801e04c3fSmrg            }
15901e04c3fSmrg         }
16001e04c3fSmrg      }
16101e04c3fSmrg      break;
16201e04c3fSmrg   }
16301e04c3fSmrg   default:
16401e04c3fSmrg      break;
16501e04c3fSmrg   }
16601e04c3fSmrg}
16701e04c3fSmrg
16801e04c3fSmrg/**
16901e04c3fSmrg * Attempt to vectorize the previously saved assignments, and clear them from
17001e04c3fSmrg * consideration.
17101e04c3fSmrg *
17201e04c3fSmrg * If the assignments are able to be combined, it modifies in-place the last
17301e04c3fSmrg * assignment seen to be an equivalent vector form of the scalar assignments.
17401e04c3fSmrg * It then removes the other now obsolete scalar assignments.
17501e04c3fSmrg */
17601e04c3fSmrgvoid
17701e04c3fSmrgir_vectorize_visitor::try_vectorize()
17801e04c3fSmrg{
17901e04c3fSmrg   if (this->last_assignment && this->channels > 1) {
18001e04c3fSmrg      ir_swizzle_mask mask = {0, 0, 0, 0, channels, 0};
18101e04c3fSmrg
18201e04c3fSmrg      this->last_assignment->write_mask = 0;
18301e04c3fSmrg
18401e04c3fSmrg      for (unsigned i = 0, j = 0; i < 4; i++) {
18501e04c3fSmrg         if (this->assignment[i]) {
18601e04c3fSmrg            this->last_assignment->write_mask |= 1 << i;
18701e04c3fSmrg
18801e04c3fSmrg            if (this->assignment[i] != this->last_assignment) {
18901e04c3fSmrg               this->assignment[i]->remove();
19001e04c3fSmrg            }
19101e04c3fSmrg
19201e04c3fSmrg            switch (j) {
19301e04c3fSmrg            case 0: mask.x = i; break;
19401e04c3fSmrg            case 1: mask.y = i; break;
19501e04c3fSmrg            case 2: mask.z = i; break;
19601e04c3fSmrg            case 3: mask.w = i; break;
19701e04c3fSmrg            }
19801e04c3fSmrg
19901e04c3fSmrg            j++;
20001e04c3fSmrg         }
20101e04c3fSmrg      }
20201e04c3fSmrg
20301e04c3fSmrg      visit_tree(this->last_assignment->rhs, rewrite_swizzle, &mask);
20401e04c3fSmrg
20501e04c3fSmrg      this->progress = true;
20601e04c3fSmrg   }
20701e04c3fSmrg   clear();
20801e04c3fSmrg}
20901e04c3fSmrg
21001e04c3fSmrg/**
21101e04c3fSmrg * Returns whether the write mask is a single channel.
21201e04c3fSmrg */
21301e04c3fSmrgstatic bool
21401e04c3fSmrgsingle_channel_write_mask(unsigned write_mask)
21501e04c3fSmrg{
21601e04c3fSmrg   return write_mask != 0 && (write_mask & (write_mask - 1)) == 0;
21701e04c3fSmrg}
21801e04c3fSmrg
21901e04c3fSmrg/**
22001e04c3fSmrg * Translates single-channeled write mask to single-channeled swizzle.
22101e04c3fSmrg */
22201e04c3fSmrgstatic unsigned
22301e04c3fSmrgwrite_mask_to_swizzle(unsigned write_mask)
22401e04c3fSmrg{
22501e04c3fSmrg   switch (write_mask) {
22601e04c3fSmrg   case WRITEMASK_X: return SWIZZLE_X;
22701e04c3fSmrg   case WRITEMASK_Y: return SWIZZLE_Y;
22801e04c3fSmrg   case WRITEMASK_Z: return SWIZZLE_Z;
22901e04c3fSmrg   case WRITEMASK_W: return SWIZZLE_W;
23001e04c3fSmrg   }
23101e04c3fSmrg   unreachable("not reached");
23201e04c3fSmrg}
23301e04c3fSmrg
23401e04c3fSmrg/**
23501e04c3fSmrg * Returns whether a single-channeled write mask matches a swizzle.
23601e04c3fSmrg */
23701e04c3fSmrgstatic bool
23801e04c3fSmrgwrite_mask_matches_swizzle(unsigned write_mask,
23901e04c3fSmrg                           const ir_swizzle *swz)
24001e04c3fSmrg{
24101e04c3fSmrg   return ((write_mask == WRITEMASK_X && swz->mask.x == SWIZZLE_X) ||
24201e04c3fSmrg           (write_mask == WRITEMASK_Y && swz->mask.x == SWIZZLE_Y) ||
24301e04c3fSmrg           (write_mask == WRITEMASK_Z && swz->mask.x == SWIZZLE_Z) ||
24401e04c3fSmrg           (write_mask == WRITEMASK_W && swz->mask.x == SWIZZLE_W));
24501e04c3fSmrg}
24601e04c3fSmrg
24701e04c3fSmrg/**
24801e04c3fSmrg * Upon entering an ir_assignment, attempt to vectorize the currently tracked
24901e04c3fSmrg * assignments if the current assignment is not suitable. Keep a pointer to
25001e04c3fSmrg * the current assignment.
25101e04c3fSmrg */
25201e04c3fSmrgir_visitor_status
25301e04c3fSmrgir_vectorize_visitor::visit_enter(ir_assignment *ir)
25401e04c3fSmrg{
25501e04c3fSmrg   ir_dereference *lhs = this->last_assignment != NULL ?
25601e04c3fSmrg                         this->last_assignment->lhs : NULL;
25701e04c3fSmrg   ir_rvalue *rhs = this->last_assignment != NULL ?
25801e04c3fSmrg                    this->last_assignment->rhs : NULL;
25901e04c3fSmrg
26001e04c3fSmrg   if (ir->condition ||
26101e04c3fSmrg       this->channels >= 4 ||
26201e04c3fSmrg       !single_channel_write_mask(ir->write_mask) ||
26301e04c3fSmrg       this->assignment[write_mask_to_swizzle(ir->write_mask)] != NULL ||
26401e04c3fSmrg       (lhs && !ir->lhs->equals(lhs)) ||
26501e04c3fSmrg       (rhs && !ir->rhs->equals(rhs, ir_type_swizzle))) {
26601e04c3fSmrg      try_vectorize();
26701e04c3fSmrg   }
26801e04c3fSmrg
26901e04c3fSmrg   this->current_assignment = ir;
27001e04c3fSmrg
27101e04c3fSmrg   return visit_continue;
27201e04c3fSmrg}
27301e04c3fSmrg
27401e04c3fSmrg/**
27501e04c3fSmrg * Upon entering an ir_swizzle, set ::has_swizzle if we're visiting from an
27601e04c3fSmrg * ir_assignment (i.e., that ::current_assignment is set) and the swizzle mask
27701e04c3fSmrg * matches the current assignment's write mask.
27801e04c3fSmrg *
27901e04c3fSmrg * If the write mask doesn't match the swizzle mask, remove the current
28001e04c3fSmrg * assignment from further consideration.
28101e04c3fSmrg */
28201e04c3fSmrgir_visitor_status
28301e04c3fSmrgir_vectorize_visitor::visit_enter(ir_swizzle *ir)
28401e04c3fSmrg{
28501e04c3fSmrg   if (this->current_assignment) {
28601e04c3fSmrg      if (write_mask_matches_swizzle(this->current_assignment->write_mask, ir)) {
28701e04c3fSmrg         this->has_swizzle = true;
28801e04c3fSmrg      } else {
28901e04c3fSmrg         this->current_assignment = NULL;
29001e04c3fSmrg      }
29101e04c3fSmrg   }
29201e04c3fSmrg   return visit_continue;
29301e04c3fSmrg}
29401e04c3fSmrg
29501e04c3fSmrg/* Upon entering an ir_array_dereference, remove the current assignment from
29601e04c3fSmrg * further consideration. Since the index of an array dereference must scalar,
29701e04c3fSmrg * we are not able to vectorize it.
29801e04c3fSmrg *
29901e04c3fSmrg * FINISHME: If all of scalar indices are identical we could vectorize.
30001e04c3fSmrg */
30101e04c3fSmrgir_visitor_status
30201e04c3fSmrgir_vectorize_visitor::visit_enter(ir_dereference_array *)
30301e04c3fSmrg{
30401e04c3fSmrg   this->current_assignment = NULL;
30501e04c3fSmrg   return visit_continue_with_parent;
30601e04c3fSmrg}
30701e04c3fSmrg
30801e04c3fSmrg/**
30901e04c3fSmrg * Upon entering an ir_expression, remove the current assignment from further
31001e04c3fSmrg * consideration if the expression operates horizontally on vectors.
31101e04c3fSmrg */
31201e04c3fSmrgir_visitor_status
31301e04c3fSmrgir_vectorize_visitor::visit_enter(ir_expression *ir)
31401e04c3fSmrg{
31501e04c3fSmrg   if (ir->is_horizontal()) {
31601e04c3fSmrg      this->current_assignment = NULL;
31701e04c3fSmrg      return visit_continue_with_parent;
31801e04c3fSmrg   }
31901e04c3fSmrg   return visit_continue;
32001e04c3fSmrg}
32101e04c3fSmrg
32201e04c3fSmrg/* Since there is no statement to visit between the "then" and "else"
32301e04c3fSmrg * instructions try to vectorize before, in between, and after them to avoid
32401e04c3fSmrg * combining statements from different basic blocks.
32501e04c3fSmrg */
32601e04c3fSmrgir_visitor_status
32701e04c3fSmrgir_vectorize_visitor::visit_enter(ir_if *ir)
32801e04c3fSmrg{
32901e04c3fSmrg   try_vectorize();
33001e04c3fSmrg
33101e04c3fSmrg   visit_list_elements(this, &ir->then_instructions);
33201e04c3fSmrg   try_vectorize();
33301e04c3fSmrg
33401e04c3fSmrg   visit_list_elements(this, &ir->else_instructions);
33501e04c3fSmrg   try_vectorize();
33601e04c3fSmrg
33701e04c3fSmrg   return visit_continue_with_parent;
33801e04c3fSmrg}
33901e04c3fSmrg
34001e04c3fSmrg/* Since there is no statement to visit between the instructions in the body of
34101e04c3fSmrg * the loop and the instructions after it try to vectorize before and after the
34201e04c3fSmrg * body to avoid combining statements from different basic blocks.
34301e04c3fSmrg */
34401e04c3fSmrgir_visitor_status
34501e04c3fSmrgir_vectorize_visitor::visit_enter(ir_loop *ir)
34601e04c3fSmrg{
34701e04c3fSmrg   try_vectorize();
34801e04c3fSmrg
34901e04c3fSmrg   visit_list_elements(this, &ir->body_instructions);
35001e04c3fSmrg   try_vectorize();
35101e04c3fSmrg
35201e04c3fSmrg   return visit_continue_with_parent;
35301e04c3fSmrg}
35401e04c3fSmrg
35501e04c3fSmrg/**
35601e04c3fSmrg * Upon entering an ir_texture, remove the current assignment from
35701e04c3fSmrg * further consideration. Vectorizing multiple texture lookups into one
35801e04c3fSmrg * is wrong.
35901e04c3fSmrg */
36001e04c3fSmrgir_visitor_status
36101e04c3fSmrgir_vectorize_visitor::visit_enter(ir_texture *)
36201e04c3fSmrg{
36301e04c3fSmrg   this->current_assignment = NULL;
36401e04c3fSmrg   return visit_continue_with_parent;
36501e04c3fSmrg}
36601e04c3fSmrg
36701e04c3fSmrg/**
36801e04c3fSmrg * Upon leaving an ir_assignment, save a pointer to it in ::assignment[] if
36901e04c3fSmrg * the swizzle mask(s) found were appropriate. Also save a pointer in
37001e04c3fSmrg * ::last_assignment so that we can compare future assignments with it.
37101e04c3fSmrg *
37201e04c3fSmrg * Finally, clear ::current_assignment and ::has_swizzle.
37301e04c3fSmrg */
37401e04c3fSmrgir_visitor_status
37501e04c3fSmrgir_vectorize_visitor::visit_leave(ir_assignment *ir)
37601e04c3fSmrg{
37701e04c3fSmrg   if (this->has_swizzle && this->current_assignment) {
37801e04c3fSmrg      assert(this->current_assignment == ir);
37901e04c3fSmrg
38001e04c3fSmrg      unsigned channel = write_mask_to_swizzle(this->current_assignment->write_mask);
38101e04c3fSmrg      this->assignment[channel] = ir;
38201e04c3fSmrg      this->channels++;
38301e04c3fSmrg
38401e04c3fSmrg      this->last_assignment = this->current_assignment;
38501e04c3fSmrg   }
38601e04c3fSmrg   this->current_assignment = NULL;
38701e04c3fSmrg   this->has_swizzle = false;
38801e04c3fSmrg   return visit_continue;
38901e04c3fSmrg}
39001e04c3fSmrg
39101e04c3fSmrg/**
39201e04c3fSmrg * Combines scalar assignments of the same expression (modulo swizzle) to
39301e04c3fSmrg * multiple channels of the same variable into a single vectorized expression
39401e04c3fSmrg * and assignment.
39501e04c3fSmrg */
39601e04c3fSmrgbool
39701e04c3fSmrgdo_vectorize(exec_list *instructions)
39801e04c3fSmrg{
39901e04c3fSmrg   ir_vectorize_visitor v;
40001e04c3fSmrg
40101e04c3fSmrg   v.run(instructions);
40201e04c3fSmrg
40301e04c3fSmrg   /* Try to vectorize the last assignments seen. */
40401e04c3fSmrg   v.try_vectorize();
40501e04c3fSmrg
40601e04c3fSmrg   return v.progress;
40701e04c3fSmrg}
408