101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2010 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2101e04c3fSmrg * DEALINGS IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg/**
2501e04c3fSmrg * \file lower_vector.cpp
2601e04c3fSmrg * IR lowering pass to remove some types of ir_quadop_vector
2701e04c3fSmrg *
2801e04c3fSmrg * \author Ian Romanick <ian.d.romanick@intel.com>
2901e04c3fSmrg */
3001e04c3fSmrg
3101e04c3fSmrg#include "ir.h"
3201e04c3fSmrg#include "ir_rvalue_visitor.h"
3301e04c3fSmrg
3401e04c3fSmrgnamespace {
3501e04c3fSmrg
3601e04c3fSmrgclass lower_vector_visitor : public ir_rvalue_visitor {
3701e04c3fSmrgpublic:
3801e04c3fSmrg   lower_vector_visitor() : dont_lower_swz(false), progress(false)
3901e04c3fSmrg   {
4001e04c3fSmrg      /* empty */
4101e04c3fSmrg   }
4201e04c3fSmrg
4301e04c3fSmrg   void handle_rvalue(ir_rvalue **rvalue);
4401e04c3fSmrg
4501e04c3fSmrg   /**
4601e04c3fSmrg    * Should SWZ-like expressions be lowered?
4701e04c3fSmrg    */
4801e04c3fSmrg   bool dont_lower_swz;
4901e04c3fSmrg
5001e04c3fSmrg   bool progress;
5101e04c3fSmrg};
5201e04c3fSmrg
5301e04c3fSmrg} /* anonymous namespace */
5401e04c3fSmrg
5501e04c3fSmrg/**
5601e04c3fSmrg * Determine if an IR expression tree looks like an extended swizzle
5701e04c3fSmrg *
5801e04c3fSmrg * Extended swizzles consist of access of a single vector source (with possible
5901e04c3fSmrg * per component negation) and the constants -1, 0, or 1.
6001e04c3fSmrg */
6101e04c3fSmrgstatic bool
6201e04c3fSmrgis_extended_swizzle(ir_expression *ir)
6301e04c3fSmrg{
6401e04c3fSmrg   /* Track any variables that are accessed by this expression.
6501e04c3fSmrg    */
6601e04c3fSmrg   ir_variable *var = NULL;
6701e04c3fSmrg
6801e04c3fSmrg   assert(ir->operation == ir_quadop_vector);
6901e04c3fSmrg
7001e04c3fSmrg   for (unsigned i = 0; i < ir->type->vector_elements; i++) {
7101e04c3fSmrg      ir_rvalue *op = ir->operands[i];
7201e04c3fSmrg
7301e04c3fSmrg      while (op != NULL) {
7401e04c3fSmrg	 switch (op->ir_type) {
7501e04c3fSmrg	 case ir_type_constant: {
7601e04c3fSmrg	    const ir_constant *const c = op->as_constant();
7701e04c3fSmrg
7801e04c3fSmrg	    if (!c->is_one() && !c->is_zero() && !c->is_negative_one())
7901e04c3fSmrg	       return false;
8001e04c3fSmrg
8101e04c3fSmrg	    op = NULL;
8201e04c3fSmrg	    break;
8301e04c3fSmrg	 }
8401e04c3fSmrg
8501e04c3fSmrg	 case ir_type_dereference_variable: {
8601e04c3fSmrg	    ir_dereference_variable *const d = (ir_dereference_variable *) op;
8701e04c3fSmrg
8801e04c3fSmrg	    if ((var != NULL) && (var != d->var))
8901e04c3fSmrg	       return false;
9001e04c3fSmrg
9101e04c3fSmrg	    var = d->var;
9201e04c3fSmrg	    op = NULL;
9301e04c3fSmrg	    break;
9401e04c3fSmrg	 }
9501e04c3fSmrg
9601e04c3fSmrg	 case ir_type_expression: {
9701e04c3fSmrg	    ir_expression *const ex = (ir_expression *) op;
9801e04c3fSmrg
9901e04c3fSmrg	    if (ex->operation != ir_unop_neg)
10001e04c3fSmrg	       return false;
10101e04c3fSmrg
10201e04c3fSmrg	    op = ex->operands[0];
10301e04c3fSmrg	    break;
10401e04c3fSmrg	 }
10501e04c3fSmrg
10601e04c3fSmrg	 case ir_type_swizzle:
10701e04c3fSmrg	    op = ((ir_swizzle *) op)->val;
10801e04c3fSmrg	    break;
10901e04c3fSmrg
11001e04c3fSmrg	 default:
11101e04c3fSmrg	    return false;
11201e04c3fSmrg	 }
11301e04c3fSmrg      }
11401e04c3fSmrg   }
11501e04c3fSmrg
11601e04c3fSmrg   return true;
11701e04c3fSmrg}
11801e04c3fSmrg
11901e04c3fSmrgvoid
12001e04c3fSmrglower_vector_visitor::handle_rvalue(ir_rvalue **rvalue)
12101e04c3fSmrg{
12201e04c3fSmrg   if (!*rvalue)
12301e04c3fSmrg      return;
12401e04c3fSmrg
12501e04c3fSmrg   ir_expression *expr = (*rvalue)->as_expression();
12601e04c3fSmrg   if ((expr == NULL) || (expr->operation != ir_quadop_vector))
12701e04c3fSmrg      return;
12801e04c3fSmrg
12901e04c3fSmrg   if (this->dont_lower_swz && is_extended_swizzle(expr))
13001e04c3fSmrg      return;
13101e04c3fSmrg
13201e04c3fSmrg   /* FINISHME: Is this the right thing to use for the ralloc context?
13301e04c3fSmrg    */
13401e04c3fSmrg   void *const mem_ctx = expr;
13501e04c3fSmrg
13601e04c3fSmrg   assert(expr->type->vector_elements == expr->num_operands);
13701e04c3fSmrg
13801e04c3fSmrg   /* Generate a temporary with the same type as the ir_quadop_operation.
13901e04c3fSmrg    */
14001e04c3fSmrg   ir_variable *const temp =
14101e04c3fSmrg      new(mem_ctx) ir_variable(expr->type, "vecop_tmp", ir_var_temporary);
14201e04c3fSmrg
14301e04c3fSmrg   this->base_ir->insert_before(temp);
14401e04c3fSmrg
14501e04c3fSmrg   /* Counter of the number of components collected so far.
14601e04c3fSmrg    */
14701e04c3fSmrg   unsigned assigned;
14801e04c3fSmrg
14901e04c3fSmrg   /* Write-mask in the destination that receives counted by 'assigned'.
15001e04c3fSmrg    */
15101e04c3fSmrg   unsigned write_mask;
15201e04c3fSmrg
15301e04c3fSmrg
15401e04c3fSmrg   /* Generate upto four assignments to that variable.  Try to group component
15501e04c3fSmrg    * assignments together:
15601e04c3fSmrg    *
15701e04c3fSmrg    * - All constant components can be assigned at once.
15801e04c3fSmrg    * - All assigments of components from a single variable with the same
15901e04c3fSmrg    *   unary operator can be assigned at once.
16001e04c3fSmrg    */
16101e04c3fSmrg   ir_constant_data d = { { 0 } };
16201e04c3fSmrg
16301e04c3fSmrg   assigned = 0;
16401e04c3fSmrg   write_mask = 0;
16501e04c3fSmrg   for (unsigned i = 0; i < expr->type->vector_elements; i++) {
16601e04c3fSmrg      const ir_constant *const c = expr->operands[i]->as_constant();
16701e04c3fSmrg
16801e04c3fSmrg      if (c == NULL)
16901e04c3fSmrg	 continue;
17001e04c3fSmrg
17101e04c3fSmrg      switch (expr->type->base_type) {
17201e04c3fSmrg      case GLSL_TYPE_UINT:  d.u[assigned] = c->value.u[0]; break;
17301e04c3fSmrg      case GLSL_TYPE_INT:   d.i[assigned] = c->value.i[0]; break;
17401e04c3fSmrg      case GLSL_TYPE_FLOAT: d.f[assigned] = c->value.f[0]; break;
17501e04c3fSmrg      case GLSL_TYPE_BOOL:  d.b[assigned] = c->value.b[0]; break;
17601e04c3fSmrg      default:              assert(!"Should not get here."); break;
17701e04c3fSmrg      }
17801e04c3fSmrg
17901e04c3fSmrg      write_mask |= (1U << i);
18001e04c3fSmrg      assigned++;
18101e04c3fSmrg   }
18201e04c3fSmrg
18301e04c3fSmrg   assert((write_mask == 0) == (assigned == 0));
18401e04c3fSmrg
18501e04c3fSmrg   /* If there were constant values, generate an assignment.
18601e04c3fSmrg    */
18701e04c3fSmrg   if (assigned > 0) {
18801e04c3fSmrg      ir_constant *const c =
18901e04c3fSmrg	 new(mem_ctx) ir_constant(glsl_type::get_instance(expr->type->base_type,
19001e04c3fSmrg							  assigned, 1),
19101e04c3fSmrg				  &d);
19201e04c3fSmrg      ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
19301e04c3fSmrg      ir_assignment *const assign =
19401e04c3fSmrg	 new(mem_ctx) ir_assignment(lhs, c, NULL, write_mask);
19501e04c3fSmrg
19601e04c3fSmrg      this->base_ir->insert_before(assign);
19701e04c3fSmrg   }
19801e04c3fSmrg
19901e04c3fSmrg   /* FINISHME: This should try to coalesce assignments.
20001e04c3fSmrg    */
20101e04c3fSmrg   for (unsigned i = 0; i < expr->type->vector_elements; i++) {
20201e04c3fSmrg      if (expr->operands[i]->ir_type == ir_type_constant)
20301e04c3fSmrg	 continue;
20401e04c3fSmrg
20501e04c3fSmrg      ir_dereference *const lhs = new(mem_ctx) ir_dereference_variable(temp);
20601e04c3fSmrg      ir_assignment *const assign =
20701e04c3fSmrg	 new(mem_ctx) ir_assignment(lhs, expr->operands[i], NULL, (1U << i));
20801e04c3fSmrg
20901e04c3fSmrg      this->base_ir->insert_before(assign);
21001e04c3fSmrg      assigned++;
21101e04c3fSmrg   }
21201e04c3fSmrg
21301e04c3fSmrg   assert(assigned == expr->type->vector_elements);
21401e04c3fSmrg
21501e04c3fSmrg   *rvalue = new(mem_ctx) ir_dereference_variable(temp);
21601e04c3fSmrg   this->progress = true;
21701e04c3fSmrg}
21801e04c3fSmrg
21901e04c3fSmrgbool
22001e04c3fSmrglower_quadop_vector(exec_list *instructions, bool dont_lower_swz)
22101e04c3fSmrg{
22201e04c3fSmrg   lower_vector_visitor v;
22301e04c3fSmrg
22401e04c3fSmrg   v.dont_lower_swz = dont_lower_swz;
22501e04c3fSmrg   visit_list_elements(&v, instructions);
22601e04c3fSmrg
22701e04c3fSmrg   return v.progress;
22801e04c3fSmrg}
229