101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2010 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2101e04c3fSmrg * DEALINGS IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg/**
2501e04c3fSmrg * \file opt_structure_splitting.cpp
2601e04c3fSmrg *
2701e04c3fSmrg * If a structure is only ever referenced by its components, then
2801e04c3fSmrg * split those components out to individual variables so they can be
2901e04c3fSmrg * handled normally by other optimization passes.
3001e04c3fSmrg *
3101e04c3fSmrg * This skips structures like uniforms, which need to be accessible as
3201e04c3fSmrg * structures for their access by the GL.
3301e04c3fSmrg */
3401e04c3fSmrg
3501e04c3fSmrg#include "ir.h"
3601e04c3fSmrg#include "ir_visitor.h"
3701e04c3fSmrg#include "ir_rvalue_visitor.h"
3801e04c3fSmrg#include "compiler/glsl_types.h"
3901e04c3fSmrg
4001e04c3fSmrgnamespace {
4101e04c3fSmrg
4201e04c3fSmrgstatic bool debug = false;
4301e04c3fSmrg
4401e04c3fSmrgclass variable_entry : public exec_node
4501e04c3fSmrg{
4601e04c3fSmrgpublic:
4701e04c3fSmrg   variable_entry(ir_variable *var)
4801e04c3fSmrg   {
4901e04c3fSmrg      this->var = var;
5001e04c3fSmrg      this->whole_structure_access = 0;
5101e04c3fSmrg      this->declaration = false;
5201e04c3fSmrg      this->components = NULL;
5301e04c3fSmrg      this->mem_ctx = NULL;
5401e04c3fSmrg   }
5501e04c3fSmrg
5601e04c3fSmrg   ir_variable *var; /* The key: the variable's pointer. */
5701e04c3fSmrg
5801e04c3fSmrg   /** Number of times the variable is referenced, including assignments. */
5901e04c3fSmrg   unsigned whole_structure_access;
6001e04c3fSmrg
6101e04c3fSmrg   /* If the variable had a decl we can work with in the instruction
6201e04c3fSmrg    * stream.  We can't do splitting on function arguments, which
6301e04c3fSmrg    * don't get this variable set.
6401e04c3fSmrg    */
6501e04c3fSmrg   bool declaration;
6601e04c3fSmrg
6701e04c3fSmrg   ir_variable **components;
6801e04c3fSmrg
6901e04c3fSmrg   /** ralloc_parent(this->var) -- the shader's ralloc context. */
7001e04c3fSmrg   void *mem_ctx;
7101e04c3fSmrg};
7201e04c3fSmrg
7301e04c3fSmrg
7401e04c3fSmrgclass ir_structure_reference_visitor : public ir_hierarchical_visitor {
7501e04c3fSmrgpublic:
7601e04c3fSmrg   ir_structure_reference_visitor(void)
7701e04c3fSmrg   {
7801e04c3fSmrg      this->mem_ctx = ralloc_context(NULL);
7901e04c3fSmrg      this->variable_list.make_empty();
8001e04c3fSmrg   }
8101e04c3fSmrg
8201e04c3fSmrg   ~ir_structure_reference_visitor(void)
8301e04c3fSmrg   {
8401e04c3fSmrg      ralloc_free(mem_ctx);
8501e04c3fSmrg   }
8601e04c3fSmrg
8701e04c3fSmrg   virtual ir_visitor_status visit(ir_variable *);
8801e04c3fSmrg   virtual ir_visitor_status visit(ir_dereference_variable *);
8901e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_dereference_record *);
9001e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_assignment *);
9101e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_function_signature *);
9201e04c3fSmrg
9301e04c3fSmrg   variable_entry *get_variable_entry(ir_variable *var);
9401e04c3fSmrg
9501e04c3fSmrg   /* List of variable_entry */
9601e04c3fSmrg   exec_list variable_list;
9701e04c3fSmrg
9801e04c3fSmrg   void *mem_ctx;
9901e04c3fSmrg};
10001e04c3fSmrg
10101e04c3fSmrgvariable_entry *
10201e04c3fSmrgir_structure_reference_visitor::get_variable_entry(ir_variable *var)
10301e04c3fSmrg{
10401e04c3fSmrg   assert(var);
10501e04c3fSmrg
1067e102996Smaya   if (!var->type->is_struct() ||
10701e04c3fSmrg       var->data.mode == ir_var_uniform || var->data.mode == ir_var_shader_storage ||
10801e04c3fSmrg       var->data.mode == ir_var_shader_in || var->data.mode == ir_var_shader_out)
10901e04c3fSmrg      return NULL;
11001e04c3fSmrg
11101e04c3fSmrg   foreach_in_list(variable_entry, entry, &this->variable_list) {
11201e04c3fSmrg      if (entry->var == var)
11301e04c3fSmrg	 return entry;
11401e04c3fSmrg   }
11501e04c3fSmrg
11601e04c3fSmrg   variable_entry *entry = new(mem_ctx) variable_entry(var);
11701e04c3fSmrg   this->variable_list.push_tail(entry);
11801e04c3fSmrg   return entry;
11901e04c3fSmrg}
12001e04c3fSmrg
12101e04c3fSmrg
12201e04c3fSmrgir_visitor_status
12301e04c3fSmrgir_structure_reference_visitor::visit(ir_variable *ir)
12401e04c3fSmrg{
12501e04c3fSmrg   variable_entry *entry = this->get_variable_entry(ir);
12601e04c3fSmrg
12701e04c3fSmrg   if (entry)
12801e04c3fSmrg      entry->declaration = true;
12901e04c3fSmrg
13001e04c3fSmrg   return visit_continue;
13101e04c3fSmrg}
13201e04c3fSmrg
13301e04c3fSmrgir_visitor_status
13401e04c3fSmrgir_structure_reference_visitor::visit(ir_dereference_variable *ir)
13501e04c3fSmrg{
13601e04c3fSmrg   ir_variable *const var = ir->variable_referenced();
13701e04c3fSmrg   variable_entry *entry = this->get_variable_entry(var);
13801e04c3fSmrg
13901e04c3fSmrg   if (entry)
14001e04c3fSmrg      entry->whole_structure_access++;
14101e04c3fSmrg
14201e04c3fSmrg   return visit_continue;
14301e04c3fSmrg}
14401e04c3fSmrg
14501e04c3fSmrgir_visitor_status
14601e04c3fSmrgir_structure_reference_visitor::visit_enter(ir_dereference_record *ir)
14701e04c3fSmrg{
14801e04c3fSmrg   (void) ir;
14901e04c3fSmrg   /* Don't descend into the ir_dereference_variable below. */
15001e04c3fSmrg   return visit_continue_with_parent;
15101e04c3fSmrg}
15201e04c3fSmrg
15301e04c3fSmrgir_visitor_status
15401e04c3fSmrgir_structure_reference_visitor::visit_enter(ir_assignment *ir)
15501e04c3fSmrg{
15601e04c3fSmrg   /* If there are no structure references yet, no need to bother with
15701e04c3fSmrg    * processing the expression tree.
15801e04c3fSmrg    */
15901e04c3fSmrg   if (this->variable_list.is_empty())
16001e04c3fSmrg      return visit_continue_with_parent;
16101e04c3fSmrg
16201e04c3fSmrg   if (ir->lhs->as_dereference_variable() &&
16301e04c3fSmrg       ir->rhs->as_dereference_variable() &&
16401e04c3fSmrg       !ir->condition) {
16501e04c3fSmrg      /* We'll split copies of a structure to copies of components, so don't
16601e04c3fSmrg       * descend to the ir_dereference_variables.
16701e04c3fSmrg       */
16801e04c3fSmrg      return visit_continue_with_parent;
16901e04c3fSmrg   }
17001e04c3fSmrg   return visit_continue;
17101e04c3fSmrg}
17201e04c3fSmrg
17301e04c3fSmrgir_visitor_status
17401e04c3fSmrgir_structure_reference_visitor::visit_enter(ir_function_signature *ir)
17501e04c3fSmrg{
17601e04c3fSmrg   /* We don't have logic for structure-splitting function arguments,
17701e04c3fSmrg    * so just look at the body instructions and not the parameter
17801e04c3fSmrg    * declarations.
17901e04c3fSmrg    */
18001e04c3fSmrg   visit_list_elements(this, &ir->body);
18101e04c3fSmrg   return visit_continue_with_parent;
18201e04c3fSmrg}
18301e04c3fSmrg
18401e04c3fSmrgclass ir_structure_splitting_visitor : public ir_rvalue_visitor {
18501e04c3fSmrgpublic:
18601e04c3fSmrg   ir_structure_splitting_visitor(exec_list *vars)
18701e04c3fSmrg   {
18801e04c3fSmrg      this->variable_list = vars;
18901e04c3fSmrg   }
19001e04c3fSmrg
19101e04c3fSmrg   virtual ~ir_structure_splitting_visitor()
19201e04c3fSmrg   {
19301e04c3fSmrg   }
19401e04c3fSmrg
19501e04c3fSmrg   virtual ir_visitor_status visit_leave(ir_assignment *);
19601e04c3fSmrg
19701e04c3fSmrg   void split_deref(ir_dereference **deref);
19801e04c3fSmrg   void handle_rvalue(ir_rvalue **rvalue);
19901e04c3fSmrg   variable_entry *get_splitting_entry(ir_variable *var);
20001e04c3fSmrg
20101e04c3fSmrg   exec_list *variable_list;
20201e04c3fSmrg};
20301e04c3fSmrg
20401e04c3fSmrgvariable_entry *
20501e04c3fSmrgir_structure_splitting_visitor::get_splitting_entry(ir_variable *var)
20601e04c3fSmrg{
20701e04c3fSmrg   assert(var);
20801e04c3fSmrg
2097e102996Smaya   if (!var->type->is_struct())
21001e04c3fSmrg      return NULL;
21101e04c3fSmrg
21201e04c3fSmrg   foreach_in_list(variable_entry, entry, this->variable_list) {
21301e04c3fSmrg      if (entry->var == var) {
21401e04c3fSmrg	 return entry;
21501e04c3fSmrg      }
21601e04c3fSmrg   }
21701e04c3fSmrg
21801e04c3fSmrg   return NULL;
21901e04c3fSmrg}
22001e04c3fSmrg
22101e04c3fSmrgvoid
22201e04c3fSmrgir_structure_splitting_visitor::split_deref(ir_dereference **deref)
22301e04c3fSmrg{
22401e04c3fSmrg   if ((*deref)->ir_type != ir_type_dereference_record)
22501e04c3fSmrg      return;
22601e04c3fSmrg
22701e04c3fSmrg   ir_dereference_record *deref_record = (ir_dereference_record *)*deref;
22801e04c3fSmrg   ir_dereference_variable *deref_var = deref_record->record->as_dereference_variable();
22901e04c3fSmrg   if (!deref_var)
23001e04c3fSmrg      return;
23101e04c3fSmrg
23201e04c3fSmrg   variable_entry *entry = get_splitting_entry(deref_var->var);
23301e04c3fSmrg   if (!entry)
23401e04c3fSmrg      return;
23501e04c3fSmrg
23601e04c3fSmrg   int i = deref_record->field_idx;
23701e04c3fSmrg   assert(i >= 0);
23801e04c3fSmrg   assert((unsigned) i < entry->var->type->length);
23901e04c3fSmrg
24001e04c3fSmrg   *deref = new(entry->mem_ctx) ir_dereference_variable(entry->components[i]);
24101e04c3fSmrg}
24201e04c3fSmrg
24301e04c3fSmrgvoid
24401e04c3fSmrgir_structure_splitting_visitor::handle_rvalue(ir_rvalue **rvalue)
24501e04c3fSmrg{
24601e04c3fSmrg   if (!*rvalue)
24701e04c3fSmrg      return;
24801e04c3fSmrg
24901e04c3fSmrg   ir_dereference *deref = (*rvalue)->as_dereference();
25001e04c3fSmrg
25101e04c3fSmrg   if (!deref)
25201e04c3fSmrg      return;
25301e04c3fSmrg
25401e04c3fSmrg   split_deref(&deref);
25501e04c3fSmrg   *rvalue = deref;
25601e04c3fSmrg}
25701e04c3fSmrg
25801e04c3fSmrgir_visitor_status
25901e04c3fSmrgir_structure_splitting_visitor::visit_leave(ir_assignment *ir)
26001e04c3fSmrg{
26101e04c3fSmrg   ir_dereference_variable *lhs_deref = ir->lhs->as_dereference_variable();
26201e04c3fSmrg   ir_dereference_variable *rhs_deref = ir->rhs->as_dereference_variable();
26301e04c3fSmrg   variable_entry *lhs_entry = lhs_deref ? get_splitting_entry(lhs_deref->var) : NULL;
26401e04c3fSmrg   variable_entry *rhs_entry = rhs_deref ? get_splitting_entry(rhs_deref->var) : NULL;
26501e04c3fSmrg   const glsl_type *type = ir->rhs->type;
26601e04c3fSmrg
26701e04c3fSmrg   if ((lhs_entry || rhs_entry) && !ir->condition) {
26801e04c3fSmrg      for (unsigned int i = 0; i < type->length; i++) {
26901e04c3fSmrg	 ir_dereference *new_lhs, *new_rhs;
27001e04c3fSmrg	 void *mem_ctx = lhs_entry ? lhs_entry->mem_ctx : rhs_entry->mem_ctx;
27101e04c3fSmrg
27201e04c3fSmrg	 if (lhs_entry) {
27301e04c3fSmrg	    new_lhs = new(mem_ctx) ir_dereference_variable(lhs_entry->components[i]);
27401e04c3fSmrg	 } else {
27501e04c3fSmrg	    new_lhs = new(mem_ctx)
27601e04c3fSmrg	       ir_dereference_record(ir->lhs->clone(mem_ctx, NULL),
27701e04c3fSmrg				     type->fields.structure[i].name);
27801e04c3fSmrg	 }
27901e04c3fSmrg
28001e04c3fSmrg	 if (rhs_entry) {
28101e04c3fSmrg	    new_rhs = new(mem_ctx) ir_dereference_variable(rhs_entry->components[i]);
28201e04c3fSmrg	 } else {
28301e04c3fSmrg	    new_rhs = new(mem_ctx)
28401e04c3fSmrg	       ir_dereference_record(ir->rhs->clone(mem_ctx, NULL),
28501e04c3fSmrg				     type->fields.structure[i].name);
28601e04c3fSmrg	 }
28701e04c3fSmrg
28801e04c3fSmrg         ir->insert_before(new(mem_ctx) ir_assignment(new_lhs, new_rhs));
28901e04c3fSmrg      }
29001e04c3fSmrg      ir->remove();
29101e04c3fSmrg   } else {
29201e04c3fSmrg      handle_rvalue(&ir->rhs);
29301e04c3fSmrg      split_deref(&ir->lhs);
29401e04c3fSmrg   }
29501e04c3fSmrg
29601e04c3fSmrg   handle_rvalue(&ir->condition);
29701e04c3fSmrg
29801e04c3fSmrg   return visit_continue;
29901e04c3fSmrg}
30001e04c3fSmrg
30101e04c3fSmrg} /* unnamed namespace */
30201e04c3fSmrg
30301e04c3fSmrgbool
30401e04c3fSmrgdo_structure_splitting(exec_list *instructions)
30501e04c3fSmrg{
30601e04c3fSmrg   ir_structure_reference_visitor refs;
30701e04c3fSmrg
30801e04c3fSmrg   visit_list_elements(&refs, instructions);
30901e04c3fSmrg
31001e04c3fSmrg   /* Trim out variables we can't split. */
31101e04c3fSmrg   foreach_in_list_safe(variable_entry, entry, &refs.variable_list) {
31201e04c3fSmrg      if (debug) {
31301e04c3fSmrg         printf("structure %s@%p: decl %d, whole_access %d\n",
31401e04c3fSmrg                entry->var->name, (void *) entry->var, entry->declaration,
31501e04c3fSmrg                entry->whole_structure_access);
31601e04c3fSmrg      }
31701e04c3fSmrg
31801e04c3fSmrg      if (!entry->declaration || entry->whole_structure_access) {
31901e04c3fSmrg         entry->remove();
32001e04c3fSmrg      }
32101e04c3fSmrg   }
32201e04c3fSmrg
32301e04c3fSmrg   if (refs.variable_list.is_empty())
32401e04c3fSmrg      return false;
32501e04c3fSmrg
32601e04c3fSmrg   void *mem_ctx = ralloc_context(NULL);
32701e04c3fSmrg
32801e04c3fSmrg   /* Replace the decls of the structures to be split with their split
32901e04c3fSmrg    * components.
33001e04c3fSmrg    */
33101e04c3fSmrg   foreach_in_list_safe(variable_entry, entry, &refs.variable_list) {
33201e04c3fSmrg      const struct glsl_type *type = entry->var->type;
33301e04c3fSmrg
33401e04c3fSmrg      entry->mem_ctx = ralloc_parent(entry->var);
33501e04c3fSmrg
33601e04c3fSmrg      entry->components = ralloc_array(mem_ctx, ir_variable *, type->length);
33701e04c3fSmrg
33801e04c3fSmrg      for (unsigned int i = 0; i < entry->var->type->length; i++) {
33901e04c3fSmrg         const char *name = ralloc_asprintf(mem_ctx, "%s_%s", entry->var->name,
34001e04c3fSmrg                                            type->fields.structure[i].name);
34101e04c3fSmrg         ir_variable *new_var =
34201e04c3fSmrg            new(entry->mem_ctx) ir_variable(type->fields.structure[i].type,
34301e04c3fSmrg                                            name,
34401e04c3fSmrg                                            (ir_variable_mode) entry->var->data.mode);
34501e04c3fSmrg
34601e04c3fSmrg         if (type->fields.structure[i].type->without_array()->is_image()) {
34701e04c3fSmrg            /* Do not lose memory/format qualifiers for images declared inside
34801e04c3fSmrg             * structures as allowed by ARB_bindless_texture.
34901e04c3fSmrg             */
35001e04c3fSmrg            new_var->data.memory_read_only =
35101e04c3fSmrg               type->fields.structure[i].memory_read_only;
35201e04c3fSmrg            new_var->data.memory_write_only =
35301e04c3fSmrg               type->fields.structure[i].memory_write_only;
35401e04c3fSmrg            new_var->data.memory_coherent =
35501e04c3fSmrg               type->fields.structure[i].memory_coherent;
35601e04c3fSmrg            new_var->data.memory_volatile =
35701e04c3fSmrg               type->fields.structure[i].memory_volatile;
35801e04c3fSmrg            new_var->data.memory_restrict =
35901e04c3fSmrg               type->fields.structure[i].memory_restrict;
36001e04c3fSmrg            new_var->data.image_format =
36101e04c3fSmrg               type->fields.structure[i].image_format;
36201e04c3fSmrg         }
36301e04c3fSmrg
36401e04c3fSmrg         entry->components[i] = new_var;
36501e04c3fSmrg         entry->var->insert_before(entry->components[i]);
36601e04c3fSmrg      }
36701e04c3fSmrg
36801e04c3fSmrg      entry->var->remove();
36901e04c3fSmrg   }
37001e04c3fSmrg
37101e04c3fSmrg   ir_structure_splitting_visitor split(&refs.variable_list);
37201e04c3fSmrg   visit_list_elements(&split, instructions);
37301e04c3fSmrg
37401e04c3fSmrg   ralloc_free(mem_ctx);
37501e04c3fSmrg
37601e04c3fSmrg   return true;
37701e04c3fSmrg}
378