101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2012 Vincent Lejeune
301e04c3fSmrg * Copyright © 2012 Intel Corporation
401e04c3fSmrg *
501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
601e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
701e04c3fSmrg * to deal in the Software without restriction, including without limitation
801e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
901e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
1001e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1101e04c3fSmrg *
1201e04c3fSmrg * The above copyright notice and this permission notice (including the next
1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1401e04c3fSmrg * Software.
1501e04c3fSmrg *
1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1901e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
2001e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2101e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2201e04c3fSmrg * DEALINGS IN THE SOFTWARE.
2301e04c3fSmrg */
2401e04c3fSmrg
2501e04c3fSmrg#include "ir.h"
2601e04c3fSmrg#include "util/hash_table.h"
2701e04c3fSmrg
2801e04c3fSmrg/**
2901e04c3fSmrg * \file lower_output_reads.cpp
3001e04c3fSmrg *
3101e04c3fSmrg * In GLSL, shader output variables (such as varyings) can be both read and
3201e04c3fSmrg * written.  However, on some hardware, reading an output register causes
3301e04c3fSmrg * trouble.
3401e04c3fSmrg *
3501e04c3fSmrg * This pass creates temporary shadow copies of every (used) shader output,
3601e04c3fSmrg * and replaces all accesses to use those instead.  It also adds code to the
3701e04c3fSmrg * main() function to copy the final values to the actual shader outputs.
3801e04c3fSmrg */
3901e04c3fSmrg
4001e04c3fSmrgnamespace {
4101e04c3fSmrg
4201e04c3fSmrgclass output_read_remover : public ir_hierarchical_visitor {
4301e04c3fSmrgprotected:
4401e04c3fSmrg   /**
4501e04c3fSmrg    * A hash table mapping from the original ir_variable shader outputs
4601e04c3fSmrg    * (ir_var_shader_out mode) to the new temporaries to be used instead.
4701e04c3fSmrg    */
4801e04c3fSmrg   hash_table *replacements;
4901e04c3fSmrg
5001e04c3fSmrg   unsigned stage;
5101e04c3fSmrgpublic:
5201e04c3fSmrg   output_read_remover(unsigned stage);
5301e04c3fSmrg   ~output_read_remover();
5401e04c3fSmrg   virtual ir_visitor_status visit(class ir_dereference_variable *);
5501e04c3fSmrg   virtual ir_visitor_status visit_leave(class ir_emit_vertex *);
5601e04c3fSmrg   virtual ir_visitor_status visit_leave(class ir_return *);
5701e04c3fSmrg   virtual ir_visitor_status visit_leave(class ir_function_signature *);
5801e04c3fSmrg};
5901e04c3fSmrg
6001e04c3fSmrg} /* anonymous namespace */
6101e04c3fSmrg
6201e04c3fSmrg/**
6301e04c3fSmrg * Hash function for the output variables - computes the hash of the name.
6401e04c3fSmrg * NOTE: We're using the name string to ensure that the hash doesn't depend
6501e04c3fSmrg * on any random factors, otherwise the output_read_remover could produce
6601e04c3fSmrg * the random order of the assignments.
6701e04c3fSmrg *
6801e04c3fSmrg * NOTE: If you want to reuse this function please take into account that
6901e04c3fSmrg * generally the names of the variables are non-unique.
7001e04c3fSmrg */
7101e04c3fSmrgstatic unsigned
7201e04c3fSmrghash_table_var_hash(const void *key)
7301e04c3fSmrg{
7401e04c3fSmrg   const ir_variable * var = static_cast<const ir_variable *>(key);
757ec681f3Smrg   return _mesa_hash_string(var->name);
7601e04c3fSmrg}
7701e04c3fSmrg
7801e04c3fSmrgoutput_read_remover::output_read_remover(unsigned stage)
7901e04c3fSmrg{
8001e04c3fSmrg   this->stage = stage;
8101e04c3fSmrg   replacements = _mesa_hash_table_create(NULL, hash_table_var_hash,
8201e04c3fSmrg                                          _mesa_key_pointer_equal);
8301e04c3fSmrg}
8401e04c3fSmrg
8501e04c3fSmrgoutput_read_remover::~output_read_remover()
8601e04c3fSmrg{
8701e04c3fSmrg   _mesa_hash_table_destroy(replacements, NULL);
8801e04c3fSmrg}
8901e04c3fSmrg
9001e04c3fSmrgir_visitor_status
9101e04c3fSmrgoutput_read_remover::visit(ir_dereference_variable *ir)
9201e04c3fSmrg{
9301e04c3fSmrg   if (ir->var->data.mode != ir_var_shader_out || ir->var->data.fb_fetch_output)
9401e04c3fSmrg      return visit_continue;
9501e04c3fSmrg
9601e04c3fSmrg   hash_entry *entry = _mesa_hash_table_search(replacements, ir->var);
9701e04c3fSmrg   ir_variable *temp = entry ? (ir_variable *) entry->data : NULL;
9801e04c3fSmrg
9901e04c3fSmrg   /* If we don't have an existing temporary, create one. */
10001e04c3fSmrg   if (temp == NULL) {
10101e04c3fSmrg      void *var_ctx = ralloc_parent(ir->var);
10201e04c3fSmrg      temp = new(var_ctx) ir_variable(ir->var->type, ir->var->name,
10301e04c3fSmrg                                      ir_var_temporary);
10401e04c3fSmrg      /* copy flags which affect arithematical precision */
10501e04c3fSmrg      temp->data.invariant = ir->var->data.invariant;
10601e04c3fSmrg      temp->data.precise = ir->var->data.precise;
10701e04c3fSmrg      temp->data.precision = ir->var->data.precision;
10801e04c3fSmrg      _mesa_hash_table_insert(replacements, ir->var, temp);
10901e04c3fSmrg      ir->var->insert_after(temp);
11001e04c3fSmrg   }
11101e04c3fSmrg
11201e04c3fSmrg   /* Update the dereference to use the temporary */
11301e04c3fSmrg   ir->var = temp;
11401e04c3fSmrg
11501e04c3fSmrg   return visit_continue;
11601e04c3fSmrg}
11701e04c3fSmrg
11801e04c3fSmrg/**
11901e04c3fSmrg * Create an assignment to copy a temporary value back to the actual output.
12001e04c3fSmrg */
12101e04c3fSmrgstatic ir_assignment *
12201e04c3fSmrgcopy(void *ctx, ir_variable *output, ir_variable *temp)
12301e04c3fSmrg{
12401e04c3fSmrg   ir_dereference_variable *lhs = new(ctx) ir_dereference_variable(output);
12501e04c3fSmrg   ir_dereference_variable *rhs = new(ctx) ir_dereference_variable(temp);
12601e04c3fSmrg   return new(ctx) ir_assignment(lhs, rhs);
12701e04c3fSmrg}
12801e04c3fSmrg
12901e04c3fSmrg/** Insert a copy-back assignment before a "return" statement or a call to
13001e04c3fSmrg * EmitVertex().
13101e04c3fSmrg */
13201e04c3fSmrgstatic void
13301e04c3fSmrgemit_return_copy(const void *key, void *data, void *closure)
13401e04c3fSmrg{
13501e04c3fSmrg   ir_return *ir = (ir_return *) closure;
13601e04c3fSmrg   ir->insert_before(copy(ir, (ir_variable *) key, (ir_variable *) data));
13701e04c3fSmrg}
13801e04c3fSmrg
13901e04c3fSmrg/** Insert a copy-back assignment at the end of the main() function */
14001e04c3fSmrgstatic void
14101e04c3fSmrgemit_main_copy(const void *key, void *data, void *closure)
14201e04c3fSmrg{
14301e04c3fSmrg   ir_function_signature *sig = (ir_function_signature *) closure;
14401e04c3fSmrg   sig->body.push_tail(copy(sig, (ir_variable *) key, (ir_variable *) data));
14501e04c3fSmrg}
14601e04c3fSmrg
14701e04c3fSmrgir_visitor_status
14801e04c3fSmrgoutput_read_remover::visit_leave(ir_return *ir)
14901e04c3fSmrg{
15001e04c3fSmrg   hash_table_call_foreach(replacements, emit_return_copy, ir);
15101e04c3fSmrg   return visit_continue;
15201e04c3fSmrg}
15301e04c3fSmrg
15401e04c3fSmrgir_visitor_status
15501e04c3fSmrgoutput_read_remover::visit_leave(ir_emit_vertex *ir)
15601e04c3fSmrg{
15701e04c3fSmrg   hash_table_call_foreach(replacements, emit_return_copy, ir);
15801e04c3fSmrg   return visit_continue;
15901e04c3fSmrg}
16001e04c3fSmrg
16101e04c3fSmrgir_visitor_status
16201e04c3fSmrgoutput_read_remover::visit_leave(ir_function_signature *sig)
16301e04c3fSmrg{
16401e04c3fSmrg   if (strcmp(sig->function_name(), "main") != 0)
16501e04c3fSmrg      return visit_continue;
16601e04c3fSmrg
16701e04c3fSmrg   hash_table_call_foreach(replacements, emit_main_copy, sig);
16801e04c3fSmrg   return visit_continue;
16901e04c3fSmrg}
17001e04c3fSmrg
17101e04c3fSmrgvoid
17201e04c3fSmrglower_output_reads(unsigned stage, exec_list *instructions)
17301e04c3fSmrg{
17401e04c3fSmrg   /* Due to the possible interactions between multiple tessellation control
17501e04c3fSmrg    * shader invocations, we leave output variables as-is.
17601e04c3fSmrg    */
17701e04c3fSmrg   if (stage == MESA_SHADER_TESS_CTRL)
17801e04c3fSmrg      return;
17901e04c3fSmrg
18001e04c3fSmrg   output_read_remover v(stage);
18101e04c3fSmrg   visit_list_elements(&v, instructions);
18201e04c3fSmrg}
183