101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2017 Ilia Mirkin
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2101e04c3fSmrg * DEALINGS IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg/**
2501e04c3fSmrg * \file lower_cs_derived.cpp
2601e04c3fSmrg *
2701e04c3fSmrg * For hardware that does not support the gl_GlobalInvocationID and
2801e04c3fSmrg * gl_LocalInvocationIndex system values, replace them with fresh
2901e04c3fSmrg * globals. Note that we can't rely on gl_WorkGroupSize or
3001e04c3fSmrg * gl_LocalGroupSizeARB being available, since they may only have been defined
3101e04c3fSmrg * in a non-main shader.
3201e04c3fSmrg *
3301e04c3fSmrg * [ This can happen if only a secondary shader has the layout(local_size_*)
3401e04c3fSmrg *   declaration. ]
3501e04c3fSmrg *
3601e04c3fSmrg * This is meant to be run post-linking.
3701e04c3fSmrg */
3801e04c3fSmrg
3901e04c3fSmrg#include "glsl_symbol_table.h"
4001e04c3fSmrg#include "ir_hierarchical_visitor.h"
4101e04c3fSmrg#include "ir.h"
4201e04c3fSmrg#include "ir_builder.h"
4301e04c3fSmrg#include "linker.h"
4401e04c3fSmrg#include "program/prog_statevars.h"
4501e04c3fSmrg#include "builtin_functions.h"
4601e04c3fSmrg#include "main/mtypes.h"
4701e04c3fSmrg
4801e04c3fSmrgusing namespace ir_builder;
4901e04c3fSmrg
5001e04c3fSmrgnamespace {
5101e04c3fSmrg
5201e04c3fSmrgclass lower_cs_derived_visitor : public ir_hierarchical_visitor {
5301e04c3fSmrgpublic:
5401e04c3fSmrg   explicit lower_cs_derived_visitor(gl_linked_shader *shader)
5501e04c3fSmrg      : progress(false),
5601e04c3fSmrg        shader(shader),
577ec681f3Smrg        local_size_variable(shader->Program->info.workgroup_size_variable),
5801e04c3fSmrg        gl_WorkGroupSize(NULL),
5901e04c3fSmrg        gl_WorkGroupID(NULL),
6001e04c3fSmrg        gl_LocalInvocationID(NULL),
6101e04c3fSmrg        gl_GlobalInvocationID(NULL),
6201e04c3fSmrg        gl_LocalInvocationIndex(NULL)
6301e04c3fSmrg   {
6401e04c3fSmrg      main_sig = _mesa_get_main_function_signature(shader->symbols);
6501e04c3fSmrg      assert(main_sig);
6601e04c3fSmrg   }
6701e04c3fSmrg
6801e04c3fSmrg   virtual ir_visitor_status visit(ir_dereference_variable *);
6901e04c3fSmrg
7001e04c3fSmrg   ir_variable *add_system_value(
7101e04c3fSmrg         int slot, const glsl_type *type, const char *name);
7201e04c3fSmrg   void find_sysvals();
7301e04c3fSmrg   void make_gl_GlobalInvocationID();
7401e04c3fSmrg   void make_gl_LocalInvocationIndex();
7501e04c3fSmrg
7601e04c3fSmrg   bool progress;
7701e04c3fSmrg
7801e04c3fSmrgprivate:
7901e04c3fSmrg   gl_linked_shader *shader;
8001e04c3fSmrg   bool local_size_variable;
8101e04c3fSmrg   ir_function_signature *main_sig;
8201e04c3fSmrg
8301e04c3fSmrg   ir_rvalue *gl_WorkGroupSize;
8401e04c3fSmrg   ir_variable *gl_WorkGroupID;
8501e04c3fSmrg   ir_variable *gl_LocalInvocationID;
8601e04c3fSmrg
8701e04c3fSmrg   ir_variable *gl_GlobalInvocationID;
8801e04c3fSmrg   ir_variable *gl_LocalInvocationIndex;
8901e04c3fSmrg};
9001e04c3fSmrg
9101e04c3fSmrg} /* anonymous namespace */
9201e04c3fSmrg
9301e04c3fSmrgir_variable *
9401e04c3fSmrglower_cs_derived_visitor::add_system_value(
9501e04c3fSmrg      int slot, const glsl_type *type, const char *name)
9601e04c3fSmrg{
9701e04c3fSmrg   ir_variable *var = new(shader) ir_variable(type, name, ir_var_system_value);
9801e04c3fSmrg   var->data.how_declared = ir_var_declared_implicitly;
9901e04c3fSmrg   var->data.read_only = true;
10001e04c3fSmrg   var->data.location = slot;
10101e04c3fSmrg   var->data.explicit_location = true;
10201e04c3fSmrg   var->data.explicit_index = 0;
10301e04c3fSmrg   shader->ir->push_head(var);
10401e04c3fSmrg
10501e04c3fSmrg   return var;
10601e04c3fSmrg}
10701e04c3fSmrg
10801e04c3fSmrgvoid
10901e04c3fSmrglower_cs_derived_visitor::find_sysvals()
11001e04c3fSmrg{
11101e04c3fSmrg   if (gl_WorkGroupSize != NULL)
11201e04c3fSmrg      return;
11301e04c3fSmrg
11401e04c3fSmrg   ir_variable *WorkGroupSize;
11501e04c3fSmrg   if (local_size_variable)
11601e04c3fSmrg      WorkGroupSize = shader->symbols->get_variable("gl_LocalGroupSizeARB");
11701e04c3fSmrg   else
11801e04c3fSmrg      WorkGroupSize = shader->symbols->get_variable("gl_WorkGroupSize");
11901e04c3fSmrg   if (WorkGroupSize)
12001e04c3fSmrg      gl_WorkGroupSize = new(shader) ir_dereference_variable(WorkGroupSize);
12101e04c3fSmrg   gl_WorkGroupID = shader->symbols->get_variable("gl_WorkGroupID");
12201e04c3fSmrg   gl_LocalInvocationID = shader->symbols->get_variable("gl_LocalInvocationID");
12301e04c3fSmrg
12401e04c3fSmrg   /*
12501e04c3fSmrg    * These may be missing due to either dead code elimination, or, in the
12601e04c3fSmrg    * case of the group size, due to the layout being declared in a non-main
12701e04c3fSmrg    * shader. Re-create them.
12801e04c3fSmrg    */
12901e04c3fSmrg
13001e04c3fSmrg   if (!gl_WorkGroupID)
13101e04c3fSmrg      gl_WorkGroupID = add_system_value(
1327ec681f3Smrg            SYSTEM_VALUE_WORKGROUP_ID, glsl_type::uvec3_type, "gl_WorkGroupID");
13301e04c3fSmrg   if (!gl_LocalInvocationID)
13401e04c3fSmrg      gl_LocalInvocationID = add_system_value(
13501e04c3fSmrg            SYSTEM_VALUE_LOCAL_INVOCATION_ID, glsl_type::uvec3_type,
13601e04c3fSmrg            "gl_LocalInvocationID");
13701e04c3fSmrg   if (!WorkGroupSize) {
13801e04c3fSmrg      if (local_size_variable) {
13901e04c3fSmrg         gl_WorkGroupSize = new(shader) ir_dereference_variable(
14001e04c3fSmrg               add_system_value(
1417ec681f3Smrg                     SYSTEM_VALUE_WORKGROUP_SIZE, glsl_type::uvec3_type,
14201e04c3fSmrg                     "gl_LocalGroupSizeARB"));
14301e04c3fSmrg      } else {
14401e04c3fSmrg         ir_constant_data data;
14501e04c3fSmrg         memset(&data, 0, sizeof(data));
14601e04c3fSmrg         for (int i = 0; i < 3; i++)
1477ec681f3Smrg            data.u[i] = shader->Program->info.workgroup_size[i];
14801e04c3fSmrg         gl_WorkGroupSize = new(shader) ir_constant(glsl_type::uvec3_type, &data);
14901e04c3fSmrg      }
15001e04c3fSmrg   }
15101e04c3fSmrg}
15201e04c3fSmrg
15301e04c3fSmrgvoid
15401e04c3fSmrglower_cs_derived_visitor::make_gl_GlobalInvocationID()
15501e04c3fSmrg{
15601e04c3fSmrg   if (gl_GlobalInvocationID != NULL)
15701e04c3fSmrg      return;
15801e04c3fSmrg
15901e04c3fSmrg   find_sysvals();
16001e04c3fSmrg
16101e04c3fSmrg   /* gl_GlobalInvocationID =
16201e04c3fSmrg    *    gl_WorkGroupID * gl_WorkGroupSize + gl_LocalInvocationID
16301e04c3fSmrg    */
16401e04c3fSmrg   gl_GlobalInvocationID = new(shader) ir_variable(
16501e04c3fSmrg         glsl_type::uvec3_type, "__GlobalInvocationID", ir_var_temporary);
16601e04c3fSmrg   shader->ir->push_head(gl_GlobalInvocationID);
16701e04c3fSmrg
16801e04c3fSmrg   ir_instruction *inst =
16901e04c3fSmrg      assign(gl_GlobalInvocationID,
17001e04c3fSmrg             add(mul(gl_WorkGroupID, gl_WorkGroupSize->clone(shader, NULL)),
17101e04c3fSmrg                 gl_LocalInvocationID));
17201e04c3fSmrg   main_sig->body.push_head(inst);
17301e04c3fSmrg}
17401e04c3fSmrg
17501e04c3fSmrgvoid
17601e04c3fSmrglower_cs_derived_visitor::make_gl_LocalInvocationIndex()
17701e04c3fSmrg{
17801e04c3fSmrg   if (gl_LocalInvocationIndex != NULL)
17901e04c3fSmrg      return;
18001e04c3fSmrg
18101e04c3fSmrg   find_sysvals();
18201e04c3fSmrg
18301e04c3fSmrg   /* gl_LocalInvocationIndex =
18401e04c3fSmrg    *    gl_LocalInvocationID.z * gl_WorkGroupSize.x * gl_WorkGroupSize.y +
18501e04c3fSmrg    *    gl_LocalInvocationID.y * gl_WorkGroupSize.x +
18601e04c3fSmrg    *    gl_LocalInvocationID.x;
18701e04c3fSmrg    */
18801e04c3fSmrg   gl_LocalInvocationIndex = new(shader)
18901e04c3fSmrg      ir_variable(glsl_type::uint_type, "__LocalInvocationIndex", ir_var_temporary);
19001e04c3fSmrg   shader->ir->push_head(gl_LocalInvocationIndex);
19101e04c3fSmrg
19201e04c3fSmrg   ir_expression *index_z =
19301e04c3fSmrg      mul(mul(swizzle_z(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL))),
19401e04c3fSmrg          swizzle_y(gl_WorkGroupSize->clone(shader, NULL)));
19501e04c3fSmrg   ir_expression *index_y =
19601e04c3fSmrg      mul(swizzle_y(gl_LocalInvocationID), swizzle_x(gl_WorkGroupSize->clone(shader, NULL)));
19701e04c3fSmrg   ir_expression *index_y_plus_z = add(index_y, index_z);
19801e04c3fSmrg   operand index_x(swizzle_x(gl_LocalInvocationID));
19901e04c3fSmrg   ir_expression *index_x_plus_y_plus_z = add(index_y_plus_z, index_x);
20001e04c3fSmrg   ir_instruction *inst =
20101e04c3fSmrg      assign(gl_LocalInvocationIndex, index_x_plus_y_plus_z);
20201e04c3fSmrg   main_sig->body.push_head(inst);
20301e04c3fSmrg}
20401e04c3fSmrg
20501e04c3fSmrgir_visitor_status
20601e04c3fSmrglower_cs_derived_visitor::visit(ir_dereference_variable *ir)
20701e04c3fSmrg{
20801e04c3fSmrg   if (ir->var->data.mode == ir_var_system_value &&
20901e04c3fSmrg       ir->var->data.location == SYSTEM_VALUE_GLOBAL_INVOCATION_ID) {
21001e04c3fSmrg      make_gl_GlobalInvocationID();
21101e04c3fSmrg      ir->var = gl_GlobalInvocationID;
21201e04c3fSmrg      progress = true;
21301e04c3fSmrg   }
21401e04c3fSmrg
21501e04c3fSmrg   if (ir->var->data.mode == ir_var_system_value &&
21601e04c3fSmrg       ir->var->data.location == SYSTEM_VALUE_LOCAL_INVOCATION_INDEX) {
21701e04c3fSmrg      make_gl_LocalInvocationIndex();
21801e04c3fSmrg      ir->var = gl_LocalInvocationIndex;
21901e04c3fSmrg      progress = true;
22001e04c3fSmrg   }
22101e04c3fSmrg
22201e04c3fSmrg   return visit_continue;
22301e04c3fSmrg}
22401e04c3fSmrg
22501e04c3fSmrgbool
22601e04c3fSmrglower_cs_derived(gl_linked_shader *shader)
22701e04c3fSmrg{
22801e04c3fSmrg   if (shader->Stage != MESA_SHADER_COMPUTE)
22901e04c3fSmrg      return false;
23001e04c3fSmrg
23101e04c3fSmrg   lower_cs_derived_visitor v(shader);
23201e04c3fSmrg   v.run(shader->ir);
23301e04c3fSmrg
23401e04c3fSmrg   return v.progress;
23501e04c3fSmrg}
236