101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2018 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg *
2301e04c3fSmrg */
2401e04c3fSmrg#include "main/mtypes.h"
257ec681f3Smrg#include "glsl_types.h"
2601e04c3fSmrg#include "linker_util.h"
277ec681f3Smrg#include "util/bitscan.h"
2801e04c3fSmrg#include "util/set.h"
2901e04c3fSmrg#include "ir_uniform.h" /* for gl_uniform_storage */
3001e04c3fSmrg
3101e04c3fSmrg/* Utility methods shared between the GLSL IR and the NIR */
3201e04c3fSmrg
337ec681f3Smrg/* From the OpenGL 4.6 specification, 7.3.1.1 Naming Active Resources:
347ec681f3Smrg *
357ec681f3Smrg *    "For an active shader storage block member declared as an array of an
367ec681f3Smrg *     aggregate type, an entry will be generated only for the first array
377ec681f3Smrg *     element, regardless of its type. Such block members are referred to as
387ec681f3Smrg *     top-level arrays. If the block member is an aggregate type, the
397ec681f3Smrg *     enumeration rules are then applied recursively."
407ec681f3Smrg */
417ec681f3Smrgbool
427ec681f3Smrglink_util_should_add_buffer_variable(struct gl_shader_program *prog,
437ec681f3Smrg                                     struct gl_uniform_storage *uniform,
447ec681f3Smrg                                     int top_level_array_base_offset,
457ec681f3Smrg                                     int top_level_array_size_in_bytes,
467ec681f3Smrg                                     int second_element_offset,
477ec681f3Smrg                                     int block_index)
487ec681f3Smrg{
497ec681f3Smrg   /* If the uniform is not a shader storage buffer or is not an array return
507ec681f3Smrg    * true.
517ec681f3Smrg    */
527ec681f3Smrg   if (!uniform->is_shader_storage || top_level_array_size_in_bytes == 0)
537ec681f3Smrg      return true;
547ec681f3Smrg
557ec681f3Smrg   int after_top_level_array = top_level_array_base_offset +
567ec681f3Smrg      top_level_array_size_in_bytes;
577ec681f3Smrg
587ec681f3Smrg   /* Check for a new block, or that we are not dealing with array elements of
597ec681f3Smrg    * a top member array other than the first element.
607ec681f3Smrg    */
617ec681f3Smrg   if (block_index != uniform->block_index ||
627ec681f3Smrg       uniform->offset >= after_top_level_array ||
637ec681f3Smrg       uniform->offset < second_element_offset) {
647ec681f3Smrg      return true;
657ec681f3Smrg   }
667ec681f3Smrg
677ec681f3Smrg   return false;
687ec681f3Smrg}
697ec681f3Smrg
7001e04c3fSmrgbool
7101e04c3fSmrglink_util_add_program_resource(struct gl_shader_program *prog,
7201e04c3fSmrg                               struct set *resource_set,
7301e04c3fSmrg                               GLenum type, const void *data, uint8_t stages)
7401e04c3fSmrg{
7501e04c3fSmrg   assert(data);
7601e04c3fSmrg
7701e04c3fSmrg   /* If resource already exists, do not add it again. */
7801e04c3fSmrg   if (_mesa_set_search(resource_set, data))
7901e04c3fSmrg      return true;
8001e04c3fSmrg
8101e04c3fSmrg   prog->data->ProgramResourceList =
8201e04c3fSmrg      reralloc(prog->data,
8301e04c3fSmrg               prog->data->ProgramResourceList,
8401e04c3fSmrg               gl_program_resource,
8501e04c3fSmrg               prog->data->NumProgramResourceList + 1);
8601e04c3fSmrg
8701e04c3fSmrg   if (!prog->data->ProgramResourceList) {
8801e04c3fSmrg      linker_error(prog, "Out of memory during linking.\n");
8901e04c3fSmrg      return false;
9001e04c3fSmrg   }
9101e04c3fSmrg
9201e04c3fSmrg   struct gl_program_resource *res =
9301e04c3fSmrg      &prog->data->ProgramResourceList[prog->data->NumProgramResourceList];
9401e04c3fSmrg
9501e04c3fSmrg   res->Type = type;
9601e04c3fSmrg   res->Data = data;
9701e04c3fSmrg   res->StageReferences = stages;
9801e04c3fSmrg
9901e04c3fSmrg   prog->data->NumProgramResourceList++;
10001e04c3fSmrg
10101e04c3fSmrg   _mesa_set_add(resource_set, data);
10201e04c3fSmrg
10301e04c3fSmrg   return true;
10401e04c3fSmrg}
10501e04c3fSmrg
10601e04c3fSmrg/**
10701e04c3fSmrg * Search through the list of empty blocks to find one that fits the current
10801e04c3fSmrg * uniform.
10901e04c3fSmrg */
11001e04c3fSmrgint
11101e04c3fSmrglink_util_find_empty_block(struct gl_shader_program *prog,
11201e04c3fSmrg                           struct gl_uniform_storage *uniform)
11301e04c3fSmrg{
11401e04c3fSmrg   const unsigned entries = MAX2(1, uniform->array_elements);
11501e04c3fSmrg
11601e04c3fSmrg   foreach_list_typed(struct empty_uniform_block, block, link,
11701e04c3fSmrg                      &prog->EmptyUniformLocations) {
11801e04c3fSmrg      /* Found a block with enough slots to fit the uniform */
11901e04c3fSmrg      if (block->slots == entries) {
12001e04c3fSmrg         unsigned start = block->start;
12101e04c3fSmrg         exec_node_remove(&block->link);
12201e04c3fSmrg         ralloc_free(block);
12301e04c3fSmrg
12401e04c3fSmrg         return start;
12501e04c3fSmrg      /* Found a block with more slots than needed. It can still be used. */
12601e04c3fSmrg      } else if (block->slots > entries) {
12701e04c3fSmrg         unsigned start = block->start;
12801e04c3fSmrg         block->start += entries;
12901e04c3fSmrg         block->slots -= entries;
13001e04c3fSmrg
13101e04c3fSmrg         return start;
13201e04c3fSmrg      }
13301e04c3fSmrg   }
13401e04c3fSmrg
13501e04c3fSmrg   return -1;
13601e04c3fSmrg}
13701e04c3fSmrg
13801e04c3fSmrgvoid
13901e04c3fSmrglink_util_update_empty_uniform_locations(struct gl_shader_program *prog)
14001e04c3fSmrg{
14101e04c3fSmrg   struct empty_uniform_block *current_block = NULL;
14201e04c3fSmrg
14301e04c3fSmrg   for (unsigned i = 0; i < prog->NumUniformRemapTable; i++) {
14401e04c3fSmrg      /* We found empty space in UniformRemapTable. */
14501e04c3fSmrg      if (prog->UniformRemapTable[i] == NULL) {
14601e04c3fSmrg         /* We've found the beginning of a new continous block of empty slots */
14701e04c3fSmrg         if (!current_block || current_block->start + current_block->slots != i) {
14801e04c3fSmrg            current_block = rzalloc(prog, struct empty_uniform_block);
14901e04c3fSmrg            current_block->start = i;
15001e04c3fSmrg            exec_list_push_tail(&prog->EmptyUniformLocations,
15101e04c3fSmrg                                &current_block->link);
15201e04c3fSmrg         }
15301e04c3fSmrg
15401e04c3fSmrg         /* The current block continues, so we simply increment its slots */
15501e04c3fSmrg         current_block->slots++;
15601e04c3fSmrg      }
15701e04c3fSmrg   }
15801e04c3fSmrg}
1597ec681f3Smrg
1607ec681f3Smrgvoid
1617ec681f3Smrglink_util_check_subroutine_resources(struct gl_shader_program *prog)
1627ec681f3Smrg{
1637ec681f3Smrg   unsigned mask = prog->data->linked_stages;
1647ec681f3Smrg   while (mask) {
1657ec681f3Smrg      const int i = u_bit_scan(&mask);
1667ec681f3Smrg      struct gl_program *p = prog->_LinkedShaders[i]->Program;
1677ec681f3Smrg
1687ec681f3Smrg      if (p->sh.NumSubroutineUniformRemapTable > MAX_SUBROUTINE_UNIFORM_LOCATIONS) {
1697ec681f3Smrg         linker_error(prog, "Too many %s shader subroutine uniforms\n",
1707ec681f3Smrg                      _mesa_shader_stage_to_string(i));
1717ec681f3Smrg      }
1727ec681f3Smrg   }
1737ec681f3Smrg}
1747ec681f3Smrg
1757ec681f3Smrg/**
1767ec681f3Smrg * Validate uniform resources used by a program versus the implementation limits
1777ec681f3Smrg */
1787ec681f3Smrgvoid
1797ec681f3Smrglink_util_check_uniform_resources(struct gl_context *ctx,
1807ec681f3Smrg                                  struct gl_shader_program *prog)
1817ec681f3Smrg{
1827ec681f3Smrg   unsigned total_uniform_blocks = 0;
1837ec681f3Smrg   unsigned total_shader_storage_blocks = 0;
1847ec681f3Smrg
1857ec681f3Smrg   for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
1867ec681f3Smrg      struct gl_linked_shader *sh = prog->_LinkedShaders[i];
1877ec681f3Smrg
1887ec681f3Smrg      if (sh == NULL)
1897ec681f3Smrg         continue;
1907ec681f3Smrg
1917ec681f3Smrg      if (sh->num_uniform_components >
1927ec681f3Smrg          ctx->Const.Program[i].MaxUniformComponents) {
1937ec681f3Smrg         if (ctx->Const.GLSLSkipStrictMaxUniformLimitCheck) {
1947ec681f3Smrg            linker_warning(prog, "Too many %s shader default uniform block "
1957ec681f3Smrg                           "components, but the driver will try to optimize "
1967ec681f3Smrg                           "them out; this is non-portable out-of-spec "
1977ec681f3Smrg                           "behavior\n",
1987ec681f3Smrg                           _mesa_shader_stage_to_string(i));
1997ec681f3Smrg         } else {
2007ec681f3Smrg            linker_error(prog, "Too many %s shader default uniform block "
2017ec681f3Smrg                         "components\n",
2027ec681f3Smrg                         _mesa_shader_stage_to_string(i));
2037ec681f3Smrg         }
2047ec681f3Smrg      }
2057ec681f3Smrg
2067ec681f3Smrg      if (sh->num_combined_uniform_components >
2077ec681f3Smrg          ctx->Const.Program[i].MaxCombinedUniformComponents) {
2087ec681f3Smrg         if (ctx->Const.GLSLSkipStrictMaxUniformLimitCheck) {
2097ec681f3Smrg            linker_warning(prog, "Too many %s shader uniform components, "
2107ec681f3Smrg                           "but the driver will try to optimize them out; "
2117ec681f3Smrg                           "this is non-portable out-of-spec behavior\n",
2127ec681f3Smrg                           _mesa_shader_stage_to_string(i));
2137ec681f3Smrg         } else {
2147ec681f3Smrg            linker_error(prog, "Too many %s shader uniform components\n",
2157ec681f3Smrg                         _mesa_shader_stage_to_string(i));
2167ec681f3Smrg         }
2177ec681f3Smrg      }
2187ec681f3Smrg
2197ec681f3Smrg      total_shader_storage_blocks += sh->Program->info.num_ssbos;
2207ec681f3Smrg      total_uniform_blocks += sh->Program->info.num_ubos;
2217ec681f3Smrg   }
2227ec681f3Smrg
2237ec681f3Smrg   if (total_uniform_blocks > ctx->Const.MaxCombinedUniformBlocks) {
2247ec681f3Smrg      linker_error(prog, "Too many combined uniform blocks (%d/%d)\n",
2257ec681f3Smrg                   total_uniform_blocks, ctx->Const.MaxCombinedUniformBlocks);
2267ec681f3Smrg   }
2277ec681f3Smrg
2287ec681f3Smrg   if (total_shader_storage_blocks > ctx->Const.MaxCombinedShaderStorageBlocks) {
2297ec681f3Smrg      linker_error(prog, "Too many combined shader storage blocks (%d/%d)\n",
2307ec681f3Smrg                   total_shader_storage_blocks,
2317ec681f3Smrg                   ctx->Const.MaxCombinedShaderStorageBlocks);
2327ec681f3Smrg   }
2337ec681f3Smrg
2347ec681f3Smrg   for (unsigned i = 0; i < prog->data->NumUniformBlocks; i++) {
2357ec681f3Smrg      if (prog->data->UniformBlocks[i].UniformBufferSize >
2367ec681f3Smrg          ctx->Const.MaxUniformBlockSize) {
2377ec681f3Smrg         linker_error(prog, "Uniform block %s too big (%d/%d)\n",
2387ec681f3Smrg                      prog->data->UniformBlocks[i].Name,
2397ec681f3Smrg                      prog->data->UniformBlocks[i].UniformBufferSize,
2407ec681f3Smrg                      ctx->Const.MaxUniformBlockSize);
2417ec681f3Smrg      }
2427ec681f3Smrg   }
2437ec681f3Smrg
2447ec681f3Smrg   for (unsigned i = 0; i < prog->data->NumShaderStorageBlocks; i++) {
2457ec681f3Smrg      if (prog->data->ShaderStorageBlocks[i].UniformBufferSize >
2467ec681f3Smrg          ctx->Const.MaxShaderStorageBlockSize) {
2477ec681f3Smrg         linker_error(prog, "Shader storage block %s too big (%d/%d)\n",
2487ec681f3Smrg                      prog->data->ShaderStorageBlocks[i].Name,
2497ec681f3Smrg                      prog->data->ShaderStorageBlocks[i].UniformBufferSize,
2507ec681f3Smrg                      ctx->Const.MaxShaderStorageBlockSize);
2517ec681f3Smrg      }
2527ec681f3Smrg   }
2537ec681f3Smrg}
2547ec681f3Smrg
2557ec681f3Smrgvoid
2567ec681f3Smrglink_util_calculate_subroutine_compat(struct gl_shader_program *prog)
2577ec681f3Smrg{
2587ec681f3Smrg   unsigned mask = prog->data->linked_stages;
2597ec681f3Smrg   while (mask) {
2607ec681f3Smrg      const int i = u_bit_scan(&mask);
2617ec681f3Smrg      struct gl_program *p = prog->_LinkedShaders[i]->Program;
2627ec681f3Smrg
2637ec681f3Smrg      for (unsigned j = 0; j < p->sh.NumSubroutineUniformRemapTable; j++) {
2647ec681f3Smrg         if (p->sh.SubroutineUniformRemapTable[j] == INACTIVE_UNIFORM_EXPLICIT_LOCATION)
2657ec681f3Smrg            continue;
2667ec681f3Smrg
2677ec681f3Smrg         struct gl_uniform_storage *uni = p->sh.SubroutineUniformRemapTable[j];
2687ec681f3Smrg
2697ec681f3Smrg         if (!uni)
2707ec681f3Smrg            continue;
2717ec681f3Smrg
2727ec681f3Smrg         int count = 0;
2737ec681f3Smrg         if (p->sh.NumSubroutineFunctions == 0) {
2747ec681f3Smrg            linker_error(prog, "subroutine uniform %s defined but no valid functions found\n", uni->type->name);
2757ec681f3Smrg            continue;
2767ec681f3Smrg         }
2777ec681f3Smrg         for (unsigned f = 0; f < p->sh.NumSubroutineFunctions; f++) {
2787ec681f3Smrg            struct gl_subroutine_function *fn = &p->sh.SubroutineFunctions[f];
2797ec681f3Smrg            for (int k = 0; k < fn->num_compat_types; k++) {
2807ec681f3Smrg               if (fn->types[k] == uni->type) {
2817ec681f3Smrg                  count++;
2827ec681f3Smrg                  break;
2837ec681f3Smrg               }
2847ec681f3Smrg            }
2857ec681f3Smrg         }
2867ec681f3Smrg         uni->num_compatible_subroutines = count;
2877ec681f3Smrg      }
2887ec681f3Smrg   }
2897ec681f3Smrg}
2907ec681f3Smrg
2917ec681f3Smrg/**
2927ec681f3Smrg * Recursive part of the public mark_array_elements_referenced function.
2937ec681f3Smrg *
2947ec681f3Smrg * The recursion occurs when an entire array-of- is accessed.  See the
2957ec681f3Smrg * implementation for more details.
2967ec681f3Smrg *
2977ec681f3Smrg * \param dr                List of array_deref_range elements to be
2987ec681f3Smrg *                          processed.
2997ec681f3Smrg * \param count             Number of array_deref_range elements to be
3007ec681f3Smrg *                          processed.
3017ec681f3Smrg * \param scale             Current offset scale.
3027ec681f3Smrg * \param linearized_index  Current accumulated linearized array index.
3037ec681f3Smrg */
3047ec681f3Smrgvoid
3057ec681f3Smrg_mark_array_elements_referenced(const struct array_deref_range *dr,
3067ec681f3Smrg                                unsigned count, unsigned scale,
3077ec681f3Smrg                                unsigned linearized_index,
3087ec681f3Smrg                                BITSET_WORD *bits)
3097ec681f3Smrg{
3107ec681f3Smrg   /* Walk through the list of array dereferences in least- to
3117ec681f3Smrg    * most-significant order.  Along the way, accumulate the current
3127ec681f3Smrg    * linearized offset and the scale factor for each array-of-.
3137ec681f3Smrg    */
3147ec681f3Smrg   for (unsigned i = 0; i < count; i++) {
3157ec681f3Smrg      if (dr[i].index < dr[i].size) {
3167ec681f3Smrg         linearized_index += dr[i].index * scale;
3177ec681f3Smrg         scale *= dr[i].size;
3187ec681f3Smrg      } else {
3197ec681f3Smrg         /* For each element in the current array, update the count and
3207ec681f3Smrg          * offset, then recurse to process the remaining arrays.
3217ec681f3Smrg          *
3227ec681f3Smrg          * There is some inefficency here if the last eBITSET_WORD *bitslement in the
3237ec681f3Smrg          * array_deref_range list specifies the entire array.  In that case,
3247ec681f3Smrg          * the loop will make recursive calls with count == 0.  In the call,
3257ec681f3Smrg          * all that will happen is the bit will be set.
3267ec681f3Smrg          */
3277ec681f3Smrg         for (unsigned j = 0; j < dr[i].size; j++) {
3287ec681f3Smrg            _mark_array_elements_referenced(&dr[i + 1],
3297ec681f3Smrg                                            count - (i + 1),
3307ec681f3Smrg                                            scale * dr[i].size,
3317ec681f3Smrg                                            linearized_index + (j * scale),
3327ec681f3Smrg                                            bits);
3337ec681f3Smrg         }
3347ec681f3Smrg
3357ec681f3Smrg         return;
3367ec681f3Smrg      }
3377ec681f3Smrg   }
3387ec681f3Smrg
3397ec681f3Smrg   BITSET_SET(bits, linearized_index);
3407ec681f3Smrg}
3417ec681f3Smrg
3427ec681f3Smrg/**
3437ec681f3Smrg * Mark a set of array elements as accessed.
3447ec681f3Smrg *
3457ec681f3Smrg * If every \c array_deref_range is for a single index, only a single
3467ec681f3Smrg * element will be marked.  If any \c array_deref_range is for an entire
3477ec681f3Smrg * array-of-, then multiple elements will be marked.
3487ec681f3Smrg *
3497ec681f3Smrg * Items in the \c array_deref_range list appear in least- to
3507ec681f3Smrg * most-significant order.  This is the \b opposite order the indices
3517ec681f3Smrg * appear in the GLSL shader text.  An array access like
3527ec681f3Smrg *
3537ec681f3Smrg *     x = y[1][i][3];
3547ec681f3Smrg *
3557ec681f3Smrg * would appear as
3567ec681f3Smrg *
3577ec681f3Smrg *     { { 3, n }, { m, m }, { 1, p } }
3587ec681f3Smrg *
3597ec681f3Smrg * where n, m, and p are the sizes of the arrays-of-arrays.
3607ec681f3Smrg *
3617ec681f3Smrg * The set of marked array elements can later be queried by
3627ec681f3Smrg * \c ::is_linearized_index_referenced.
3637ec681f3Smrg *
3647ec681f3Smrg * \param dr     List of array_deref_range elements to be processed.
3657ec681f3Smrg * \param count  Number of array_deref_range elements to be processed.
3667ec681f3Smrg */
3677ec681f3Smrgvoid
3687ec681f3Smrglink_util_mark_array_elements_referenced(const struct array_deref_range *dr,
3697ec681f3Smrg                                         unsigned count, unsigned array_depth,
3707ec681f3Smrg                                         BITSET_WORD *bits)
3717ec681f3Smrg{
3727ec681f3Smrg   if (count != array_depth)
3737ec681f3Smrg      return;
3747ec681f3Smrg
3757ec681f3Smrg   _mark_array_elements_referenced(dr, count, 1, 0, bits);
3767ec681f3Smrg}
377