101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2015 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg#include "nir.h"
257e102996Smaya#include "nir_builder.h"
2601e04c3fSmrg#include "util/set.h"
2701e04c3fSmrg#include "util/hash_table.h"
2801e04c3fSmrg
2901e04c3fSmrg/* This file contains various little helpers for doing simple linking in
3001e04c3fSmrg * NIR.  Eventually, we'll probably want a full-blown varying packing
3101e04c3fSmrg * implementation in here.  Right now, it just deletes unused things.
3201e04c3fSmrg */
3301e04c3fSmrg
3401e04c3fSmrg/**
357ec681f3Smrg * Returns the bits in the inputs_read, or outputs_written
367ec681f3Smrg * bitfield corresponding to this variable.
3701e04c3fSmrg */
3801e04c3fSmrgstatic uint64_t
3901e04c3fSmrgget_variable_io_mask(nir_variable *var, gl_shader_stage stage)
4001e04c3fSmrg{
4101e04c3fSmrg   if (var->data.location < 0)
4201e04c3fSmrg      return 0;
4301e04c3fSmrg
4401e04c3fSmrg   unsigned location = var->data.patch ?
4501e04c3fSmrg      var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
4601e04c3fSmrg
4701e04c3fSmrg   assert(var->data.mode == nir_var_shader_in ||
487ec681f3Smrg          var->data.mode == nir_var_shader_out);
4901e04c3fSmrg   assert(var->data.location >= 0);
5001e04c3fSmrg
5101e04c3fSmrg   const struct glsl_type *type = var->type;
527ec681f3Smrg   if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
5301e04c3fSmrg      assert(glsl_type_is_array(type));
5401e04c3fSmrg      type = glsl_get_array_element(type);
5501e04c3fSmrg   }
5601e04c3fSmrg
5701e04c3fSmrg   unsigned slots = glsl_count_attribute_slots(type, false);
5801e04c3fSmrg   return ((1ull << slots) - 1) << location;
5901e04c3fSmrg}
6001e04c3fSmrg
617ec681f3Smrgstatic bool
627ec681f3Smrgis_non_generic_patch_var(nir_variable *var)
637ec681f3Smrg{
647ec681f3Smrg   return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
657ec681f3Smrg          var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
667ec681f3Smrg          var->data.location == VARYING_SLOT_BOUNDING_BOX0 ||
677ec681f3Smrg          var->data.location == VARYING_SLOT_BOUNDING_BOX1;
687ec681f3Smrg}
697ec681f3Smrg
707e102996Smayastatic uint8_t
717e102996Smayaget_num_components(nir_variable *var)
727e102996Smaya{
737e102996Smaya   if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
747e102996Smaya      return 4;
757e102996Smaya
767e102996Smaya   return glsl_get_vector_elements(glsl_without_array(var->type));
777e102996Smaya}
787e102996Smaya
7901e04c3fSmrgstatic void
8001e04c3fSmrgtcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
8101e04c3fSmrg{
8201e04c3fSmrg   nir_foreach_function(function, shader) {
8301e04c3fSmrg      if (!function->impl)
8401e04c3fSmrg         continue;
8501e04c3fSmrg
8601e04c3fSmrg      nir_foreach_block(block, function->impl) {
8701e04c3fSmrg         nir_foreach_instr(instr, block) {
8801e04c3fSmrg            if (instr->type != nir_instr_type_intrinsic)
8901e04c3fSmrg               continue;
9001e04c3fSmrg
9101e04c3fSmrg            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
9201e04c3fSmrg            if (intrin->intrinsic != nir_intrinsic_load_deref)
9301e04c3fSmrg               continue;
9401e04c3fSmrg
957e102996Smaya            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
967ec681f3Smrg            if (!nir_deref_mode_is(deref, nir_var_shader_out))
9701e04c3fSmrg               continue;
9801e04c3fSmrg
997e102996Smaya            nir_variable *var = nir_deref_instr_get_variable(deref);
1007e102996Smaya            for (unsigned i = 0; i < get_num_components(var); i++) {
1017e102996Smaya               if (var->data.patch) {
1027ec681f3Smrg                  if (is_non_generic_patch_var(var))
1037ec681f3Smrg                     continue;
1047ec681f3Smrg
1057e102996Smaya                  patches_read[var->data.location_frac + i] |=
1067e102996Smaya                     get_variable_io_mask(var, shader->info.stage);
1077e102996Smaya               } else {
1087e102996Smaya                  read[var->data.location_frac + i] |=
1097e102996Smaya                     get_variable_io_mask(var, shader->info.stage);
1107e102996Smaya               }
11101e04c3fSmrg            }
11201e04c3fSmrg         }
11301e04c3fSmrg      }
11401e04c3fSmrg   }
11501e04c3fSmrg}
11601e04c3fSmrg
11701e04c3fSmrg/**
11801e04c3fSmrg * Helper for removing unused shader I/O variables, by demoting them to global
11901e04c3fSmrg * variables (which may then by dead code eliminated).
12001e04c3fSmrg *
12101e04c3fSmrg * Example usage is:
12201e04c3fSmrg *
1237ec681f3Smrg * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
12401e04c3fSmrg *                                      read, patches_read) ||
12501e04c3fSmrg *                                      progress;
12601e04c3fSmrg *
12701e04c3fSmrg * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
12801e04c3fSmrg * representing each .location_frac used.  Note that for vector variables,
12901e04c3fSmrg * only the first channel (.location_frac) is examined for deciding if the
13001e04c3fSmrg * variable is used!
13101e04c3fSmrg */
13201e04c3fSmrgbool
1337ec681f3Smrgnir_remove_unused_io_vars(nir_shader *shader,
1347ec681f3Smrg                          nir_variable_mode mode,
13501e04c3fSmrg                          uint64_t *used_by_other_stage,
13601e04c3fSmrg                          uint64_t *used_by_other_stage_patches)
13701e04c3fSmrg{
13801e04c3fSmrg   bool progress = false;
13901e04c3fSmrg   uint64_t *used;
14001e04c3fSmrg
1417ec681f3Smrg   assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
1427ec681f3Smrg
1437ec681f3Smrg   nir_foreach_variable_with_modes_safe(var, shader, mode) {
14401e04c3fSmrg      if (var->data.patch)
14501e04c3fSmrg         used = used_by_other_stage_patches;
14601e04c3fSmrg      else
14701e04c3fSmrg         used = used_by_other_stage;
14801e04c3fSmrg
14901e04c3fSmrg      if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
15001e04c3fSmrg         continue;
15101e04c3fSmrg
15201e04c3fSmrg      if (var->data.always_active_io)
15301e04c3fSmrg         continue;
15401e04c3fSmrg
1557e102996Smaya      if (var->data.explicit_xfb_buffer)
1567e102996Smaya         continue;
1577e102996Smaya
15801e04c3fSmrg      uint64_t other_stage = used[var->data.location_frac];
15901e04c3fSmrg
16001e04c3fSmrg      if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
16101e04c3fSmrg         /* This one is invalid, make it a global variable instead */
16201e04c3fSmrg         var->data.location = 0;
1637e102996Smaya         var->data.mode = nir_var_shader_temp;
16401e04c3fSmrg
16501e04c3fSmrg         progress = true;
16601e04c3fSmrg      }
16701e04c3fSmrg   }
16801e04c3fSmrg
16901e04c3fSmrg   if (progress)
17001e04c3fSmrg      nir_fixup_deref_modes(shader);
17101e04c3fSmrg
17201e04c3fSmrg   return progress;
17301e04c3fSmrg}
17401e04c3fSmrg
17501e04c3fSmrgbool
17601e04c3fSmrgnir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
17701e04c3fSmrg{
17801e04c3fSmrg   assert(producer->info.stage != MESA_SHADER_FRAGMENT);
17901e04c3fSmrg   assert(consumer->info.stage != MESA_SHADER_VERTEX);
18001e04c3fSmrg
18101e04c3fSmrg   uint64_t read[4] = { 0 }, written[4] = { 0 };
18201e04c3fSmrg   uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
18301e04c3fSmrg
1847ec681f3Smrg   nir_foreach_shader_out_variable(var, producer) {
1857e102996Smaya      for (unsigned i = 0; i < get_num_components(var); i++) {
1867e102996Smaya         if (var->data.patch) {
1877ec681f3Smrg            if (is_non_generic_patch_var(var))
1887ec681f3Smrg               continue;
1897ec681f3Smrg
1907e102996Smaya            patches_written[var->data.location_frac + i] |=
1917e102996Smaya               get_variable_io_mask(var, producer->info.stage);
1927e102996Smaya         } else {
1937e102996Smaya            written[var->data.location_frac + i] |=
1947e102996Smaya               get_variable_io_mask(var, producer->info.stage);
1957e102996Smaya         }
19601e04c3fSmrg      }
19701e04c3fSmrg   }
19801e04c3fSmrg
1997ec681f3Smrg   nir_foreach_shader_in_variable(var, consumer) {
2007e102996Smaya      for (unsigned i = 0; i < get_num_components(var); i++) {
2017e102996Smaya         if (var->data.patch) {
2027ec681f3Smrg            if (is_non_generic_patch_var(var))
2037ec681f3Smrg               continue;
2047ec681f3Smrg
2057e102996Smaya            patches_read[var->data.location_frac + i] |=
2067e102996Smaya               get_variable_io_mask(var, consumer->info.stage);
2077e102996Smaya         } else {
2087e102996Smaya            read[var->data.location_frac + i] |=
2097e102996Smaya               get_variable_io_mask(var, consumer->info.stage);
2107e102996Smaya         }
21101e04c3fSmrg      }
21201e04c3fSmrg   }
21301e04c3fSmrg
21401e04c3fSmrg   /* Each TCS invocation can read data written by other TCS invocations,
21501e04c3fSmrg    * so even if the outputs are not used by the TES we must also make
21601e04c3fSmrg    * sure they are not read by the TCS before demoting them to globals.
21701e04c3fSmrg    */
21801e04c3fSmrg   if (producer->info.stage == MESA_SHADER_TESS_CTRL)
21901e04c3fSmrg      tcs_add_output_reads(producer, read, patches_read);
22001e04c3fSmrg
22101e04c3fSmrg   bool progress = false;
2227ec681f3Smrg   progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
22301e04c3fSmrg                                        patches_read);
22401e04c3fSmrg
2257ec681f3Smrg   progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
22601e04c3fSmrg                                        patches_written) || progress;
22701e04c3fSmrg
22801e04c3fSmrg   return progress;
22901e04c3fSmrg}
23001e04c3fSmrg
23101e04c3fSmrgstatic uint8_t
23201e04c3fSmrgget_interp_type(nir_variable *var, const struct glsl_type *type,
23301e04c3fSmrg                bool default_to_smooth_interp)
23401e04c3fSmrg{
23501e04c3fSmrg   if (glsl_type_is_integer(type))
23601e04c3fSmrg      return INTERP_MODE_FLAT;
23701e04c3fSmrg   else if (var->data.interpolation != INTERP_MODE_NONE)
23801e04c3fSmrg      return var->data.interpolation;
23901e04c3fSmrg   else if (default_to_smooth_interp)
24001e04c3fSmrg      return INTERP_MODE_SMOOTH;
24101e04c3fSmrg   else
24201e04c3fSmrg      return INTERP_MODE_NONE;
24301e04c3fSmrg}
24401e04c3fSmrg
24501e04c3fSmrg#define INTERPOLATE_LOC_SAMPLE 0
24601e04c3fSmrg#define INTERPOLATE_LOC_CENTROID 1
24701e04c3fSmrg#define INTERPOLATE_LOC_CENTER 2
24801e04c3fSmrg
24901e04c3fSmrgstatic uint8_t
25001e04c3fSmrgget_interp_loc(nir_variable *var)
25101e04c3fSmrg{
25201e04c3fSmrg   if (var->data.sample)
25301e04c3fSmrg      return INTERPOLATE_LOC_SAMPLE;
25401e04c3fSmrg   else if (var->data.centroid)
25501e04c3fSmrg      return INTERPOLATE_LOC_CENTROID;
25601e04c3fSmrg   else
25701e04c3fSmrg      return INTERPOLATE_LOC_CENTER;
25801e04c3fSmrg}
25901e04c3fSmrg
2607e102996Smayastatic bool
2617e102996Smayais_packing_supported_for_type(const struct glsl_type *type)
2627e102996Smaya{
2637e102996Smaya   /* We ignore complex types such as arrays, matrices, structs and bitsizes
2647e102996Smaya    * other then 32bit. All other vector types should have been split into
2657e102996Smaya    * scalar variables by the lower_io_to_scalar pass. The only exception
2667e102996Smaya    * should be OpenGL xfb varyings.
2677e102996Smaya    * TODO: add support for more complex types?
2687e102996Smaya    */
2697e102996Smaya   return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
2707e102996Smaya}
2717e102996Smaya
2727e102996Smayastruct assigned_comps
2737e102996Smaya{
2747e102996Smaya   uint8_t comps;
2757e102996Smaya   uint8_t interp_type;
2767e102996Smaya   uint8_t interp_loc;
2777e102996Smaya   bool is_32bit;
2787ec681f3Smrg   bool is_mediump;
2797e102996Smaya};
2807e102996Smaya
2817e102996Smaya/* Packing arrays and dual slot varyings is difficult so to avoid complex
2827e102996Smaya * algorithms this function just assigns them their existing location for now.
2837e102996Smaya * TODO: allow better packing of complex types.
2847e102996Smaya */
28501e04c3fSmrgstatic void
2867ec681f3Smrgget_unmoveable_components_masks(nir_shader *shader,
2877ec681f3Smrg                                nir_variable_mode mode,
2887e102996Smaya                                struct assigned_comps *comps,
2897e102996Smaya                                gl_shader_stage stage,
2907e102996Smaya                                bool default_to_smooth_interp)
29101e04c3fSmrg{
2927ec681f3Smrg   nir_foreach_variable_with_modes_safe(var, shader, mode) {
29301e04c3fSmrg      assert(var->data.location >= 0);
29401e04c3fSmrg
2957e102996Smaya      /* Only remap things that aren't built-ins. */
29601e04c3fSmrg      if (var->data.location >= VARYING_SLOT_VAR0 &&
2977e102996Smaya          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
29801e04c3fSmrg
29901e04c3fSmrg         const struct glsl_type *type = var->type;
3007ec681f3Smrg         if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
30101e04c3fSmrg            assert(glsl_type_is_array(type));
30201e04c3fSmrg            type = glsl_get_array_element(type);
30301e04c3fSmrg         }
30401e04c3fSmrg
3057e102996Smaya         /* If we can pack this varying then don't mark the components as
3067e102996Smaya          * used.
3077e102996Smaya          */
3087e102996Smaya         if (is_packing_supported_for_type(type))
3097e102996Smaya            continue;
3107e102996Smaya
31101e04c3fSmrg         unsigned location = var->data.location - VARYING_SLOT_VAR0;
3127e102996Smaya
31301e04c3fSmrg         unsigned elements =
3147e102996Smaya            glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
3157e102996Smaya            glsl_get_vector_elements(glsl_without_array(type)) : 4;
31601e04c3fSmrg
31701e04c3fSmrg         bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
31801e04c3fSmrg         unsigned slots = glsl_count_attribute_slots(type, false);
3197e102996Smaya         unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
32001e04c3fSmrg         unsigned comps_slot2 = 0;
32101e04c3fSmrg         for (unsigned i = 0; i < slots; i++) {
32201e04c3fSmrg            if (dual_slot) {
32301e04c3fSmrg               if (i & 1) {
3247e102996Smaya                  comps[location + i].comps |= ((1 << comps_slot2) - 1);
32501e04c3fSmrg               } else {
32601e04c3fSmrg                  unsigned num_comps = 4 - var->data.location_frac;
3277e102996Smaya                  comps_slot2 = (elements * dmul) - num_comps;
32801e04c3fSmrg
32901e04c3fSmrg                  /* Assume ARB_enhanced_layouts packing rules for doubles */
33001e04c3fSmrg                  assert(var->data.location_frac == 0 ||
33101e04c3fSmrg                         var->data.location_frac == 2);
33201e04c3fSmrg                  assert(comps_slot2 <= 4);
33301e04c3fSmrg
3347e102996Smaya                  comps[location + i].comps |=
33501e04c3fSmrg                     ((1 << num_comps) - 1) << var->data.location_frac;
33601e04c3fSmrg               }
33701e04c3fSmrg            } else {
3387e102996Smaya               comps[location + i].comps |=
3397e102996Smaya                  ((1 << (elements * dmul)) - 1) << var->data.location_frac;
34001e04c3fSmrg            }
3417e102996Smaya
3427e102996Smaya            comps[location + i].interp_type =
3437e102996Smaya               get_interp_type(var, type, default_to_smooth_interp);
3447e102996Smaya            comps[location + i].interp_loc = get_interp_loc(var);
3457e102996Smaya            comps[location + i].is_32bit =
3467e102996Smaya               glsl_type_is_32bit(glsl_without_array(type));
3477ec681f3Smrg            comps[location + i].is_mediump =
3487ec681f3Smrg               var->data.precision == GLSL_PRECISION_MEDIUM ||
3497ec681f3Smrg               var->data.precision == GLSL_PRECISION_LOW;
35001e04c3fSmrg         }
35101e04c3fSmrg      }
35201e04c3fSmrg   }
35301e04c3fSmrg}
35401e04c3fSmrg
35501e04c3fSmrgstruct varying_loc
35601e04c3fSmrg{
35701e04c3fSmrg   uint8_t component;
35801e04c3fSmrg   uint32_t location;
35901e04c3fSmrg};
36001e04c3fSmrg
3617e102996Smayastatic void
3627e102996Smayamark_all_used_slots(nir_variable *var, uint64_t *slots_used,
3637e102996Smaya                    uint64_t slots_used_mask, unsigned num_slots)
3647e102996Smaya{
3657e102996Smaya   unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
3667e102996Smaya
3677e102996Smaya   slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
3687e102996Smaya      BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
3697e102996Smaya}
3707e102996Smaya
3717e102996Smayastatic void
3727e102996Smayamark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
3737e102996Smaya{
3747e102996Smaya   unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
3757e102996Smaya
3767e102996Smaya   slots_used[var->data.patch ? 1 : 0] |=
3777e102996Smaya      BITFIELD64_BIT(var->data.location - loc_offset + offset);
3787e102996Smaya}
3797e102996Smaya
38001e04c3fSmrgstatic void
3817ec681f3Smrgremap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
38201e04c3fSmrg                           struct varying_loc (*remap)[4],
3837e102996Smaya                           uint64_t *slots_used, uint64_t *out_slots_read,
3847e102996Smaya                           uint32_t *p_slots_used, uint32_t *p_out_slots_read)
38501e04c3fSmrg {
3867ec681f3Smrg   const gl_shader_stage stage = shader->info.stage;
3877e102996Smaya   uint64_t out_slots_read_tmp[2] = {0};
3887e102996Smaya   uint64_t slots_used_tmp[2] = {0};
38901e04c3fSmrg
39001e04c3fSmrg   /* We don't touch builtins so just copy the bitmask */
3917e102996Smaya   slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
39201e04c3fSmrg
3937ec681f3Smrg   nir_foreach_variable_with_modes(var, shader, mode) {
39401e04c3fSmrg      assert(var->data.location >= 0);
39501e04c3fSmrg
39601e04c3fSmrg      /* Only remap things that aren't built-ins */
39701e04c3fSmrg      if (var->data.location >= VARYING_SLOT_VAR0 &&
3987e102996Smaya          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
39901e04c3fSmrg
40001e04c3fSmrg         const struct glsl_type *type = var->type;
4017ec681f3Smrg         if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
40201e04c3fSmrg            assert(glsl_type_is_array(type));
40301e04c3fSmrg            type = glsl_get_array_element(type);
40401e04c3fSmrg         }
40501e04c3fSmrg
40601e04c3fSmrg         unsigned num_slots = glsl_count_attribute_slots(type, false);
40701e04c3fSmrg         bool used_across_stages = false;
40801e04c3fSmrg         bool outputs_read = false;
40901e04c3fSmrg
41001e04c3fSmrg         unsigned location = var->data.location - VARYING_SLOT_VAR0;
41101e04c3fSmrg         struct varying_loc *new_loc = &remap[location][var->data.location_frac];
41201e04c3fSmrg
4137e102996Smaya         unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
4147e102996Smaya         uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
4157e102996Smaya         uint64_t outs_used =
4167e102996Smaya            var->data.patch ? *p_out_slots_read : *out_slots_read;
4177e102996Smaya         uint64_t slots =
4187e102996Smaya            BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
4197e102996Smaya
4207e102996Smaya         if (slots & used)
42101e04c3fSmrg            used_across_stages = true;
42201e04c3fSmrg
4237e102996Smaya         if (slots & outs_used)
42401e04c3fSmrg            outputs_read = true;
42501e04c3fSmrg
42601e04c3fSmrg         if (new_loc->location) {
42701e04c3fSmrg            var->data.location = new_loc->location;
42801e04c3fSmrg            var->data.location_frac = new_loc->component;
42901e04c3fSmrg         }
43001e04c3fSmrg
43101e04c3fSmrg         if (var->data.always_active_io) {
43201e04c3fSmrg            /* We can't apply link time optimisations (specifically array
43301e04c3fSmrg             * splitting) to these so we need to copy the existing mask
43401e04c3fSmrg             * otherwise we will mess up the mask for things like partially
43501e04c3fSmrg             * marked arrays.
43601e04c3fSmrg             */
4377e102996Smaya            if (used_across_stages)
4387e102996Smaya               mark_all_used_slots(var, slots_used_tmp, used, num_slots);
43901e04c3fSmrg
44001e04c3fSmrg            if (outputs_read) {
4417e102996Smaya               mark_all_used_slots(var, out_slots_read_tmp, outs_used,
4427e102996Smaya                                   num_slots);
44301e04c3fSmrg            }
44401e04c3fSmrg         } else {
44501e04c3fSmrg            for (unsigned i = 0; i < num_slots; i++) {
44601e04c3fSmrg               if (used_across_stages)
4477e102996Smaya                  mark_used_slot(var, slots_used_tmp, i);
44801e04c3fSmrg
44901e04c3fSmrg               if (outputs_read)
4507e102996Smaya                  mark_used_slot(var, out_slots_read_tmp, i);
45101e04c3fSmrg            }
45201e04c3fSmrg         }
45301e04c3fSmrg      }
45401e04c3fSmrg   }
45501e04c3fSmrg
4567e102996Smaya   *slots_used = slots_used_tmp[0];
4577e102996Smaya   *out_slots_read = out_slots_read_tmp[0];
4587e102996Smaya   *p_slots_used = slots_used_tmp[1];
4597e102996Smaya   *p_out_slots_read = out_slots_read_tmp[1];
46001e04c3fSmrg}
46101e04c3fSmrg
4627e102996Smayastruct varying_component {
4637e102996Smaya   nir_variable *var;
4647e102996Smaya   uint8_t interp_type;
4657e102996Smaya   uint8_t interp_loc;
4667e102996Smaya   bool is_32bit;
4677e102996Smaya   bool is_patch;
4687ec681f3Smrg   bool is_mediump;
4697ec681f3Smrg   bool is_intra_stage_only;
4707e102996Smaya   bool initialised;
4717e102996Smaya};
4727e102996Smaya
4737e102996Smayastatic int
4747e102996Smayacmp_varying_component(const void *comp1_v, const void *comp2_v)
47501e04c3fSmrg{
4767e102996Smaya   struct varying_component *comp1 = (struct varying_component *) comp1_v;
4777e102996Smaya   struct varying_component *comp2 = (struct varying_component *) comp2_v;
4787e102996Smaya
4797e102996Smaya   /* We want patches to be order at the end of the array */
4807e102996Smaya   if (comp1->is_patch != comp2->is_patch)
4817e102996Smaya      return comp1->is_patch ? 1 : -1;
4827e102996Smaya
4837ec681f3Smrg   /* We want to try to group together TCS outputs that are only read by other
4847ec681f3Smrg    * TCS invocations and not consumed by the follow stage.
4857ec681f3Smrg    */
4867ec681f3Smrg   if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
4877ec681f3Smrg      return comp1->is_intra_stage_only ? 1 : -1;
4887ec681f3Smrg
4897ec681f3Smrg   /* Group mediump varyings together. */
4907ec681f3Smrg   if (comp1->is_mediump != comp2->is_mediump)
4917ec681f3Smrg      return comp1->is_mediump ? 1 : -1;
4927ec681f3Smrg
4937e102996Smaya   /* We can only pack varyings with matching interpolation types so group
4947e102996Smaya    * them together.
4957e102996Smaya    */
4967e102996Smaya   if (comp1->interp_type != comp2->interp_type)
4977e102996Smaya      return comp1->interp_type - comp2->interp_type;
4987e102996Smaya
4997e102996Smaya   /* Interpolation loc must match also. */
5007e102996Smaya   if (comp1->interp_loc != comp2->interp_loc)
5017e102996Smaya      return comp1->interp_loc - comp2->interp_loc;
5027e102996Smaya
5037e102996Smaya   /* If everything else matches just use the original location to sort */
5047ec681f3Smrg   const struct nir_variable_data *const data1 = &comp1->var->data;
5057ec681f3Smrg   const struct nir_variable_data *const data2 = &comp2->var->data;
5067ec681f3Smrg   if (data1->location != data2->location)
5077ec681f3Smrg      return data1->location - data2->location;
5087ec681f3Smrg   return (int)data1->location_frac - (int)data2->location_frac;
5097e102996Smaya}
51001e04c3fSmrg
5117e102996Smayastatic void
5127ec681f3Smrggather_varying_component_info(nir_shader *producer, nir_shader *consumer,
5137e102996Smaya                              struct varying_component **varying_comp_info,
5147e102996Smaya                              unsigned *varying_comp_info_size,
5157e102996Smaya                              bool default_to_smooth_interp)
5167e102996Smaya{
5177e102996Smaya   unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
5187e102996Smaya   unsigned num_of_comps_to_pack = 0;
51901e04c3fSmrg
5207e102996Smaya   /* Count the number of varying that can be packed and create a mapping
5217e102996Smaya    * of those varyings to the array we will pass to qsort.
52201e04c3fSmrg    */
5237ec681f3Smrg   nir_foreach_shader_out_variable(var, producer) {
52401e04c3fSmrg
5257e102996Smaya      /* Only remap things that aren't builtins. */
52601e04c3fSmrg      if (var->data.location >= VARYING_SLOT_VAR0 &&
5277e102996Smaya          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
52801e04c3fSmrg
52901e04c3fSmrg         /* We can't repack xfb varyings. */
53001e04c3fSmrg         if (var->data.always_active_io)
53101e04c3fSmrg            continue;
53201e04c3fSmrg
53301e04c3fSmrg         const struct glsl_type *type = var->type;
5347ec681f3Smrg         if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
53501e04c3fSmrg            assert(glsl_type_is_array(type));
53601e04c3fSmrg            type = glsl_get_array_element(type);
53701e04c3fSmrg         }
53801e04c3fSmrg
5397e102996Smaya         if (!is_packing_supported_for_type(type))
54001e04c3fSmrg            continue;
54101e04c3fSmrg
5427e102996Smaya         unsigned loc = var->data.location - VARYING_SLOT_VAR0;
5437e102996Smaya         store_varying_info_idx[loc][var->data.location_frac] =
5447e102996Smaya            ++num_of_comps_to_pack;
5457e102996Smaya      }
5467e102996Smaya   }
5477e102996Smaya
5487e102996Smaya   *varying_comp_info_size = num_of_comps_to_pack;
5497e102996Smaya   *varying_comp_info = rzalloc_array(NULL, struct varying_component,
5507e102996Smaya                                      num_of_comps_to_pack);
5517e102996Smaya
5527e102996Smaya   nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
5537e102996Smaya
5547e102996Smaya   /* Walk over the shader and populate the varying component info array */
5557e102996Smaya   nir_foreach_block(block, impl) {
5567e102996Smaya      nir_foreach_instr(instr, block) {
5577e102996Smaya         if (instr->type != nir_instr_type_intrinsic)
55801e04c3fSmrg            continue;
55901e04c3fSmrg
5607e102996Smaya         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
5617e102996Smaya         if (intr->intrinsic != nir_intrinsic_load_deref &&
5627e102996Smaya             intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
5637e102996Smaya             intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
5647ec681f3Smrg             intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
5657ec681f3Smrg             intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
5667e102996Smaya            continue;
5677e102996Smaya
5687e102996Smaya         nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
5697ec681f3Smrg         if (!nir_deref_mode_is(deref, nir_var_shader_in))
5707e102996Smaya            continue;
5717e102996Smaya
5727e102996Smaya         /* We only remap things that aren't builtins. */
5737e102996Smaya         nir_variable *in_var = nir_deref_instr_get_variable(deref);
5747e102996Smaya         if (in_var->data.location < VARYING_SLOT_VAR0)
5757e102996Smaya            continue;
5767e102996Smaya
5777e102996Smaya         unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
5787e102996Smaya         if (location >= MAX_VARYINGS_INCL_PATCH)
5797e102996Smaya            continue;
5807e102996Smaya
5817e102996Smaya         unsigned var_info_idx =
5827e102996Smaya            store_varying_info_idx[location][in_var->data.location_frac];
5837e102996Smaya         if (!var_info_idx)
5847e102996Smaya            continue;
5857e102996Smaya
5867e102996Smaya         struct varying_component *vc_info =
5877e102996Smaya            &(*varying_comp_info)[var_info_idx-1];
5887e102996Smaya
5897e102996Smaya         if (!vc_info->initialised) {
5907e102996Smaya            const struct glsl_type *type = in_var->type;
5917ec681f3Smrg            if (nir_is_arrayed_io(in_var, consumer->info.stage) ||
5927ec681f3Smrg                in_var->data.per_view) {
5937e102996Smaya               assert(glsl_type_is_array(type));
5947e102996Smaya               type = glsl_get_array_element(type);
5957e102996Smaya            }
5967e102996Smaya
5977e102996Smaya            vc_info->var = in_var;
5987e102996Smaya            vc_info->interp_type =
5997e102996Smaya               get_interp_type(in_var, type, default_to_smooth_interp);
6007e102996Smaya            vc_info->interp_loc = get_interp_loc(in_var);
6017e102996Smaya            vc_info->is_32bit = glsl_type_is_32bit(type);
6027e102996Smaya            vc_info->is_patch = in_var->data.patch;
6037ec681f3Smrg            vc_info->is_mediump = !producer->options->linker_ignore_precision &&
6047ec681f3Smrg               (in_var->data.precision == GLSL_PRECISION_MEDIUM ||
6057ec681f3Smrg                in_var->data.precision == GLSL_PRECISION_LOW);
6067ec681f3Smrg            vc_info->is_intra_stage_only = false;
6077ec681f3Smrg            vc_info->initialised = true;
6087e102996Smaya         }
6097e102996Smaya      }
6107e102996Smaya   }
6117ec681f3Smrg
6127ec681f3Smrg   /* Walk over the shader and populate the varying component info array
6137ec681f3Smrg    * for varyings which are read by other TCS instances but are not consumed
6147ec681f3Smrg    * by the TES.
6157ec681f3Smrg    */
6167ec681f3Smrg   if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
6177ec681f3Smrg      impl = nir_shader_get_entrypoint(producer);
6187ec681f3Smrg
6197ec681f3Smrg      nir_foreach_block(block, impl) {
6207ec681f3Smrg         nir_foreach_instr(instr, block) {
6217ec681f3Smrg            if (instr->type != nir_instr_type_intrinsic)
6227ec681f3Smrg               continue;
6237ec681f3Smrg
6247ec681f3Smrg            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
6257ec681f3Smrg            if (intr->intrinsic != nir_intrinsic_load_deref)
6267ec681f3Smrg               continue;
6277ec681f3Smrg
6287ec681f3Smrg            nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
6297ec681f3Smrg            if (!nir_deref_mode_is(deref, nir_var_shader_out))
6307ec681f3Smrg               continue;
6317ec681f3Smrg
6327ec681f3Smrg            /* We only remap things that aren't builtins. */
6337ec681f3Smrg            nir_variable *out_var = nir_deref_instr_get_variable(deref);
6347ec681f3Smrg            if (out_var->data.location < VARYING_SLOT_VAR0)
6357ec681f3Smrg               continue;
6367ec681f3Smrg
6377ec681f3Smrg            unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
6387ec681f3Smrg            if (location >= MAX_VARYINGS_INCL_PATCH)
6397ec681f3Smrg               continue;
6407ec681f3Smrg
6417ec681f3Smrg            unsigned var_info_idx =
6427ec681f3Smrg               store_varying_info_idx[location][out_var->data.location_frac];
6437ec681f3Smrg            if (!var_info_idx) {
6447ec681f3Smrg               /* Something went wrong, the shader interfaces didn't match, so
6457ec681f3Smrg                * abandon packing. This can happen for example when the
6467ec681f3Smrg                * inputs are scalars but the outputs are struct members.
6477ec681f3Smrg                */
6487ec681f3Smrg               *varying_comp_info_size = 0;
6497ec681f3Smrg               break;
6507ec681f3Smrg            }
6517ec681f3Smrg
6527ec681f3Smrg            struct varying_component *vc_info =
6537ec681f3Smrg               &(*varying_comp_info)[var_info_idx-1];
6547ec681f3Smrg
6557ec681f3Smrg            if (!vc_info->initialised) {
6567ec681f3Smrg               const struct glsl_type *type = out_var->type;
6577ec681f3Smrg               if (nir_is_arrayed_io(out_var, producer->info.stage)) {
6587ec681f3Smrg                  assert(glsl_type_is_array(type));
6597ec681f3Smrg                  type = glsl_get_array_element(type);
6607ec681f3Smrg               }
6617ec681f3Smrg
6627ec681f3Smrg               vc_info->var = out_var;
6637ec681f3Smrg               vc_info->interp_type =
6647ec681f3Smrg                  get_interp_type(out_var, type, default_to_smooth_interp);
6657ec681f3Smrg               vc_info->interp_loc = get_interp_loc(out_var);
6667ec681f3Smrg               vc_info->is_32bit = glsl_type_is_32bit(type);
6677ec681f3Smrg               vc_info->is_patch = out_var->data.patch;
6687ec681f3Smrg               vc_info->is_mediump = !producer->options->linker_ignore_precision &&
6697ec681f3Smrg                  (out_var->data.precision == GLSL_PRECISION_MEDIUM ||
6707ec681f3Smrg                   out_var->data.precision == GLSL_PRECISION_LOW);
6717ec681f3Smrg               vc_info->is_intra_stage_only = true;
6727ec681f3Smrg               vc_info->initialised = true;
6737ec681f3Smrg            }
6747ec681f3Smrg         }
6757ec681f3Smrg      }
6767ec681f3Smrg   }
6777ec681f3Smrg
6787ec681f3Smrg   for (unsigned i = 0; i < *varying_comp_info_size; i++ ) {
6797ec681f3Smrg      struct varying_component *vc_info = &(*varying_comp_info)[i];
6807ec681f3Smrg      if (!vc_info->initialised) {
6817ec681f3Smrg         /* Something went wrong, the shader interfaces didn't match, so
6827ec681f3Smrg          * abandon packing. This can happen for example when the outputs are
6837ec681f3Smrg          * scalars but the inputs are struct members.
6847ec681f3Smrg          */
6857ec681f3Smrg         *varying_comp_info_size = 0;
6867ec681f3Smrg         break;
6877ec681f3Smrg      }
6887ec681f3Smrg   }
6897ec681f3Smrg}
6907ec681f3Smrg
6917ec681f3Smrgstatic bool
6927ec681f3Smrgallow_pack_interp_type(nir_pack_varying_options options, int type)
6937ec681f3Smrg{
6947ec681f3Smrg   int sel;
6957ec681f3Smrg
6967ec681f3Smrg   switch (type) {
6977ec681f3Smrg   case INTERP_MODE_NONE:
6987ec681f3Smrg      sel = nir_pack_varying_interp_mode_none;
6997ec681f3Smrg      break;
7007ec681f3Smrg   case INTERP_MODE_SMOOTH:
7017ec681f3Smrg      sel = nir_pack_varying_interp_mode_smooth;
7027ec681f3Smrg      break;
7037ec681f3Smrg   case INTERP_MODE_FLAT:
7047ec681f3Smrg      sel = nir_pack_varying_interp_mode_flat;
7057ec681f3Smrg      break;
7067ec681f3Smrg   case INTERP_MODE_NOPERSPECTIVE:
7077ec681f3Smrg      sel = nir_pack_varying_interp_mode_noperspective;
7087ec681f3Smrg      break;
7097ec681f3Smrg   default:
7107ec681f3Smrg      return false;
7117ec681f3Smrg   }
7127ec681f3Smrg
7137ec681f3Smrg   return options & sel;
7147ec681f3Smrg}
7157ec681f3Smrg
7167ec681f3Smrgstatic bool
7177ec681f3Smrgallow_pack_interp_loc(nir_pack_varying_options options, int loc)
7187ec681f3Smrg{
7197ec681f3Smrg   int sel;
7207ec681f3Smrg
7217ec681f3Smrg   switch (loc) {
7227ec681f3Smrg   case INTERPOLATE_LOC_SAMPLE:
7237ec681f3Smrg      sel = nir_pack_varying_interp_loc_sample;
7247ec681f3Smrg      break;
7257ec681f3Smrg   case INTERPOLATE_LOC_CENTROID:
7267ec681f3Smrg      sel = nir_pack_varying_interp_loc_centroid;
7277ec681f3Smrg      break;
7287ec681f3Smrg   case INTERPOLATE_LOC_CENTER:
7297ec681f3Smrg      sel = nir_pack_varying_interp_loc_center;
7307ec681f3Smrg      break;
7317ec681f3Smrg   default:
7327ec681f3Smrg      return false;
7337ec681f3Smrg   }
7347ec681f3Smrg
7357ec681f3Smrg   return options & sel;
7367e102996Smaya}
73701e04c3fSmrg
7387e102996Smayastatic void
7397e102996Smayaassign_remap_locations(struct varying_loc (*remap)[4],
7407e102996Smaya                       struct assigned_comps *assigned_comps,
7417e102996Smaya                       struct varying_component *info,
7427e102996Smaya                       unsigned *cursor, unsigned *comp,
7437ec681f3Smrg                       unsigned max_location,
7447ec681f3Smrg                       nir_pack_varying_options options)
7457e102996Smaya{
7467e102996Smaya   unsigned tmp_cursor = *cursor;
7477e102996Smaya   unsigned tmp_comp = *comp;
7487e102996Smaya
7497e102996Smaya   for (; tmp_cursor < max_location; tmp_cursor++) {
7507e102996Smaya
7517e102996Smaya      if (assigned_comps[tmp_cursor].comps) {
7527ec681f3Smrg         /* We can only pack varyings with matching precision. */
7537ec681f3Smrg         if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) {
7547ec681f3Smrg            tmp_comp = 0;
7557ec681f3Smrg            continue;
7567ec681f3Smrg         }
7577ec681f3Smrg
7587ec681f3Smrg         /* We can only pack varyings with matching interpolation type
7597ec681f3Smrg          * if driver does not support it.
7607ec681f3Smrg          */
7617ec681f3Smrg         if (assigned_comps[tmp_cursor].interp_type != info->interp_type &&
7627ec681f3Smrg             (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) ||
7637ec681f3Smrg              !allow_pack_interp_type(options, info->interp_type))) {
7647ec681f3Smrg            tmp_comp = 0;
7657ec681f3Smrg            continue;
7667ec681f3Smrg         }
7677ec681f3Smrg
7687ec681f3Smrg         /* We can only pack varyings with matching interpolation location
7697ec681f3Smrg          * if driver does not support it.
77001e04c3fSmrg          */
7717ec681f3Smrg         if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc &&
7727ec681f3Smrg             (!allow_pack_interp_loc(options, assigned_comps[tmp_cursor].interp_loc) ||
7737ec681f3Smrg              !allow_pack_interp_loc(options, info->interp_loc))) {
7747e102996Smaya            tmp_comp = 0;
77501e04c3fSmrg            continue;
7767e102996Smaya         }
77701e04c3fSmrg
7787e102996Smaya         /* We can only pack varyings with matching types, and the current
7797e102996Smaya          * algorithm only supports packing 32-bit.
7807e102996Smaya          */
7817e102996Smaya         if (!assigned_comps[tmp_cursor].is_32bit) {
7827e102996Smaya            tmp_comp = 0;
7837e102996Smaya            continue;
7847e102996Smaya         }
78501e04c3fSmrg
7867e102996Smaya         while (tmp_comp < 4 &&
7877e102996Smaya                (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
7887e102996Smaya            tmp_comp++;
7897e102996Smaya         }
7907e102996Smaya      }
79101e04c3fSmrg
7927e102996Smaya      if (tmp_comp == 4) {
7937e102996Smaya         tmp_comp = 0;
7947e102996Smaya         continue;
7957e102996Smaya      }
79601e04c3fSmrg
7977e102996Smaya      unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
79801e04c3fSmrg
7997e102996Smaya      /* Once we have assigned a location mark it as used */
8007e102996Smaya      assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
8017e102996Smaya      assigned_comps[tmp_cursor].interp_type = info->interp_type;
8027e102996Smaya      assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
8037e102996Smaya      assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
8047ec681f3Smrg      assigned_comps[tmp_cursor].is_mediump = info->is_mediump;
80501e04c3fSmrg
8067e102996Smaya      /* Assign remap location */
8077e102996Smaya      remap[location][info->var->data.location_frac].component = tmp_comp++;
8087e102996Smaya      remap[location][info->var->data.location_frac].location =
8097e102996Smaya         tmp_cursor + VARYING_SLOT_VAR0;
81001e04c3fSmrg
8117e102996Smaya      break;
8127e102996Smaya   }
81301e04c3fSmrg
8147e102996Smaya   *cursor = tmp_cursor;
8157e102996Smaya   *comp = tmp_comp;
8167e102996Smaya}
81701e04c3fSmrg
8187e102996Smaya/* If there are empty components in the slot compact the remaining components
8197e102996Smaya * as close to component 0 as possible. This will make it easier to fill the
8207e102996Smaya * empty components with components from a different slot in a following pass.
8217e102996Smaya */
8227e102996Smayastatic void
8237e102996Smayacompact_components(nir_shader *producer, nir_shader *consumer,
8247e102996Smaya                   struct assigned_comps *assigned_comps,
8257e102996Smaya                   bool default_to_smooth_interp)
8267e102996Smaya{
8277e102996Smaya   struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
8287e102996Smaya   struct varying_component *varying_comp_info;
8297e102996Smaya   unsigned varying_comp_info_size;
8307e102996Smaya
8317e102996Smaya   /* Gather varying component info */
8327ec681f3Smrg   gather_varying_component_info(producer, consumer, &varying_comp_info,
8337e102996Smaya                                 &varying_comp_info_size,
8347e102996Smaya                                 default_to_smooth_interp);
8357e102996Smaya
8367e102996Smaya   /* Sort varying components. */
8377e102996Smaya   qsort(varying_comp_info, varying_comp_info_size,
8387e102996Smaya         sizeof(struct varying_component), cmp_varying_component);
8397e102996Smaya
8407ec681f3Smrg   nir_pack_varying_options options = consumer->options->pack_varying_options;
8417ec681f3Smrg
8427e102996Smaya   unsigned cursor = 0;
8437e102996Smaya   unsigned comp = 0;
8447e102996Smaya
8457e102996Smaya   /* Set the remap array based on the sorted components */
8467e102996Smaya   for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
8477e102996Smaya      struct varying_component *info = &varying_comp_info[i];
8487e102996Smaya
8497e102996Smaya      assert(info->is_patch || cursor < MAX_VARYING);
8507e102996Smaya      if (info->is_patch) {
8517e102996Smaya         /* The list should be sorted with all non-patch inputs first followed
8527e102996Smaya          * by patch inputs.  When we hit our first patch input, we need to
8537e102996Smaya          * reset the cursor to MAX_VARYING so we put them in the right slot.
8547e102996Smaya          */
8557e102996Smaya         if (cursor < MAX_VARYING) {
8567e102996Smaya            cursor = MAX_VARYING;
8577e102996Smaya            comp = 0;
8587e102996Smaya         }
85901e04c3fSmrg
8607e102996Smaya         assign_remap_locations(remap, assigned_comps, info,
8617ec681f3Smrg                                &cursor, &comp, MAX_VARYINGS_INCL_PATCH,
8627ec681f3Smrg                                options);
8637e102996Smaya      } else {
8647e102996Smaya         assign_remap_locations(remap, assigned_comps, info,
8657ec681f3Smrg                                &cursor, &comp, MAX_VARYING,
8667ec681f3Smrg                                options);
8677e102996Smaya
8687e102996Smaya         /* Check if we failed to assign a remap location. This can happen if
8697e102996Smaya          * for example there are a bunch of unmovable components with
8707e102996Smaya          * mismatching interpolation types causing us to skip over locations
8717e102996Smaya          * that would have been useful for packing later components.
8727e102996Smaya          * The solution is to iterate over the locations again (this should
8737e102996Smaya          * happen very rarely in practice).
8747e102996Smaya          */
8757e102996Smaya         if (cursor == MAX_VARYING) {
8767e102996Smaya            cursor = 0;
8777e102996Smaya            comp = 0;
8787e102996Smaya            assign_remap_locations(remap, assigned_comps, info,
8797ec681f3Smrg                                   &cursor, &comp, MAX_VARYING,
8807ec681f3Smrg                                   options);
88101e04c3fSmrg         }
88201e04c3fSmrg      }
88301e04c3fSmrg   }
88401e04c3fSmrg
8857e102996Smaya   ralloc_free(varying_comp_info);
8867e102996Smaya
88701e04c3fSmrg   uint64_t zero = 0;
8887e102996Smaya   uint32_t zero32 = 0;
8897ec681f3Smrg   remap_slots_and_components(consumer, nir_var_shader_in, remap,
8907e102996Smaya                              &consumer->info.inputs_read, &zero,
8917e102996Smaya                              &consumer->info.patch_inputs_read, &zero32);
8927ec681f3Smrg   remap_slots_and_components(producer, nir_var_shader_out, remap,
89301e04c3fSmrg                              &producer->info.outputs_written,
8947e102996Smaya                              &producer->info.outputs_read,
8957e102996Smaya                              &producer->info.patch_outputs_written,
8967e102996Smaya                              &producer->info.patch_outputs_read);
89701e04c3fSmrg}
89801e04c3fSmrg
89901e04c3fSmrg/* We assume that this has been called more-or-less directly after
90001e04c3fSmrg * remove_unused_varyings.  At this point, all of the varyings that we
90101e04c3fSmrg * aren't going to be using have been completely removed and the
90201e04c3fSmrg * inputs_read and outputs_written fields in nir_shader_info reflect
90301e04c3fSmrg * this.  Therefore, the total set of valid slots is the OR of the two
90401e04c3fSmrg * sets of varyings;  this accounts for varyings which one side may need
90501e04c3fSmrg * to read/write even if the other doesn't.  This can happen if, for
90601e04c3fSmrg * instance, an array is used indirectly from one side causing it to be
90701e04c3fSmrg * unsplittable but directly from the other.
90801e04c3fSmrg */
90901e04c3fSmrgvoid
91001e04c3fSmrgnir_compact_varyings(nir_shader *producer, nir_shader *consumer,
91101e04c3fSmrg                     bool default_to_smooth_interp)
91201e04c3fSmrg{
91301e04c3fSmrg   assert(producer->info.stage != MESA_SHADER_FRAGMENT);
91401e04c3fSmrg   assert(consumer->info.stage != MESA_SHADER_VERTEX);
91501e04c3fSmrg
9167e102996Smaya   struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}};
91701e04c3fSmrg
9187ec681f3Smrg   get_unmoveable_components_masks(producer, nir_var_shader_out,
9197ec681f3Smrg                                   assigned_comps,
9207e102996Smaya                                   producer->info.stage,
9217e102996Smaya                                   default_to_smooth_interp);
9227ec681f3Smrg   get_unmoveable_components_masks(consumer, nir_var_shader_in,
9237ec681f3Smrg                                   assigned_comps,
9247e102996Smaya                                   consumer->info.stage,
9257e102996Smaya                                   default_to_smooth_interp);
92601e04c3fSmrg
9277e102996Smaya   compact_components(producer, consumer, assigned_comps,
92801e04c3fSmrg                      default_to_smooth_interp);
92901e04c3fSmrg}
93001e04c3fSmrg
93101e04c3fSmrg/*
93201e04c3fSmrg * Mark XFB varyings as always_active_io in the consumer so the linking opts
93301e04c3fSmrg * don't touch them.
93401e04c3fSmrg */
93501e04c3fSmrgvoid
93601e04c3fSmrgnir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
93701e04c3fSmrg{
93801e04c3fSmrg   nir_variable *input_vars[MAX_VARYING] = { 0 };
93901e04c3fSmrg
9407ec681f3Smrg   nir_foreach_shader_in_variable(var, consumer) {
94101e04c3fSmrg      if (var->data.location >= VARYING_SLOT_VAR0 &&
94201e04c3fSmrg          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
94301e04c3fSmrg
94401e04c3fSmrg         unsigned location = var->data.location - VARYING_SLOT_VAR0;
94501e04c3fSmrg         input_vars[location] = var;
94601e04c3fSmrg      }
94701e04c3fSmrg   }
94801e04c3fSmrg
9497ec681f3Smrg   nir_foreach_shader_out_variable(var, producer) {
95001e04c3fSmrg      if (var->data.location >= VARYING_SLOT_VAR0 &&
95101e04c3fSmrg          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
95201e04c3fSmrg
95301e04c3fSmrg         if (!var->data.always_active_io)
95401e04c3fSmrg            continue;
95501e04c3fSmrg
95601e04c3fSmrg         unsigned location = var->data.location - VARYING_SLOT_VAR0;
95701e04c3fSmrg         if (input_vars[location]) {
95801e04c3fSmrg            input_vars[location]->data.always_active_io = true;
95901e04c3fSmrg         }
96001e04c3fSmrg      }
96101e04c3fSmrg   }
96201e04c3fSmrg}
9637e102996Smaya
9647e102996Smayastatic bool
9657e102996Smayadoes_varying_match(nir_variable *out_var, nir_variable *in_var)
9667e102996Smaya{
9677e102996Smaya   return in_var->data.location == out_var->data.location &&
9687e102996Smaya          in_var->data.location_frac == out_var->data.location_frac;
9697e102996Smaya}
9707e102996Smaya
9717e102996Smayastatic nir_variable *
9727e102996Smayaget_matching_input_var(nir_shader *consumer, nir_variable *out_var)
9737e102996Smaya{
9747ec681f3Smrg   nir_foreach_shader_in_variable(var, consumer) {
9757e102996Smaya      if (does_varying_match(out_var, var))
9767e102996Smaya         return var;
9777e102996Smaya   }
9787e102996Smaya
9797e102996Smaya   return NULL;
9807e102996Smaya}
9817e102996Smaya
9827e102996Smayastatic bool
9837e102996Smayacan_replace_varying(nir_variable *out_var)
9847e102996Smaya{
9857e102996Smaya   /* Skip types that require more complex handling.
9867e102996Smaya    * TODO: add support for these types.
9877e102996Smaya    */
9887e102996Smaya   if (glsl_type_is_array(out_var->type) ||
9897e102996Smaya       glsl_type_is_dual_slot(out_var->type) ||
9907e102996Smaya       glsl_type_is_matrix(out_var->type) ||
9917e102996Smaya       glsl_type_is_struct_or_ifc(out_var->type))
9927e102996Smaya      return false;
9937e102996Smaya
9947e102996Smaya   /* Limit this pass to scalars for now to keep things simple. Most varyings
9957e102996Smaya    * should have been lowered to scalars at this point anyway.
9967e102996Smaya    */
9977e102996Smaya   if (!glsl_type_is_scalar(out_var->type))
9987e102996Smaya      return false;
9997e102996Smaya
10007e102996Smaya   if (out_var->data.location < VARYING_SLOT_VAR0 ||
10017e102996Smaya       out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
10027e102996Smaya      return false;
10037e102996Smaya
10047e102996Smaya   return true;
10057e102996Smaya}
10067e102996Smaya
10077e102996Smayastatic bool
10087ec681f3Smrgreplace_varying_input_by_constant_load(nir_shader *shader,
10097ec681f3Smrg                                       nir_intrinsic_instr *store_intr)
10107e102996Smaya{
10117e102996Smaya   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
10127e102996Smaya
10137e102996Smaya   nir_builder b;
10147e102996Smaya   nir_builder_init(&b, impl);
10157e102996Smaya
10167e102996Smaya   nir_variable *out_var =
10177e102996Smaya      nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
10187e102996Smaya
10197e102996Smaya   bool progress = false;
10207e102996Smaya   nir_foreach_block(block, impl) {
10217e102996Smaya      nir_foreach_instr(instr, block) {
10227e102996Smaya         if (instr->type != nir_instr_type_intrinsic)
10237e102996Smaya            continue;
10247e102996Smaya
10257e102996Smaya         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
10267e102996Smaya         if (intr->intrinsic != nir_intrinsic_load_deref)
10277e102996Smaya            continue;
10287e102996Smaya
10297e102996Smaya         nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
10307ec681f3Smrg         if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
10317e102996Smaya            continue;
10327e102996Smaya
10337e102996Smaya         nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
10347e102996Smaya
10357e102996Smaya         if (!does_varying_match(out_var, in_var))
10367e102996Smaya            continue;
10377e102996Smaya
10387e102996Smaya         b.cursor = nir_before_instr(instr);
10397e102996Smaya
10407e102996Smaya         nir_load_const_instr *out_const =
10417e102996Smaya            nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
10427e102996Smaya
10437e102996Smaya         /* Add new const to replace the input */
10447e102996Smaya         nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
10457e102996Smaya                                             intr->dest.ssa.bit_size,
10467e102996Smaya                                             out_const->value);
10477e102996Smaya
10487ec681f3Smrg         nir_ssa_def_rewrite_uses(&intr->dest.ssa, nconst);
10497e102996Smaya
10507e102996Smaya         progress = true;
10517e102996Smaya      }
10527e102996Smaya   }
10537e102996Smaya
10547e102996Smaya   return progress;
10557e102996Smaya}
10567e102996Smaya
10577e102996Smayastatic bool
10587e102996Smayareplace_duplicate_input(nir_shader *shader, nir_variable *input_var,
10597e102996Smaya                         nir_intrinsic_instr *dup_store_intr)
10607e102996Smaya{
10617e102996Smaya   assert(input_var);
10627e102996Smaya
10637e102996Smaya   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
10647e102996Smaya
10657e102996Smaya   nir_builder b;
10667e102996Smaya   nir_builder_init(&b, impl);
10677e102996Smaya
10687e102996Smaya   nir_variable *dup_out_var =
10697e102996Smaya      nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
10707e102996Smaya
10717e102996Smaya   bool progress = false;
10727e102996Smaya   nir_foreach_block(block, impl) {
10737e102996Smaya      nir_foreach_instr(instr, block) {
10747e102996Smaya         if (instr->type != nir_instr_type_intrinsic)
10757e102996Smaya            continue;
10767e102996Smaya
10777e102996Smaya         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
10787e102996Smaya         if (intr->intrinsic != nir_intrinsic_load_deref)
10797e102996Smaya            continue;
10807e102996Smaya
10817e102996Smaya         nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
10827ec681f3Smrg         if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
10837e102996Smaya            continue;
10847e102996Smaya
10857e102996Smaya         nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
10867e102996Smaya
10877e102996Smaya         if (!does_varying_match(dup_out_var, in_var) ||
10887e102996Smaya             in_var->data.interpolation != input_var->data.interpolation ||
10897e102996Smaya             get_interp_loc(in_var) != get_interp_loc(input_var))
10907e102996Smaya            continue;
10917e102996Smaya
10927e102996Smaya         b.cursor = nir_before_instr(instr);
10937e102996Smaya
10947e102996Smaya         nir_ssa_def *load = nir_load_var(&b, input_var);
10957ec681f3Smrg         nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
10967e102996Smaya
10977e102996Smaya         progress = true;
10987e102996Smaya      }
10997e102996Smaya   }
11007e102996Smaya
11017e102996Smaya   return progress;
11027e102996Smaya}
11037e102996Smaya
11047ec681f3Smrgstatic bool
11057ec681f3Smrgis_direct_uniform_load(nir_ssa_def *def, nir_ssa_scalar *s)
11067ec681f3Smrg{
11077ec681f3Smrg   /* def is sure to be scalar as can_replace_varying() filter out vector case. */
11087ec681f3Smrg   assert(def->num_components == 1);
11097ec681f3Smrg
11107ec681f3Smrg   /* Uniform load may hide behind some move instruction for converting
11117ec681f3Smrg    * vector to scalar:
11127ec681f3Smrg    *
11137ec681f3Smrg    *     vec1 32 ssa_1 = deref_var &color (uniform vec3)
11147ec681f3Smrg    *     vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0)
11157ec681f3Smrg    *     vec1 32 ssa_3 = mov ssa_2.x
11167ec681f3Smrg    *     vec1 32 ssa_4 = deref_var &color_out (shader_out float)
11177ec681f3Smrg    *     intrinsic store_deref (ssa_4, ssa_3) (1, 0)
11187ec681f3Smrg    */
11197ec681f3Smrg   *s = nir_ssa_scalar_resolved(def, 0);
11207ec681f3Smrg
11217ec681f3Smrg   nir_ssa_def *ssa = s->def;
11227ec681f3Smrg   if (ssa->parent_instr->type != nir_instr_type_intrinsic)
11237ec681f3Smrg      return false;
11247ec681f3Smrg
11257ec681f3Smrg   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr);
11267ec681f3Smrg   if (intr->intrinsic != nir_intrinsic_load_deref)
11277ec681f3Smrg      return false;
11287ec681f3Smrg
11297ec681f3Smrg   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
11307ec681f3Smrg   /* TODO: support nir_var_mem_ubo. */
11317ec681f3Smrg   if (!nir_deref_mode_is(deref, nir_var_uniform))
11327ec681f3Smrg      return false;
11337ec681f3Smrg
11347ec681f3Smrg   /* Does not support indirect uniform load. */
11357ec681f3Smrg   return !nir_deref_instr_has_indirect(deref);
11367ec681f3Smrg}
11377ec681f3Smrg
11387ec681f3Smrgstatic nir_variable *
11397ec681f3Smrgget_uniform_var_in_consumer(nir_shader *consumer,
11407ec681f3Smrg                            nir_variable *var_in_producer)
11417ec681f3Smrg{
11427ec681f3Smrg   /* Find if uniform already exists in consumer. */
11437ec681f3Smrg   nir_variable *new_var = NULL;
11447ec681f3Smrg   nir_foreach_uniform_variable(v, consumer) {
11457ec681f3Smrg      if (!strcmp(var_in_producer->name, v->name)) {
11467ec681f3Smrg         new_var = v;
11477ec681f3Smrg         break;
11487ec681f3Smrg      }
11497ec681f3Smrg   }
11507ec681f3Smrg
11517ec681f3Smrg   /* Create a variable if not exist. */
11527ec681f3Smrg   if (!new_var) {
11537ec681f3Smrg      new_var = nir_variable_clone(var_in_producer, consumer);
11547ec681f3Smrg      nir_shader_add_variable(consumer, new_var);
11557ec681f3Smrg   }
11567ec681f3Smrg
11577ec681f3Smrg   return new_var;
11587ec681f3Smrg}
11597ec681f3Smrg
11607ec681f3Smrgstatic nir_deref_instr *
11617ec681f3Smrgclone_deref_instr(nir_builder *b, nir_variable *var, nir_deref_instr *deref)
11627ec681f3Smrg{
11637ec681f3Smrg   if (deref->deref_type == nir_deref_type_var)
11647ec681f3Smrg       return nir_build_deref_var(b, var);
11657ec681f3Smrg
11667ec681f3Smrg   nir_deref_instr *parent_deref = nir_deref_instr_parent(deref);
11677ec681f3Smrg   nir_deref_instr *parent = clone_deref_instr(b, var, parent_deref);
11687ec681f3Smrg
11697ec681f3Smrg   /* Build array and struct deref instruction.
11707ec681f3Smrg    * "deref" instr is sure to be direct (see is_direct_uniform_load()).
11717ec681f3Smrg    */
11727ec681f3Smrg   switch (deref->deref_type) {
11737ec681f3Smrg   case nir_deref_type_array: {
11747ec681f3Smrg      nir_load_const_instr *index =
11757ec681f3Smrg         nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
11767ec681f3Smrg      return nir_build_deref_array_imm(b, parent, index->value->i64);
11777ec681f3Smrg   }
11787ec681f3Smrg   case nir_deref_type_ptr_as_array: {
11797ec681f3Smrg      nir_load_const_instr *index =
11807ec681f3Smrg         nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
11817ec681f3Smrg      nir_ssa_def *ssa = nir_imm_intN_t(b, index->value->i64,
11827ec681f3Smrg                                        parent->dest.ssa.bit_size);
11837ec681f3Smrg      return nir_build_deref_ptr_as_array(b, parent, ssa);
11847ec681f3Smrg   }
11857ec681f3Smrg   case nir_deref_type_struct:
11867ec681f3Smrg      return nir_build_deref_struct(b, parent, deref->strct.index);
11877ec681f3Smrg   default:
11887ec681f3Smrg      unreachable("invalid type");
11897ec681f3Smrg      return NULL;
11907ec681f3Smrg   }
11917ec681f3Smrg}
11927ec681f3Smrg
11937ec681f3Smrgstatic bool
11947ec681f3Smrgreplace_varying_input_by_uniform_load(nir_shader *shader,
11957ec681f3Smrg                                      nir_intrinsic_instr *store_intr,
11967ec681f3Smrg                                      nir_ssa_scalar *scalar)
11977ec681f3Smrg{
11987ec681f3Smrg   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
11997ec681f3Smrg
12007ec681f3Smrg   nir_builder b;
12017ec681f3Smrg   nir_builder_init(&b, impl);
12027ec681f3Smrg
12037ec681f3Smrg   nir_variable *out_var =
12047ec681f3Smrg      nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
12057ec681f3Smrg
12067ec681f3Smrg   nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr);
12077ec681f3Smrg   nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
12087ec681f3Smrg   nir_variable *uni_var = nir_deref_instr_get_variable(deref);
12097ec681f3Smrg   uni_var = get_uniform_var_in_consumer(shader, uni_var);
12107ec681f3Smrg
12117ec681f3Smrg   bool progress = false;
12127ec681f3Smrg   nir_foreach_block(block, impl) {
12137ec681f3Smrg      nir_foreach_instr(instr, block) {
12147ec681f3Smrg         if (instr->type != nir_instr_type_intrinsic)
12157ec681f3Smrg            continue;
12167ec681f3Smrg
12177ec681f3Smrg         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
12187ec681f3Smrg         if (intr->intrinsic != nir_intrinsic_load_deref)
12197ec681f3Smrg            continue;
12207ec681f3Smrg
12217ec681f3Smrg         nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
12227ec681f3Smrg         if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
12237ec681f3Smrg            continue;
12247ec681f3Smrg
12257ec681f3Smrg         nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
12267ec681f3Smrg
12277ec681f3Smrg         if (!does_varying_match(out_var, in_var))
12287ec681f3Smrg            continue;
12297ec681f3Smrg
12307ec681f3Smrg         b.cursor = nir_before_instr(instr);
12317ec681f3Smrg
12327ec681f3Smrg         /* Clone instructions start from deref load to variable deref. */
12337ec681f3Smrg         nir_deref_instr *uni_deref = clone_deref_instr(&b, uni_var, deref);
12347ec681f3Smrg         nir_ssa_def *uni_def = nir_load_deref(&b, uni_deref);
12357ec681f3Smrg
12367ec681f3Smrg         /* Add a vector to scalar move if uniform is a vector. */
12377ec681f3Smrg         if (uni_def->num_components > 1) {
12387ec681f3Smrg            nir_alu_src src = {0};
12397ec681f3Smrg            src.src = nir_src_for_ssa(uni_def);
12407ec681f3Smrg            src.swizzle[0] = scalar->comp;
12417ec681f3Smrg            uni_def = nir_mov_alu(&b, src, 1);
12427ec681f3Smrg         }
12437ec681f3Smrg
12447ec681f3Smrg         /* Replace load input with load uniform. */
12457ec681f3Smrg         nir_ssa_def_rewrite_uses(&intr->dest.ssa, uni_def);
12467ec681f3Smrg
12477ec681f3Smrg         progress = true;
12487ec681f3Smrg      }
12497ec681f3Smrg   }
12507ec681f3Smrg
12517ec681f3Smrg   return progress;
12527ec681f3Smrg}
12537ec681f3Smrg
12547ec681f3Smrg/* The GLSL ES 3.20 spec says:
12557ec681f3Smrg *
12567ec681f3Smrg * "The precision of a vertex output does not need to match the precision of
12577ec681f3Smrg * the corresponding fragment input. The minimum precision at which vertex
12587ec681f3Smrg * outputs are interpolated is the minimum of the vertex output precision and
12597ec681f3Smrg * the fragment input precision, with the exception that for highp,
12607ec681f3Smrg * implementations do not have to support full IEEE 754 precision." (9.1 "Input
12617ec681f3Smrg * Output Matching by Name in Linked Programs")
12627ec681f3Smrg *
12637ec681f3Smrg * To implement this, when linking shaders we will take the minimum precision
12647ec681f3Smrg * qualifier (allowing drivers to interpolate at lower precision). For
12657ec681f3Smrg * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec
12667ec681f3Smrg * requires we use the *last* specified precision if there is a conflict.
12677ec681f3Smrg *
12687ec681f3Smrg * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is
12697ec681f3Smrg * NONE, we'll return the other precision, since there is no conflict.
12707ec681f3Smrg * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH,
12717ec681f3Smrg * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is
12727ec681f3Smrg * "backwards". For non-fragment stages, we'll pick the latter precision to
12737ec681f3Smrg * comply with the spec. (Note that the order matters.)
12747ec681f3Smrg *
12757ec681f3Smrg * For streamout, "Variables declared with lowp or mediump precision are
12767ec681f3Smrg * promoted to highp before being written." (12.2 "Transform Feedback", p. 341
12777ec681f3Smrg * of OpenGL ES 3.2 specification). So drivers should promote them
12787ec681f3Smrg * the transform feedback memory store, but not the output store.
12797ec681f3Smrg */
12807ec681f3Smrg
12817ec681f3Smrgstatic unsigned
12827ec681f3Smrgnir_link_precision(unsigned producer, unsigned consumer, bool fs)
12837ec681f3Smrg{
12847ec681f3Smrg   if (producer == GLSL_PRECISION_NONE)
12857ec681f3Smrg      return consumer;
12867ec681f3Smrg   else if (consumer == GLSL_PRECISION_NONE)
12877ec681f3Smrg      return producer;
12887ec681f3Smrg   else
12897ec681f3Smrg      return fs ? MAX2(producer, consumer) : consumer;
12907ec681f3Smrg}
12917ec681f3Smrg
12927ec681f3Smrgvoid
12937ec681f3Smrgnir_link_varying_precision(nir_shader *producer, nir_shader *consumer)
12947ec681f3Smrg{
12957ec681f3Smrg   bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT;
12967ec681f3Smrg
12977ec681f3Smrg   nir_foreach_shader_out_variable(producer_var, producer) {
12987ec681f3Smrg      /* Skip if the slot is not assigned */
12997ec681f3Smrg      if (producer_var->data.location < 0)
13007ec681f3Smrg         continue;
13017ec681f3Smrg
13027ec681f3Smrg      nir_variable *consumer_var = nir_find_variable_with_location(consumer,
13037ec681f3Smrg            nir_var_shader_in, producer_var->data.location);
13047ec681f3Smrg
13057ec681f3Smrg      /* Skip if the variable will be eliminated */
13067ec681f3Smrg      if (!consumer_var)
13077ec681f3Smrg         continue;
13087ec681f3Smrg
13097ec681f3Smrg      /* Now we have a pair of variables. Let's pick the smaller precision. */
13107ec681f3Smrg      unsigned precision_1 = producer_var->data.precision;
13117ec681f3Smrg      unsigned precision_2 = consumer_var->data.precision;
13127ec681f3Smrg      unsigned minimum = nir_link_precision(precision_1, precision_2, frag);
13137ec681f3Smrg
13147ec681f3Smrg      /* Propagate the new precision */
13157ec681f3Smrg      producer_var->data.precision = consumer_var->data.precision = minimum;
13167ec681f3Smrg   }
13177ec681f3Smrg}
13187ec681f3Smrg
13197e102996Smayabool
13207e102996Smayanir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
13217e102996Smaya{
13227e102996Smaya   /* TODO: Add support for more shader stage combinations */
13237e102996Smaya   if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
13247e102996Smaya       (producer->info.stage != MESA_SHADER_VERTEX &&
13257e102996Smaya        producer->info.stage != MESA_SHADER_TESS_EVAL))
13267e102996Smaya      return false;
13277e102996Smaya
13287e102996Smaya   bool progress = false;
13297e102996Smaya
13307e102996Smaya   nir_function_impl *impl = nir_shader_get_entrypoint(producer);
13317e102996Smaya
13327e102996Smaya   struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
13337e102996Smaya
13347e102996Smaya   /* If we find a store in the last block of the producer we can be sure this
13357e102996Smaya    * is the only possible value for this output.
13367e102996Smaya    */
13377e102996Smaya   nir_block *last_block = nir_impl_last_block(impl);
13387e102996Smaya   nir_foreach_instr_reverse(instr, last_block) {
13397e102996Smaya      if (instr->type != nir_instr_type_intrinsic)
13407e102996Smaya         continue;
13417e102996Smaya
13427e102996Smaya      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
13437e102996Smaya
13447e102996Smaya      if (intr->intrinsic != nir_intrinsic_store_deref)
13457e102996Smaya         continue;
13467e102996Smaya
13477e102996Smaya      nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
13487ec681f3Smrg      if (!nir_deref_mode_is(out_deref, nir_var_shader_out))
13497e102996Smaya         continue;
13507e102996Smaya
13517e102996Smaya      nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
13527e102996Smaya      if (!can_replace_varying(out_var))
13537e102996Smaya         continue;
13547e102996Smaya
13557ec681f3Smrg      nir_ssa_scalar uni_scalar;
13567ec681f3Smrg      nir_ssa_def *ssa = intr->src[1].ssa;
13577ec681f3Smrg      if (ssa->parent_instr->type == nir_instr_type_load_const) {
13587ec681f3Smrg         progress |= replace_varying_input_by_constant_load(consumer, intr);
13597ec681f3Smrg      } else if (is_direct_uniform_load(ssa, &uni_scalar)) {
13607ec681f3Smrg         progress |= replace_varying_input_by_uniform_load(consumer, intr,
13617ec681f3Smrg                                                           &uni_scalar);
13627e102996Smaya      } else {
13637e102996Smaya         struct hash_entry *entry =
13647ec681f3Smrg               _mesa_hash_table_search(varying_values, ssa);
13657e102996Smaya         if (entry) {
13667e102996Smaya            progress |= replace_duplicate_input(consumer,
13677e102996Smaya                                                (nir_variable *) entry->data,
13687e102996Smaya                                                intr);
13697e102996Smaya         } else {
13707e102996Smaya            nir_variable *in_var = get_matching_input_var(consumer, out_var);
13717e102996Smaya            if (in_var) {
13727ec681f3Smrg               _mesa_hash_table_insert(varying_values, ssa, in_var);
13737e102996Smaya            }
13747e102996Smaya         }
13757e102996Smaya      }
13767e102996Smaya   }
13777e102996Smaya
13787e102996Smaya   _mesa_hash_table_destroy(varying_values, NULL);
13797e102996Smaya
13807e102996Smaya   return progress;
13817e102996Smaya}
13827ec681f3Smrg
13837ec681f3Smrg/* TODO any better helper somewhere to sort a list? */
13847ec681f3Smrg
13857ec681f3Smrgstatic void
13867ec681f3Smrginsert_sorted(struct exec_list *var_list, nir_variable *new_var)
13877ec681f3Smrg{
13887ec681f3Smrg   nir_foreach_variable_in_list(var, var_list) {
13897ec681f3Smrg      if (var->data.location > new_var->data.location) {
13907ec681f3Smrg         exec_node_insert_node_before(&var->node, &new_var->node);
13917ec681f3Smrg         return;
13927ec681f3Smrg      }
13937ec681f3Smrg   }
13947ec681f3Smrg   exec_list_push_tail(var_list, &new_var->node);
13957ec681f3Smrg}
13967ec681f3Smrg
13977ec681f3Smrgstatic void
13987ec681f3Smrgsort_varyings(nir_shader *shader, nir_variable_mode mode,
13997ec681f3Smrg              struct exec_list *sorted_list)
14007ec681f3Smrg{
14017ec681f3Smrg   exec_list_make_empty(sorted_list);
14027ec681f3Smrg   nir_foreach_variable_with_modes_safe(var, shader, mode) {
14037ec681f3Smrg      exec_node_remove(&var->node);
14047ec681f3Smrg      insert_sorted(sorted_list, var);
14057ec681f3Smrg   }
14067ec681f3Smrg}
14077ec681f3Smrg
14087ec681f3Smrgvoid
14097ec681f3Smrgnir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
14107ec681f3Smrg                            unsigned *size, gl_shader_stage stage)
14117ec681f3Smrg{
14127ec681f3Smrg   unsigned location = 0;
14137ec681f3Smrg   unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
14147ec681f3Smrg   uint64_t processed_locs[2] = {0};
14157ec681f3Smrg
14167ec681f3Smrg   struct exec_list io_vars;
14177ec681f3Smrg   sort_varyings(shader, mode, &io_vars);
14187ec681f3Smrg
14197ec681f3Smrg   int UNUSED last_loc = 0;
14207ec681f3Smrg   bool last_partial = false;
14217ec681f3Smrg   nir_foreach_variable_in_list(var, &io_vars) {
14227ec681f3Smrg      const struct glsl_type *type = var->type;
14237ec681f3Smrg      if (nir_is_arrayed_io(var, stage)) {
14247ec681f3Smrg         assert(glsl_type_is_array(type));
14257ec681f3Smrg         type = glsl_get_array_element(type);
14267ec681f3Smrg      }
14277ec681f3Smrg
14287ec681f3Smrg      int base;
14297ec681f3Smrg      if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
14307ec681f3Smrg         base = VERT_ATTRIB_GENERIC0;
14317ec681f3Smrg      else if (var->data.mode == nir_var_shader_out &&
14327ec681f3Smrg               stage == MESA_SHADER_FRAGMENT)
14337ec681f3Smrg         base = FRAG_RESULT_DATA0;
14347ec681f3Smrg      else
14357ec681f3Smrg         base = VARYING_SLOT_VAR0;
14367ec681f3Smrg
14377ec681f3Smrg      unsigned var_size, driver_size;
14387ec681f3Smrg      if (var->data.compact) {
14397ec681f3Smrg         /* If we are inside a partial compact,
14407ec681f3Smrg          * don't allow another compact to be in this slot
14417ec681f3Smrg          * if it starts at component 0.
14427ec681f3Smrg          */
14437ec681f3Smrg         if (last_partial && var->data.location_frac == 0) {
14447ec681f3Smrg            location++;
14457ec681f3Smrg         }
14467ec681f3Smrg
14477ec681f3Smrg         /* compact variables must be arrays of scalars */
14487ec681f3Smrg         assert(!var->data.per_view);
14497ec681f3Smrg         assert(glsl_type_is_array(type));
14507ec681f3Smrg         assert(glsl_type_is_scalar(glsl_get_array_element(type)));
14517ec681f3Smrg         unsigned start = 4 * location + var->data.location_frac;
14527ec681f3Smrg         unsigned end = start + glsl_get_length(type);
14537ec681f3Smrg         var_size = driver_size = end / 4 - location;
14547ec681f3Smrg         last_partial = end % 4 != 0;
14557ec681f3Smrg      } else {
14567ec681f3Smrg         /* Compact variables bypass the normal varying compacting pass,
14577ec681f3Smrg          * which means they cannot be in the same vec4 slot as a normal
14587ec681f3Smrg          * variable. If part of the current slot is taken up by a compact
14597ec681f3Smrg          * variable, we need to go to the next one.
14607ec681f3Smrg          */
14617ec681f3Smrg         if (last_partial) {
14627ec681f3Smrg            location++;
14637ec681f3Smrg            last_partial = false;
14647ec681f3Smrg         }
14657ec681f3Smrg
14667ec681f3Smrg         /* per-view variables have an extra array dimension, which is ignored
14677ec681f3Smrg          * when counting user-facing slots (var->data.location), but *not*
14687ec681f3Smrg          * with driver slots (var->data.driver_location). That is, each user
14697ec681f3Smrg          * slot maps to multiple driver slots.
14707ec681f3Smrg          */
14717ec681f3Smrg         driver_size = glsl_count_attribute_slots(type, false);
14727ec681f3Smrg         if (var->data.per_view) {
14737ec681f3Smrg            assert(glsl_type_is_array(type));
14747ec681f3Smrg            var_size =
14757ec681f3Smrg               glsl_count_attribute_slots(glsl_get_array_element(type), false);
14767ec681f3Smrg         } else {
14777ec681f3Smrg            var_size = driver_size;
14787ec681f3Smrg         }
14797ec681f3Smrg      }
14807ec681f3Smrg
14817ec681f3Smrg      /* Builtins don't allow component packing so we only need to worry about
14827ec681f3Smrg       * user defined varyings sharing the same location.
14837ec681f3Smrg       */
14847ec681f3Smrg      bool processed = false;
14857ec681f3Smrg      if (var->data.location >= base) {
14867ec681f3Smrg         unsigned glsl_location = var->data.location - base;
14877ec681f3Smrg
14887ec681f3Smrg         for (unsigned i = 0; i < var_size; i++) {
14897ec681f3Smrg            if (processed_locs[var->data.index] &
14907ec681f3Smrg                ((uint64_t)1 << (glsl_location + i)))
14917ec681f3Smrg               processed = true;
14927ec681f3Smrg            else
14937ec681f3Smrg               processed_locs[var->data.index] |=
14947ec681f3Smrg                  ((uint64_t)1 << (glsl_location + i));
14957ec681f3Smrg         }
14967ec681f3Smrg      }
14977ec681f3Smrg
14987ec681f3Smrg      /* Because component packing allows varyings to share the same location
14997ec681f3Smrg       * we may have already have processed this location.
15007ec681f3Smrg       */
15017ec681f3Smrg      if (processed) {
15027ec681f3Smrg         /* TODO handle overlapping per-view variables */
15037ec681f3Smrg         assert(!var->data.per_view);
15047ec681f3Smrg         unsigned driver_location = assigned_locations[var->data.location];
15057ec681f3Smrg         var->data.driver_location = driver_location;
15067ec681f3Smrg
15077ec681f3Smrg         /* An array may be packed such that is crosses multiple other arrays
15087ec681f3Smrg          * or variables, we need to make sure we have allocated the elements
15097ec681f3Smrg          * consecutively if the previously proccessed var was shorter than
15107ec681f3Smrg          * the current array we are processing.
15117ec681f3Smrg          *
15127ec681f3Smrg          * NOTE: The code below assumes the var list is ordered in ascending
15137ec681f3Smrg          * location order.
15147ec681f3Smrg          */
15157ec681f3Smrg         assert(last_loc <= var->data.location);
15167ec681f3Smrg         last_loc = var->data.location;
15177ec681f3Smrg         unsigned last_slot_location = driver_location + var_size;
15187ec681f3Smrg         if (last_slot_location > location) {
15197ec681f3Smrg            unsigned num_unallocated_slots = last_slot_location - location;
15207ec681f3Smrg            unsigned first_unallocated_slot = var_size - num_unallocated_slots;
15217ec681f3Smrg            for (unsigned i = first_unallocated_slot; i < var_size; i++) {
15227ec681f3Smrg               assigned_locations[var->data.location + i] = location;
15237ec681f3Smrg               location++;
15247ec681f3Smrg            }
15257ec681f3Smrg         }
15267ec681f3Smrg         continue;
15277ec681f3Smrg      }
15287ec681f3Smrg
15297ec681f3Smrg      for (unsigned i = 0; i < var_size; i++) {
15307ec681f3Smrg         assigned_locations[var->data.location + i] = location + i;
15317ec681f3Smrg      }
15327ec681f3Smrg
15337ec681f3Smrg      var->data.driver_location = location;
15347ec681f3Smrg      location += driver_size;
15357ec681f3Smrg   }
15367ec681f3Smrg
15377ec681f3Smrg   if (last_partial)
15387ec681f3Smrg      location++;
15397ec681f3Smrg
15407ec681f3Smrg   exec_list_append(&shader->variables, &io_vars);
15417ec681f3Smrg   *size = location;
15427ec681f3Smrg}
15437ec681f3Smrg
15447ec681f3Smrgstatic uint64_t
15457ec681f3Smrgget_linked_variable_location(unsigned location, bool patch)
15467ec681f3Smrg{
15477ec681f3Smrg   if (!patch)
15487ec681f3Smrg      return location;
15497ec681f3Smrg
15507ec681f3Smrg   /* Reserve locations 0...3 for special patch variables
15517ec681f3Smrg    * like tess factors and bounding boxes, and the generic patch
15527ec681f3Smrg    * variables will come after them.
15537ec681f3Smrg    */
15547ec681f3Smrg   if (location >= VARYING_SLOT_PATCH0)
15557ec681f3Smrg      return location - VARYING_SLOT_PATCH0 + 4;
15567ec681f3Smrg   else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER &&
15577ec681f3Smrg            location <= VARYING_SLOT_BOUNDING_BOX1)
15587ec681f3Smrg      return location - VARYING_SLOT_TESS_LEVEL_OUTER;
15597ec681f3Smrg   else
15607ec681f3Smrg      unreachable("Unsupported variable in get_linked_variable_location.");
15617ec681f3Smrg}
15627ec681f3Smrg
15637ec681f3Smrgstatic uint64_t
15647ec681f3Smrgget_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage)
15657ec681f3Smrg{
15667ec681f3Smrg   const struct glsl_type *type = variable->type;
15677ec681f3Smrg
15687ec681f3Smrg   if (nir_is_arrayed_io(variable, stage)) {
15697ec681f3Smrg      assert(glsl_type_is_array(type));
15707ec681f3Smrg      type = glsl_get_array_element(type);
15717ec681f3Smrg   }
15727ec681f3Smrg
15737ec681f3Smrg   unsigned slots = glsl_count_attribute_slots(type, false);
15747ec681f3Smrg   if (variable->data.compact) {
15757ec681f3Smrg      unsigned component_count = variable->data.location_frac + glsl_get_length(type);
15767ec681f3Smrg      slots = DIV_ROUND_UP(component_count, 4);
15777ec681f3Smrg   }
15787ec681f3Smrg
15797ec681f3Smrg   uint64_t mask = u_bit_consecutive64(0, slots);
15807ec681f3Smrg   return mask;
15817ec681f3Smrg}
15827ec681f3Smrg
15837ec681f3Smrgnir_linked_io_var_info
15847ec681f3Smrgnir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer)
15857ec681f3Smrg{
15867ec681f3Smrg   assert(producer);
15877ec681f3Smrg   assert(consumer);
15887ec681f3Smrg
15897ec681f3Smrg   uint64_t producer_output_mask = 0;
15907ec681f3Smrg   uint64_t producer_patch_output_mask = 0;
15917ec681f3Smrg
15927ec681f3Smrg   nir_foreach_shader_out_variable(variable, producer) {
15937ec681f3Smrg      uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage);
15947ec681f3Smrg      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
15957ec681f3Smrg
15967ec681f3Smrg      if (variable->data.patch)
15977ec681f3Smrg         producer_patch_output_mask |= mask << loc;
15987ec681f3Smrg      else
15997ec681f3Smrg         producer_output_mask |= mask << loc;
16007ec681f3Smrg   }
16017ec681f3Smrg
16027ec681f3Smrg   uint64_t consumer_input_mask = 0;
16037ec681f3Smrg   uint64_t consumer_patch_input_mask = 0;
16047ec681f3Smrg
16057ec681f3Smrg   nir_foreach_shader_in_variable(variable, consumer) {
16067ec681f3Smrg      uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage);
16077ec681f3Smrg      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
16087ec681f3Smrg
16097ec681f3Smrg      if (variable->data.patch)
16107ec681f3Smrg         consumer_patch_input_mask |= mask << loc;
16117ec681f3Smrg      else
16127ec681f3Smrg         consumer_input_mask |= mask << loc;
16137ec681f3Smrg   }
16147ec681f3Smrg
16157ec681f3Smrg   uint64_t io_mask = producer_output_mask | consumer_input_mask;
16167ec681f3Smrg   uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask;
16177ec681f3Smrg
16187ec681f3Smrg   nir_foreach_shader_out_variable(variable, producer) {
16197ec681f3Smrg      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
16207ec681f3Smrg
16217ec681f3Smrg      if (variable->data.patch)
16227ec681f3Smrg         variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
16237ec681f3Smrg      else
16247ec681f3Smrg         variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
16257ec681f3Smrg   }
16267ec681f3Smrg
16277ec681f3Smrg   nir_foreach_shader_in_variable(variable, consumer) {
16287ec681f3Smrg      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
16297ec681f3Smrg
16307ec681f3Smrg      if (variable->data.patch)
16317ec681f3Smrg         variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
16327ec681f3Smrg      else
16337ec681f3Smrg         variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
16347ec681f3Smrg   }
16357ec681f3Smrg
16367ec681f3Smrg   nir_linked_io_var_info result = {
16377ec681f3Smrg      .num_linked_io_vars = util_bitcount64(io_mask),
16387ec681f3Smrg      .num_linked_patch_io_vars = util_bitcount64(patch_io_mask),
16397ec681f3Smrg   };
16407ec681f3Smrg
16417ec681f3Smrg   return result;
16427ec681f3Smrg}
1643