101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2014 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg *
2301e04c3fSmrg * Authors:
2401e04c3fSmrg *    Connor Abbott (cwabbott0@gmail.com)
2501e04c3fSmrg *    Jason Ekstrand (jason@jlekstrand.net)
2601e04c3fSmrg *
2701e04c3fSmrg */
2801e04c3fSmrg
2901e04c3fSmrg/*
3001e04c3fSmrg * This lowering pass converts references to input/output variables with
3101e04c3fSmrg * loads/stores to actual input/output intrinsics.
3201e04c3fSmrg */
3301e04c3fSmrg
3401e04c3fSmrg#include "nir.h"
3501e04c3fSmrg#include "nir_builder.h"
3601e04c3fSmrg#include "nir_deref.h"
3701e04c3fSmrg
387ec681f3Smrg#include "util/u_math.h"
397ec681f3Smrg
4001e04c3fSmrgstruct lower_io_state {
4101e04c3fSmrg   void *dead_ctx;
4201e04c3fSmrg   nir_builder builder;
437e102996Smaya   int (*type_size)(const struct glsl_type *type, bool);
4401e04c3fSmrg   nir_variable_mode modes;
4501e04c3fSmrg   nir_lower_io_options options;
4601e04c3fSmrg};
4701e04c3fSmrg
487e102996Smayastatic nir_intrinsic_op
497e102996Smayassbo_atomic_for_deref(nir_intrinsic_op deref_op)
507e102996Smaya{
517e102996Smaya   switch (deref_op) {
527e102996Smaya#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_ssbo_##O;
537e102996Smaya   OP(atomic_exchange)
547e102996Smaya   OP(atomic_comp_swap)
557e102996Smaya   OP(atomic_add)
567e102996Smaya   OP(atomic_imin)
577e102996Smaya   OP(atomic_umin)
587e102996Smaya   OP(atomic_imax)
597e102996Smaya   OP(atomic_umax)
607e102996Smaya   OP(atomic_and)
617e102996Smaya   OP(atomic_or)
627e102996Smaya   OP(atomic_xor)
637e102996Smaya   OP(atomic_fadd)
647e102996Smaya   OP(atomic_fmin)
657e102996Smaya   OP(atomic_fmax)
667e102996Smaya   OP(atomic_fcomp_swap)
677e102996Smaya#undef OP
687e102996Smaya   default:
697e102996Smaya      unreachable("Invalid SSBO atomic");
707e102996Smaya   }
717e102996Smaya}
727e102996Smaya
737e102996Smayastatic nir_intrinsic_op
747e102996Smayaglobal_atomic_for_deref(nir_intrinsic_op deref_op)
757e102996Smaya{
767e102996Smaya   switch (deref_op) {
777e102996Smaya#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_global_##O;
787e102996Smaya   OP(atomic_exchange)
797e102996Smaya   OP(atomic_comp_swap)
807e102996Smaya   OP(atomic_add)
817e102996Smaya   OP(atomic_imin)
827e102996Smaya   OP(atomic_umin)
837e102996Smaya   OP(atomic_imax)
847e102996Smaya   OP(atomic_umax)
857e102996Smaya   OP(atomic_and)
867e102996Smaya   OP(atomic_or)
877e102996Smaya   OP(atomic_xor)
887e102996Smaya   OP(atomic_fadd)
897e102996Smaya   OP(atomic_fmin)
907e102996Smaya   OP(atomic_fmax)
917e102996Smaya   OP(atomic_fcomp_swap)
927e102996Smaya#undef OP
937e102996Smaya   default:
947e102996Smaya      unreachable("Invalid SSBO atomic");
957e102996Smaya   }
967e102996Smaya}
977e102996Smaya
987ec681f3Smrgstatic nir_intrinsic_op
997ec681f3Smrgshared_atomic_for_deref(nir_intrinsic_op deref_op)
1007ec681f3Smrg{
1017ec681f3Smrg   switch (deref_op) {
1027ec681f3Smrg#define OP(O) case nir_intrinsic_deref_##O: return nir_intrinsic_shared_##O;
1037ec681f3Smrg   OP(atomic_exchange)
1047ec681f3Smrg   OP(atomic_comp_swap)
1057ec681f3Smrg   OP(atomic_add)
1067ec681f3Smrg   OP(atomic_imin)
1077ec681f3Smrg   OP(atomic_umin)
1087ec681f3Smrg   OP(atomic_imax)
1097ec681f3Smrg   OP(atomic_umax)
1107ec681f3Smrg   OP(atomic_and)
1117ec681f3Smrg   OP(atomic_or)
1127ec681f3Smrg   OP(atomic_xor)
1137ec681f3Smrg   OP(atomic_fadd)
1147ec681f3Smrg   OP(atomic_fmin)
1157ec681f3Smrg   OP(atomic_fmax)
1167ec681f3Smrg   OP(atomic_fcomp_swap)
1177ec681f3Smrg#undef OP
1187ec681f3Smrg   default:
1197ec681f3Smrg      unreachable("Invalid shared atomic");
1207ec681f3Smrg   }
1217ec681f3Smrg}
1227ec681f3Smrg
12301e04c3fSmrgvoid
1247ec681f3Smrgnir_assign_var_locations(nir_shader *shader, nir_variable_mode mode,
1257ec681f3Smrg                         unsigned *size,
1267e102996Smaya                         int (*type_size)(const struct glsl_type *, bool))
12701e04c3fSmrg{
12801e04c3fSmrg   unsigned location = 0;
12901e04c3fSmrg
1307ec681f3Smrg   nir_foreach_variable_with_modes(var, shader, mode) {
13101e04c3fSmrg      var->data.driver_location = location;
1327e102996Smaya      bool bindless_type_size = var->data.mode == nir_var_shader_in ||
1337e102996Smaya                                var->data.mode == nir_var_shader_out ||
1347e102996Smaya                                var->data.bindless;
1357e102996Smaya      location += type_size(var->type, bindless_type_size);
13601e04c3fSmrg   }
13701e04c3fSmrg
13801e04c3fSmrg   *size = location;
13901e04c3fSmrg}
14001e04c3fSmrg
14101e04c3fSmrg/**
1427ec681f3Smrg * Some inputs and outputs are arrayed, meaning that there is an extra level
1437ec681f3Smrg * of array indexing to handle mismatches between the shader interface and the
1447ec681f3Smrg * dispatch pattern of the shader.  For instance, geometry shaders are
1457ec681f3Smrg * executed per-primitive while their inputs and outputs are specified
1467ec681f3Smrg * per-vertex so all inputs and outputs have to be additionally indexed with
1477ec681f3Smrg * the vertex index within the primitive.
14801e04c3fSmrg */
14901e04c3fSmrgbool
1507ec681f3Smrgnir_is_arrayed_io(const nir_variable *var, gl_shader_stage stage)
15101e04c3fSmrg{
15201e04c3fSmrg   if (var->data.patch || !glsl_type_is_array(var->type))
15301e04c3fSmrg      return false;
15401e04c3fSmrg
15501e04c3fSmrg   if (var->data.mode == nir_var_shader_in)
15601e04c3fSmrg      return stage == MESA_SHADER_GEOMETRY ||
15701e04c3fSmrg             stage == MESA_SHADER_TESS_CTRL ||
15801e04c3fSmrg             stage == MESA_SHADER_TESS_EVAL;
15901e04c3fSmrg
16001e04c3fSmrg   if (var->data.mode == nir_var_shader_out)
1617ec681f3Smrg      return stage == MESA_SHADER_TESS_CTRL ||
1627ec681f3Smrg             stage == MESA_SHADER_MESH;
16301e04c3fSmrg
16401e04c3fSmrg   return false;
16501e04c3fSmrg}
16601e04c3fSmrg
1677ec681f3Smrgstatic unsigned get_number_of_slots(struct lower_io_state *state,
1687ec681f3Smrg                                    const nir_variable *var)
1697ec681f3Smrg{
1707ec681f3Smrg   const struct glsl_type *type = var->type;
1717ec681f3Smrg
1727ec681f3Smrg   if (nir_is_arrayed_io(var, state->builder.shader->info.stage)) {
1737ec681f3Smrg      assert(glsl_type_is_array(type));
1747ec681f3Smrg      type = glsl_get_array_element(type);
1757ec681f3Smrg   }
1767ec681f3Smrg
1777ec681f3Smrg   return state->type_size(type, var->data.bindless);
1787ec681f3Smrg}
1797ec681f3Smrg
18001e04c3fSmrgstatic nir_ssa_def *
18101e04c3fSmrgget_io_offset(nir_builder *b, nir_deref_instr *deref,
1827ec681f3Smrg              nir_ssa_def **array_index,
1837e102996Smaya              int (*type_size)(const struct glsl_type *, bool),
1847e102996Smaya              unsigned *component, bool bts)
18501e04c3fSmrg{
18601e04c3fSmrg   nir_deref_path path;
18701e04c3fSmrg   nir_deref_path_init(&path, deref, NULL);
18801e04c3fSmrg
18901e04c3fSmrg   assert(path.path[0]->deref_type == nir_deref_type_var);
19001e04c3fSmrg   nir_deref_instr **p = &path.path[1];
19101e04c3fSmrg
1927ec681f3Smrg   /* For arrayed I/O (e.g., per-vertex input arrays in geometry shader
1937ec681f3Smrg    * inputs), skip the outermost array index.  Process the rest normally.
19401e04c3fSmrg    */
1957ec681f3Smrg   if (array_index != NULL) {
19601e04c3fSmrg      assert((*p)->deref_type == nir_deref_type_array);
1977ec681f3Smrg      *array_index = nir_ssa_for_src(b, (*p)->arr.index, 1);
19801e04c3fSmrg      p++;
19901e04c3fSmrg   }
20001e04c3fSmrg
20101e04c3fSmrg   if (path.path[0]->var->data.compact) {
20201e04c3fSmrg      assert((*p)->deref_type == nir_deref_type_array);
20301e04c3fSmrg      assert(glsl_type_is_scalar((*p)->type));
20401e04c3fSmrg
20501e04c3fSmrg      /* We always lower indirect dereferences for "compact" array vars. */
20601e04c3fSmrg      const unsigned index = nir_src_as_uint((*p)->arr.index);
20701e04c3fSmrg      const unsigned total_offset = *component + index;
20801e04c3fSmrg      const unsigned slot_offset = total_offset / 4;
20901e04c3fSmrg      *component = total_offset % 4;
2107e102996Smaya      return nir_imm_int(b, type_size(glsl_vec4_type(), bts) * slot_offset);
21101e04c3fSmrg   }
21201e04c3fSmrg
21301e04c3fSmrg   /* Just emit code and let constant-folding go to town */
21401e04c3fSmrg   nir_ssa_def *offset = nir_imm_int(b, 0);
21501e04c3fSmrg
21601e04c3fSmrg   for (; *p; p++) {
21701e04c3fSmrg      if ((*p)->deref_type == nir_deref_type_array) {
2187e102996Smaya         unsigned size = type_size((*p)->type, bts);
21901e04c3fSmrg
22001e04c3fSmrg         nir_ssa_def *mul =
2217ec681f3Smrg            nir_amul_imm(b, nir_ssa_for_src(b, (*p)->arr.index, 1), size);
22201e04c3fSmrg
22301e04c3fSmrg         offset = nir_iadd(b, offset, mul);
22401e04c3fSmrg      } else if ((*p)->deref_type == nir_deref_type_struct) {
22501e04c3fSmrg         /* p starts at path[1], so this is safe */
22601e04c3fSmrg         nir_deref_instr *parent = *(p - 1);
22701e04c3fSmrg
22801e04c3fSmrg         unsigned field_offset = 0;
22901e04c3fSmrg         for (unsigned i = 0; i < (*p)->strct.index; i++) {
2307e102996Smaya            field_offset += type_size(glsl_get_struct_field(parent->type, i), bts);
23101e04c3fSmrg         }
2327e102996Smaya         offset = nir_iadd_imm(b, offset, field_offset);
23301e04c3fSmrg      } else {
23401e04c3fSmrg         unreachable("Unsupported deref type");
23501e04c3fSmrg      }
23601e04c3fSmrg   }
23701e04c3fSmrg
23801e04c3fSmrg   nir_deref_path_finish(&path);
23901e04c3fSmrg
24001e04c3fSmrg   return offset;
24101e04c3fSmrg}
24201e04c3fSmrg
2437ec681f3Smrgstatic nir_ssa_def *
2447ec681f3Smrgemit_load(struct lower_io_state *state,
2457ec681f3Smrg          nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
2467ec681f3Smrg          unsigned component, unsigned num_components, unsigned bit_size,
2477ec681f3Smrg          nir_alu_type dest_type)
24801e04c3fSmrg{
2497ec681f3Smrg   nir_builder *b = &state->builder;
2507ec681f3Smrg   const nir_shader *nir = b->shader;
25101e04c3fSmrg   nir_variable_mode mode = var->data.mode;
25201e04c3fSmrg   nir_ssa_def *barycentric = NULL;
25301e04c3fSmrg
25401e04c3fSmrg   nir_intrinsic_op op;
25501e04c3fSmrg   switch (mode) {
25601e04c3fSmrg   case nir_var_shader_in:
25701e04c3fSmrg      if (nir->info.stage == MESA_SHADER_FRAGMENT &&
25801e04c3fSmrg          nir->options->use_interpolated_input_intrinsics &&
2597ec681f3Smrg          var->data.interpolation != INTERP_MODE_FLAT &&
2607ec681f3Smrg          !var->data.per_primitive) {
2617ec681f3Smrg         if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
2627ec681f3Smrg            assert(array_index != NULL);
2637ec681f3Smrg            op = nir_intrinsic_load_input_vertex;
2647ec681f3Smrg         } else {
2657ec681f3Smrg            assert(array_index == NULL);
2667ec681f3Smrg
2677ec681f3Smrg            nir_intrinsic_op bary_op;
2687ec681f3Smrg            if (var->data.sample ||
2697ec681f3Smrg                (state->options & nir_lower_io_force_sample_interpolation))
2707ec681f3Smrg               bary_op = nir_intrinsic_load_barycentric_sample;
2717ec681f3Smrg            else if (var->data.centroid)
2727ec681f3Smrg               bary_op = nir_intrinsic_load_barycentric_centroid;
2737ec681f3Smrg            else
2747ec681f3Smrg               bary_op = nir_intrinsic_load_barycentric_pixel;
2757ec681f3Smrg
2767ec681f3Smrg            barycentric = nir_load_barycentric(&state->builder, bary_op,
2777ec681f3Smrg                                               var->data.interpolation);
2787ec681f3Smrg            op = nir_intrinsic_load_interpolated_input;
2797ec681f3Smrg         }
28001e04c3fSmrg      } else {
2817ec681f3Smrg         op = array_index ? nir_intrinsic_load_per_vertex_input :
2827ec681f3Smrg                            nir_intrinsic_load_input;
28301e04c3fSmrg      }
28401e04c3fSmrg      break;
28501e04c3fSmrg   case nir_var_shader_out:
2867ec681f3Smrg      op = !array_index            ? nir_intrinsic_load_output :
2877ec681f3Smrg           var->data.per_primitive ? nir_intrinsic_load_per_primitive_output :
2887ec681f3Smrg                                     nir_intrinsic_load_per_vertex_output;
28901e04c3fSmrg      break;
29001e04c3fSmrg   case nir_var_uniform:
29101e04c3fSmrg      op = nir_intrinsic_load_uniform;
29201e04c3fSmrg      break;
29301e04c3fSmrg   default:
29401e04c3fSmrg      unreachable("Unknown variable mode");
29501e04c3fSmrg   }
29601e04c3fSmrg
29701e04c3fSmrg   nir_intrinsic_instr *load =
29801e04c3fSmrg      nir_intrinsic_instr_create(state->builder.shader, op);
2997ec681f3Smrg   load->num_components = num_components;
30001e04c3fSmrg
30101e04c3fSmrg   nir_intrinsic_set_base(load, var->data.driver_location);
30201e04c3fSmrg   if (mode == nir_var_shader_in || mode == nir_var_shader_out)
30301e04c3fSmrg      nir_intrinsic_set_component(load, component);
30401e04c3fSmrg
30501e04c3fSmrg   if (load->intrinsic == nir_intrinsic_load_uniform)
3067e102996Smaya      nir_intrinsic_set_range(load,
3077e102996Smaya                              state->type_size(var->type, var->data.bindless));
30801e04c3fSmrg
3097ec681f3Smrg   if (nir_intrinsic_has_access(load))
3107ec681f3Smrg      nir_intrinsic_set_access(load, var->data.access);
3117ec681f3Smrg
3127ec681f3Smrg   nir_intrinsic_set_dest_type(load, dest_type);
3137ec681f3Smrg
3147ec681f3Smrg   if (load->intrinsic != nir_intrinsic_load_uniform) {
3157ec681f3Smrg      nir_io_semantics semantics = {0};
3167ec681f3Smrg      semantics.location = var->data.location;
3177ec681f3Smrg      semantics.num_slots = get_number_of_slots(state, var);
3187ec681f3Smrg      semantics.fb_fetch_output = var->data.fb_fetch_output;
3197ec681f3Smrg      semantics.medium_precision =
3207ec681f3Smrg         var->data.precision == GLSL_PRECISION_MEDIUM ||
3217ec681f3Smrg         var->data.precision == GLSL_PRECISION_LOW;
3227ec681f3Smrg      nir_intrinsic_set_io_semantics(load, semantics);
3237ec681f3Smrg   }
3247ec681f3Smrg
3257ec681f3Smrg   if (array_index) {
3267ec681f3Smrg      load->src[0] = nir_src_for_ssa(array_index);
32701e04c3fSmrg      load->src[1] = nir_src_for_ssa(offset);
32801e04c3fSmrg   } else if (barycentric) {
32901e04c3fSmrg      load->src[0] = nir_src_for_ssa(barycentric);
33001e04c3fSmrg      load->src[1] = nir_src_for_ssa(offset);
33101e04c3fSmrg   } else {
33201e04c3fSmrg      load->src[0] = nir_src_for_ssa(offset);
33301e04c3fSmrg   }
33401e04c3fSmrg
3357ec681f3Smrg   nir_ssa_dest_init(&load->instr, &load->dest,
3367ec681f3Smrg                     num_components, bit_size, NULL);
3377ec681f3Smrg   nir_builder_instr_insert(b, &load->instr);
3387ec681f3Smrg
3397ec681f3Smrg   return &load->dest.ssa;
34001e04c3fSmrg}
34101e04c3fSmrg
3427ec681f3Smrgstatic nir_ssa_def *
3437ec681f3Smrglower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
3447ec681f3Smrg           nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
3457ec681f3Smrg           unsigned component, const struct glsl_type *type)
34601e04c3fSmrg{
3477ec681f3Smrg   assert(intrin->dest.is_ssa);
3487ec681f3Smrg   if (intrin->dest.ssa.bit_size == 64 &&
3497ec681f3Smrg       (state->options & nir_lower_io_lower_64bit_to_32)) {
3507ec681f3Smrg      nir_builder *b = &state->builder;
3517ec681f3Smrg
3527ec681f3Smrg      const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
3537ec681f3Smrg
3547ec681f3Smrg      nir_ssa_def *comp64[4];
3557ec681f3Smrg      assert(component == 0 || component == 2);
3567ec681f3Smrg      unsigned dest_comp = 0;
3577ec681f3Smrg      while (dest_comp < intrin->dest.ssa.num_components) {
3587ec681f3Smrg         const unsigned num_comps =
3597ec681f3Smrg            MIN2(intrin->dest.ssa.num_components - dest_comp,
3607ec681f3Smrg                 (4 - component) / 2);
3617ec681f3Smrg
3627ec681f3Smrg         nir_ssa_def *data32 =
3637ec681f3Smrg            emit_load(state, array_index, var, offset, component,
3647ec681f3Smrg                      num_comps * 2, 32, nir_type_uint32);
3657ec681f3Smrg         for (unsigned i = 0; i < num_comps; i++) {
3667ec681f3Smrg            comp64[dest_comp + i] =
3677ec681f3Smrg               nir_pack_64_2x32(b, nir_channels(b, data32, 3 << (i * 2)));
3687ec681f3Smrg         }
36901e04c3fSmrg
3707ec681f3Smrg         /* Only the first store has a component offset */
3717ec681f3Smrg         component = 0;
3727ec681f3Smrg         dest_comp += num_comps;
3737ec681f3Smrg         offset = nir_iadd_imm(b, offset, slot_size);
3747ec681f3Smrg      }
3757ec681f3Smrg
3767ec681f3Smrg      return nir_vec(b, comp64, intrin->dest.ssa.num_components);
3777ec681f3Smrg   } else if (intrin->dest.ssa.bit_size == 1) {
3787ec681f3Smrg      /* Booleans are 32-bit */
3797ec681f3Smrg      assert(glsl_type_is_boolean(type));
3807ec681f3Smrg      return nir_b2b1(&state->builder,
3817ec681f3Smrg                      emit_load(state, array_index, var, offset, component,
3827ec681f3Smrg                                intrin->dest.ssa.num_components, 32,
3837ec681f3Smrg                                nir_type_bool32));
38401e04c3fSmrg   } else {
3857ec681f3Smrg      return emit_load(state, array_index, var, offset, component,
3867ec681f3Smrg                       intrin->dest.ssa.num_components,
3877ec681f3Smrg                       intrin->dest.ssa.bit_size,
3887ec681f3Smrg                       nir_get_nir_type_for_glsl_type(type));
38901e04c3fSmrg   }
3907ec681f3Smrg}
3917ec681f3Smrg
3927ec681f3Smrgstatic void
3937ec681f3Smrgemit_store(struct lower_io_state *state, nir_ssa_def *data,
3947ec681f3Smrg           nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
3957ec681f3Smrg           unsigned component, unsigned num_components,
3967ec681f3Smrg           nir_component_mask_t write_mask, nir_alu_type src_type)
3977ec681f3Smrg{
3987ec681f3Smrg   nir_builder *b = &state->builder;
3997ec681f3Smrg
4007ec681f3Smrg   assert(var->data.mode == nir_var_shader_out);
4017ec681f3Smrg   nir_intrinsic_op op =
4027ec681f3Smrg      !array_index            ? nir_intrinsic_store_output :
4037ec681f3Smrg      var->data.per_primitive ? nir_intrinsic_store_per_primitive_output :
4047ec681f3Smrg                                nir_intrinsic_store_per_vertex_output;
40501e04c3fSmrg
40601e04c3fSmrg   nir_intrinsic_instr *store =
40701e04c3fSmrg      nir_intrinsic_instr_create(state->builder.shader, op);
4087ec681f3Smrg   store->num_components = num_components;
40901e04c3fSmrg
4107ec681f3Smrg   store->src[0] = nir_src_for_ssa(data);
41101e04c3fSmrg
41201e04c3fSmrg   nir_intrinsic_set_base(store, var->data.driver_location);
4137ec681f3Smrg   nir_intrinsic_set_component(store, component);
4147ec681f3Smrg   nir_intrinsic_set_src_type(store, src_type);
4157ec681f3Smrg
4167ec681f3Smrg   nir_intrinsic_set_write_mask(store, write_mask);
41701e04c3fSmrg
4187ec681f3Smrg   if (nir_intrinsic_has_access(store))
4197ec681f3Smrg      nir_intrinsic_set_access(store, var->data.access);
42001e04c3fSmrg
4217ec681f3Smrg   if (array_index)
4227ec681f3Smrg      store->src[1] = nir_src_for_ssa(array_index);
42301e04c3fSmrg
4247ec681f3Smrg   store->src[array_index ? 2 : 1] = nir_src_for_ssa(offset);
42501e04c3fSmrg
4267ec681f3Smrg   unsigned gs_streams = 0;
4277ec681f3Smrg   if (state->builder.shader->info.stage == MESA_SHADER_GEOMETRY) {
4287ec681f3Smrg      if (var->data.stream & NIR_STREAM_PACKED) {
4297ec681f3Smrg         gs_streams = var->data.stream & ~NIR_STREAM_PACKED;
4307ec681f3Smrg      } else {
4317ec681f3Smrg         assert(var->data.stream < 4);
4327ec681f3Smrg         gs_streams = 0;
4337ec681f3Smrg         for (unsigned i = 0; i < num_components; ++i)
4347ec681f3Smrg            gs_streams |= var->data.stream << (2 * i);
4357ec681f3Smrg      }
4367ec681f3Smrg   }
43701e04c3fSmrg
4387ec681f3Smrg   nir_io_semantics semantics = {0};
4397ec681f3Smrg   semantics.location = var->data.location;
4407ec681f3Smrg   semantics.num_slots = get_number_of_slots(state, var);
4417ec681f3Smrg   semantics.dual_source_blend_index = var->data.index;
4427ec681f3Smrg   semantics.gs_streams = gs_streams;
4437ec681f3Smrg   semantics.medium_precision =
4447ec681f3Smrg      var->data.precision == GLSL_PRECISION_MEDIUM ||
4457ec681f3Smrg      var->data.precision == GLSL_PRECISION_LOW;
4467ec681f3Smrg   semantics.per_view = var->data.per_view;
4477ec681f3Smrg   nir_intrinsic_set_io_semantics(store, semantics);
4487ec681f3Smrg
4497ec681f3Smrg   nir_builder_instr_insert(b, &store->instr);
45001e04c3fSmrg}
45101e04c3fSmrg
4527ec681f3Smrgstatic void
4537ec681f3Smrglower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
4547ec681f3Smrg            nir_ssa_def *array_index, nir_variable *var, nir_ssa_def *offset,
4557ec681f3Smrg            unsigned component, const struct glsl_type *type)
45601e04c3fSmrg{
4577ec681f3Smrg   assert(intrin->src[1].is_ssa);
4587ec681f3Smrg   if (intrin->src[1].ssa->bit_size == 64 &&
4597ec681f3Smrg       (state->options & nir_lower_io_lower_64bit_to_32)) {
4607ec681f3Smrg      nir_builder *b = &state->builder;
46101e04c3fSmrg
4627ec681f3Smrg      const unsigned slot_size = state->type_size(glsl_dvec_type(2), false);
46301e04c3fSmrg
4647ec681f3Smrg      assert(component == 0 || component == 2);
4657ec681f3Smrg      unsigned src_comp = 0;
4667ec681f3Smrg      nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
4677ec681f3Smrg      while (src_comp < intrin->num_components) {
4687ec681f3Smrg         const unsigned num_comps =
4697ec681f3Smrg            MIN2(intrin->num_components - src_comp,
4707ec681f3Smrg                 (4 - component) / 2);
4717ec681f3Smrg
4727ec681f3Smrg         if (write_mask & BITFIELD_MASK(num_comps)) {
4737ec681f3Smrg            nir_ssa_def *data =
4747ec681f3Smrg               nir_channels(b, intrin->src[1].ssa,
4757ec681f3Smrg                            BITFIELD_RANGE(src_comp, num_comps));
4767ec681f3Smrg            nir_ssa_def *data32 = nir_bitcast_vector(b, data, 32);
4777ec681f3Smrg
4787ec681f3Smrg            nir_component_mask_t write_mask32 = 0;
4797ec681f3Smrg            for (unsigned i = 0; i < num_comps; i++) {
4807ec681f3Smrg               if (write_mask & BITFIELD_MASK(num_comps) & (1 << i))
4817ec681f3Smrg                  write_mask32 |= 3 << (i * 2);
4827ec681f3Smrg            }
48301e04c3fSmrg
4847ec681f3Smrg            emit_store(state, data32, array_index, var, offset,
4857ec681f3Smrg                       component, data32->num_components, write_mask32,
4867ec681f3Smrg                       nir_type_uint32);
4877ec681f3Smrg         }
48801e04c3fSmrg
4897ec681f3Smrg         /* Only the first store has a component offset */
4907ec681f3Smrg         component = 0;
4917ec681f3Smrg         src_comp += num_comps;
4927ec681f3Smrg         write_mask >>= num_comps;
4937ec681f3Smrg         offset = nir_iadd_imm(b, offset, slot_size);
4947ec681f3Smrg      }
4957ec681f3Smrg   } else if (intrin->dest.ssa.bit_size == 1) {
4967ec681f3Smrg      /* Booleans are 32-bit */
4977ec681f3Smrg      assert(glsl_type_is_boolean(type));
4987ec681f3Smrg      nir_ssa_def *b32_val = nir_b2b32(&state->builder, intrin->src[1].ssa);
4997ec681f3Smrg      emit_store(state, b32_val, array_index, var, offset,
5007ec681f3Smrg                 component, intrin->num_components,
5017ec681f3Smrg                 nir_intrinsic_write_mask(intrin),
5027ec681f3Smrg                 nir_type_bool32);
5037ec681f3Smrg   } else {
5047ec681f3Smrg      emit_store(state, intrin->src[1].ssa, array_index, var, offset,
5057ec681f3Smrg                 component, intrin->num_components,
5067ec681f3Smrg                 nir_intrinsic_write_mask(intrin),
5077ec681f3Smrg                 nir_get_nir_type_for_glsl_type(type));
50801e04c3fSmrg   }
50901e04c3fSmrg}
51001e04c3fSmrg
5117ec681f3Smrgstatic nir_ssa_def *
51201e04c3fSmrglower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
5137ec681f3Smrg                     nir_variable *var, nir_ssa_def *offset, unsigned component,
5147ec681f3Smrg                     const struct glsl_type *type)
51501e04c3fSmrg{
5167ec681f3Smrg   nir_builder *b = &state->builder;
51701e04c3fSmrg   assert(var->data.mode == nir_var_shader_in);
51801e04c3fSmrg
5197ec681f3Smrg   /* Ignore interpolateAt() for flat variables - flat is flat. Lower
5207ec681f3Smrg    * interpolateAtVertex() for explicit variables.
5217ec681f3Smrg    */
5227ec681f3Smrg   if (var->data.interpolation == INTERP_MODE_FLAT ||
5237ec681f3Smrg       var->data.interpolation == INTERP_MODE_EXPLICIT) {
5247ec681f3Smrg      nir_ssa_def *vertex_index = NULL;
5257ec681f3Smrg
5267ec681f3Smrg      if (var->data.interpolation == INTERP_MODE_EXPLICIT) {
5277ec681f3Smrg         assert(intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex);
5287ec681f3Smrg         vertex_index = intrin->src[1].ssa;
5297ec681f3Smrg      }
5307ec681f3Smrg
5317ec681f3Smrg      return lower_load(intrin, state, vertex_index, var, offset, component, type);
5327ec681f3Smrg   }
5337ec681f3Smrg
5347ec681f3Smrg   /* None of the supported APIs allow interpolation on 64-bit things */
5357ec681f3Smrg   assert(intrin->dest.is_ssa && intrin->dest.ssa.bit_size <= 32);
53601e04c3fSmrg
53701e04c3fSmrg   nir_intrinsic_op bary_op;
53801e04c3fSmrg   switch (intrin->intrinsic) {
53901e04c3fSmrg   case nir_intrinsic_interp_deref_at_centroid:
54001e04c3fSmrg      bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
54101e04c3fSmrg                nir_intrinsic_load_barycentric_sample :
54201e04c3fSmrg                nir_intrinsic_load_barycentric_centroid;
54301e04c3fSmrg      break;
54401e04c3fSmrg   case nir_intrinsic_interp_deref_at_sample:
54501e04c3fSmrg      bary_op = nir_intrinsic_load_barycentric_at_sample;
54601e04c3fSmrg      break;
54701e04c3fSmrg   case nir_intrinsic_interp_deref_at_offset:
54801e04c3fSmrg      bary_op = nir_intrinsic_load_barycentric_at_offset;
54901e04c3fSmrg      break;
55001e04c3fSmrg   default:
55101e04c3fSmrg      unreachable("Bogus interpolateAt() intrinsic.");
55201e04c3fSmrg   }
55301e04c3fSmrg
55401e04c3fSmrg   nir_intrinsic_instr *bary_setup =
55501e04c3fSmrg      nir_intrinsic_instr_create(state->builder.shader, bary_op);
55601e04c3fSmrg
55701e04c3fSmrg   nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
55801e04c3fSmrg   nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
55901e04c3fSmrg
56001e04c3fSmrg   if (intrin->intrinsic == nir_intrinsic_interp_deref_at_sample ||
5617ec681f3Smrg       intrin->intrinsic == nir_intrinsic_interp_deref_at_offset ||
5627ec681f3Smrg       intrin->intrinsic == nir_intrinsic_interp_deref_at_vertex)
5637ec681f3Smrg      nir_src_copy(&bary_setup->src[0], &intrin->src[1]);
56401e04c3fSmrg
5657ec681f3Smrg   nir_builder_instr_insert(b, &bary_setup->instr);
56601e04c3fSmrg
5677ec681f3Smrg   nir_io_semantics semantics = {0};
5687ec681f3Smrg   semantics.location = var->data.location;
5697ec681f3Smrg   semantics.num_slots = get_number_of_slots(state, var);
5707ec681f3Smrg   semantics.medium_precision =
5717ec681f3Smrg      var->data.precision == GLSL_PRECISION_MEDIUM ||
5727ec681f3Smrg      var->data.precision == GLSL_PRECISION_LOW;
57301e04c3fSmrg
5747ec681f3Smrg   assert(intrin->dest.is_ssa);
5757ec681f3Smrg   nir_ssa_def *load =
5767ec681f3Smrg      nir_load_interpolated_input(&state->builder,
5777ec681f3Smrg                                  intrin->dest.ssa.num_components,
5787ec681f3Smrg                                  intrin->dest.ssa.bit_size,
5797ec681f3Smrg                                  &bary_setup->dest.ssa,
5807ec681f3Smrg                                  offset,
5817ec681f3Smrg                                  .base = var->data.driver_location,
5827ec681f3Smrg                                  .component = component,
5837ec681f3Smrg                                  .io_semantics = semantics);
58401e04c3fSmrg
58501e04c3fSmrg   return load;
58601e04c3fSmrg}
58701e04c3fSmrg
58801e04c3fSmrgstatic bool
58901e04c3fSmrgnir_lower_io_block(nir_block *block,
59001e04c3fSmrg                   struct lower_io_state *state)
59101e04c3fSmrg{
59201e04c3fSmrg   nir_builder *b = &state->builder;
59301e04c3fSmrg   const nir_shader_compiler_options *options = b->shader->options;
59401e04c3fSmrg   bool progress = false;
59501e04c3fSmrg
59601e04c3fSmrg   nir_foreach_instr_safe(instr, block) {
59701e04c3fSmrg      if (instr->type != nir_instr_type_intrinsic)
59801e04c3fSmrg         continue;
59901e04c3fSmrg
60001e04c3fSmrg      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
60101e04c3fSmrg
60201e04c3fSmrg      switch (intrin->intrinsic) {
60301e04c3fSmrg      case nir_intrinsic_load_deref:
60401e04c3fSmrg      case nir_intrinsic_store_deref:
60501e04c3fSmrg         /* We can lower the io for this nir instrinsic */
60601e04c3fSmrg         break;
60701e04c3fSmrg      case nir_intrinsic_interp_deref_at_centroid:
60801e04c3fSmrg      case nir_intrinsic_interp_deref_at_sample:
60901e04c3fSmrg      case nir_intrinsic_interp_deref_at_offset:
6107ec681f3Smrg      case nir_intrinsic_interp_deref_at_vertex:
61101e04c3fSmrg         /* We can optionally lower these to load_interpolated_input */
6127ec681f3Smrg         if (options->use_interpolated_input_intrinsics ||
6137ec681f3Smrg             options->lower_interpolate_at)
61401e04c3fSmrg            break;
6157ec681f3Smrg         FALLTHROUGH;
61601e04c3fSmrg      default:
61701e04c3fSmrg         /* We can't lower the io for this nir instrinsic, so skip it */
61801e04c3fSmrg         continue;
61901e04c3fSmrg      }
62001e04c3fSmrg
62101e04c3fSmrg      nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
6227ec681f3Smrg      if (!nir_deref_mode_is_one_of(deref, state->modes))
62301e04c3fSmrg         continue;
62401e04c3fSmrg
6257ec681f3Smrg      nir_variable *var = nir_deref_instr_get_variable(deref);
62601e04c3fSmrg
62701e04c3fSmrg      b->cursor = nir_before_instr(instr);
62801e04c3fSmrg
6297ec681f3Smrg      const bool is_arrayed = nir_is_arrayed_io(var, b->shader->info.stage);
63001e04c3fSmrg
63101e04c3fSmrg      nir_ssa_def *offset;
6327ec681f3Smrg      nir_ssa_def *array_index = NULL;
63301e04c3fSmrg      unsigned component_offset = var->data.location_frac;
6347ec681f3Smrg      bool bindless_type_size = var->data.mode == nir_var_shader_in ||
6357ec681f3Smrg                                var->data.mode == nir_var_shader_out ||
6367e102996Smaya                                var->data.bindless;
63701e04c3fSmrg
6387ec681f3Smrg     if (nir_deref_instr_is_known_out_of_bounds(deref)) {
6397ec681f3Smrg        /* Section 5.11 (Out-of-Bounds Accesses) of the GLSL 4.60 spec says:
6407ec681f3Smrg         *
6417ec681f3Smrg         *    In the subsections described above for array, vector, matrix and
6427ec681f3Smrg         *    structure accesses, any out-of-bounds access produced undefined
6437ec681f3Smrg         *    behavior....
6447ec681f3Smrg         *    Out-of-bounds reads return undefined values, which
6457ec681f3Smrg         *    include values from other variables of the active program or zero.
6467ec681f3Smrg         *    Out-of-bounds writes may be discarded or overwrite
6477ec681f3Smrg         *    other variables of the active program.
6487ec681f3Smrg         *
6497ec681f3Smrg         * GL_KHR_robustness and GL_ARB_robustness encourage us to return zero
6507ec681f3Smrg         * for reads.
6517ec681f3Smrg         *
6527ec681f3Smrg         * Otherwise get_io_offset would return out-of-bound offset which may
6537ec681f3Smrg         * result in out-of-bound loading/storing of inputs/outputs,
6547ec681f3Smrg         * that could cause issues in drivers down the line.
6557ec681f3Smrg         */
6567ec681f3Smrg         if (intrin->intrinsic != nir_intrinsic_store_deref) {
6577ec681f3Smrg            nir_ssa_def *zero =
6587ec681f3Smrg               nir_imm_zero(b, intrin->dest.ssa.num_components,
6597ec681f3Smrg                             intrin->dest.ssa.bit_size);
6607ec681f3Smrg            nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
6617ec681f3Smrg                                  zero);
6627ec681f3Smrg         }
6637ec681f3Smrg
6647ec681f3Smrg         nir_instr_remove(&intrin->instr);
6657ec681f3Smrg         progress = true;
6667ec681f3Smrg         continue;
6677ec681f3Smrg      }
6687ec681f3Smrg
6697ec681f3Smrg      offset = get_io_offset(b, deref, is_arrayed ? &array_index : NULL,
6707e102996Smaya                             state->type_size, &component_offset,
6717e102996Smaya                             bindless_type_size);
67201e04c3fSmrg
6737ec681f3Smrg      nir_ssa_def *replacement = NULL;
67401e04c3fSmrg
67501e04c3fSmrg      switch (intrin->intrinsic) {
67601e04c3fSmrg      case nir_intrinsic_load_deref:
6777ec681f3Smrg         replacement = lower_load(intrin, state, array_index, var, offset,
6787ec681f3Smrg                                  component_offset, deref->type);
67901e04c3fSmrg         break;
68001e04c3fSmrg
68101e04c3fSmrg      case nir_intrinsic_store_deref:
6827ec681f3Smrg         lower_store(intrin, state, array_index, var, offset,
6837ec681f3Smrg                     component_offset, deref->type);
68401e04c3fSmrg         break;
68501e04c3fSmrg
68601e04c3fSmrg      case nir_intrinsic_interp_deref_at_centroid:
68701e04c3fSmrg      case nir_intrinsic_interp_deref_at_sample:
68801e04c3fSmrg      case nir_intrinsic_interp_deref_at_offset:
6897ec681f3Smrg      case nir_intrinsic_interp_deref_at_vertex:
6907ec681f3Smrg         assert(array_index == NULL);
69101e04c3fSmrg         replacement = lower_interpolate_at(intrin, state, var, offset,
6927ec681f3Smrg                                            component_offset, deref->type);
69301e04c3fSmrg         break;
69401e04c3fSmrg
69501e04c3fSmrg      default:
69601e04c3fSmrg         continue;
69701e04c3fSmrg      }
69801e04c3fSmrg
6997ec681f3Smrg      if (replacement) {
7007ec681f3Smrg         nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
7017ec681f3Smrg                                  replacement);
70201e04c3fSmrg      }
70301e04c3fSmrg      nir_instr_remove(&intrin->instr);
70401e04c3fSmrg      progress = true;
70501e04c3fSmrg   }
70601e04c3fSmrg
70701e04c3fSmrg   return progress;
70801e04c3fSmrg}
70901e04c3fSmrg
71001e04c3fSmrgstatic bool
71101e04c3fSmrgnir_lower_io_impl(nir_function_impl *impl,
71201e04c3fSmrg                  nir_variable_mode modes,
7137e102996Smaya                  int (*type_size)(const struct glsl_type *, bool),
71401e04c3fSmrg                  nir_lower_io_options options)
71501e04c3fSmrg{
71601e04c3fSmrg   struct lower_io_state state;
71701e04c3fSmrg   bool progress = false;
71801e04c3fSmrg
71901e04c3fSmrg   nir_builder_init(&state.builder, impl);
72001e04c3fSmrg   state.dead_ctx = ralloc_context(NULL);
72101e04c3fSmrg   state.modes = modes;
72201e04c3fSmrg   state.type_size = type_size;
72301e04c3fSmrg   state.options = options;
72401e04c3fSmrg
7257ec681f3Smrg   ASSERTED nir_variable_mode supported_modes =
7267ec681f3Smrg      nir_var_shader_in | nir_var_shader_out | nir_var_uniform;
7277ec681f3Smrg   assert(!(modes & ~supported_modes));
7287ec681f3Smrg
72901e04c3fSmrg   nir_foreach_block(block, impl) {
73001e04c3fSmrg      progress |= nir_lower_io_block(block, &state);
73101e04c3fSmrg   }
73201e04c3fSmrg
73301e04c3fSmrg   ralloc_free(state.dead_ctx);
73401e04c3fSmrg
7357ec681f3Smrg   nir_metadata_preserve(impl, nir_metadata_none);
7367ec681f3Smrg
73701e04c3fSmrg   return progress;
73801e04c3fSmrg}
73901e04c3fSmrg
7407ec681f3Smrg/** Lower load/store_deref intrinsics on I/O variables to offset-based intrinsics
7417ec681f3Smrg *
7427ec681f3Smrg * This pass is intended to be used for cross-stage shader I/O and driver-
7437ec681f3Smrg * managed uniforms to turn deref-based access into a simpler model using
7447ec681f3Smrg * locations or offsets.  For fragment shader inputs, it can optionally turn
7457ec681f3Smrg * load_deref into an explicit interpolation using barycentrics coming from
7467ec681f3Smrg * one of the load_barycentric_* intrinsics.  This pass requires that all
7477ec681f3Smrg * deref chains are complete and contain no casts.
7487ec681f3Smrg */
74901e04c3fSmrgbool
75001e04c3fSmrgnir_lower_io(nir_shader *shader, nir_variable_mode modes,
7517e102996Smaya             int (*type_size)(const struct glsl_type *, bool),
75201e04c3fSmrg             nir_lower_io_options options)
75301e04c3fSmrg{
75401e04c3fSmrg   bool progress = false;
75501e04c3fSmrg
75601e04c3fSmrg   nir_foreach_function(function, shader) {
75701e04c3fSmrg      if (function->impl) {
75801e04c3fSmrg         progress |= nir_lower_io_impl(function->impl, modes,
75901e04c3fSmrg                                       type_size, options);
76001e04c3fSmrg      }
76101e04c3fSmrg   }
76201e04c3fSmrg
76301e04c3fSmrg   return progress;
76401e04c3fSmrg}
76501e04c3fSmrg
7667e102996Smayastatic unsigned
7677e102996Smayatype_scalar_size_bytes(const struct glsl_type *type)
7687e102996Smaya{
7697e102996Smaya   assert(glsl_type_is_vector_or_scalar(type) ||
7707e102996Smaya          glsl_type_is_matrix(type));
7717e102996Smaya   return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
7727e102996Smaya}
7737e102996Smaya
7747e102996Smayastatic nir_ssa_def *
7757e102996Smayabuild_addr_iadd(nir_builder *b, nir_ssa_def *addr,
7767ec681f3Smrg                nir_address_format addr_format,
7777ec681f3Smrg                nir_variable_mode modes,
7787ec681f3Smrg                nir_ssa_def *offset)
7797e102996Smaya{
7807e102996Smaya   assert(offset->num_components == 1);
7817e102996Smaya
7827e102996Smaya   switch (addr_format) {
7837e102996Smaya   case nir_address_format_32bit_global:
7847e102996Smaya   case nir_address_format_64bit_global:
7857ec681f3Smrg   case nir_address_format_32bit_offset:
7867ec681f3Smrg      assert(addr->bit_size == offset->bit_size);
7877e102996Smaya      assert(addr->num_components == 1);
7887e102996Smaya      return nir_iadd(b, addr, offset);
7897e102996Smaya
7907ec681f3Smrg   case nir_address_format_32bit_offset_as_64bit:
7917ec681f3Smrg      assert(addr->num_components == 1);
7927ec681f3Smrg      assert(offset->bit_size == 32);
7937ec681f3Smrg      return nir_u2u64(b, nir_iadd(b, nir_u2u32(b, addr), offset));
7947ec681f3Smrg
7957ec681f3Smrg   case nir_address_format_64bit_global_32bit_offset:
7967e102996Smaya   case nir_address_format_64bit_bounded_global:
7977e102996Smaya      assert(addr->num_components == 4);
7987ec681f3Smrg      assert(addr->bit_size == offset->bit_size);
7997ec681f3Smrg      return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 3), offset), 3);
8007e102996Smaya
8017e102996Smaya   case nir_address_format_32bit_index_offset:
8027e102996Smaya      assert(addr->num_components == 2);
8037ec681f3Smrg      assert(addr->bit_size == offset->bit_size);
8047ec681f3Smrg      return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 1), offset), 1);
8057ec681f3Smrg
8067ec681f3Smrg   case nir_address_format_32bit_index_offset_pack64:
8077ec681f3Smrg      assert(addr->num_components == 1);
8087ec681f3Smrg      assert(offset->bit_size == 32);
8097ec681f3Smrg      return nir_pack_64_2x32_split(b,
8107ec681f3Smrg                                    nir_iadd(b, nir_unpack_64_2x32_split_x(b, addr), offset),
8117ec681f3Smrg                                    nir_unpack_64_2x32_split_y(b, addr));
8127ec681f3Smrg
8137ec681f3Smrg   case nir_address_format_vec2_index_32bit_offset:
8147ec681f3Smrg      assert(addr->num_components == 3);
8157ec681f3Smrg      assert(offset->bit_size == 32);
8167ec681f3Smrg      return nir_vector_insert_imm(b, addr, nir_iadd(b, nir_channel(b, addr, 2), offset), 2);
8177ec681f3Smrg
8187ec681f3Smrg   case nir_address_format_62bit_generic:
8197ec681f3Smrg      assert(addr->num_components == 1);
8207ec681f3Smrg      assert(addr->bit_size == 64);
8217ec681f3Smrg      assert(offset->bit_size == 64);
8227ec681f3Smrg      if (!(modes & ~(nir_var_function_temp |
8237ec681f3Smrg                      nir_var_shader_temp |
8247ec681f3Smrg                      nir_var_mem_shared))) {
8257ec681f3Smrg         /* If we're sure it's one of these modes, we can do an easy 32-bit
8267ec681f3Smrg          * addition and don't need to bother with 64-bit math.
8277ec681f3Smrg          */
8287ec681f3Smrg         nir_ssa_def *addr32 = nir_unpack_64_2x32_split_x(b, addr);
8297ec681f3Smrg         nir_ssa_def *type = nir_unpack_64_2x32_split_y(b, addr);
8307ec681f3Smrg         addr32 = nir_iadd(b, addr32, nir_u2u32(b, offset));
8317ec681f3Smrg         return nir_pack_64_2x32_split(b, addr32, type);
8327ec681f3Smrg      } else {
8337ec681f3Smrg         return nir_iadd(b, addr, offset);
8347ec681f3Smrg      }
8357ec681f3Smrg
8367ec681f3Smrg   case nir_address_format_logical:
8377ec681f3Smrg      unreachable("Unsupported address format");
8387e102996Smaya   }
8397e102996Smaya   unreachable("Invalid address format");
8407e102996Smaya}
8417e102996Smaya
8427ec681f3Smrgstatic unsigned
8437ec681f3Smrgaddr_get_offset_bit_size(nir_ssa_def *addr, nir_address_format addr_format)
8447ec681f3Smrg{
8457ec681f3Smrg   if (addr_format == nir_address_format_32bit_offset_as_64bit ||
8467ec681f3Smrg       addr_format == nir_address_format_32bit_index_offset_pack64)
8477ec681f3Smrg      return 32;
8487ec681f3Smrg   return addr->bit_size;
8497ec681f3Smrg}
8507ec681f3Smrg
8517e102996Smayastatic nir_ssa_def *
8527e102996Smayabuild_addr_iadd_imm(nir_builder *b, nir_ssa_def *addr,
8537ec681f3Smrg                    nir_address_format addr_format,
8547ec681f3Smrg                    nir_variable_mode modes,
8557ec681f3Smrg                    int64_t offset)
8567ec681f3Smrg{
8577ec681f3Smrg   return build_addr_iadd(b, addr, addr_format, modes,
8587ec681f3Smrg                             nir_imm_intN_t(b, offset,
8597ec681f3Smrg                                            addr_get_offset_bit_size(addr, addr_format)));
8607ec681f3Smrg}
8617ec681f3Smrg
8627ec681f3Smrgstatic nir_ssa_def *
8637ec681f3Smrgbuild_addr_for_var(nir_builder *b, nir_variable *var,
8647ec681f3Smrg                   nir_address_format addr_format)
8657ec681f3Smrg{
8667ec681f3Smrg   assert(var->data.mode & (nir_var_uniform | nir_var_mem_shared |
8677ec681f3Smrg                            nir_var_shader_temp | nir_var_function_temp |
8687ec681f3Smrg                            nir_var_mem_push_const | nir_var_mem_constant));
8697ec681f3Smrg
8707ec681f3Smrg   const unsigned num_comps = nir_address_format_num_components(addr_format);
8717ec681f3Smrg   const unsigned bit_size = nir_address_format_bit_size(addr_format);
8727ec681f3Smrg
8737ec681f3Smrg   switch (addr_format) {
8747ec681f3Smrg   case nir_address_format_32bit_global:
8757ec681f3Smrg   case nir_address_format_64bit_global: {
8767ec681f3Smrg      nir_ssa_def *base_addr;
8777ec681f3Smrg      switch (var->data.mode) {
8787ec681f3Smrg      case nir_var_shader_temp:
8797ec681f3Smrg         base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 0);
8807ec681f3Smrg         break;
8817ec681f3Smrg
8827ec681f3Smrg      case nir_var_function_temp:
8837ec681f3Smrg         base_addr = nir_load_scratch_base_ptr(b, num_comps, bit_size, 1);
8847ec681f3Smrg         break;
8857ec681f3Smrg
8867ec681f3Smrg      case nir_var_mem_constant:
8877ec681f3Smrg         base_addr = nir_load_constant_base_ptr(b, num_comps, bit_size);
8887ec681f3Smrg         break;
8897ec681f3Smrg
8907ec681f3Smrg      case nir_var_mem_shared:
8917ec681f3Smrg         base_addr = nir_load_shared_base_ptr(b, num_comps, bit_size);
8927ec681f3Smrg         break;
8937ec681f3Smrg
8947ec681f3Smrg      default:
8957ec681f3Smrg         unreachable("Unsupported variable mode");
8967ec681f3Smrg      }
8977ec681f3Smrg
8987ec681f3Smrg      return build_addr_iadd_imm(b, base_addr, addr_format, var->data.mode,
8997ec681f3Smrg                                    var->data.driver_location);
9007ec681f3Smrg   }
9017ec681f3Smrg
9027ec681f3Smrg   case nir_address_format_32bit_offset:
9037ec681f3Smrg      assert(var->data.driver_location <= UINT32_MAX);
9047ec681f3Smrg      return nir_imm_int(b, var->data.driver_location);
9057ec681f3Smrg
9067ec681f3Smrg   case nir_address_format_32bit_offset_as_64bit:
9077ec681f3Smrg      assert(var->data.driver_location <= UINT32_MAX);
9087ec681f3Smrg      return nir_imm_int64(b, var->data.driver_location);
9097ec681f3Smrg
9107ec681f3Smrg   case nir_address_format_62bit_generic:
9117ec681f3Smrg      switch (var->data.mode) {
9127ec681f3Smrg      case nir_var_shader_temp:
9137ec681f3Smrg      case nir_var_function_temp:
9147ec681f3Smrg         assert(var->data.driver_location <= UINT32_MAX);
9157ec681f3Smrg         return nir_imm_intN_t(b, var->data.driver_location | 2ull << 62, 64);
9167ec681f3Smrg
9177ec681f3Smrg      case nir_var_mem_shared:
9187ec681f3Smrg         assert(var->data.driver_location <= UINT32_MAX);
9197ec681f3Smrg         return nir_imm_intN_t(b, var->data.driver_location | 1ull << 62, 64);
9207ec681f3Smrg
9217ec681f3Smrg      default:
9227ec681f3Smrg         unreachable("Unsupported variable mode");
9237ec681f3Smrg      }
9247ec681f3Smrg
9257ec681f3Smrg   default:
9267ec681f3Smrg      unreachable("Unsupported address format");
9277ec681f3Smrg   }
9287ec681f3Smrg}
9297ec681f3Smrg
9307ec681f3Smrgstatic nir_ssa_def *
9317ec681f3Smrgbuild_runtime_addr_mode_check(nir_builder *b, nir_ssa_def *addr,
9327ec681f3Smrg                              nir_address_format addr_format,
9337ec681f3Smrg                              nir_variable_mode mode)
9347e102996Smaya{
9357ec681f3Smrg   /* The compile-time check failed; do a run-time check */
9367ec681f3Smrg   switch (addr_format) {
9377ec681f3Smrg   case nir_address_format_62bit_generic: {
9387ec681f3Smrg      assert(addr->num_components == 1);
9397ec681f3Smrg      assert(addr->bit_size == 64);
9407ec681f3Smrg      nir_ssa_def *mode_enum = nir_ushr(b, addr, nir_imm_int(b, 62));
9417ec681f3Smrg      switch (mode) {
9427ec681f3Smrg      case nir_var_function_temp:
9437ec681f3Smrg      case nir_var_shader_temp:
9447ec681f3Smrg         return nir_ieq_imm(b, mode_enum, 0x2);
9457ec681f3Smrg
9467ec681f3Smrg      case nir_var_mem_shared:
9477ec681f3Smrg         return nir_ieq_imm(b, mode_enum, 0x1);
9487ec681f3Smrg
9497ec681f3Smrg      case nir_var_mem_global:
9507ec681f3Smrg         return nir_ior(b, nir_ieq_imm(b, mode_enum, 0x0),
9517ec681f3Smrg                           nir_ieq_imm(b, mode_enum, 0x3));
9527ec681f3Smrg
9537ec681f3Smrg      default:
9547ec681f3Smrg         unreachable("Invalid mode check intrinsic");
9557ec681f3Smrg      }
9567ec681f3Smrg   }
9577ec681f3Smrg
9587ec681f3Smrg   default:
9597ec681f3Smrg      unreachable("Unsupported address mode");
9607ec681f3Smrg   }
9617e102996Smaya}
9627e102996Smaya
9637e102996Smayastatic nir_ssa_def *
9647e102996Smayaaddr_to_index(nir_builder *b, nir_ssa_def *addr,
9657e102996Smaya              nir_address_format addr_format)
9667e102996Smaya{
9677ec681f3Smrg   switch (addr_format) {
9687ec681f3Smrg   case nir_address_format_32bit_index_offset:
9697ec681f3Smrg      assert(addr->num_components == 2);
9707ec681f3Smrg      return nir_channel(b, addr, 0);
9717ec681f3Smrg   case nir_address_format_32bit_index_offset_pack64:
9727ec681f3Smrg      return nir_unpack_64_2x32_split_y(b, addr);
9737ec681f3Smrg   case nir_address_format_vec2_index_32bit_offset:
9747ec681f3Smrg      assert(addr->num_components == 3);
9757ec681f3Smrg      return nir_channels(b, addr, 0x3);
9767ec681f3Smrg   default: unreachable("Invalid address format");
9777ec681f3Smrg   }
9787e102996Smaya}
9797e102996Smaya
9807e102996Smayastatic nir_ssa_def *
9817e102996Smayaaddr_to_offset(nir_builder *b, nir_ssa_def *addr,
9827e102996Smaya               nir_address_format addr_format)
9837e102996Smaya{
9847ec681f3Smrg   switch (addr_format) {
9857ec681f3Smrg   case nir_address_format_32bit_index_offset:
9867ec681f3Smrg      assert(addr->num_components == 2);
9877ec681f3Smrg      return nir_channel(b, addr, 1);
9887ec681f3Smrg   case nir_address_format_32bit_index_offset_pack64:
9897ec681f3Smrg      return nir_unpack_64_2x32_split_x(b, addr);
9907ec681f3Smrg   case nir_address_format_vec2_index_32bit_offset:
9917ec681f3Smrg      assert(addr->num_components == 3);
9927ec681f3Smrg      return nir_channel(b, addr, 2);
9937ec681f3Smrg   case nir_address_format_32bit_offset:
9947ec681f3Smrg      return addr;
9957ec681f3Smrg   case nir_address_format_32bit_offset_as_64bit:
9967ec681f3Smrg   case nir_address_format_62bit_generic:
9977ec681f3Smrg      return nir_u2u32(b, addr);
9987ec681f3Smrg   default:
9997ec681f3Smrg      unreachable("Invalid address format");
10007ec681f3Smrg   }
10017e102996Smaya}
10027e102996Smaya
10037e102996Smaya/** Returns true if the given address format resolves to a global address */
10047e102996Smayastatic bool
10057ec681f3Smrgaddr_format_is_global(nir_address_format addr_format,
10067ec681f3Smrg                      nir_variable_mode mode)
10077e102996Smaya{
10087ec681f3Smrg   if (addr_format == nir_address_format_62bit_generic)
10097ec681f3Smrg      return mode == nir_var_mem_global;
10107ec681f3Smrg
10117e102996Smaya   return addr_format == nir_address_format_32bit_global ||
10127e102996Smaya          addr_format == nir_address_format_64bit_global ||
10137ec681f3Smrg          addr_format == nir_address_format_64bit_global_32bit_offset ||
10147e102996Smaya          addr_format == nir_address_format_64bit_bounded_global;
10157e102996Smaya}
10167e102996Smaya
10177ec681f3Smrgstatic bool
10187ec681f3Smrgaddr_format_is_offset(nir_address_format addr_format,
10197ec681f3Smrg                      nir_variable_mode mode)
10207ec681f3Smrg{
10217ec681f3Smrg   if (addr_format == nir_address_format_62bit_generic)
10227ec681f3Smrg      return mode != nir_var_mem_global;
10237ec681f3Smrg
10247ec681f3Smrg   return addr_format == nir_address_format_32bit_offset ||
10257ec681f3Smrg          addr_format == nir_address_format_32bit_offset_as_64bit;
10267ec681f3Smrg}
10277ec681f3Smrg
10287e102996Smayastatic nir_ssa_def *
10297e102996Smayaaddr_to_global(nir_builder *b, nir_ssa_def *addr,
10307e102996Smaya               nir_address_format addr_format)
10317e102996Smaya{
10327e102996Smaya   switch (addr_format) {
10337e102996Smaya   case nir_address_format_32bit_global:
10347e102996Smaya   case nir_address_format_64bit_global:
10357ec681f3Smrg   case nir_address_format_62bit_generic:
10367e102996Smaya      assert(addr->num_components == 1);
10377e102996Smaya      return addr;
10387e102996Smaya
10397ec681f3Smrg   case nir_address_format_64bit_global_32bit_offset:
10407e102996Smaya   case nir_address_format_64bit_bounded_global:
10417e102996Smaya      assert(addr->num_components == 4);
10427e102996Smaya      return nir_iadd(b, nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)),
10437e102996Smaya                         nir_u2u64(b, nir_channel(b, addr, 3)));
10447e102996Smaya
10457e102996Smaya   case nir_address_format_32bit_index_offset:
10467ec681f3Smrg   case nir_address_format_32bit_index_offset_pack64:
10477ec681f3Smrg   case nir_address_format_vec2_index_32bit_offset:
10487ec681f3Smrg   case nir_address_format_32bit_offset:
10497ec681f3Smrg   case nir_address_format_32bit_offset_as_64bit:
10507ec681f3Smrg   case nir_address_format_logical:
10517e102996Smaya      unreachable("Cannot get a 64-bit address with this address format");
10527e102996Smaya   }
10537e102996Smaya
10547e102996Smaya   unreachable("Invalid address format");
10557e102996Smaya}
10567e102996Smaya
10577e102996Smayastatic bool
10587e102996Smayaaddr_format_needs_bounds_check(nir_address_format addr_format)
10597e102996Smaya{
10607e102996Smaya   return addr_format == nir_address_format_64bit_bounded_global;
10617e102996Smaya}
10627e102996Smaya
10637e102996Smayastatic nir_ssa_def *
10647e102996Smayaaddr_is_in_bounds(nir_builder *b, nir_ssa_def *addr,
10657e102996Smaya                  nir_address_format addr_format, unsigned size)
10667e102996Smaya{
10677e102996Smaya   assert(addr_format == nir_address_format_64bit_bounded_global);
10687e102996Smaya   assert(addr->num_components == 4);
10697e102996Smaya   return nir_ige(b, nir_channel(b, addr, 2),
10707e102996Smaya                     nir_iadd_imm(b, nir_channel(b, addr, 3), size));
10717e102996Smaya}
10727e102996Smaya
10737ec681f3Smrgstatic void
10747ec681f3Smrgnir_get_explicit_deref_range(nir_deref_instr *deref,
10757ec681f3Smrg                             nir_address_format addr_format,
10767ec681f3Smrg                             uint32_t *out_base,
10777ec681f3Smrg                             uint32_t *out_range)
10787e102996Smaya{
10797ec681f3Smrg   uint32_t base = 0;
10807ec681f3Smrg   uint32_t range = glsl_get_explicit_size(deref->type, false);
10817e102996Smaya
10827ec681f3Smrg   while (true) {
10837ec681f3Smrg      nir_deref_instr *parent = nir_deref_instr_parent(deref);
10847e102996Smaya
10857ec681f3Smrg      switch (deref->deref_type) {
10867ec681f3Smrg      case nir_deref_type_array:
10877ec681f3Smrg      case nir_deref_type_array_wildcard:
10887ec681f3Smrg      case nir_deref_type_ptr_as_array: {
10897ec681f3Smrg         const unsigned stride = nir_deref_instr_array_stride(deref);
10907ec681f3Smrg         if (stride == 0)
10917ec681f3Smrg            goto fail;
10927e102996Smaya
10937ec681f3Smrg         if (!parent)
10947ec681f3Smrg            goto fail;
10957e102996Smaya
10967ec681f3Smrg         if (deref->deref_type != nir_deref_type_array_wildcard &&
10977ec681f3Smrg             nir_src_is_const(deref->arr.index)) {
10987ec681f3Smrg            base += stride * nir_src_as_uint(deref->arr.index);
10997ec681f3Smrg         } else {
11007ec681f3Smrg            if (glsl_get_length(parent->type) == 0)
11017ec681f3Smrg               goto fail;
11027ec681f3Smrg            range += stride * (glsl_get_length(parent->type) - 1);
11037ec681f3Smrg         }
11047ec681f3Smrg         break;
11057ec681f3Smrg      }
11067e102996Smaya
11077ec681f3Smrg      case nir_deref_type_struct: {
11087ec681f3Smrg         if (!parent)
11097ec681f3Smrg            goto fail;
11107e102996Smaya
11117ec681f3Smrg         base += glsl_get_struct_field_offset(parent->type, deref->strct.index);
11127ec681f3Smrg         break;
11137ec681f3Smrg      }
11147e102996Smaya
11157ec681f3Smrg      case nir_deref_type_cast: {
11167ec681f3Smrg         nir_instr *parent_instr = deref->parent.ssa->parent_instr;
11177e102996Smaya
11187ec681f3Smrg         switch (parent_instr->type) {
11197ec681f3Smrg         case nir_instr_type_load_const: {
11207ec681f3Smrg            nir_load_const_instr *load = nir_instr_as_load_const(parent_instr);
11217e102996Smaya
11227ec681f3Smrg            switch (addr_format) {
11237ec681f3Smrg            case nir_address_format_32bit_offset:
11247ec681f3Smrg               base += load->value[1].u32;
11257ec681f3Smrg               break;
11267ec681f3Smrg            case nir_address_format_32bit_index_offset:
11277ec681f3Smrg               base += load->value[1].u32;
11287ec681f3Smrg               break;
11297ec681f3Smrg            case nir_address_format_vec2_index_32bit_offset:
11307ec681f3Smrg               base += load->value[2].u32;
11317ec681f3Smrg               break;
11327ec681f3Smrg            default:
11337ec681f3Smrg               goto fail;
11347ec681f3Smrg            }
11357ec681f3Smrg
11367ec681f3Smrg            *out_base = base;
11377ec681f3Smrg            *out_range = range;
11387ec681f3Smrg            return;
11397ec681f3Smrg         }
11407ec681f3Smrg
11417ec681f3Smrg         case nir_instr_type_intrinsic: {
11427ec681f3Smrg            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(parent_instr);
11437ec681f3Smrg            switch (intr->intrinsic) {
11447ec681f3Smrg            case nir_intrinsic_load_vulkan_descriptor:
11457ec681f3Smrg               /* Assume that a load_vulkan_descriptor won't contribute to an
11467ec681f3Smrg                * offset within the resource.
11477ec681f3Smrg                */
11487ec681f3Smrg               break;
11497ec681f3Smrg            default:
11507ec681f3Smrg               goto fail;
11517ec681f3Smrg            }
11527ec681f3Smrg
11537ec681f3Smrg            *out_base = base;
11547ec681f3Smrg            *out_range = range;
11557ec681f3Smrg            return;
11567ec681f3Smrg         }
11577ec681f3Smrg
11587ec681f3Smrg         default:
11597ec681f3Smrg            goto fail;
11607ec681f3Smrg         }
11617ec681f3Smrg      }
11627ec681f3Smrg
11637ec681f3Smrg      default:
11647ec681f3Smrg         goto fail;
11657ec681f3Smrg      }
11667ec681f3Smrg
11677ec681f3Smrg      deref = parent;
11687ec681f3Smrg   }
11697ec681f3Smrg
11707ec681f3Smrgfail:
11717ec681f3Smrg   *out_base = 0;
11727ec681f3Smrg   *out_range = ~0;
11737ec681f3Smrg}
11747ec681f3Smrg
11757ec681f3Smrgstatic nir_variable_mode
11767ec681f3Smrgcanonicalize_generic_modes(nir_variable_mode modes)
11777ec681f3Smrg{
11787ec681f3Smrg   assert(modes != 0);
11797ec681f3Smrg   if (util_bitcount(modes) == 1)
11807ec681f3Smrg      return modes;
11817ec681f3Smrg
11827ec681f3Smrg   assert(!(modes & ~(nir_var_function_temp | nir_var_shader_temp |
11837ec681f3Smrg                      nir_var_mem_shared | nir_var_mem_global)));
11847ec681f3Smrg
11857ec681f3Smrg   /* Canonicalize by converting shader_temp to function_temp */
11867ec681f3Smrg   if (modes & nir_var_shader_temp) {
11877ec681f3Smrg      modes &= ~nir_var_shader_temp;
11887ec681f3Smrg      modes |= nir_var_function_temp;
11897ec681f3Smrg   }
11907ec681f3Smrg
11917ec681f3Smrg   return modes;
11927ec681f3Smrg}
11937ec681f3Smrg
11947ec681f3Smrgstatic nir_ssa_def *
11957ec681f3Smrgbuild_explicit_io_load(nir_builder *b, nir_intrinsic_instr *intrin,
11967ec681f3Smrg                       nir_ssa_def *addr, nir_address_format addr_format,
11977ec681f3Smrg                       nir_variable_mode modes,
11987ec681f3Smrg                       uint32_t align_mul, uint32_t align_offset,
11997ec681f3Smrg                       unsigned num_components)
12007ec681f3Smrg{
12017ec681f3Smrg   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
12027ec681f3Smrg   modes = canonicalize_generic_modes(modes);
12037ec681f3Smrg
12047ec681f3Smrg   if (util_bitcount(modes) > 1) {
12057ec681f3Smrg      if (addr_format_is_global(addr_format, modes)) {
12067ec681f3Smrg         return build_explicit_io_load(b, intrin, addr, addr_format,
12077ec681f3Smrg                                       nir_var_mem_global,
12087ec681f3Smrg                                       align_mul, align_offset,
12097ec681f3Smrg                                       num_components);
12107ec681f3Smrg      } else if (modes & nir_var_function_temp) {
12117ec681f3Smrg         nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
12127ec681f3Smrg                                                      nir_var_function_temp));
12137ec681f3Smrg         nir_ssa_def *res1 =
12147ec681f3Smrg            build_explicit_io_load(b, intrin, addr, addr_format,
12157ec681f3Smrg                                   nir_var_function_temp,
12167ec681f3Smrg                                   align_mul, align_offset,
12177ec681f3Smrg                                   num_components);
12187ec681f3Smrg         nir_push_else(b, NULL);
12197ec681f3Smrg         nir_ssa_def *res2 =
12207ec681f3Smrg            build_explicit_io_load(b, intrin, addr, addr_format,
12217ec681f3Smrg                                   modes & ~nir_var_function_temp,
12227ec681f3Smrg                                   align_mul, align_offset,
12237ec681f3Smrg                                   num_components);
12247ec681f3Smrg         nir_pop_if(b, NULL);
12257ec681f3Smrg         return nir_if_phi(b, res1, res2);
12267ec681f3Smrg      } else {
12277ec681f3Smrg         nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
12287ec681f3Smrg                                                      nir_var_mem_shared));
12297ec681f3Smrg         assert(modes & nir_var_mem_shared);
12307ec681f3Smrg         nir_ssa_def *res1 =
12317ec681f3Smrg            build_explicit_io_load(b, intrin, addr, addr_format,
12327ec681f3Smrg                                   nir_var_mem_shared,
12337ec681f3Smrg                                   align_mul, align_offset,
12347ec681f3Smrg                                   num_components);
12357ec681f3Smrg         nir_push_else(b, NULL);
12367ec681f3Smrg         assert(modes & nir_var_mem_global);
12377ec681f3Smrg         nir_ssa_def *res2 =
12387ec681f3Smrg            build_explicit_io_load(b, intrin, addr, addr_format,
12397ec681f3Smrg                                   nir_var_mem_global,
12407ec681f3Smrg                                   align_mul, align_offset,
12417ec681f3Smrg                                   num_components);
12427ec681f3Smrg         nir_pop_if(b, NULL);
12437ec681f3Smrg         return nir_if_phi(b, res1, res2);
12447ec681f3Smrg      }
12457ec681f3Smrg   }
12467ec681f3Smrg
12477ec681f3Smrg   assert(util_bitcount(modes) == 1);
12487ec681f3Smrg   const nir_variable_mode mode = modes;
12497ec681f3Smrg
12507ec681f3Smrg   nir_intrinsic_op op;
12517ec681f3Smrg   switch (intrin->intrinsic) {
12527ec681f3Smrg   case nir_intrinsic_load_deref:
12537ec681f3Smrg      switch (mode) {
12547ec681f3Smrg      case nir_var_mem_ubo:
12557ec681f3Smrg         if (addr_format == nir_address_format_64bit_global_32bit_offset)
12567ec681f3Smrg            op = nir_intrinsic_load_global_constant_offset;
12577ec681f3Smrg         else if (addr_format == nir_address_format_64bit_bounded_global)
12587ec681f3Smrg            op = nir_intrinsic_load_global_constant_bounded;
12597ec681f3Smrg         else if (addr_format_is_global(addr_format, mode))
12607ec681f3Smrg            op = nir_intrinsic_load_global_constant;
12617ec681f3Smrg         else
12627ec681f3Smrg            op = nir_intrinsic_load_ubo;
12637ec681f3Smrg         break;
12647ec681f3Smrg      case nir_var_mem_ssbo:
12657ec681f3Smrg         if (addr_format_is_global(addr_format, mode))
12667ec681f3Smrg            op = nir_intrinsic_load_global;
12677ec681f3Smrg         else
12687ec681f3Smrg            op = nir_intrinsic_load_ssbo;
12697ec681f3Smrg         break;
12707ec681f3Smrg      case nir_var_mem_global:
12717ec681f3Smrg         assert(addr_format_is_global(addr_format, mode));
12727ec681f3Smrg         op = nir_intrinsic_load_global;
12737ec681f3Smrg         break;
12747ec681f3Smrg      case nir_var_uniform:
12757ec681f3Smrg         assert(addr_format_is_offset(addr_format, mode));
12767ec681f3Smrg         assert(b->shader->info.stage == MESA_SHADER_KERNEL);
12777ec681f3Smrg         op = nir_intrinsic_load_kernel_input;
12787ec681f3Smrg         break;
12797ec681f3Smrg      case nir_var_mem_shared:
12807ec681f3Smrg         assert(addr_format_is_offset(addr_format, mode));
12817ec681f3Smrg         op = nir_intrinsic_load_shared;
12827ec681f3Smrg         break;
12837ec681f3Smrg      case nir_var_shader_temp:
12847ec681f3Smrg      case nir_var_function_temp:
12857ec681f3Smrg         if (addr_format_is_offset(addr_format, mode)) {
12867ec681f3Smrg            op = nir_intrinsic_load_scratch;
12877ec681f3Smrg         } else {
12887ec681f3Smrg            assert(addr_format_is_global(addr_format, mode));
12897ec681f3Smrg            op = nir_intrinsic_load_global;
12907ec681f3Smrg         }
12917ec681f3Smrg         break;
12927ec681f3Smrg      case nir_var_mem_push_const:
12937ec681f3Smrg         assert(addr_format == nir_address_format_32bit_offset);
12947ec681f3Smrg         op = nir_intrinsic_load_push_constant;
12957ec681f3Smrg         break;
12967ec681f3Smrg      case nir_var_mem_constant:
12977ec681f3Smrg         if (addr_format_is_offset(addr_format, mode)) {
12987ec681f3Smrg            op = nir_intrinsic_load_constant;
12997ec681f3Smrg         } else {
13007ec681f3Smrg            assert(addr_format_is_global(addr_format, mode));
13017ec681f3Smrg            op = nir_intrinsic_load_global_constant;
13027ec681f3Smrg         }
13037ec681f3Smrg         break;
13047ec681f3Smrg      default:
13057ec681f3Smrg         unreachable("Unsupported explicit IO variable mode");
13067ec681f3Smrg      }
13077ec681f3Smrg      break;
13087ec681f3Smrg
13097ec681f3Smrg   case nir_intrinsic_load_deref_block_intel:
13107ec681f3Smrg      switch (mode) {
13117ec681f3Smrg      case nir_var_mem_ssbo:
13127ec681f3Smrg         if (addr_format_is_global(addr_format, mode))
13137ec681f3Smrg            op = nir_intrinsic_load_global_block_intel;
13147ec681f3Smrg         else
13157ec681f3Smrg            op = nir_intrinsic_load_ssbo_block_intel;
13167ec681f3Smrg         break;
13177ec681f3Smrg      case nir_var_mem_global:
13187ec681f3Smrg         op = nir_intrinsic_load_global_block_intel;
13197ec681f3Smrg         break;
13207ec681f3Smrg      case nir_var_mem_shared:
13217ec681f3Smrg         op = nir_intrinsic_load_shared_block_intel;
13227ec681f3Smrg         break;
13237ec681f3Smrg      default:
13247ec681f3Smrg         unreachable("Unsupported explicit IO variable mode");
13257ec681f3Smrg      }
13267ec681f3Smrg      break;
13277ec681f3Smrg
13287ec681f3Smrg   default:
13297ec681f3Smrg      unreachable("Invalid intrinsic");
13307ec681f3Smrg   }
13317ec681f3Smrg
13327ec681f3Smrg   nir_intrinsic_instr *load = nir_intrinsic_instr_create(b->shader, op);
13337ec681f3Smrg
13347ec681f3Smrg   if (op == nir_intrinsic_load_global_constant_offset) {
13357ec681f3Smrg      assert(addr_format == nir_address_format_64bit_global_32bit_offset);
13367ec681f3Smrg      load->src[0] = nir_src_for_ssa(
13377ec681f3Smrg         nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)));
13387ec681f3Smrg      load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
13397ec681f3Smrg   } else if (op == nir_intrinsic_load_global_constant_bounded) {
13407ec681f3Smrg      assert(addr_format == nir_address_format_64bit_bounded_global);
13417ec681f3Smrg      load->src[0] = nir_src_for_ssa(
13427ec681f3Smrg         nir_pack_64_2x32(b, nir_channels(b, addr, 0x3)));
13437ec681f3Smrg      load->src[1] = nir_src_for_ssa(nir_channel(b, addr, 3));
13447ec681f3Smrg      load->src[2] = nir_src_for_ssa(nir_channel(b, addr, 2));
13457ec681f3Smrg   } else if (addr_format_is_global(addr_format, mode)) {
13467ec681f3Smrg      load->src[0] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
13477ec681f3Smrg   } else if (addr_format_is_offset(addr_format, mode)) {
13487ec681f3Smrg      assert(addr->num_components == 1);
13497ec681f3Smrg      load->src[0] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
13507ec681f3Smrg   } else {
13517ec681f3Smrg      load->src[0] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
13527ec681f3Smrg      load->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
13537ec681f3Smrg   }
13547ec681f3Smrg
13557ec681f3Smrg   if (nir_intrinsic_has_access(load))
13567ec681f3Smrg      nir_intrinsic_set_access(load, nir_intrinsic_access(intrin));
13577ec681f3Smrg
13587ec681f3Smrg   if (op == nir_intrinsic_load_constant) {
13597ec681f3Smrg      nir_intrinsic_set_base(load, 0);
13607ec681f3Smrg      nir_intrinsic_set_range(load, b->shader->constant_data_size);
13617ec681f3Smrg   } else if (mode == nir_var_mem_push_const) {
13627ec681f3Smrg      /* Push constants are required to be able to be chased back to the
13637ec681f3Smrg       * variable so we can provide a base/range.
13647ec681f3Smrg       */
13657ec681f3Smrg      nir_variable *var = nir_deref_instr_get_variable(deref);
13667ec681f3Smrg      nir_intrinsic_set_base(load, 0);
13677ec681f3Smrg      nir_intrinsic_set_range(load, glsl_get_explicit_size(var->type, false));
13687ec681f3Smrg   }
13697ec681f3Smrg
13707ec681f3Smrg   unsigned bit_size = intrin->dest.ssa.bit_size;
13717ec681f3Smrg   if (bit_size == 1) {
13727ec681f3Smrg      /* TODO: Make the native bool bit_size an option. */
13737ec681f3Smrg      bit_size = 32;
13747ec681f3Smrg   }
13757ec681f3Smrg
13767ec681f3Smrg   if (nir_intrinsic_has_align(load))
13777ec681f3Smrg      nir_intrinsic_set_align(load, align_mul, align_offset);
13787ec681f3Smrg
13797ec681f3Smrg   if (nir_intrinsic_has_range_base(load)) {
13807ec681f3Smrg      unsigned base, range;
13817ec681f3Smrg      nir_get_explicit_deref_range(deref, addr_format, &base, &range);
13827ec681f3Smrg      nir_intrinsic_set_range_base(load, base);
13837ec681f3Smrg      nir_intrinsic_set_range(load, range);
13847ec681f3Smrg   }
13857ec681f3Smrg
13867ec681f3Smrg   assert(intrin->dest.is_ssa);
13877ec681f3Smrg   load->num_components = num_components;
13887ec681f3Smrg   nir_ssa_dest_init(&load->instr, &load->dest, num_components,
13897ec681f3Smrg                     bit_size, NULL);
13907ec681f3Smrg
13917ec681f3Smrg   assert(bit_size % 8 == 0);
13927ec681f3Smrg
13937ec681f3Smrg   nir_ssa_def *result;
13947ec681f3Smrg   if (addr_format_needs_bounds_check(addr_format) &&
13957ec681f3Smrg       op != nir_intrinsic_load_global_constant_bounded) {
13967ec681f3Smrg      /* We don't need to bounds-check global_constant_bounded because bounds
13977ec681f3Smrg       * checking is handled by the intrinsic itself.
13987ec681f3Smrg       *
13997ec681f3Smrg       * The Vulkan spec for robustBufferAccess gives us quite a few options
14007ec681f3Smrg       * as to what we can do with an OOB read.  Unfortunately, returning
14017ec681f3Smrg       * undefined values isn't one of them so we return an actual zero.
14027ec681f3Smrg       */
14037ec681f3Smrg      nir_ssa_def *zero = nir_imm_zero(b, load->num_components, bit_size);
14047ec681f3Smrg
14057ec681f3Smrg      /* TODO: Better handle block_intel. */
14067ec681f3Smrg      const unsigned load_size = (bit_size / 8) * load->num_components;
14077ec681f3Smrg      nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, load_size));
14087e102996Smaya
14097e102996Smaya      nir_builder_instr_insert(b, &load->instr);
14107e102996Smaya
14117e102996Smaya      nir_pop_if(b, NULL);
14127e102996Smaya
14137ec681f3Smrg      result = nir_if_phi(b, &load->dest.ssa, zero);
14147e102996Smaya   } else {
14157e102996Smaya      nir_builder_instr_insert(b, &load->instr);
14167ec681f3Smrg      result = &load->dest.ssa;
14177ec681f3Smrg   }
14187ec681f3Smrg
14197ec681f3Smrg   if (intrin->dest.ssa.bit_size == 1) {
14207ec681f3Smrg      /* For shared, we can go ahead and use NIR's and/or the back-end's
14217ec681f3Smrg       * standard encoding for booleans rather than forcing a 0/1 boolean.
14227ec681f3Smrg       * This should save an instruction or two.
14237ec681f3Smrg       */
14247ec681f3Smrg      if (mode == nir_var_mem_shared ||
14257ec681f3Smrg          mode == nir_var_shader_temp ||
14267ec681f3Smrg          mode == nir_var_function_temp)
14277ec681f3Smrg         result = nir_b2b1(b, result);
14287ec681f3Smrg      else
14297ec681f3Smrg         result = nir_i2b(b, result);
14307e102996Smaya   }
14317ec681f3Smrg
14327ec681f3Smrg   return result;
14337e102996Smaya}
14347e102996Smaya
14357e102996Smayastatic void
14367e102996Smayabuild_explicit_io_store(nir_builder *b, nir_intrinsic_instr *intrin,
14377e102996Smaya                        nir_ssa_def *addr, nir_address_format addr_format,
14387ec681f3Smrg                        nir_variable_mode modes,
14397ec681f3Smrg                        uint32_t align_mul, uint32_t align_offset,
14407e102996Smaya                        nir_ssa_def *value, nir_component_mask_t write_mask)
14417e102996Smaya{
14427ec681f3Smrg   modes = canonicalize_generic_modes(modes);
14437ec681f3Smrg
14447ec681f3Smrg   if (util_bitcount(modes) > 1) {
14457ec681f3Smrg      if (addr_format_is_global(addr_format, modes)) {
14467ec681f3Smrg         build_explicit_io_store(b, intrin, addr, addr_format,
14477ec681f3Smrg                                 nir_var_mem_global,
14487ec681f3Smrg                                 align_mul, align_offset,
14497ec681f3Smrg                                 value, write_mask);
14507ec681f3Smrg      } else if (modes & nir_var_function_temp) {
14517ec681f3Smrg         nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
14527ec681f3Smrg                                                      nir_var_function_temp));
14537ec681f3Smrg         build_explicit_io_store(b, intrin, addr, addr_format,
14547ec681f3Smrg                                 nir_var_function_temp,
14557ec681f3Smrg                                 align_mul, align_offset,
14567ec681f3Smrg                                 value, write_mask);
14577ec681f3Smrg         nir_push_else(b, NULL);
14587ec681f3Smrg         build_explicit_io_store(b, intrin, addr, addr_format,
14597ec681f3Smrg                                 modes & ~nir_var_function_temp,
14607ec681f3Smrg                                 align_mul, align_offset,
14617ec681f3Smrg                                 value, write_mask);
14627ec681f3Smrg         nir_pop_if(b, NULL);
14637ec681f3Smrg      } else {
14647ec681f3Smrg         nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
14657ec681f3Smrg                                                      nir_var_mem_shared));
14667ec681f3Smrg         assert(modes & nir_var_mem_shared);
14677ec681f3Smrg         build_explicit_io_store(b, intrin, addr, addr_format,
14687ec681f3Smrg                                 nir_var_mem_shared,
14697ec681f3Smrg                                 align_mul, align_offset,
14707ec681f3Smrg                                 value, write_mask);
14717ec681f3Smrg         nir_push_else(b, NULL);
14727ec681f3Smrg         assert(modes & nir_var_mem_global);
14737ec681f3Smrg         build_explicit_io_store(b, intrin, addr, addr_format,
14747ec681f3Smrg                                 nir_var_mem_global,
14757ec681f3Smrg                                 align_mul, align_offset,
14767ec681f3Smrg                                 value, write_mask);
14777ec681f3Smrg         nir_pop_if(b, NULL);
14787ec681f3Smrg      }
14797ec681f3Smrg      return;
14807ec681f3Smrg   }
14817ec681f3Smrg
14827ec681f3Smrg   assert(util_bitcount(modes) == 1);
14837ec681f3Smrg   const nir_variable_mode mode = modes;
14847e102996Smaya
14857e102996Smaya   nir_intrinsic_op op;
14867ec681f3Smrg   switch (intrin->intrinsic) {
14877ec681f3Smrg   case nir_intrinsic_store_deref:
14887ec681f3Smrg      assert(write_mask != 0);
14897ec681f3Smrg
14907ec681f3Smrg      switch (mode) {
14917ec681f3Smrg      case nir_var_mem_ssbo:
14927ec681f3Smrg         if (addr_format_is_global(addr_format, mode))
14937ec681f3Smrg            op = nir_intrinsic_store_global;
14947ec681f3Smrg         else
14957ec681f3Smrg            op = nir_intrinsic_store_ssbo;
14967ec681f3Smrg         break;
14977ec681f3Smrg      case nir_var_mem_global:
14987ec681f3Smrg         assert(addr_format_is_global(addr_format, mode));
14997e102996Smaya         op = nir_intrinsic_store_global;
15007ec681f3Smrg         break;
15017ec681f3Smrg      case nir_var_mem_shared:
15027ec681f3Smrg         assert(addr_format_is_offset(addr_format, mode));
15037ec681f3Smrg         op = nir_intrinsic_store_shared;
15047ec681f3Smrg         break;
15057ec681f3Smrg      case nir_var_shader_temp:
15067ec681f3Smrg      case nir_var_function_temp:
15077ec681f3Smrg         if (addr_format_is_offset(addr_format, mode)) {
15087ec681f3Smrg            op = nir_intrinsic_store_scratch;
15097ec681f3Smrg         } else {
15107ec681f3Smrg            assert(addr_format_is_global(addr_format, mode));
15117ec681f3Smrg            op = nir_intrinsic_store_global;
15127ec681f3Smrg         }
15137ec681f3Smrg         break;
15147ec681f3Smrg      default:
15157ec681f3Smrg         unreachable("Unsupported explicit IO variable mode");
15167ec681f3Smrg      }
15177e102996Smaya      break;
15187ec681f3Smrg
15197ec681f3Smrg   case nir_intrinsic_store_deref_block_intel:
15207ec681f3Smrg      assert(write_mask == 0);
15217ec681f3Smrg
15227ec681f3Smrg      switch (mode) {
15237ec681f3Smrg      case nir_var_mem_ssbo:
15247ec681f3Smrg         if (addr_format_is_global(addr_format, mode))
15257ec681f3Smrg            op = nir_intrinsic_store_global_block_intel;
15267ec681f3Smrg         else
15277ec681f3Smrg            op = nir_intrinsic_store_ssbo_block_intel;
15287ec681f3Smrg         break;
15297ec681f3Smrg      case nir_var_mem_global:
15307ec681f3Smrg         op = nir_intrinsic_store_global_block_intel;
15317ec681f3Smrg         break;
15327ec681f3Smrg      case nir_var_mem_shared:
15337ec681f3Smrg         op = nir_intrinsic_store_shared_block_intel;
15347ec681f3Smrg         break;
15357ec681f3Smrg      default:
15367ec681f3Smrg         unreachable("Unsupported explicit IO variable mode");
15377ec681f3Smrg      }
15387e102996Smaya      break;
15397ec681f3Smrg
15407e102996Smaya   default:
15417ec681f3Smrg      unreachable("Invalid intrinsic");
15427e102996Smaya   }
15437e102996Smaya
15447e102996Smaya   nir_intrinsic_instr *store = nir_intrinsic_instr_create(b->shader, op);
15457e102996Smaya
15467ec681f3Smrg   if (value->bit_size == 1) {
15477ec681f3Smrg      /* For shared, we can go ahead and use NIR's and/or the back-end's
15487ec681f3Smrg       * standard encoding for booleans rather than forcing a 0/1 boolean.
15497ec681f3Smrg       * This should save an instruction or two.
15507ec681f3Smrg       *
15517ec681f3Smrg       * TODO: Make the native bool bit_size an option.
15527ec681f3Smrg       */
15537ec681f3Smrg      if (mode == nir_var_mem_shared ||
15547ec681f3Smrg          mode == nir_var_shader_temp ||
15557ec681f3Smrg          mode == nir_var_function_temp)
15567ec681f3Smrg         value = nir_b2b32(b, value);
15577ec681f3Smrg      else
15587ec681f3Smrg         value = nir_b2i(b, value, 32);
15597ec681f3Smrg   }
15607ec681f3Smrg
15617e102996Smaya   store->src[0] = nir_src_for_ssa(value);
15627ec681f3Smrg   if (addr_format_is_global(addr_format, mode)) {
15637e102996Smaya      store->src[1] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
15647ec681f3Smrg   } else if (addr_format_is_offset(addr_format, mode)) {
15657ec681f3Smrg      assert(addr->num_components == 1);
15667ec681f3Smrg      store->src[1] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
15677e102996Smaya   } else {
15687e102996Smaya      store->src[1] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
15697e102996Smaya      store->src[2] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
15707e102996Smaya   }
15717e102996Smaya
15727e102996Smaya   nir_intrinsic_set_write_mask(store, write_mask);
15737e102996Smaya
15747ec681f3Smrg   if (nir_intrinsic_has_access(store))
15757ec681f3Smrg      nir_intrinsic_set_access(store, nir_intrinsic_access(intrin));
15767e102996Smaya
15777ec681f3Smrg   nir_intrinsic_set_align(store, align_mul, align_offset);
15787e102996Smaya
15797e102996Smaya   assert(value->num_components == 1 ||
15807e102996Smaya          value->num_components == intrin->num_components);
15817e102996Smaya   store->num_components = value->num_components;
15827e102996Smaya
15837e102996Smaya   assert(value->bit_size % 8 == 0);
15847e102996Smaya
15857e102996Smaya   if (addr_format_needs_bounds_check(addr_format)) {
15867ec681f3Smrg      /* TODO: Better handle block_intel. */
15877e102996Smaya      const unsigned store_size = (value->bit_size / 8) * store->num_components;
15887e102996Smaya      nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, store_size));
15897e102996Smaya
15907e102996Smaya      nir_builder_instr_insert(b, &store->instr);
15917e102996Smaya
15927e102996Smaya      nir_pop_if(b, NULL);
15937e102996Smaya   } else {
15947e102996Smaya      nir_builder_instr_insert(b, &store->instr);
15957e102996Smaya   }
15967e102996Smaya}
15977e102996Smaya
15987e102996Smayastatic nir_ssa_def *
15997e102996Smayabuild_explicit_io_atomic(nir_builder *b, nir_intrinsic_instr *intrin,
16007ec681f3Smrg                         nir_ssa_def *addr, nir_address_format addr_format,
16017ec681f3Smrg                         nir_variable_mode modes)
16027e102996Smaya{
16037ec681f3Smrg   modes = canonicalize_generic_modes(modes);
16047ec681f3Smrg
16057ec681f3Smrg   if (util_bitcount(modes) > 1) {
16067ec681f3Smrg      if (addr_format_is_global(addr_format, modes)) {
16077ec681f3Smrg         return build_explicit_io_atomic(b, intrin, addr, addr_format,
16087ec681f3Smrg                                         nir_var_mem_global);
16097ec681f3Smrg      } else if (modes & nir_var_function_temp) {
16107ec681f3Smrg         nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
16117ec681f3Smrg                                                      nir_var_function_temp));
16127ec681f3Smrg         nir_ssa_def *res1 =
16137ec681f3Smrg            build_explicit_io_atomic(b, intrin, addr, addr_format,
16147ec681f3Smrg                                     nir_var_function_temp);
16157ec681f3Smrg         nir_push_else(b, NULL);
16167ec681f3Smrg         nir_ssa_def *res2 =
16177ec681f3Smrg            build_explicit_io_atomic(b, intrin, addr, addr_format,
16187ec681f3Smrg                                     modes & ~nir_var_function_temp);
16197ec681f3Smrg         nir_pop_if(b, NULL);
16207ec681f3Smrg         return nir_if_phi(b, res1, res2);
16217ec681f3Smrg      } else {
16227ec681f3Smrg         nir_push_if(b, build_runtime_addr_mode_check(b, addr, addr_format,
16237ec681f3Smrg                                                      nir_var_mem_shared));
16247ec681f3Smrg         assert(modes & nir_var_mem_shared);
16257ec681f3Smrg         nir_ssa_def *res1 =
16267ec681f3Smrg            build_explicit_io_atomic(b, intrin, addr, addr_format,
16277ec681f3Smrg                                     nir_var_mem_shared);
16287ec681f3Smrg         nir_push_else(b, NULL);
16297ec681f3Smrg         assert(modes & nir_var_mem_global);
16307ec681f3Smrg         nir_ssa_def *res2 =
16317ec681f3Smrg            build_explicit_io_atomic(b, intrin, addr, addr_format,
16327ec681f3Smrg                                     nir_var_mem_global);
16337ec681f3Smrg         nir_pop_if(b, NULL);
16347ec681f3Smrg         return nir_if_phi(b, res1, res2);
16357ec681f3Smrg      }
16367ec681f3Smrg   }
16377ec681f3Smrg
16387ec681f3Smrg   assert(util_bitcount(modes) == 1);
16397ec681f3Smrg   const nir_variable_mode mode = modes;
16407ec681f3Smrg
16417e102996Smaya   const unsigned num_data_srcs =
16427e102996Smaya      nir_intrinsic_infos[intrin->intrinsic].num_srcs - 1;
16437e102996Smaya
16447e102996Smaya   nir_intrinsic_op op;
16457e102996Smaya   switch (mode) {
16467e102996Smaya   case nir_var_mem_ssbo:
16477ec681f3Smrg      if (addr_format_is_global(addr_format, mode))
16487e102996Smaya         op = global_atomic_for_deref(intrin->intrinsic);
16497e102996Smaya      else
16507e102996Smaya         op = ssbo_atomic_for_deref(intrin->intrinsic);
16517e102996Smaya      break;
16527e102996Smaya   case nir_var_mem_global:
16537ec681f3Smrg      assert(addr_format_is_global(addr_format, mode));
16547e102996Smaya      op = global_atomic_for_deref(intrin->intrinsic);
16557e102996Smaya      break;
16567ec681f3Smrg   case nir_var_mem_shared:
16577ec681f3Smrg      assert(addr_format_is_offset(addr_format, mode));
16587ec681f3Smrg      op = shared_atomic_for_deref(intrin->intrinsic);
16597ec681f3Smrg      break;
16607e102996Smaya   default:
16617e102996Smaya      unreachable("Unsupported explicit IO variable mode");
16627e102996Smaya   }
16637e102996Smaya
16647e102996Smaya   nir_intrinsic_instr *atomic = nir_intrinsic_instr_create(b->shader, op);
16657e102996Smaya
16667e102996Smaya   unsigned src = 0;
16677ec681f3Smrg   if (addr_format_is_global(addr_format, mode)) {
16687e102996Smaya      atomic->src[src++] = nir_src_for_ssa(addr_to_global(b, addr, addr_format));
16697ec681f3Smrg   } else if (addr_format_is_offset(addr_format, mode)) {
16707ec681f3Smrg      assert(addr->num_components == 1);
16717ec681f3Smrg      atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
16727e102996Smaya   } else {
16737e102996Smaya      atomic->src[src++] = nir_src_for_ssa(addr_to_index(b, addr, addr_format));
16747e102996Smaya      atomic->src[src++] = nir_src_for_ssa(addr_to_offset(b, addr, addr_format));
16757e102996Smaya   }
16767e102996Smaya   for (unsigned i = 0; i < num_data_srcs; i++) {
16777e102996Smaya      atomic->src[src++] = nir_src_for_ssa(intrin->src[1 + i].ssa);
16787e102996Smaya   }
16797e102996Smaya
16807e102996Smaya   /* Global atomics don't have access flags because they assume that the
16817e102996Smaya    * address may be non-uniform.
16827e102996Smaya    */
16837ec681f3Smrg   if (nir_intrinsic_has_access(atomic))
16847e102996Smaya      nir_intrinsic_set_access(atomic, nir_intrinsic_access(intrin));
16857e102996Smaya
16867e102996Smaya   assert(intrin->dest.ssa.num_components == 1);
16877e102996Smaya   nir_ssa_dest_init(&atomic->instr, &atomic->dest,
16887ec681f3Smrg                     1, intrin->dest.ssa.bit_size, NULL);
16897e102996Smaya
16907e102996Smaya   assert(atomic->dest.ssa.bit_size % 8 == 0);
16917e102996Smaya
16927e102996Smaya   if (addr_format_needs_bounds_check(addr_format)) {
16937e102996Smaya      const unsigned atomic_size = atomic->dest.ssa.bit_size / 8;
16947e102996Smaya      nir_push_if(b, addr_is_in_bounds(b, addr, addr_format, atomic_size));
16957e102996Smaya
16967e102996Smaya      nir_builder_instr_insert(b, &atomic->instr);
16977e102996Smaya
16987e102996Smaya      nir_pop_if(b, NULL);
16997e102996Smaya      return nir_if_phi(b, &atomic->dest.ssa,
17007e102996Smaya                           nir_ssa_undef(b, 1, atomic->dest.ssa.bit_size));
17017e102996Smaya   } else {
17027e102996Smaya      nir_builder_instr_insert(b, &atomic->instr);
17037e102996Smaya      return &atomic->dest.ssa;
17047e102996Smaya   }
17057e102996Smaya}
17067e102996Smaya
17077e102996Smayanir_ssa_def *
17087e102996Smayanir_explicit_io_address_from_deref(nir_builder *b, nir_deref_instr *deref,
17097e102996Smaya                                   nir_ssa_def *base_addr,
17107e102996Smaya                                   nir_address_format addr_format)
17117e102996Smaya{
17127e102996Smaya   assert(deref->dest.is_ssa);
17137e102996Smaya   switch (deref->deref_type) {
17147e102996Smaya   case nir_deref_type_var:
17157ec681f3Smrg      return build_addr_for_var(b, deref->var, addr_format);
17167e102996Smaya
17177e102996Smaya   case nir_deref_type_array: {
17187ec681f3Smrg      unsigned stride = nir_deref_instr_array_stride(deref);
17197e102996Smaya      assert(stride > 0);
17207e102996Smaya
17217e102996Smaya      nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
17227ec681f3Smrg      index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format));
17237ec681f3Smrg      return build_addr_iadd(b, base_addr, addr_format, deref->modes,
17247ec681f3Smrg                                nir_amul_imm(b, index, stride));
17257e102996Smaya   }
17267e102996Smaya
17277e102996Smaya   case nir_deref_type_ptr_as_array: {
17287e102996Smaya      nir_ssa_def *index = nir_ssa_for_src(b, deref->arr.index, 1);
17297ec681f3Smrg      index = nir_i2i(b, index, addr_get_offset_bit_size(base_addr, addr_format));
17307ec681f3Smrg      unsigned stride = nir_deref_instr_array_stride(deref);
17317ec681f3Smrg      return build_addr_iadd(b, base_addr, addr_format, deref->modes,
17327ec681f3Smrg                                nir_amul_imm(b, index, stride));
17337e102996Smaya   }
17347e102996Smaya
17357e102996Smaya   case nir_deref_type_array_wildcard:
17367e102996Smaya      unreachable("Wildcards should be lowered by now");
17377e102996Smaya      break;
17387e102996Smaya
17397e102996Smaya   case nir_deref_type_struct: {
17407e102996Smaya      nir_deref_instr *parent = nir_deref_instr_parent(deref);
17417e102996Smaya      int offset = glsl_get_struct_field_offset(parent->type,
17427e102996Smaya                                                deref->strct.index);
17437e102996Smaya      assert(offset >= 0);
17447ec681f3Smrg      return build_addr_iadd_imm(b, base_addr, addr_format,
17457ec681f3Smrg                                 deref->modes, offset);
17467e102996Smaya   }
17477e102996Smaya
17487e102996Smaya   case nir_deref_type_cast:
17497e102996Smaya      /* Nothing to do here */
17507e102996Smaya      return base_addr;
17517e102996Smaya   }
17527e102996Smaya
17537e102996Smaya   unreachable("Invalid NIR deref type");
17547e102996Smaya}
17557e102996Smaya
17567e102996Smayavoid
17577e102996Smayanir_lower_explicit_io_instr(nir_builder *b,
17587e102996Smaya                            nir_intrinsic_instr *intrin,
17597e102996Smaya                            nir_ssa_def *addr,
17607e102996Smaya                            nir_address_format addr_format)
17617e102996Smaya{
17627e102996Smaya   b->cursor = nir_after_instr(&intrin->instr);
17637e102996Smaya
17647e102996Smaya   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
17657e102996Smaya   unsigned vec_stride = glsl_get_explicit_stride(deref->type);
17667e102996Smaya   unsigned scalar_size = type_scalar_size_bytes(deref->type);
17677e102996Smaya   assert(vec_stride == 0 || glsl_type_is_vector(deref->type));
17687e102996Smaya   assert(vec_stride == 0 || vec_stride >= scalar_size);
17697e102996Smaya
17707ec681f3Smrg   uint32_t align_mul, align_offset;
17717ec681f3Smrg   if (!nir_get_explicit_deref_align(deref, true, &align_mul, &align_offset)) {
17727ec681f3Smrg      /* If we don't have an alignment from the deref, assume scalar */
17737ec681f3Smrg      align_mul = scalar_size;
17747ec681f3Smrg      align_offset = 0;
17757ec681f3Smrg   }
17767ec681f3Smrg
17777ec681f3Smrg   switch (intrin->intrinsic) {
17787ec681f3Smrg   case nir_intrinsic_load_deref: {
17797e102996Smaya      nir_ssa_def *value;
17807e102996Smaya      if (vec_stride > scalar_size) {
17817ec681f3Smrg         nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS] = { NULL, };
17827e102996Smaya         for (unsigned i = 0; i < intrin->num_components; i++) {
17837ec681f3Smrg            unsigned comp_offset = i * vec_stride;
17847e102996Smaya            nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
17857ec681f3Smrg                                                         deref->modes,
17867ec681f3Smrg                                                         comp_offset);
17877e102996Smaya            comps[i] = build_explicit_io_load(b, intrin, comp_addr,
17887ec681f3Smrg                                              addr_format, deref->modes,
17897ec681f3Smrg                                              align_mul,
17907ec681f3Smrg                                              (align_offset + comp_offset) %
17917ec681f3Smrg                                                 align_mul,
17927ec681f3Smrg                                              1);
17937e102996Smaya         }
17947e102996Smaya         value = nir_vec(b, comps, intrin->num_components);
17957e102996Smaya      } else {
17967e102996Smaya         value = build_explicit_io_load(b, intrin, addr, addr_format,
17977ec681f3Smrg                                        deref->modes, align_mul, align_offset,
17987e102996Smaya                                        intrin->num_components);
17997e102996Smaya      }
18007ec681f3Smrg      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
18017ec681f3Smrg      break;
18027ec681f3Smrg   }
18037ec681f3Smrg
18047ec681f3Smrg   case nir_intrinsic_store_deref: {
18057e102996Smaya      assert(intrin->src[1].is_ssa);
18067e102996Smaya      nir_ssa_def *value = intrin->src[1].ssa;
18077e102996Smaya      nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
18087e102996Smaya      if (vec_stride > scalar_size) {
18097e102996Smaya         for (unsigned i = 0; i < intrin->num_components; i++) {
18107e102996Smaya            if (!(write_mask & (1 << i)))
18117e102996Smaya               continue;
18127e102996Smaya
18137ec681f3Smrg            unsigned comp_offset = i * vec_stride;
18147e102996Smaya            nir_ssa_def *comp_addr = build_addr_iadd_imm(b, addr, addr_format,
18157ec681f3Smrg                                                         deref->modes,
18167ec681f3Smrg                                                         comp_offset);
18177e102996Smaya            build_explicit_io_store(b, intrin, comp_addr, addr_format,
18187ec681f3Smrg                                    deref->modes, align_mul,
18197ec681f3Smrg                                    (align_offset + comp_offset) % align_mul,
18207e102996Smaya                                    nir_channel(b, value, i), 1);
18217e102996Smaya         }
18227e102996Smaya      } else {
18237e102996Smaya         build_explicit_io_store(b, intrin, addr, addr_format,
18247ec681f3Smrg                                 deref->modes, align_mul, align_offset,
18257e102996Smaya                                 value, write_mask);
18267e102996Smaya      }
18277ec681f3Smrg      break;
18287ec681f3Smrg   }
18297ec681f3Smrg
18307ec681f3Smrg   case nir_intrinsic_load_deref_block_intel: {
18317ec681f3Smrg      nir_ssa_def *value = build_explicit_io_load(b, intrin, addr, addr_format,
18327ec681f3Smrg                                                  deref->modes,
18337ec681f3Smrg                                                  align_mul, align_offset,
18347ec681f3Smrg                                                  intrin->num_components);
18357ec681f3Smrg      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
18367ec681f3Smrg      break;
18377ec681f3Smrg   }
18387ec681f3Smrg
18397ec681f3Smrg   case nir_intrinsic_store_deref_block_intel: {
18407ec681f3Smrg      assert(intrin->src[1].is_ssa);
18417ec681f3Smrg      nir_ssa_def *value = intrin->src[1].ssa;
18427ec681f3Smrg      const nir_component_mask_t write_mask = 0;
18437ec681f3Smrg      build_explicit_io_store(b, intrin, addr, addr_format,
18447ec681f3Smrg                              deref->modes, align_mul, align_offset,
18457ec681f3Smrg                              value, write_mask);
18467ec681f3Smrg      break;
18477ec681f3Smrg   }
18487ec681f3Smrg
18497ec681f3Smrg   default: {
18507e102996Smaya      nir_ssa_def *value =
18517ec681f3Smrg         build_explicit_io_atomic(b, intrin, addr, addr_format, deref->modes);
18527ec681f3Smrg      nir_ssa_def_rewrite_uses(&intrin->dest.ssa, value);
18537ec681f3Smrg      break;
18547ec681f3Smrg   }
18557e102996Smaya   }
18567e102996Smaya
18577e102996Smaya   nir_instr_remove(&intrin->instr);
18587e102996Smaya}
18597e102996Smaya
18607ec681f3Smrgbool
18617ec681f3Smrgnir_get_explicit_deref_align(nir_deref_instr *deref,
18627ec681f3Smrg                             bool default_to_type_align,
18637ec681f3Smrg                             uint32_t *align_mul,
18647ec681f3Smrg                             uint32_t *align_offset)
18657ec681f3Smrg{
18667ec681f3Smrg   if (deref->deref_type == nir_deref_type_var) {
18677ec681f3Smrg      /* If we see a variable, align_mul is effectively infinite because we
18687ec681f3Smrg       * know the offset exactly (up to the offset of the base pointer for the
18697ec681f3Smrg       * given variable mode).   We have to pick something so we choose 256B
18707ec681f3Smrg       * as an arbitrary alignment which seems high enough for any reasonable
18717ec681f3Smrg       * wide-load use-case.  Back-ends should clamp alignments down if 256B
18727ec681f3Smrg       * is too large for some reason.
18737ec681f3Smrg       */
18747ec681f3Smrg      *align_mul = 256;
18757ec681f3Smrg      *align_offset = deref->var->data.driver_location % 256;
18767ec681f3Smrg      return true;
18777ec681f3Smrg   }
18787ec681f3Smrg
18797ec681f3Smrg   /* If we're a cast deref that has an alignment, use that. */
18807ec681f3Smrg   if (deref->deref_type == nir_deref_type_cast && deref->cast.align_mul > 0) {
18817ec681f3Smrg      *align_mul = deref->cast.align_mul;
18827ec681f3Smrg      *align_offset = deref->cast.align_offset;
18837ec681f3Smrg      return true;
18847ec681f3Smrg   }
18857ec681f3Smrg
18867ec681f3Smrg   /* Otherwise, we need to compute the alignment based on the parent */
18877ec681f3Smrg   nir_deref_instr *parent = nir_deref_instr_parent(deref);
18887ec681f3Smrg   if (parent == NULL) {
18897ec681f3Smrg      assert(deref->deref_type == nir_deref_type_cast);
18907ec681f3Smrg      if (default_to_type_align) {
18917ec681f3Smrg         /* If we don't have a parent, assume the type's alignment, if any. */
18927ec681f3Smrg         unsigned type_align = glsl_get_explicit_alignment(deref->type);
18937ec681f3Smrg         if (type_align == 0)
18947ec681f3Smrg            return false;
18957ec681f3Smrg
18967ec681f3Smrg         *align_mul = type_align;
18977ec681f3Smrg         *align_offset = 0;
18987ec681f3Smrg         return true;
18997ec681f3Smrg      } else {
19007ec681f3Smrg         return false;
19017ec681f3Smrg      }
19027ec681f3Smrg   }
19037ec681f3Smrg
19047ec681f3Smrg   uint32_t parent_mul, parent_offset;
19057ec681f3Smrg   if (!nir_get_explicit_deref_align(parent, default_to_type_align,
19067ec681f3Smrg                                     &parent_mul, &parent_offset))
19077ec681f3Smrg      return false;
19087ec681f3Smrg
19097ec681f3Smrg   switch (deref->deref_type) {
19107ec681f3Smrg   case nir_deref_type_var:
19117ec681f3Smrg      unreachable("Handled above");
19127ec681f3Smrg
19137ec681f3Smrg   case nir_deref_type_array:
19147ec681f3Smrg   case nir_deref_type_array_wildcard:
19157ec681f3Smrg   case nir_deref_type_ptr_as_array: {
19167ec681f3Smrg      const unsigned stride = nir_deref_instr_array_stride(deref);
19177ec681f3Smrg      if (stride == 0)
19187ec681f3Smrg         return false;
19197ec681f3Smrg
19207ec681f3Smrg      if (deref->deref_type != nir_deref_type_array_wildcard &&
19217ec681f3Smrg          nir_src_is_const(deref->arr.index)) {
19227ec681f3Smrg         unsigned offset = nir_src_as_uint(deref->arr.index) * stride;
19237ec681f3Smrg         *align_mul = parent_mul;
19247ec681f3Smrg         *align_offset = (parent_offset + offset) % parent_mul;
19257ec681f3Smrg      } else {
19267ec681f3Smrg         /* If this is a wildcard or an indirect deref, we have to go with the
19277ec681f3Smrg          * power-of-two gcd.
19287ec681f3Smrg          */
19297ec681f3Smrg         *align_mul = MIN2(parent_mul, 1 << (ffs(stride) - 1));
19307ec681f3Smrg         *align_offset = parent_offset % *align_mul;
19317ec681f3Smrg      }
19327ec681f3Smrg      return true;
19337ec681f3Smrg   }
19347ec681f3Smrg
19357ec681f3Smrg   case nir_deref_type_struct: {
19367ec681f3Smrg      const int offset = glsl_get_struct_field_offset(parent->type,
19377ec681f3Smrg                                                      deref->strct.index);
19387ec681f3Smrg      if (offset < 0)
19397ec681f3Smrg         return false;
19407ec681f3Smrg
19417ec681f3Smrg      *align_mul = parent_mul;
19427ec681f3Smrg      *align_offset = (parent_offset + offset) % parent_mul;
19437ec681f3Smrg      return true;
19447ec681f3Smrg   }
19457ec681f3Smrg
19467ec681f3Smrg   case nir_deref_type_cast:
19477ec681f3Smrg      /* We handled the explicit alignment case above. */
19487ec681f3Smrg      assert(deref->cast.align_mul == 0);
19497ec681f3Smrg      *align_mul = parent_mul;
19507ec681f3Smrg      *align_offset = parent_offset;
19517ec681f3Smrg      return true;
19527ec681f3Smrg   }
19537ec681f3Smrg
19547ec681f3Smrg   unreachable("Invalid deref_instr_type");
19557ec681f3Smrg}
19567ec681f3Smrg
19577e102996Smayastatic void
19587e102996Smayalower_explicit_io_deref(nir_builder *b, nir_deref_instr *deref,
19597e102996Smaya                        nir_address_format addr_format)
19607e102996Smaya{
19617e102996Smaya   /* Just delete the deref if it's not used.  We can't use
19627e102996Smaya    * nir_deref_instr_remove_if_unused here because it may remove more than
19637e102996Smaya    * one deref which could break our list walking since we walk the list
19647e102996Smaya    * backwards.
19657e102996Smaya    */
19667ec681f3Smrg   assert(list_is_empty(&deref->dest.ssa.if_uses));
19677ec681f3Smrg   if (list_is_empty(&deref->dest.ssa.uses)) {
19687e102996Smaya      nir_instr_remove(&deref->instr);
19697e102996Smaya      return;
19707e102996Smaya   }
19717e102996Smaya
19727e102996Smaya   b->cursor = nir_after_instr(&deref->instr);
19737e102996Smaya
19747e102996Smaya   nir_ssa_def *base_addr = NULL;
19757e102996Smaya   if (deref->deref_type != nir_deref_type_var) {
19767e102996Smaya      assert(deref->parent.is_ssa);
19777e102996Smaya      base_addr = deref->parent.ssa;
19787e102996Smaya   }
19797e102996Smaya
19807e102996Smaya   nir_ssa_def *addr = nir_explicit_io_address_from_deref(b, deref, base_addr,
19817e102996Smaya                                                          addr_format);
19827ec681f3Smrg   assert(addr->bit_size == deref->dest.ssa.bit_size);
19837ec681f3Smrg   assert(addr->num_components == deref->dest.ssa.num_components);
19847e102996Smaya
19857e102996Smaya   nir_instr_remove(&deref->instr);
19867ec681f3Smrg   nir_ssa_def_rewrite_uses(&deref->dest.ssa, addr);
19877e102996Smaya}
19887e102996Smaya
19897e102996Smayastatic void
19907e102996Smayalower_explicit_io_access(nir_builder *b, nir_intrinsic_instr *intrin,
19917e102996Smaya                         nir_address_format addr_format)
19927e102996Smaya{
19937e102996Smaya   assert(intrin->src[0].is_ssa);
19947e102996Smaya   nir_lower_explicit_io_instr(b, intrin, intrin->src[0].ssa, addr_format);
19957e102996Smaya}
19967e102996Smaya
19977e102996Smayastatic void
19987e102996Smayalower_explicit_io_array_length(nir_builder *b, nir_intrinsic_instr *intrin,
19997e102996Smaya                               nir_address_format addr_format)
20007e102996Smaya{
20017e102996Smaya   b->cursor = nir_after_instr(&intrin->instr);
20027e102996Smaya
20037e102996Smaya   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
20047e102996Smaya
20057e102996Smaya   assert(glsl_type_is_array(deref->type));
20067e102996Smaya   assert(glsl_get_length(deref->type) == 0);
20077ec681f3Smrg   assert(nir_deref_mode_is(deref, nir_var_mem_ssbo));
20087e102996Smaya   unsigned stride = glsl_get_explicit_stride(deref->type);
20097e102996Smaya   assert(stride > 0);
20107e102996Smaya
20117e102996Smaya   nir_ssa_def *addr = &deref->dest.ssa;
20127e102996Smaya   nir_ssa_def *index = addr_to_index(b, addr, addr_format);
20137e102996Smaya   nir_ssa_def *offset = addr_to_offset(b, addr, addr_format);
20147ec681f3Smrg   unsigned access = nir_intrinsic_access(intrin);
20157e102996Smaya
20167ec681f3Smrg   nir_ssa_def *arr_size = nir_get_ssbo_size(b, index, .access=access);
20177ec681f3Smrg   arr_size = nir_imax(b, nir_isub(b, arr_size, offset), nir_imm_int(b, 0u));
20187ec681f3Smrg   arr_size = nir_idiv(b, arr_size, nir_imm_int(b, stride));
20197e102996Smaya
20207ec681f3Smrg   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, arr_size);
20217e102996Smaya   nir_instr_remove(&intrin->instr);
20227e102996Smaya}
20237e102996Smaya
20247ec681f3Smrgstatic void
20257ec681f3Smrglower_explicit_io_mode_check(nir_builder *b, nir_intrinsic_instr *intrin,
20267ec681f3Smrg                             nir_address_format addr_format)
20277ec681f3Smrg{
20287ec681f3Smrg   if (addr_format_is_global(addr_format, 0)) {
20297ec681f3Smrg      /* If the address format is always global, then the driver can use
20307ec681f3Smrg       * global addresses regardless of the mode.  In that case, don't create
20317ec681f3Smrg       * a check, just whack the intrinsic to addr_mode_is and delegate to the
20327ec681f3Smrg       * driver lowering.
20337ec681f3Smrg       */
20347ec681f3Smrg      intrin->intrinsic = nir_intrinsic_addr_mode_is;
20357ec681f3Smrg      return;
20367ec681f3Smrg   }
20377ec681f3Smrg
20387ec681f3Smrg   assert(intrin->src[0].is_ssa);
20397ec681f3Smrg   nir_ssa_def *addr = intrin->src[0].ssa;
20407ec681f3Smrg
20417ec681f3Smrg   b->cursor = nir_instr_remove(&intrin->instr);
20427ec681f3Smrg
20437ec681f3Smrg   nir_ssa_def *is_mode =
20447ec681f3Smrg      build_runtime_addr_mode_check(b, addr, addr_format,
20457ec681f3Smrg                                    nir_intrinsic_memory_modes(intrin));
20467ec681f3Smrg
20477ec681f3Smrg   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, is_mode);
20487ec681f3Smrg}
20497ec681f3Smrg
20507e102996Smayastatic bool
20517e102996Smayanir_lower_explicit_io_impl(nir_function_impl *impl, nir_variable_mode modes,
20527e102996Smaya                           nir_address_format addr_format)
20537e102996Smaya{
20547e102996Smaya   bool progress = false;
20557e102996Smaya
20567e102996Smaya   nir_builder b;
20577e102996Smaya   nir_builder_init(&b, impl);
20587e102996Smaya
20597e102996Smaya   /* Walk in reverse order so that we can see the full deref chain when we
20607e102996Smaya    * lower the access operations.  We lower them assuming that the derefs
20617e102996Smaya    * will be turned into address calculations later.
20627e102996Smaya    */
20637e102996Smaya   nir_foreach_block_reverse(block, impl) {
20647e102996Smaya      nir_foreach_instr_reverse_safe(instr, block) {
20657e102996Smaya         switch (instr->type) {
20667e102996Smaya         case nir_instr_type_deref: {
20677e102996Smaya            nir_deref_instr *deref = nir_instr_as_deref(instr);
20687ec681f3Smrg            if (nir_deref_mode_is_in_set(deref, modes)) {
20697e102996Smaya               lower_explicit_io_deref(&b, deref, addr_format);
20707e102996Smaya               progress = true;
20717e102996Smaya            }
20727e102996Smaya            break;
20737e102996Smaya         }
20747e102996Smaya
20757e102996Smaya         case nir_instr_type_intrinsic: {
20767e102996Smaya            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
20777e102996Smaya            switch (intrin->intrinsic) {
20787e102996Smaya            case nir_intrinsic_load_deref:
20797e102996Smaya            case nir_intrinsic_store_deref:
20807ec681f3Smrg            case nir_intrinsic_load_deref_block_intel:
20817ec681f3Smrg            case nir_intrinsic_store_deref_block_intel:
20827e102996Smaya            case nir_intrinsic_deref_atomic_add:
20837e102996Smaya            case nir_intrinsic_deref_atomic_imin:
20847e102996Smaya            case nir_intrinsic_deref_atomic_umin:
20857e102996Smaya            case nir_intrinsic_deref_atomic_imax:
20867e102996Smaya            case nir_intrinsic_deref_atomic_umax:
20877e102996Smaya            case nir_intrinsic_deref_atomic_and:
20887e102996Smaya            case nir_intrinsic_deref_atomic_or:
20897e102996Smaya            case nir_intrinsic_deref_atomic_xor:
20907e102996Smaya            case nir_intrinsic_deref_atomic_exchange:
20917e102996Smaya            case nir_intrinsic_deref_atomic_comp_swap:
20927e102996Smaya            case nir_intrinsic_deref_atomic_fadd:
20937e102996Smaya            case nir_intrinsic_deref_atomic_fmin:
20947e102996Smaya            case nir_intrinsic_deref_atomic_fmax:
20957e102996Smaya            case nir_intrinsic_deref_atomic_fcomp_swap: {
20967e102996Smaya               nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
20977ec681f3Smrg               if (nir_deref_mode_is_in_set(deref, modes)) {
20987e102996Smaya                  lower_explicit_io_access(&b, intrin, addr_format);
20997e102996Smaya                  progress = true;
21007e102996Smaya               }
21017e102996Smaya               break;
21027e102996Smaya            }
21037e102996Smaya
21047e102996Smaya            case nir_intrinsic_deref_buffer_array_length: {
21057e102996Smaya               nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
21067ec681f3Smrg               if (nir_deref_mode_is_in_set(deref, modes)) {
21077e102996Smaya                  lower_explicit_io_array_length(&b, intrin, addr_format);
21087e102996Smaya                  progress = true;
21097e102996Smaya               }
21107e102996Smaya               break;
21117e102996Smaya            }
21127e102996Smaya
21137ec681f3Smrg            case nir_intrinsic_deref_mode_is: {
21147ec681f3Smrg               nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
21157ec681f3Smrg               if (nir_deref_mode_is_in_set(deref, modes)) {
21167ec681f3Smrg                  lower_explicit_io_mode_check(&b, intrin, addr_format);
21177ec681f3Smrg                  progress = true;
21187ec681f3Smrg               }
21197ec681f3Smrg               break;
21207ec681f3Smrg            }
21217ec681f3Smrg
21227e102996Smaya            default:
21237e102996Smaya               break;
21247e102996Smaya            }
21257e102996Smaya            break;
21267e102996Smaya         }
21277e102996Smaya
21287e102996Smaya         default:
21297e102996Smaya            /* Nothing to do */
21307e102996Smaya            break;
21317e102996Smaya         }
21327e102996Smaya      }
21337e102996Smaya   }
21347e102996Smaya
21357e102996Smaya   if (progress) {
21367e102996Smaya      nir_metadata_preserve(impl, nir_metadata_block_index |
21377e102996Smaya                                  nir_metadata_dominance);
21387ec681f3Smrg   } else {
21397ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_all);
21407e102996Smaya   }
21417e102996Smaya
21427e102996Smaya   return progress;
21437e102996Smaya}
21447e102996Smaya
21457ec681f3Smrg/** Lower explicitly laid out I/O access to byte offset/address intrinsics
21467ec681f3Smrg *
21477ec681f3Smrg * This pass is intended to be used for any I/O which touches memory external
21487ec681f3Smrg * to the shader or which is directly visible to the client.  It requires that
21497ec681f3Smrg * all data types in the given modes have a explicit stride/offset decorations
21507ec681f3Smrg * to tell it exactly how to calculate the offset/address for the given load,
21517ec681f3Smrg * store, or atomic operation.  If the offset/stride information does not come
21527ec681f3Smrg * from the client explicitly (as with shared variables in GL or Vulkan),
21537ec681f3Smrg * nir_lower_vars_to_explicit_types() can be used to add them.
21547ec681f3Smrg *
21557ec681f3Smrg * Unlike nir_lower_io, this pass is fully capable of handling incomplete
21567ec681f3Smrg * pointer chains which may contain cast derefs.  It does so by walking the
21577ec681f3Smrg * deref chain backwards and simply replacing each deref, one at a time, with
21587ec681f3Smrg * the appropriate address calculation.  The pass takes a nir_address_format
21597ec681f3Smrg * parameter which describes how the offset or address is to be represented
21607ec681f3Smrg * during calculations.  By ensuring that the address is always in a
21617ec681f3Smrg * consistent format, pointers can safely be conjured from thin air by the
21627ec681f3Smrg * driver, stored to variables, passed through phis, etc.
21637ec681f3Smrg *
21647ec681f3Smrg * The one exception to the simple algorithm described above is for handling
21657ec681f3Smrg * row-major matrices in which case we may look down one additional level of
21667ec681f3Smrg * the deref chain.
21677ec681f3Smrg *
21687ec681f3Smrg * This pass is also capable of handling OpenCL generic pointers.  If the
21697ec681f3Smrg * address mode is global, it will lower any ambiguous (more than one mode)
21707ec681f3Smrg * access to global and pass through the deref_mode_is run-time checks as
21717ec681f3Smrg * addr_mode_is.  This assumes the driver has somehow mapped shared and
21727ec681f3Smrg * scratch memory to the global address space.  For other modes such as
21737ec681f3Smrg * 62bit_generic, there is an enum embedded in the address and we lower
21747ec681f3Smrg * ambiguous access to an if-ladder and deref_mode_is to a check against the
21757ec681f3Smrg * embedded enum.  If nir_lower_explicit_io is called on any shader that
21767ec681f3Smrg * contains generic pointers, it must either be used on all of the generic
21777ec681f3Smrg * modes or none.
21787ec681f3Smrg */
21797e102996Smayabool
21807e102996Smayanir_lower_explicit_io(nir_shader *shader, nir_variable_mode modes,
21817e102996Smaya                      nir_address_format addr_format)
21827e102996Smaya{
21837e102996Smaya   bool progress = false;
21847e102996Smaya
21857e102996Smaya   nir_foreach_function(function, shader) {
21867e102996Smaya      if (function->impl &&
21877e102996Smaya          nir_lower_explicit_io_impl(function->impl, modes, addr_format))
21887e102996Smaya         progress = true;
21897e102996Smaya   }
21907e102996Smaya
21917e102996Smaya   return progress;
21927e102996Smaya}
21937e102996Smaya
21947ec681f3Smrgstatic bool
21957ec681f3Smrgnir_lower_vars_to_explicit_types_impl(nir_function_impl *impl,
21967ec681f3Smrg                                      nir_variable_mode modes,
21977ec681f3Smrg                                      glsl_type_size_align_func type_info)
21987ec681f3Smrg{
21997ec681f3Smrg   bool progress = false;
22007ec681f3Smrg
22017ec681f3Smrg   nir_foreach_block(block, impl) {
22027ec681f3Smrg      nir_foreach_instr(instr, block) {
22037ec681f3Smrg         if (instr->type != nir_instr_type_deref)
22047ec681f3Smrg            continue;
22057ec681f3Smrg
22067ec681f3Smrg         nir_deref_instr *deref = nir_instr_as_deref(instr);
22077ec681f3Smrg         if (!nir_deref_mode_is_in_set(deref, modes))
22087ec681f3Smrg            continue;
22097ec681f3Smrg
22107ec681f3Smrg         unsigned size, alignment;
22117ec681f3Smrg         const struct glsl_type *new_type =
22127ec681f3Smrg            glsl_get_explicit_type_for_size_align(deref->type, type_info, &size, &alignment);
22137ec681f3Smrg         if (new_type != deref->type) {
22147ec681f3Smrg            progress = true;
22157ec681f3Smrg            deref->type = new_type;
22167ec681f3Smrg         }
22177ec681f3Smrg         if (deref->deref_type == nir_deref_type_cast) {
22187ec681f3Smrg            /* See also glsl_type::get_explicit_type_for_size_align() */
22197ec681f3Smrg            unsigned new_stride = align(size, alignment);
22207ec681f3Smrg            if (new_stride != deref->cast.ptr_stride) {
22217ec681f3Smrg               deref->cast.ptr_stride = new_stride;
22227ec681f3Smrg               progress = true;
22237ec681f3Smrg            }
22247ec681f3Smrg         }
22257ec681f3Smrg      }
22267ec681f3Smrg   }
22277ec681f3Smrg
22287ec681f3Smrg   if (progress) {
22297ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_block_index |
22307ec681f3Smrg                                  nir_metadata_dominance |
22317ec681f3Smrg                                  nir_metadata_live_ssa_defs |
22327ec681f3Smrg                                  nir_metadata_loop_analysis);
22337ec681f3Smrg   } else {
22347ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_all);
22357ec681f3Smrg   }
22367ec681f3Smrg
22377ec681f3Smrg   return progress;
22387ec681f3Smrg}
22397ec681f3Smrg
22407ec681f3Smrgstatic bool
22417ec681f3Smrglower_vars_to_explicit(nir_shader *shader,
22427ec681f3Smrg                       struct exec_list *vars, nir_variable_mode mode,
22437ec681f3Smrg                       glsl_type_size_align_func type_info)
22447ec681f3Smrg{
22457ec681f3Smrg   bool progress = false;
22467ec681f3Smrg   unsigned offset;
22477ec681f3Smrg   switch (mode) {
22487ec681f3Smrg   case nir_var_uniform:
22497ec681f3Smrg      assert(shader->info.stage == MESA_SHADER_KERNEL);
22507ec681f3Smrg      offset = 0;
22517ec681f3Smrg      break;
22527ec681f3Smrg   case nir_var_function_temp:
22537ec681f3Smrg   case nir_var_shader_temp:
22547ec681f3Smrg      offset = shader->scratch_size;
22557ec681f3Smrg      break;
22567ec681f3Smrg   case nir_var_mem_shared:
22577ec681f3Smrg      offset = shader->info.shared_size;
22587ec681f3Smrg      break;
22597ec681f3Smrg   case nir_var_mem_constant:
22607ec681f3Smrg      offset = shader->constant_data_size;
22617ec681f3Smrg      break;
22627ec681f3Smrg   case nir_var_shader_call_data:
22637ec681f3Smrg   case nir_var_ray_hit_attrib:
22647ec681f3Smrg      offset = 0;
22657ec681f3Smrg      break;
22667ec681f3Smrg   default:
22677ec681f3Smrg      unreachable("Unsupported mode");
22687ec681f3Smrg   }
22697ec681f3Smrg   nir_foreach_variable_in_list(var, vars) {
22707ec681f3Smrg      if (var->data.mode != mode)
22717ec681f3Smrg         continue;
22727ec681f3Smrg
22737ec681f3Smrg      unsigned size, align;
22747ec681f3Smrg      const struct glsl_type *explicit_type =
22757ec681f3Smrg         glsl_get_explicit_type_for_size_align(var->type, type_info, &size, &align);
22767ec681f3Smrg
22777ec681f3Smrg      if (explicit_type != var->type)
22787ec681f3Smrg         var->type = explicit_type;
22797ec681f3Smrg
22807ec681f3Smrg      UNUSED bool is_empty_struct =
22817ec681f3Smrg         glsl_type_is_struct_or_ifc(explicit_type) &&
22827ec681f3Smrg         glsl_get_length(explicit_type) == 0;
22837ec681f3Smrg
22847ec681f3Smrg      assert(util_is_power_of_two_nonzero(align) || is_empty_struct);
22857ec681f3Smrg      var->data.driver_location = ALIGN_POT(offset, align);
22867ec681f3Smrg      offset = var->data.driver_location + size;
22877ec681f3Smrg      progress = true;
22887ec681f3Smrg   }
22897ec681f3Smrg
22907ec681f3Smrg   switch (mode) {
22917ec681f3Smrg   case nir_var_uniform:
22927ec681f3Smrg      assert(shader->info.stage == MESA_SHADER_KERNEL);
22937ec681f3Smrg      shader->num_uniforms = offset;
22947ec681f3Smrg      break;
22957ec681f3Smrg   case nir_var_shader_temp:
22967ec681f3Smrg   case nir_var_function_temp:
22977ec681f3Smrg      shader->scratch_size = offset;
22987ec681f3Smrg      break;
22997ec681f3Smrg   case nir_var_mem_shared:
23007ec681f3Smrg      shader->info.shared_size = offset;
23017ec681f3Smrg      break;
23027ec681f3Smrg   case nir_var_mem_constant:
23037ec681f3Smrg      shader->constant_data_size = offset;
23047ec681f3Smrg      break;
23057ec681f3Smrg   case nir_var_shader_call_data:
23067ec681f3Smrg   case nir_var_ray_hit_attrib:
23077ec681f3Smrg      break;
23087ec681f3Smrg   default:
23097ec681f3Smrg      unreachable("Unsupported mode");
23107ec681f3Smrg   }
23117ec681f3Smrg
23127ec681f3Smrg   return progress;
23137ec681f3Smrg}
23147ec681f3Smrg
23157ec681f3Smrg/* If nir_lower_vars_to_explicit_types is called on any shader that contains
23167ec681f3Smrg * generic pointers, it must either be used on all of the generic modes or
23177ec681f3Smrg * none.
23187ec681f3Smrg */
23197ec681f3Smrgbool
23207ec681f3Smrgnir_lower_vars_to_explicit_types(nir_shader *shader,
23217ec681f3Smrg                                 nir_variable_mode modes,
23227ec681f3Smrg                                 glsl_type_size_align_func type_info)
23237ec681f3Smrg{
23247ec681f3Smrg   /* TODO: Situations which need to be handled to support more modes:
23257ec681f3Smrg    * - row-major matrices
23267ec681f3Smrg    * - compact shader inputs/outputs
23277ec681f3Smrg    * - interface types
23287ec681f3Smrg    */
23297ec681f3Smrg   ASSERTED nir_variable_mode supported =
23307ec681f3Smrg      nir_var_mem_shared | nir_var_mem_global | nir_var_mem_constant |
23317ec681f3Smrg      nir_var_shader_temp | nir_var_function_temp | nir_var_uniform |
23327ec681f3Smrg      nir_var_shader_call_data | nir_var_ray_hit_attrib;
23337ec681f3Smrg   assert(!(modes & ~supported) && "unsupported");
23347ec681f3Smrg
23357ec681f3Smrg   bool progress = false;
23367ec681f3Smrg
23377ec681f3Smrg   if (modes & nir_var_uniform)
23387ec681f3Smrg      progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_uniform, type_info);
23397ec681f3Smrg
23407ec681f3Smrg   if (modes & nir_var_mem_shared) {
23417ec681f3Smrg      assert(!shader->info.shared_memory_explicit_layout);
23427ec681f3Smrg      progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_shared, type_info);
23437ec681f3Smrg   }
23447ec681f3Smrg
23457ec681f3Smrg   if (modes & nir_var_shader_temp)
23467ec681f3Smrg      progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_temp, type_info);
23477ec681f3Smrg   if (modes & nir_var_mem_constant)
23487ec681f3Smrg      progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_mem_constant, type_info);
23497ec681f3Smrg   if (modes & nir_var_shader_call_data)
23507ec681f3Smrg      progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_shader_call_data, type_info);
23517ec681f3Smrg   if (modes & nir_var_ray_hit_attrib)
23527ec681f3Smrg      progress |= lower_vars_to_explicit(shader, &shader->variables, nir_var_ray_hit_attrib, type_info);
23537ec681f3Smrg
23547ec681f3Smrg   nir_foreach_function(function, shader) {
23557ec681f3Smrg      if (function->impl) {
23567ec681f3Smrg         if (modes & nir_var_function_temp)
23577ec681f3Smrg            progress |= lower_vars_to_explicit(shader, &function->impl->locals, nir_var_function_temp, type_info);
23587ec681f3Smrg
23597ec681f3Smrg         progress |= nir_lower_vars_to_explicit_types_impl(function->impl, modes, type_info);
23607ec681f3Smrg      }
23617ec681f3Smrg   }
23627ec681f3Smrg
23637ec681f3Smrg   return progress;
23647ec681f3Smrg}
23657ec681f3Smrg
23667ec681f3Smrgstatic void
23677ec681f3Smrgwrite_constant(void *dst, size_t dst_size,
23687ec681f3Smrg               const nir_constant *c, const struct glsl_type *type)
23697ec681f3Smrg{
23707ec681f3Smrg   if (glsl_type_is_vector_or_scalar(type)) {
23717ec681f3Smrg      const unsigned num_components = glsl_get_vector_elements(type);
23727ec681f3Smrg      const unsigned bit_size = glsl_get_bit_size(type);
23737ec681f3Smrg      if (bit_size == 1) {
23747ec681f3Smrg         /* Booleans are special-cased to be 32-bit
23757ec681f3Smrg          *
23767ec681f3Smrg          * TODO: Make the native bool bit_size an option.
23777ec681f3Smrg          */
23787ec681f3Smrg         assert(num_components * 4 <= dst_size);
23797ec681f3Smrg         for (unsigned i = 0; i < num_components; i++) {
23807ec681f3Smrg            int32_t b32 = -(int)c->values[i].b;
23817ec681f3Smrg            memcpy((char *)dst + i * 4, &b32, 4);
23827ec681f3Smrg         }
23837ec681f3Smrg      } else {
23847ec681f3Smrg         assert(bit_size >= 8 && bit_size % 8 == 0);
23857ec681f3Smrg         const unsigned byte_size = bit_size / 8;
23867ec681f3Smrg         assert(num_components * byte_size <= dst_size);
23877ec681f3Smrg         for (unsigned i = 0; i < num_components; i++) {
23887ec681f3Smrg            /* Annoyingly, thanks to packed structs, we can't make any
23897ec681f3Smrg             * assumptions about the alignment of dst.  To avoid any strange
23907ec681f3Smrg             * issues with unaligned writes, we always use memcpy.
23917ec681f3Smrg             */
23927ec681f3Smrg            memcpy((char *)dst + i * byte_size, &c->values[i], byte_size);
23937ec681f3Smrg         }
23947ec681f3Smrg      }
23957ec681f3Smrg   } else if (glsl_type_is_array_or_matrix(type)) {
23967ec681f3Smrg      const unsigned array_len = glsl_get_length(type);
23977ec681f3Smrg      const unsigned stride = glsl_get_explicit_stride(type);
23987ec681f3Smrg      assert(stride > 0);
23997ec681f3Smrg      const struct glsl_type *elem_type = glsl_get_array_element(type);
24007ec681f3Smrg      for (unsigned i = 0; i < array_len; i++) {
24017ec681f3Smrg         unsigned elem_offset = i * stride;
24027ec681f3Smrg         assert(elem_offset < dst_size);
24037ec681f3Smrg         write_constant((char *)dst + elem_offset, dst_size - elem_offset,
24047ec681f3Smrg                        c->elements[i], elem_type);
24057ec681f3Smrg      }
24067ec681f3Smrg   } else {
24077ec681f3Smrg      assert(glsl_type_is_struct_or_ifc(type));
24087ec681f3Smrg      const unsigned num_fields = glsl_get_length(type);
24097ec681f3Smrg      for (unsigned i = 0; i < num_fields; i++) {
24107ec681f3Smrg         const int field_offset = glsl_get_struct_field_offset(type, i);
24117ec681f3Smrg         assert(field_offset >= 0 && field_offset < dst_size);
24127ec681f3Smrg         const struct glsl_type *field_type = glsl_get_struct_field(type, i);
24137ec681f3Smrg         write_constant((char *)dst + field_offset, dst_size - field_offset,
24147ec681f3Smrg                        c->elements[i], field_type);
24157ec681f3Smrg      }
24167ec681f3Smrg   }
24177ec681f3Smrg}
24187ec681f3Smrg
24197ec681f3Smrgvoid
24207ec681f3Smrgnir_gather_explicit_io_initializers(nir_shader *shader,
24217ec681f3Smrg                                    void *dst, size_t dst_size,
24227ec681f3Smrg                                    nir_variable_mode mode)
24237ec681f3Smrg{
24247ec681f3Smrg   /* It doesn't really make sense to gather initializers for more than one
24257ec681f3Smrg    * mode at a time.  If this ever becomes well-defined, we can drop the
24267ec681f3Smrg    * assert then.
24277ec681f3Smrg    */
24287ec681f3Smrg   assert(util_bitcount(mode) == 1);
24297ec681f3Smrg
24307ec681f3Smrg   nir_foreach_variable_with_modes(var, shader, mode) {
24317ec681f3Smrg      assert(var->data.driver_location < dst_size);
24327ec681f3Smrg      write_constant((char *)dst + var->data.driver_location,
24337ec681f3Smrg                     dst_size - var->data.driver_location,
24347ec681f3Smrg                     var->constant_initializer, var->type);
24357ec681f3Smrg   }
24367ec681f3Smrg}
24377ec681f3Smrg
243801e04c3fSmrg/**
243901e04c3fSmrg * Return the offset source for a load/store intrinsic.
244001e04c3fSmrg */
244101e04c3fSmrgnir_src *
244201e04c3fSmrgnir_get_io_offset_src(nir_intrinsic_instr *instr)
244301e04c3fSmrg{
244401e04c3fSmrg   switch (instr->intrinsic) {
244501e04c3fSmrg   case nir_intrinsic_load_input:
244601e04c3fSmrg   case nir_intrinsic_load_output:
24477e102996Smaya   case nir_intrinsic_load_shared:
244801e04c3fSmrg   case nir_intrinsic_load_uniform:
24497ec681f3Smrg   case nir_intrinsic_load_kernel_input:
24507e102996Smaya   case nir_intrinsic_load_global:
24517ec681f3Smrg   case nir_intrinsic_load_global_constant:
24527e102996Smaya   case nir_intrinsic_load_scratch:
24537ec681f3Smrg   case nir_intrinsic_load_fs_input_interp_deltas:
24547ec681f3Smrg   case nir_intrinsic_shared_atomic_add:
24557ec681f3Smrg   case nir_intrinsic_shared_atomic_and:
24567ec681f3Smrg   case nir_intrinsic_shared_atomic_comp_swap:
24577ec681f3Smrg   case nir_intrinsic_shared_atomic_exchange:
24587ec681f3Smrg   case nir_intrinsic_shared_atomic_fadd:
24597ec681f3Smrg   case nir_intrinsic_shared_atomic_fcomp_swap:
24607ec681f3Smrg   case nir_intrinsic_shared_atomic_fmax:
24617ec681f3Smrg   case nir_intrinsic_shared_atomic_fmin:
24627ec681f3Smrg   case nir_intrinsic_shared_atomic_imax:
24637ec681f3Smrg   case nir_intrinsic_shared_atomic_imin:
24647ec681f3Smrg   case nir_intrinsic_shared_atomic_or:
24657ec681f3Smrg   case nir_intrinsic_shared_atomic_umax:
24667ec681f3Smrg   case nir_intrinsic_shared_atomic_umin:
24677ec681f3Smrg   case nir_intrinsic_shared_atomic_xor:
24687ec681f3Smrg   case nir_intrinsic_global_atomic_add:
24697ec681f3Smrg   case nir_intrinsic_global_atomic_and:
24707ec681f3Smrg   case nir_intrinsic_global_atomic_comp_swap:
24717ec681f3Smrg   case nir_intrinsic_global_atomic_exchange:
24727ec681f3Smrg   case nir_intrinsic_global_atomic_fadd:
24737ec681f3Smrg   case nir_intrinsic_global_atomic_fcomp_swap:
24747ec681f3Smrg   case nir_intrinsic_global_atomic_fmax:
24757ec681f3Smrg   case nir_intrinsic_global_atomic_fmin:
24767ec681f3Smrg   case nir_intrinsic_global_atomic_imax:
24777ec681f3Smrg   case nir_intrinsic_global_atomic_imin:
24787ec681f3Smrg   case nir_intrinsic_global_atomic_or:
24797ec681f3Smrg   case nir_intrinsic_global_atomic_umax:
24807ec681f3Smrg   case nir_intrinsic_global_atomic_umin:
24817ec681f3Smrg   case nir_intrinsic_global_atomic_xor:
248201e04c3fSmrg      return &instr->src[0];
248301e04c3fSmrg   case nir_intrinsic_load_ubo:
248401e04c3fSmrg   case nir_intrinsic_load_ssbo:
24857ec681f3Smrg   case nir_intrinsic_load_input_vertex:
248601e04c3fSmrg   case nir_intrinsic_load_per_vertex_input:
248701e04c3fSmrg   case nir_intrinsic_load_per_vertex_output:
24887ec681f3Smrg   case nir_intrinsic_load_per_primitive_output:
248901e04c3fSmrg   case nir_intrinsic_load_interpolated_input:
249001e04c3fSmrg   case nir_intrinsic_store_output:
24917e102996Smaya   case nir_intrinsic_store_shared:
24927e102996Smaya   case nir_intrinsic_store_global:
24937e102996Smaya   case nir_intrinsic_store_scratch:
24947ec681f3Smrg   case nir_intrinsic_ssbo_atomic_add:
24957ec681f3Smrg   case nir_intrinsic_ssbo_atomic_imin:
24967ec681f3Smrg   case nir_intrinsic_ssbo_atomic_umin:
24977ec681f3Smrg   case nir_intrinsic_ssbo_atomic_imax:
24987ec681f3Smrg   case nir_intrinsic_ssbo_atomic_umax:
24997ec681f3Smrg   case nir_intrinsic_ssbo_atomic_and:
25007ec681f3Smrg   case nir_intrinsic_ssbo_atomic_or:
25017ec681f3Smrg   case nir_intrinsic_ssbo_atomic_xor:
25027ec681f3Smrg   case nir_intrinsic_ssbo_atomic_exchange:
25037ec681f3Smrg   case nir_intrinsic_ssbo_atomic_comp_swap:
25047ec681f3Smrg   case nir_intrinsic_ssbo_atomic_fadd:
25057ec681f3Smrg   case nir_intrinsic_ssbo_atomic_fmin:
25067ec681f3Smrg   case nir_intrinsic_ssbo_atomic_fmax:
25077ec681f3Smrg   case nir_intrinsic_ssbo_atomic_fcomp_swap:
250801e04c3fSmrg      return &instr->src[1];
250901e04c3fSmrg   case nir_intrinsic_store_ssbo:
251001e04c3fSmrg   case nir_intrinsic_store_per_vertex_output:
25117ec681f3Smrg   case nir_intrinsic_store_per_primitive_output:
251201e04c3fSmrg      return &instr->src[2];
251301e04c3fSmrg   default:
251401e04c3fSmrg      return NULL;
251501e04c3fSmrg   }
251601e04c3fSmrg}
251701e04c3fSmrg
251801e04c3fSmrg/**
251901e04c3fSmrg * Return the vertex index source for a load/store per_vertex intrinsic.
252001e04c3fSmrg */
252101e04c3fSmrgnir_src *
252201e04c3fSmrgnir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
252301e04c3fSmrg{
252401e04c3fSmrg   switch (instr->intrinsic) {
252501e04c3fSmrg   case nir_intrinsic_load_per_vertex_input:
252601e04c3fSmrg   case nir_intrinsic_load_per_vertex_output:
252701e04c3fSmrg      return &instr->src[0];
252801e04c3fSmrg   case nir_intrinsic_store_per_vertex_output:
252901e04c3fSmrg      return &instr->src[1];
253001e04c3fSmrg   default:
253101e04c3fSmrg      return NULL;
253201e04c3fSmrg   }
253301e04c3fSmrg}
25347ec681f3Smrg
25357ec681f3Smrg/**
25367ec681f3Smrg * Return the numeric constant that identify a NULL pointer for each address
25377ec681f3Smrg * format.
25387ec681f3Smrg */
25397ec681f3Smrgconst nir_const_value *
25407ec681f3Smrgnir_address_format_null_value(nir_address_format addr_format)
25417ec681f3Smrg{
25427ec681f3Smrg   const static nir_const_value null_values[][NIR_MAX_VEC_COMPONENTS] = {
25437ec681f3Smrg      [nir_address_format_32bit_global] = {{0}},
25447ec681f3Smrg      [nir_address_format_64bit_global] = {{0}},
25457ec681f3Smrg      [nir_address_format_64bit_global_32bit_offset] = {{0}},
25467ec681f3Smrg      [nir_address_format_64bit_bounded_global] = {{0}},
25477ec681f3Smrg      [nir_address_format_32bit_index_offset] = {{.u32 = ~0}, {.u32 = ~0}},
25487ec681f3Smrg      [nir_address_format_32bit_index_offset_pack64] = {{.u64 = ~0ull}},
25497ec681f3Smrg      [nir_address_format_vec2_index_32bit_offset] = {{.u32 = ~0}, {.u32 = ~0}, {.u32 = ~0}},
25507ec681f3Smrg      [nir_address_format_32bit_offset] = {{.u32 = ~0}},
25517ec681f3Smrg      [nir_address_format_32bit_offset_as_64bit] = {{.u64 = ~0ull}},
25527ec681f3Smrg      [nir_address_format_62bit_generic] = {{.u64 = 0}},
25537ec681f3Smrg      [nir_address_format_logical] = {{.u32 = ~0}},
25547ec681f3Smrg   };
25557ec681f3Smrg
25567ec681f3Smrg   assert(addr_format < ARRAY_SIZE(null_values));
25577ec681f3Smrg   return null_values[addr_format];
25587ec681f3Smrg}
25597ec681f3Smrg
25607ec681f3Smrgnir_ssa_def *
25617ec681f3Smrgnir_build_addr_ieq(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
25627ec681f3Smrg                   nir_address_format addr_format)
25637ec681f3Smrg{
25647ec681f3Smrg   switch (addr_format) {
25657ec681f3Smrg   case nir_address_format_32bit_global:
25667ec681f3Smrg   case nir_address_format_64bit_global:
25677ec681f3Smrg   case nir_address_format_64bit_bounded_global:
25687ec681f3Smrg   case nir_address_format_32bit_index_offset:
25697ec681f3Smrg   case nir_address_format_vec2_index_32bit_offset:
25707ec681f3Smrg   case nir_address_format_32bit_offset:
25717ec681f3Smrg   case nir_address_format_62bit_generic:
25727ec681f3Smrg      return nir_ball_iequal(b, addr0, addr1);
25737ec681f3Smrg
25747ec681f3Smrg   case nir_address_format_64bit_global_32bit_offset:
25757ec681f3Smrg      return nir_ball_iequal(b, nir_channels(b, addr0, 0xb),
25767ec681f3Smrg                                nir_channels(b, addr1, 0xb));
25777ec681f3Smrg
25787ec681f3Smrg   case nir_address_format_32bit_offset_as_64bit:
25797ec681f3Smrg      assert(addr0->num_components == 1 && addr1->num_components == 1);
25807ec681f3Smrg      return nir_ieq(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1));
25817ec681f3Smrg
25827ec681f3Smrg   case nir_address_format_32bit_index_offset_pack64:
25837ec681f3Smrg      assert(addr0->num_components == 1 && addr1->num_components == 1);
25847ec681f3Smrg      return nir_ball_iequal(b, nir_unpack_64_2x32(b, addr0), nir_unpack_64_2x32(b, addr1));
25857ec681f3Smrg
25867ec681f3Smrg   case nir_address_format_logical:
25877ec681f3Smrg      unreachable("Unsupported address format");
25887ec681f3Smrg   }
25897ec681f3Smrg
25907ec681f3Smrg   unreachable("Invalid address format");
25917ec681f3Smrg}
25927ec681f3Smrg
25937ec681f3Smrgnir_ssa_def *
25947ec681f3Smrgnir_build_addr_isub(nir_builder *b, nir_ssa_def *addr0, nir_ssa_def *addr1,
25957ec681f3Smrg                    nir_address_format addr_format)
25967ec681f3Smrg{
25977ec681f3Smrg   switch (addr_format) {
25987ec681f3Smrg   case nir_address_format_32bit_global:
25997ec681f3Smrg   case nir_address_format_64bit_global:
26007ec681f3Smrg   case nir_address_format_32bit_offset:
26017ec681f3Smrg   case nir_address_format_32bit_index_offset_pack64:
26027ec681f3Smrg   case nir_address_format_62bit_generic:
26037ec681f3Smrg      assert(addr0->num_components == 1);
26047ec681f3Smrg      assert(addr1->num_components == 1);
26057ec681f3Smrg      return nir_isub(b, addr0, addr1);
26067ec681f3Smrg
26077ec681f3Smrg   case nir_address_format_32bit_offset_as_64bit:
26087ec681f3Smrg      assert(addr0->num_components == 1);
26097ec681f3Smrg      assert(addr1->num_components == 1);
26107ec681f3Smrg      return nir_u2u64(b, nir_isub(b, nir_u2u32(b, addr0), nir_u2u32(b, addr1)));
26117ec681f3Smrg
26127ec681f3Smrg   case nir_address_format_64bit_global_32bit_offset:
26137ec681f3Smrg   case nir_address_format_64bit_bounded_global:
26147ec681f3Smrg      return nir_isub(b, addr_to_global(b, addr0, addr_format),
26157ec681f3Smrg                         addr_to_global(b, addr1, addr_format));
26167ec681f3Smrg
26177ec681f3Smrg   case nir_address_format_32bit_index_offset:
26187ec681f3Smrg      assert(addr0->num_components == 2);
26197ec681f3Smrg      assert(addr1->num_components == 2);
26207ec681f3Smrg      /* Assume the same buffer index. */
26217ec681f3Smrg      return nir_isub(b, nir_channel(b, addr0, 1), nir_channel(b, addr1, 1));
26227ec681f3Smrg
26237ec681f3Smrg   case nir_address_format_vec2_index_32bit_offset:
26247ec681f3Smrg      assert(addr0->num_components == 3);
26257ec681f3Smrg      assert(addr1->num_components == 3);
26267ec681f3Smrg      /* Assume the same buffer index. */
26277ec681f3Smrg      return nir_isub(b, nir_channel(b, addr0, 2), nir_channel(b, addr1, 2));
26287ec681f3Smrg
26297ec681f3Smrg   case nir_address_format_logical:
26307ec681f3Smrg      unreachable("Unsupported address format");
26317ec681f3Smrg   }
26327ec681f3Smrg
26337ec681f3Smrg   unreachable("Invalid address format");
26347ec681f3Smrg}
26357ec681f3Smrg
26367ec681f3Smrgstatic bool
26377ec681f3Smrgis_input(nir_intrinsic_instr *intrin)
26387ec681f3Smrg{
26397ec681f3Smrg   return intrin->intrinsic == nir_intrinsic_load_input ||
26407ec681f3Smrg          intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
26417ec681f3Smrg          intrin->intrinsic == nir_intrinsic_load_interpolated_input ||
26427ec681f3Smrg          intrin->intrinsic == nir_intrinsic_load_fs_input_interp_deltas;
26437ec681f3Smrg}
26447ec681f3Smrg
26457ec681f3Smrgstatic bool
26467ec681f3Smrgis_output(nir_intrinsic_instr *intrin)
26477ec681f3Smrg{
26487ec681f3Smrg   return intrin->intrinsic == nir_intrinsic_load_output ||
26497ec681f3Smrg          intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
26507ec681f3Smrg          intrin->intrinsic == nir_intrinsic_load_per_primitive_output ||
26517ec681f3Smrg          intrin->intrinsic == nir_intrinsic_store_output ||
26527ec681f3Smrg          intrin->intrinsic == nir_intrinsic_store_per_vertex_output ||
26537ec681f3Smrg          intrin->intrinsic == nir_intrinsic_store_per_primitive_output;
26547ec681f3Smrg}
26557ec681f3Smrg
26567ec681f3Smrgstatic bool is_dual_slot(nir_intrinsic_instr *intrin)
26577ec681f3Smrg{
26587ec681f3Smrg   if (intrin->intrinsic == nir_intrinsic_store_output ||
26597ec681f3Smrg       intrin->intrinsic == nir_intrinsic_store_per_vertex_output) {
26607ec681f3Smrg      return nir_src_bit_size(intrin->src[0]) == 64 &&
26617ec681f3Smrg             nir_src_num_components(intrin->src[0]) >= 3;
26627ec681f3Smrg   }
26637ec681f3Smrg
26647ec681f3Smrg   return nir_dest_bit_size(intrin->dest) == 64 &&
26657ec681f3Smrg          nir_dest_num_components(intrin->dest) >= 3;
26667ec681f3Smrg}
26677ec681f3Smrg
26687ec681f3Smrg/**
26697ec681f3Smrg * This pass adds constant offsets to instr->const_index[0] for input/output
26707ec681f3Smrg * intrinsics, and resets the offset source to 0.  Non-constant offsets remain
26717ec681f3Smrg * unchanged - since we don't know what part of a compound variable is
26727ec681f3Smrg * accessed, we allocate storage for the entire thing. For drivers that use
26737ec681f3Smrg * nir_lower_io_to_temporaries() before nir_lower_io(), this guarantees that
26747ec681f3Smrg * the offset source will be 0, so that they don't have to add it in manually.
26757ec681f3Smrg */
26767ec681f3Smrg
26777ec681f3Smrgstatic bool
26787ec681f3Smrgadd_const_offset_to_base_block(nir_block *block, nir_builder *b,
26797ec681f3Smrg                               nir_variable_mode modes)
26807ec681f3Smrg{
26817ec681f3Smrg   bool progress = false;
26827ec681f3Smrg   nir_foreach_instr_safe(instr, block) {
26837ec681f3Smrg      if (instr->type != nir_instr_type_intrinsic)
26847ec681f3Smrg         continue;
26857ec681f3Smrg
26867ec681f3Smrg      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
26877ec681f3Smrg
26887ec681f3Smrg      if (((modes & nir_var_shader_in) && is_input(intrin)) ||
26897ec681f3Smrg          ((modes & nir_var_shader_out) && is_output(intrin))) {
26907ec681f3Smrg         nir_src *offset = nir_get_io_offset_src(intrin);
26917ec681f3Smrg
26927ec681f3Smrg         /* TODO: Better handling of per-view variables here */
26937ec681f3Smrg         if (nir_src_is_const(*offset) &&
26947ec681f3Smrg             !nir_intrinsic_io_semantics(intrin).per_view) {
26957ec681f3Smrg            unsigned off = nir_src_as_uint(*offset);
26967ec681f3Smrg
26977ec681f3Smrg            nir_intrinsic_set_base(intrin, nir_intrinsic_base(intrin) + off);
26987ec681f3Smrg
26997ec681f3Smrg            nir_io_semantics sem = nir_intrinsic_io_semantics(intrin);
27007ec681f3Smrg            sem.location += off;
27017ec681f3Smrg            /* non-indirect indexing should reduce num_slots */
27027ec681f3Smrg            sem.num_slots = is_dual_slot(intrin) ? 2 : 1;
27037ec681f3Smrg            nir_intrinsic_set_io_semantics(intrin, sem);
27047ec681f3Smrg
27057ec681f3Smrg            b->cursor = nir_before_instr(&intrin->instr);
27067ec681f3Smrg            nir_instr_rewrite_src(&intrin->instr, offset,
27077ec681f3Smrg                                  nir_src_for_ssa(nir_imm_int(b, 0)));
27087ec681f3Smrg            progress = true;
27097ec681f3Smrg         }
27107ec681f3Smrg      }
27117ec681f3Smrg   }
27127ec681f3Smrg
27137ec681f3Smrg   return progress;
27147ec681f3Smrg}
27157ec681f3Smrg
27167ec681f3Smrgbool
27177ec681f3Smrgnir_io_add_const_offset_to_base(nir_shader *nir, nir_variable_mode modes)
27187ec681f3Smrg{
27197ec681f3Smrg   bool progress = false;
27207ec681f3Smrg
27217ec681f3Smrg   nir_foreach_function(f, nir) {
27227ec681f3Smrg      if (f->impl) {
27237ec681f3Smrg         nir_builder b;
27247ec681f3Smrg         nir_builder_init(&b, f->impl);
27257ec681f3Smrg         nir_foreach_block(block, f->impl) {
27267ec681f3Smrg            progress |= add_const_offset_to_base_block(block, &b, modes);
27277ec681f3Smrg         }
27287ec681f3Smrg      }
27297ec681f3Smrg   }
27307ec681f3Smrg
27317ec681f3Smrg   return progress;
27327ec681f3Smrg}
27337ec681f3Smrg
2734