101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2018 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg#include "nir.h"
2501e04c3fSmrg#include "nir_builder.h"
2601e04c3fSmrg#include "nir_deref.h"
2701e04c3fSmrg#include "util/hash_table.h"
2801e04c3fSmrg
297ec681f3Smrgstatic bool
307ec681f3Smrgis_trivial_deref_cast(nir_deref_instr *cast)
317ec681f3Smrg{
327ec681f3Smrg   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
337ec681f3Smrg   if (!parent)
347ec681f3Smrg      return false;
357ec681f3Smrg
367ec681f3Smrg   return cast->modes == parent->modes &&
377ec681f3Smrg          cast->type == parent->type &&
387ec681f3Smrg          cast->dest.ssa.num_components == parent->dest.ssa.num_components &&
397ec681f3Smrg          cast->dest.ssa.bit_size == parent->dest.ssa.bit_size;
407ec681f3Smrg}
417ec681f3Smrg
4201e04c3fSmrgvoid
4301e04c3fSmrgnir_deref_path_init(nir_deref_path *path,
4401e04c3fSmrg                    nir_deref_instr *deref, void *mem_ctx)
4501e04c3fSmrg{
4601e04c3fSmrg   assert(deref != NULL);
4701e04c3fSmrg
4801e04c3fSmrg   /* The length of the short path is at most ARRAY_SIZE - 1 because we need
4901e04c3fSmrg    * room for the NULL terminator.
5001e04c3fSmrg    */
5101e04c3fSmrg   static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1;
5201e04c3fSmrg
5301e04c3fSmrg   int count = 0;
5401e04c3fSmrg
5501e04c3fSmrg   nir_deref_instr **tail = &path->_short_path[max_short_path_len];
5601e04c3fSmrg   nir_deref_instr **head = tail;
5701e04c3fSmrg
5801e04c3fSmrg   *tail = NULL;
5901e04c3fSmrg   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
607ec681f3Smrg      if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
617ec681f3Smrg         continue;
6201e04c3fSmrg      count++;
6301e04c3fSmrg      if (count <= max_short_path_len)
6401e04c3fSmrg         *(--head) = d;
6501e04c3fSmrg   }
6601e04c3fSmrg
6701e04c3fSmrg   if (count <= max_short_path_len) {
6801e04c3fSmrg      /* If we're under max_short_path_len, just use the short path. */
6901e04c3fSmrg      path->path = head;
7001e04c3fSmrg      goto done;
7101e04c3fSmrg   }
7201e04c3fSmrg
7301e04c3fSmrg#ifndef NDEBUG
7401e04c3fSmrg   /* Just in case someone uses short_path by accident */
7501e04c3fSmrg   for (unsigned i = 0; i < ARRAY_SIZE(path->_short_path); i++)
767ec681f3Smrg      path->_short_path[i] = (void *)(uintptr_t)0xdeadbeef;
7701e04c3fSmrg#endif
7801e04c3fSmrg
7901e04c3fSmrg   path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1);
8001e04c3fSmrg   head = tail = path->path + count;
8101e04c3fSmrg   *tail = NULL;
827ec681f3Smrg   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
837ec681f3Smrg      if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d))
847ec681f3Smrg         continue;
8501e04c3fSmrg      *(--head) = d;
867ec681f3Smrg   }
8701e04c3fSmrg
8801e04c3fSmrgdone:
8901e04c3fSmrg   assert(head == path->path);
9001e04c3fSmrg   assert(tail == head + count);
9101e04c3fSmrg   assert(*tail == NULL);
9201e04c3fSmrg}
9301e04c3fSmrg
9401e04c3fSmrgvoid
9501e04c3fSmrgnir_deref_path_finish(nir_deref_path *path)
9601e04c3fSmrg{
9701e04c3fSmrg   if (path->path < &path->_short_path[0] ||
9801e04c3fSmrg       path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1])
9901e04c3fSmrg      ralloc_free(path->path);
10001e04c3fSmrg}
10101e04c3fSmrg
10201e04c3fSmrg/**
10301e04c3fSmrg * Recursively removes unused deref instructions
10401e04c3fSmrg */
10501e04c3fSmrgbool
10601e04c3fSmrgnir_deref_instr_remove_if_unused(nir_deref_instr *instr)
10701e04c3fSmrg{
10801e04c3fSmrg   bool progress = false;
10901e04c3fSmrg
11001e04c3fSmrg   for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) {
11101e04c3fSmrg      /* If anyone is using this deref, leave it alone */
11201e04c3fSmrg      assert(d->dest.is_ssa);
1137ec681f3Smrg      if (!nir_ssa_def_is_unused(&d->dest.ssa))
11401e04c3fSmrg         break;
11501e04c3fSmrg
11601e04c3fSmrg      nir_instr_remove(&d->instr);
11701e04c3fSmrg      progress = true;
11801e04c3fSmrg   }
11901e04c3fSmrg
12001e04c3fSmrg   return progress;
12101e04c3fSmrg}
12201e04c3fSmrg
12301e04c3fSmrgbool
12401e04c3fSmrgnir_deref_instr_has_indirect(nir_deref_instr *instr)
12501e04c3fSmrg{
12601e04c3fSmrg   while (instr->deref_type != nir_deref_type_var) {
12701e04c3fSmrg      /* Consider casts to be indirects */
12801e04c3fSmrg      if (instr->deref_type == nir_deref_type_cast)
12901e04c3fSmrg         return true;
13001e04c3fSmrg
1317e102996Smaya      if ((instr->deref_type == nir_deref_type_array ||
1327e102996Smaya           instr->deref_type == nir_deref_type_ptr_as_array) &&
13301e04c3fSmrg          !nir_src_is_const(instr->arr.index))
13401e04c3fSmrg         return true;
13501e04c3fSmrg
13601e04c3fSmrg      instr = nir_deref_instr_parent(instr);
13701e04c3fSmrg   }
13801e04c3fSmrg
13901e04c3fSmrg   return false;
14001e04c3fSmrg}
14101e04c3fSmrg
1427ec681f3Smrgbool
1437ec681f3Smrgnir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr)
1447ec681f3Smrg{
1457ec681f3Smrg   for (; instr; instr = nir_deref_instr_parent(instr)) {
1467ec681f3Smrg      if (instr->deref_type == nir_deref_type_array &&
1477ec681f3Smrg          nir_src_is_const(instr->arr.index) &&
1487ec681f3Smrg           nir_src_as_uint(instr->arr.index) >=
1497ec681f3Smrg           glsl_get_length(nir_deref_instr_parent(instr)->type))
1507ec681f3Smrg         return true;
1517ec681f3Smrg   }
1527ec681f3Smrg
1537ec681f3Smrg   return false;
1547ec681f3Smrg}
1557ec681f3Smrg
1567ec681f3Smrgbool
1577ec681f3Smrgnir_deref_instr_has_complex_use(nir_deref_instr *deref)
1587ec681f3Smrg{
1597ec681f3Smrg   nir_foreach_use(use_src, &deref->dest.ssa) {
1607ec681f3Smrg      nir_instr *use_instr = use_src->parent_instr;
1617ec681f3Smrg
1627ec681f3Smrg      switch (use_instr->type) {
1637ec681f3Smrg      case nir_instr_type_deref: {
1647ec681f3Smrg         nir_deref_instr *use_deref = nir_instr_as_deref(use_instr);
1657ec681f3Smrg
1667ec681f3Smrg         /* A var deref has no sources */
1677ec681f3Smrg         assert(use_deref->deref_type != nir_deref_type_var);
1687ec681f3Smrg
1697ec681f3Smrg         /* If a deref shows up in an array index or something like that, it's
1707ec681f3Smrg          * a complex use.
1717ec681f3Smrg          */
1727ec681f3Smrg         if (use_src != &use_deref->parent)
1737ec681f3Smrg            return true;
1747ec681f3Smrg
1757ec681f3Smrg         /* Anything that isn't a basic struct or array deref is considered to
1767ec681f3Smrg          * be a "complex" use.  In particular, we don't allow ptr_as_array
1777ec681f3Smrg          * because we assume that opt_deref will turn any non-complex
1787ec681f3Smrg          * ptr_as_array derefs into regular array derefs eventually so passes
1797ec681f3Smrg          * which only want to handle simple derefs will pick them up in a
1807ec681f3Smrg          * later pass.
1817ec681f3Smrg          */
1827ec681f3Smrg         if (use_deref->deref_type != nir_deref_type_struct &&
1837ec681f3Smrg             use_deref->deref_type != nir_deref_type_array_wildcard &&
1847ec681f3Smrg             use_deref->deref_type != nir_deref_type_array)
1857ec681f3Smrg            return true;
1867ec681f3Smrg
1877ec681f3Smrg         if (nir_deref_instr_has_complex_use(use_deref))
1887ec681f3Smrg            return true;
1897ec681f3Smrg
1907ec681f3Smrg         continue;
1917ec681f3Smrg      }
1927ec681f3Smrg
1937ec681f3Smrg      case nir_instr_type_intrinsic: {
1947ec681f3Smrg         nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(use_instr);
1957ec681f3Smrg         switch (use_intrin->intrinsic) {
1967ec681f3Smrg         case nir_intrinsic_load_deref:
1977ec681f3Smrg            assert(use_src == &use_intrin->src[0]);
1987ec681f3Smrg            continue;
1997ec681f3Smrg
2007ec681f3Smrg         case nir_intrinsic_copy_deref:
2017ec681f3Smrg            assert(use_src == &use_intrin->src[0] ||
2027ec681f3Smrg                   use_src == &use_intrin->src[1]);
2037ec681f3Smrg            continue;
2047ec681f3Smrg
2057ec681f3Smrg         case nir_intrinsic_store_deref:
2067ec681f3Smrg            /* A use in src[1] of a store means we're taking that pointer and
2077ec681f3Smrg             * writing it to a variable.  Because we have no idea who will
2087ec681f3Smrg             * read that variable and what they will do with the pointer, it's
2097ec681f3Smrg             * considered a "complex" use.  A use in src[0], on the other
2107ec681f3Smrg             * hand, is a simple use because we're just going to dereference
2117ec681f3Smrg             * it and write a value there.
2127ec681f3Smrg             */
2137ec681f3Smrg            if (use_src == &use_intrin->src[0])
2147ec681f3Smrg               continue;
2157ec681f3Smrg            return true;
2167ec681f3Smrg
2177ec681f3Smrg         default:
2187ec681f3Smrg            return true;
2197ec681f3Smrg         }
2207ec681f3Smrg         unreachable("Switch default failed");
2217ec681f3Smrg      }
2227ec681f3Smrg
2237ec681f3Smrg      default:
2247ec681f3Smrg         return true;
2257ec681f3Smrg      }
2267ec681f3Smrg   }
2277ec681f3Smrg
2287ec681f3Smrg   nir_foreach_if_use(use, &deref->dest.ssa)
2297ec681f3Smrg      return true;
2307ec681f3Smrg
2317ec681f3Smrg   return false;
2327ec681f3Smrg}
2337ec681f3Smrg
2347ec681f3Smrgstatic unsigned
2357ec681f3Smrgtype_scalar_size_bytes(const struct glsl_type *type)
2367ec681f3Smrg{
2377ec681f3Smrg   assert(glsl_type_is_vector_or_scalar(type) ||
2387ec681f3Smrg          glsl_type_is_matrix(type));
2397ec681f3Smrg   return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;
2407ec681f3Smrg}
2417ec681f3Smrg
2427e102996Smayaunsigned
2437ec681f3Smrgnir_deref_instr_array_stride(nir_deref_instr *deref)
2447e102996Smaya{
2457e102996Smaya   switch (deref->deref_type) {
2467e102996Smaya   case nir_deref_type_array:
2477ec681f3Smrg   case nir_deref_type_array_wildcard: {
2487ec681f3Smrg      const struct glsl_type *arr_type = nir_deref_instr_parent(deref)->type;
2497ec681f3Smrg      unsigned stride = glsl_get_explicit_stride(arr_type);
2507ec681f3Smrg
2517ec681f3Smrg      if ((glsl_type_is_matrix(arr_type) &&
2527ec681f3Smrg           glsl_matrix_type_is_row_major(arr_type)) ||
2537ec681f3Smrg          (glsl_type_is_vector(arr_type) && stride == 0))
2547ec681f3Smrg         stride = type_scalar_size_bytes(arr_type);
2557ec681f3Smrg
2567ec681f3Smrg      return stride;
2577ec681f3Smrg   }
2587e102996Smaya   case nir_deref_type_ptr_as_array:
2597ec681f3Smrg      return nir_deref_instr_array_stride(nir_deref_instr_parent(deref));
2607e102996Smaya   case nir_deref_type_cast:
2617e102996Smaya      return deref->cast.ptr_stride;
2627e102996Smaya   default:
2637e102996Smaya      return 0;
2647e102996Smaya   }
2657e102996Smaya}
2667e102996Smaya
26701e04c3fSmrgstatic unsigned
26801e04c3fSmrgtype_get_array_stride(const struct glsl_type *elem_type,
26901e04c3fSmrg                      glsl_type_size_align_func size_align)
27001e04c3fSmrg{
27101e04c3fSmrg   unsigned elem_size, elem_align;
2727e102996Smaya   size_align(elem_type, &elem_size, &elem_align);
27301e04c3fSmrg   return ALIGN_POT(elem_size, elem_align);
27401e04c3fSmrg}
27501e04c3fSmrg
27601e04c3fSmrgstatic unsigned
27701e04c3fSmrgstruct_type_get_field_offset(const struct glsl_type *struct_type,
27801e04c3fSmrg                             glsl_type_size_align_func size_align,
27901e04c3fSmrg                             unsigned field_idx)
28001e04c3fSmrg{
2817e102996Smaya   assert(glsl_type_is_struct_or_ifc(struct_type));
28201e04c3fSmrg   unsigned offset = 0;
28301e04c3fSmrg   for (unsigned i = 0; i <= field_idx; i++) {
28401e04c3fSmrg      unsigned elem_size, elem_align;
2857e102996Smaya      size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align);
28601e04c3fSmrg      offset = ALIGN_POT(offset, elem_align);
28701e04c3fSmrg      if (i < field_idx)
28801e04c3fSmrg         offset += elem_size;
28901e04c3fSmrg   }
29001e04c3fSmrg   return offset;
29101e04c3fSmrg}
29201e04c3fSmrg
29301e04c3fSmrgunsigned
29401e04c3fSmrgnir_deref_instr_get_const_offset(nir_deref_instr *deref,
29501e04c3fSmrg                                 glsl_type_size_align_func size_align)
29601e04c3fSmrg{
29701e04c3fSmrg   nir_deref_path path;
29801e04c3fSmrg   nir_deref_path_init(&path, deref, NULL);
29901e04c3fSmrg
30001e04c3fSmrg   unsigned offset = 0;
30101e04c3fSmrg   for (nir_deref_instr **p = &path.path[1]; *p; p++) {
3027ec681f3Smrg      switch ((*p)->deref_type) {
3037ec681f3Smrg      case nir_deref_type_array:
30401e04c3fSmrg         offset += nir_src_as_uint((*p)->arr.index) *
30501e04c3fSmrg                   type_get_array_stride((*p)->type, size_align);
3067ec681f3Smrg	 break;
3077ec681f3Smrg      case nir_deref_type_struct: {
30801e04c3fSmrg         /* p starts at path[1], so this is safe */
30901e04c3fSmrg         nir_deref_instr *parent = *(p - 1);
31001e04c3fSmrg         offset += struct_type_get_field_offset(parent->type, size_align,
31101e04c3fSmrg                                                (*p)->strct.index);
3127ec681f3Smrg	 break;
3137ec681f3Smrg      }
3147ec681f3Smrg      case nir_deref_type_cast:
3157ec681f3Smrg         /* A cast doesn't contribute to the offset */
3167ec681f3Smrg         break;
3177ec681f3Smrg      default:
31801e04c3fSmrg         unreachable("Unsupported deref type");
31901e04c3fSmrg      }
32001e04c3fSmrg   }
32101e04c3fSmrg
32201e04c3fSmrg   nir_deref_path_finish(&path);
32301e04c3fSmrg
32401e04c3fSmrg   return offset;
32501e04c3fSmrg}
32601e04c3fSmrg
32701e04c3fSmrgnir_ssa_def *
32801e04c3fSmrgnir_build_deref_offset(nir_builder *b, nir_deref_instr *deref,
32901e04c3fSmrg                       glsl_type_size_align_func size_align)
33001e04c3fSmrg{
33101e04c3fSmrg   nir_deref_path path;
33201e04c3fSmrg   nir_deref_path_init(&path, deref, NULL);
33301e04c3fSmrg
3347ec681f3Smrg   nir_ssa_def *offset = nir_imm_intN_t(b, 0, deref->dest.ssa.bit_size);
33501e04c3fSmrg   for (nir_deref_instr **p = &path.path[1]; *p; p++) {
3367ec681f3Smrg      switch ((*p)->deref_type) {
3377ec681f3Smrg      case nir_deref_type_array:
3387ec681f3Smrg      case nir_deref_type_ptr_as_array: {
33901e04c3fSmrg         nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1);
3407e102996Smaya         int stride = type_get_array_stride((*p)->type, size_align);
3417ec681f3Smrg         offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride));
3427ec681f3Smrg         break;
3437ec681f3Smrg      }
3447ec681f3Smrg      case nir_deref_type_struct: {
34501e04c3fSmrg         /* p starts at path[1], so this is safe */
34601e04c3fSmrg         nir_deref_instr *parent = *(p - 1);
34701e04c3fSmrg         unsigned field_offset =
34801e04c3fSmrg            struct_type_get_field_offset(parent->type, size_align,
34901e04c3fSmrg                                         (*p)->strct.index);
3507e102996Smaya         offset = nir_iadd_imm(b, offset, field_offset);
3517ec681f3Smrg         break;
3527ec681f3Smrg      }
3537ec681f3Smrg      case nir_deref_type_cast:
3547ec681f3Smrg         /* A cast doesn't contribute to the offset */
3557ec681f3Smrg         break;
3567ec681f3Smrg      default:
35701e04c3fSmrg         unreachable("Unsupported deref type");
35801e04c3fSmrg      }
35901e04c3fSmrg   }
36001e04c3fSmrg
36101e04c3fSmrg   nir_deref_path_finish(&path);
36201e04c3fSmrg
36301e04c3fSmrg   return offset;
36401e04c3fSmrg}
36501e04c3fSmrg
36601e04c3fSmrgbool
36701e04c3fSmrgnir_remove_dead_derefs_impl(nir_function_impl *impl)
36801e04c3fSmrg{
36901e04c3fSmrg   bool progress = false;
37001e04c3fSmrg
37101e04c3fSmrg   nir_foreach_block(block, impl) {
37201e04c3fSmrg      nir_foreach_instr_safe(instr, block) {
37301e04c3fSmrg         if (instr->type == nir_instr_type_deref &&
37401e04c3fSmrg             nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
37501e04c3fSmrg            progress = true;
37601e04c3fSmrg      }
37701e04c3fSmrg   }
37801e04c3fSmrg
37901e04c3fSmrg   if (progress)
38001e04c3fSmrg      nir_metadata_preserve(impl, nir_metadata_block_index |
38101e04c3fSmrg                                  nir_metadata_dominance);
38201e04c3fSmrg
38301e04c3fSmrg   return progress;
38401e04c3fSmrg}
38501e04c3fSmrg
38601e04c3fSmrgbool
38701e04c3fSmrgnir_remove_dead_derefs(nir_shader *shader)
38801e04c3fSmrg{
38901e04c3fSmrg   bool progress = false;
39001e04c3fSmrg   nir_foreach_function(function, shader) {
39101e04c3fSmrg      if (function->impl && nir_remove_dead_derefs_impl(function->impl))
39201e04c3fSmrg         progress = true;
39301e04c3fSmrg   }
39401e04c3fSmrg
39501e04c3fSmrg   return progress;
39601e04c3fSmrg}
39701e04c3fSmrg
39801e04c3fSmrgvoid
39901e04c3fSmrgnir_fixup_deref_modes(nir_shader *shader)
40001e04c3fSmrg{
40101e04c3fSmrg   nir_foreach_function(function, shader) {
40201e04c3fSmrg      if (!function->impl)
40301e04c3fSmrg         continue;
40401e04c3fSmrg
40501e04c3fSmrg      nir_foreach_block(block, function->impl) {
40601e04c3fSmrg         nir_foreach_instr(instr, block) {
40701e04c3fSmrg            if (instr->type != nir_instr_type_deref)
40801e04c3fSmrg               continue;
40901e04c3fSmrg
41001e04c3fSmrg            nir_deref_instr *deref = nir_instr_as_deref(instr);
4117e102996Smaya            if (deref->deref_type == nir_deref_type_cast)
4127e102996Smaya               continue;
41301e04c3fSmrg
4147ec681f3Smrg            nir_variable_mode parent_modes;
41501e04c3fSmrg            if (deref->deref_type == nir_deref_type_var) {
4167ec681f3Smrg               parent_modes = deref->var->data.mode;
41701e04c3fSmrg            } else {
41801e04c3fSmrg               assert(deref->parent.is_ssa);
41901e04c3fSmrg               nir_deref_instr *parent =
42001e04c3fSmrg                  nir_instr_as_deref(deref->parent.ssa->parent_instr);
4217ec681f3Smrg               parent_modes = parent->modes;
42201e04c3fSmrg            }
42301e04c3fSmrg
4247ec681f3Smrg            deref->modes = parent_modes;
42501e04c3fSmrg         }
42601e04c3fSmrg      }
42701e04c3fSmrg   }
42801e04c3fSmrg}
42901e04c3fSmrg
4307e102996Smayastatic bool
4317e102996Smayamodes_may_alias(nir_variable_mode a, nir_variable_mode b)
4327e102996Smaya{
4337e102996Smaya   /* Generic pointers can alias with SSBOs */
4347ec681f3Smrg   if ((a & (nir_var_mem_ssbo | nir_var_mem_global)) &&
4357ec681f3Smrg       (b & (nir_var_mem_ssbo | nir_var_mem_global)))
4367e102996Smaya      return true;
4377e102996Smaya
4387ec681f3Smrg   /* Pointers can only alias if they share a mode. */
4397ec681f3Smrg   return a & b;
4407e102996Smaya}
4417e102996Smaya
4427e102996Smayastatic bool
4437e102996Smayaderef_path_contains_coherent_decoration(nir_deref_path *path)
4447e102996Smaya{
4457e102996Smaya   assert(path->path[0]->deref_type == nir_deref_type_var);
4467e102996Smaya
4477ec681f3Smrg   if (path->path[0]->var->data.access & ACCESS_COHERENT)
4487e102996Smaya      return true;
4497e102996Smaya
4507e102996Smaya   for (nir_deref_instr **p = &path->path[1]; *p; p++) {
4517e102996Smaya      if ((*p)->deref_type != nir_deref_type_struct)
4527e102996Smaya         continue;
4537e102996Smaya
4547e102996Smaya      const struct glsl_type *struct_type = (*(p - 1))->type;
4557e102996Smaya      const struct glsl_struct_field *field =
4567e102996Smaya         glsl_get_struct_field_data(struct_type, (*p)->strct.index);
4577e102996Smaya      if (field->memory_coherent)
4587e102996Smaya         return true;
4597e102996Smaya   }
4607e102996Smaya
4617e102996Smaya   return false;
4627e102996Smaya}
4637e102996Smaya
46401e04c3fSmrgnir_deref_compare_result
46501e04c3fSmrgnir_compare_deref_paths(nir_deref_path *a_path,
46601e04c3fSmrg                        nir_deref_path *b_path)
46701e04c3fSmrg{
4687ec681f3Smrg   if (!modes_may_alias(b_path->path[0]->modes, a_path->path[0]->modes))
4697e102996Smaya      return nir_derefs_do_not_alias;
4707e102996Smaya
4717e102996Smaya   if (a_path->path[0]->deref_type != b_path->path[0]->deref_type)
4727e102996Smaya      return nir_derefs_may_alias_bit;
4737e102996Smaya
4747e102996Smaya   if (a_path->path[0]->deref_type == nir_deref_type_var) {
4757e102996Smaya      if (a_path->path[0]->var != b_path->path[0]->var) {
4767e102996Smaya         /* Shader and function temporaries aren't backed by memory so two
4777e102996Smaya          * distinct variables never alias.
4787e102996Smaya          */
4797e102996Smaya         static const nir_variable_mode temp_var_modes =
4807e102996Smaya            nir_var_shader_temp | nir_var_function_temp;
4817ec681f3Smrg         if (!(a_path->path[0]->modes & ~temp_var_modes) ||
4827ec681f3Smrg             !(b_path->path[0]->modes & ~temp_var_modes))
4837e102996Smaya            return nir_derefs_do_not_alias;
4847e102996Smaya
4857e102996Smaya         /* If they are both declared coherent or have coherent somewhere in
4867e102996Smaya          * their path (due to a member of an interface being declared
4877e102996Smaya          * coherent), we have to assume we that we could have any kind of
4887e102996Smaya          * aliasing.  Otherwise, they could still alias but the client didn't
4897e102996Smaya          * tell us and that's their fault.
4907e102996Smaya          */
4917e102996Smaya         if (deref_path_contains_coherent_decoration(a_path) &&
4927e102996Smaya             deref_path_contains_coherent_decoration(b_path))
4937e102996Smaya            return nir_derefs_may_alias_bit;
4947e102996Smaya
4957ec681f3Smrg         /* Per SPV_KHR_workgroup_memory_explicit_layout and GL_EXT_shared_memory_block,
4967ec681f3Smrg          * shared blocks alias each other.
4977ec681f3Smrg          */
4987ec681f3Smrg         if (a_path->path[0]->modes & nir_var_mem_shared &&
4997ec681f3Smrg             b_path->path[0]->modes & nir_var_mem_shared &&
5007ec681f3Smrg             (glsl_type_is_interface(a_path->path[0]->var->type) ||
5017ec681f3Smrg              glsl_type_is_interface(b_path->path[0]->var->type))) {
5027ec681f3Smrg            assert(glsl_type_is_interface(a_path->path[0]->var->type) &&
5037ec681f3Smrg                   glsl_type_is_interface(b_path->path[0]->var->type));
5047ec681f3Smrg            return nir_derefs_may_alias_bit;
5057ec681f3Smrg         }
5067ec681f3Smrg
5077e102996Smaya         /* If we can chase the deref all the way back to the variable and
5087e102996Smaya          * they're not the same variable and at least one is not declared
5097e102996Smaya          * coherent, we know they can't possibly alias.
5107e102996Smaya          */
5117e102996Smaya         return nir_derefs_do_not_alias;
5127e102996Smaya      }
5137e102996Smaya   } else {
5147e102996Smaya      assert(a_path->path[0]->deref_type == nir_deref_type_cast);
5157e102996Smaya      /* If they're not exactly the same cast, it's hard to compare them so we
5167e102996Smaya       * just assume they alias.  Comparing casts is tricky as there are lots
5177e102996Smaya       * of things such as mode, type, etc. to make sure work out; for now, we
5187e102996Smaya       * just assume nit_opt_deref will combine them and compare the deref
5197e102996Smaya       * instructions.
5207e102996Smaya       *
5217e102996Smaya       * TODO: At some point in the future, we could be clever and understand
5227e102996Smaya       * that a float[] and int[] have the same layout and aliasing structure
5237e102996Smaya       * but double[] and vec3[] do not and we could potentially be a bit
5247e102996Smaya       * smarter here.
5257e102996Smaya       */
5267e102996Smaya      if (a_path->path[0] != b_path->path[0])
5277e102996Smaya         return nir_derefs_may_alias_bit;
5287e102996Smaya   }
52901e04c3fSmrg
53001e04c3fSmrg   /* Start off assuming they fully compare.  We ignore equality for now.  In
53101e04c3fSmrg    * the end, we'll determine that by containment.
53201e04c3fSmrg    */
53301e04c3fSmrg   nir_deref_compare_result result = nir_derefs_may_alias_bit |
53401e04c3fSmrg                                     nir_derefs_a_contains_b_bit |
53501e04c3fSmrg                                     nir_derefs_b_contains_a_bit;
53601e04c3fSmrg
53701e04c3fSmrg   nir_deref_instr **a_p = &a_path->path[1];
53801e04c3fSmrg   nir_deref_instr **b_p = &b_path->path[1];
5397e102996Smaya   while (*a_p != NULL && *a_p == *b_p) {
5407e102996Smaya      a_p++;
5417e102996Smaya      b_p++;
5427e102996Smaya   }
5437e102996Smaya
5447e102996Smaya   /* We're at either the tail or the divergence point between the two deref
5457ec681f3Smrg    * paths.  Look to see if either contains cast or a ptr_as_array deref.  If
5467ec681f3Smrg    * it does we don't know how to safely make any inferences.  Hopefully,
5477e102996Smaya    * nir_opt_deref will clean most of these up and we can start inferring
5487e102996Smaya    * things again.
5497e102996Smaya    *
5507e102996Smaya    * In theory, we could do a bit better.  For instance, we could detect the
5517e102996Smaya    * case where we have exactly one ptr_as_array deref in the chain after the
5527e102996Smaya    * divergence point and it's matched in both chains and the two chains have
5537e102996Smaya    * different constant indices.
5547e102996Smaya    */
5557e102996Smaya   for (nir_deref_instr **t_p = a_p; *t_p; t_p++) {
5567ec681f3Smrg      if ((*t_p)->deref_type == nir_deref_type_cast ||
5577ec681f3Smrg          (*t_p)->deref_type == nir_deref_type_ptr_as_array)
5587e102996Smaya         return nir_derefs_may_alias_bit;
5597e102996Smaya   }
5607e102996Smaya   for (nir_deref_instr **t_p = b_p; *t_p; t_p++) {
5617ec681f3Smrg      if ((*t_p)->deref_type == nir_deref_type_cast ||
5627ec681f3Smrg          (*t_p)->deref_type == nir_deref_type_ptr_as_array)
5637e102996Smaya         return nir_derefs_may_alias_bit;
5647e102996Smaya   }
5657e102996Smaya
56601e04c3fSmrg   while (*a_p != NULL && *b_p != NULL) {
56701e04c3fSmrg      nir_deref_instr *a_tail = *(a_p++);
56801e04c3fSmrg      nir_deref_instr *b_tail = *(b_p++);
56901e04c3fSmrg
57001e04c3fSmrg      switch (a_tail->deref_type) {
57101e04c3fSmrg      case nir_deref_type_array:
57201e04c3fSmrg      case nir_deref_type_array_wildcard: {
57301e04c3fSmrg         assert(b_tail->deref_type == nir_deref_type_array ||
57401e04c3fSmrg                b_tail->deref_type == nir_deref_type_array_wildcard);
57501e04c3fSmrg
57601e04c3fSmrg         if (a_tail->deref_type == nir_deref_type_array_wildcard) {
57701e04c3fSmrg            if (b_tail->deref_type != nir_deref_type_array_wildcard)
57801e04c3fSmrg               result &= ~nir_derefs_b_contains_a_bit;
57901e04c3fSmrg         } else if (b_tail->deref_type == nir_deref_type_array_wildcard) {
58001e04c3fSmrg            if (a_tail->deref_type != nir_deref_type_array_wildcard)
58101e04c3fSmrg               result &= ~nir_derefs_a_contains_b_bit;
58201e04c3fSmrg         } else {
58301e04c3fSmrg            assert(a_tail->deref_type == nir_deref_type_array &&
58401e04c3fSmrg                   b_tail->deref_type == nir_deref_type_array);
58501e04c3fSmrg            assert(a_tail->arr.index.is_ssa && b_tail->arr.index.is_ssa);
58601e04c3fSmrg
58701e04c3fSmrg            if (nir_src_is_const(a_tail->arr.index) &&
58801e04c3fSmrg                nir_src_is_const(b_tail->arr.index)) {
58901e04c3fSmrg               /* If they're both direct and have different offsets, they
59001e04c3fSmrg                * don't even alias much less anything else.
59101e04c3fSmrg                */
59201e04c3fSmrg               if (nir_src_as_uint(a_tail->arr.index) !=
59301e04c3fSmrg                   nir_src_as_uint(b_tail->arr.index))
5947e102996Smaya                  return nir_derefs_do_not_alias;
59501e04c3fSmrg            } else if (a_tail->arr.index.ssa == b_tail->arr.index.ssa) {
59601e04c3fSmrg               /* They're the same indirect, continue on */
59701e04c3fSmrg            } else {
59801e04c3fSmrg               /* They're not the same index so we can't prove anything about
59901e04c3fSmrg                * containment.
60001e04c3fSmrg                */
60101e04c3fSmrg               result &= ~(nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit);
60201e04c3fSmrg            }
60301e04c3fSmrg         }
60401e04c3fSmrg         break;
60501e04c3fSmrg      }
60601e04c3fSmrg
60701e04c3fSmrg      case nir_deref_type_struct: {
60801e04c3fSmrg         /* If they're different struct members, they don't even alias */
60901e04c3fSmrg         if (a_tail->strct.index != b_tail->strct.index)
6107e102996Smaya            return nir_derefs_do_not_alias;
61101e04c3fSmrg         break;
61201e04c3fSmrg      }
61301e04c3fSmrg
61401e04c3fSmrg      default:
61501e04c3fSmrg         unreachable("Invalid deref type");
61601e04c3fSmrg      }
61701e04c3fSmrg   }
61801e04c3fSmrg
61901e04c3fSmrg   /* If a is longer than b, then it can't contain b */
62001e04c3fSmrg   if (*a_p != NULL)
62101e04c3fSmrg      result &= ~nir_derefs_a_contains_b_bit;
62201e04c3fSmrg   if (*b_p != NULL)
62301e04c3fSmrg      result &= ~nir_derefs_b_contains_a_bit;
62401e04c3fSmrg
62501e04c3fSmrg   /* If a contains b and b contains a they must be equal. */
62601e04c3fSmrg   if ((result & nir_derefs_a_contains_b_bit) && (result & nir_derefs_b_contains_a_bit))
62701e04c3fSmrg      result |= nir_derefs_equal_bit;
62801e04c3fSmrg
62901e04c3fSmrg   return result;
63001e04c3fSmrg}
63101e04c3fSmrg
63201e04c3fSmrgnir_deref_compare_result
63301e04c3fSmrgnir_compare_derefs(nir_deref_instr *a, nir_deref_instr *b)
63401e04c3fSmrg{
63501e04c3fSmrg   if (a == b) {
63601e04c3fSmrg      return nir_derefs_equal_bit | nir_derefs_may_alias_bit |
63701e04c3fSmrg             nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit;
63801e04c3fSmrg   }
63901e04c3fSmrg
64001e04c3fSmrg   nir_deref_path a_path, b_path;
64101e04c3fSmrg   nir_deref_path_init(&a_path, a, NULL);
64201e04c3fSmrg   nir_deref_path_init(&b_path, b, NULL);
6437e102996Smaya   assert(a_path.path[0]->deref_type == nir_deref_type_var ||
6447e102996Smaya          a_path.path[0]->deref_type == nir_deref_type_cast);
6457e102996Smaya   assert(b_path.path[0]->deref_type == nir_deref_type_var ||
6467e102996Smaya          b_path.path[0]->deref_type == nir_deref_type_cast);
64701e04c3fSmrg
64801e04c3fSmrg   nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path);
64901e04c3fSmrg
65001e04c3fSmrg   nir_deref_path_finish(&a_path);
65101e04c3fSmrg   nir_deref_path_finish(&b_path);
65201e04c3fSmrg
65301e04c3fSmrg   return result;
65401e04c3fSmrg}
65501e04c3fSmrg
6567ec681f3Smrgnir_deref_path *nir_get_deref_path(void *mem_ctx, nir_deref_and_path *deref)
6577ec681f3Smrg{
6587ec681f3Smrg   if (!deref->_path) {
6597ec681f3Smrg      deref->_path = ralloc(mem_ctx, nir_deref_path);
6607ec681f3Smrg      nir_deref_path_init(deref->_path, deref->instr, mem_ctx);
6617ec681f3Smrg   }
6627ec681f3Smrg   return deref->_path;
6637ec681f3Smrg}
6647ec681f3Smrg
6657ec681f3Smrgnir_deref_compare_result nir_compare_derefs_and_paths(void *mem_ctx,
6667ec681f3Smrg                                                      nir_deref_and_path *a,
6677ec681f3Smrg                                                      nir_deref_and_path *b)
6687ec681f3Smrg{
6697ec681f3Smrg   if (a->instr == b->instr) /* nir_compare_derefs has a fast path if a == b */
6707ec681f3Smrg      return nir_compare_derefs(a->instr, b->instr);
6717ec681f3Smrg
6727ec681f3Smrg   return nir_compare_deref_paths(nir_get_deref_path(mem_ctx, a),
6737ec681f3Smrg                                  nir_get_deref_path(mem_ctx, b));
6747ec681f3Smrg}
6757ec681f3Smrg
67601e04c3fSmrgstruct rematerialize_deref_state {
67701e04c3fSmrg   bool progress;
67801e04c3fSmrg   nir_builder builder;
67901e04c3fSmrg   nir_block *block;
68001e04c3fSmrg   struct hash_table *cache;
68101e04c3fSmrg};
68201e04c3fSmrg
68301e04c3fSmrgstatic nir_deref_instr *
68401e04c3fSmrgrematerialize_deref_in_block(nir_deref_instr *deref,
68501e04c3fSmrg                             struct rematerialize_deref_state *state)
68601e04c3fSmrg{
68701e04c3fSmrg   if (deref->instr.block == state->block)
68801e04c3fSmrg      return deref;
68901e04c3fSmrg
69001e04c3fSmrg   if (!state->cache) {
6917e102996Smaya      state->cache = _mesa_pointer_hash_table_create(NULL);
69201e04c3fSmrg   }
69301e04c3fSmrg
69401e04c3fSmrg   struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref);
69501e04c3fSmrg   if (cached)
69601e04c3fSmrg      return cached->data;
69701e04c3fSmrg
69801e04c3fSmrg   nir_builder *b = &state->builder;
69901e04c3fSmrg   nir_deref_instr *new_deref =
70001e04c3fSmrg      nir_deref_instr_create(b->shader, deref->deref_type);
7017ec681f3Smrg   new_deref->modes = deref->modes;
70201e04c3fSmrg   new_deref->type = deref->type;
70301e04c3fSmrg
70401e04c3fSmrg   if (deref->deref_type == nir_deref_type_var) {
70501e04c3fSmrg      new_deref->var = deref->var;
70601e04c3fSmrg   } else {
70701e04c3fSmrg      nir_deref_instr *parent = nir_src_as_deref(deref->parent);
70801e04c3fSmrg      if (parent) {
70901e04c3fSmrg         parent = rematerialize_deref_in_block(parent, state);
71001e04c3fSmrg         new_deref->parent = nir_src_for_ssa(&parent->dest.ssa);
71101e04c3fSmrg      } else {
7127ec681f3Smrg         nir_src_copy(&new_deref->parent, &deref->parent);
71301e04c3fSmrg      }
71401e04c3fSmrg   }
71501e04c3fSmrg
71601e04c3fSmrg   switch (deref->deref_type) {
71701e04c3fSmrg   case nir_deref_type_var:
71801e04c3fSmrg   case nir_deref_type_array_wildcard:
71901e04c3fSmrg      /* Nothing more to do */
72001e04c3fSmrg      break;
72101e04c3fSmrg
7227ec681f3Smrg   case nir_deref_type_cast:
7237ec681f3Smrg      new_deref->cast.ptr_stride = deref->cast.ptr_stride;
7247ec681f3Smrg      break;
7257ec681f3Smrg
72601e04c3fSmrg   case nir_deref_type_array:
7277ec681f3Smrg   case nir_deref_type_ptr_as_array:
72801e04c3fSmrg      assert(!nir_src_as_deref(deref->arr.index));
7297ec681f3Smrg      nir_src_copy(&new_deref->arr.index, &deref->arr.index);
73001e04c3fSmrg      break;
73101e04c3fSmrg
73201e04c3fSmrg   case nir_deref_type_struct:
73301e04c3fSmrg      new_deref->strct.index = deref->strct.index;
73401e04c3fSmrg      break;
73501e04c3fSmrg
73601e04c3fSmrg   default:
73701e04c3fSmrg      unreachable("Invalid deref instruction type");
73801e04c3fSmrg   }
73901e04c3fSmrg
74001e04c3fSmrg   nir_ssa_dest_init(&new_deref->instr, &new_deref->dest,
74101e04c3fSmrg                     deref->dest.ssa.num_components,
74201e04c3fSmrg                     deref->dest.ssa.bit_size,
7437ec681f3Smrg                     NULL);
74401e04c3fSmrg   nir_builder_instr_insert(b, &new_deref->instr);
74501e04c3fSmrg
74601e04c3fSmrg   return new_deref;
74701e04c3fSmrg}
74801e04c3fSmrg
74901e04c3fSmrgstatic bool
75001e04c3fSmrgrematerialize_deref_src(nir_src *src, void *_state)
75101e04c3fSmrg{
75201e04c3fSmrg   struct rematerialize_deref_state *state = _state;
75301e04c3fSmrg
75401e04c3fSmrg   nir_deref_instr *deref = nir_src_as_deref(*src);
75501e04c3fSmrg   if (!deref)
75601e04c3fSmrg      return true;
75701e04c3fSmrg
75801e04c3fSmrg   nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state);
75901e04c3fSmrg   if (block_deref != deref) {
76001e04c3fSmrg      nir_instr_rewrite_src(src->parent_instr, src,
76101e04c3fSmrg                            nir_src_for_ssa(&block_deref->dest.ssa));
76201e04c3fSmrg      nir_deref_instr_remove_if_unused(deref);
76301e04c3fSmrg      state->progress = true;
76401e04c3fSmrg   }
76501e04c3fSmrg
76601e04c3fSmrg   return true;
76701e04c3fSmrg}
76801e04c3fSmrg
76901e04c3fSmrg/** Re-materialize derefs in every block
77001e04c3fSmrg *
77101e04c3fSmrg * This pass re-materializes deref instructions in every block in which it is
77201e04c3fSmrg * used.  After this pass has been run, every use of a deref will be of a
77301e04c3fSmrg * deref in the same block as the use.  Also, all unused derefs will be
77401e04c3fSmrg * deleted as a side-effect.
7757ec681f3Smrg *
7767ec681f3Smrg * Derefs used as sources of phi instructions are not rematerialized.
77701e04c3fSmrg */
77801e04c3fSmrgbool
77901e04c3fSmrgnir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl)
78001e04c3fSmrg{
78101e04c3fSmrg   struct rematerialize_deref_state state = { 0 };
78201e04c3fSmrg   nir_builder_init(&state.builder, impl);
78301e04c3fSmrg
7847ec681f3Smrg   nir_foreach_block_unstructured(block, impl) {
78501e04c3fSmrg      state.block = block;
78601e04c3fSmrg
78701e04c3fSmrg      /* Start each block with a fresh cache */
78801e04c3fSmrg      if (state.cache)
78901e04c3fSmrg         _mesa_hash_table_clear(state.cache, NULL);
79001e04c3fSmrg
79101e04c3fSmrg      nir_foreach_instr_safe(instr, block) {
79201e04c3fSmrg         if (instr->type == nir_instr_type_deref &&
79301e04c3fSmrg             nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr)))
79401e04c3fSmrg            continue;
79501e04c3fSmrg
7967ec681f3Smrg         /* If a deref is used in a phi, we can't rematerialize it, as the new
7977ec681f3Smrg          * derefs would appear before the phi, which is not valid.
7987ec681f3Smrg          */
7997ec681f3Smrg         if (instr->type == nir_instr_type_phi)
8007ec681f3Smrg            continue;
8017ec681f3Smrg
80201e04c3fSmrg         state.builder.cursor = nir_before_instr(instr);
80301e04c3fSmrg         nir_foreach_src(instr, rematerialize_deref_src, &state);
80401e04c3fSmrg      }
80501e04c3fSmrg
80601e04c3fSmrg#ifndef NDEBUG
80701e04c3fSmrg      nir_if *following_if = nir_block_get_following_if(block);
80801e04c3fSmrg      if (following_if)
80901e04c3fSmrg         assert(!nir_src_as_deref(following_if->condition));
81001e04c3fSmrg#endif
81101e04c3fSmrg   }
81201e04c3fSmrg
81301e04c3fSmrg   _mesa_hash_table_destroy(state.cache, NULL);
81401e04c3fSmrg
81501e04c3fSmrg   return state.progress;
81601e04c3fSmrg}
8177e102996Smaya
8187ec681f3Smrgstatic void
8197ec681f3Smrgnir_deref_instr_fixup_child_types(nir_deref_instr *parent)
8207e102996Smaya{
8217ec681f3Smrg   nir_foreach_use(use, &parent->dest.ssa) {
8227ec681f3Smrg      if (use->parent_instr->type != nir_instr_type_deref)
8237ec681f3Smrg         continue;
8247e102996Smaya
8257ec681f3Smrg      nir_deref_instr *child = nir_instr_as_deref(use->parent_instr);
8267ec681f3Smrg      switch (child->deref_type) {
8277ec681f3Smrg      case nir_deref_type_var:
8287ec681f3Smrg         unreachable("nir_deref_type_var cannot be a child");
8297ec681f3Smrg
8307ec681f3Smrg      case nir_deref_type_array:
8317ec681f3Smrg      case nir_deref_type_array_wildcard:
8327ec681f3Smrg         child->type = glsl_get_array_element(parent->type);
8337ec681f3Smrg         break;
8347ec681f3Smrg
8357ec681f3Smrg      case nir_deref_type_ptr_as_array:
8367ec681f3Smrg         child->type = parent->type;
8377ec681f3Smrg         break;
8387ec681f3Smrg
8397ec681f3Smrg      case nir_deref_type_struct:
8407ec681f3Smrg         child->type = glsl_get_struct_field(parent->type,
8417ec681f3Smrg                                             child->strct.index);
8427ec681f3Smrg         break;
8437ec681f3Smrg
8447ec681f3Smrg      case nir_deref_type_cast:
8457ec681f3Smrg         /* We stop the recursion here */
8467ec681f3Smrg         continue;
8477ec681f3Smrg      }
8487ec681f3Smrg
8497ec681f3Smrg      /* Recurse into children */
8507ec681f3Smrg      nir_deref_instr_fixup_child_types(child);
8517ec681f3Smrg   }
8527e102996Smaya}
8537e102996Smaya
8547e102996Smayastatic bool
8557e102996Smayais_trivial_array_deref_cast(nir_deref_instr *cast)
8567e102996Smaya{
8577e102996Smaya   assert(is_trivial_deref_cast(cast));
8587e102996Smaya
8597e102996Smaya   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
8607e102996Smaya
8617e102996Smaya   if (parent->deref_type == nir_deref_type_array) {
8627e102996Smaya      return cast->cast.ptr_stride ==
8637e102996Smaya             glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type);
8647e102996Smaya   } else if (parent->deref_type == nir_deref_type_ptr_as_array) {
8657e102996Smaya      return cast->cast.ptr_stride ==
8667ec681f3Smrg             nir_deref_instr_array_stride(parent);
8677e102996Smaya   } else {
8687e102996Smaya      return false;
8697e102996Smaya   }
8707e102996Smaya}
8717e102996Smaya
8727e102996Smayastatic bool
8737e102996Smayais_deref_ptr_as_array(nir_instr *instr)
8747e102996Smaya{
8757e102996Smaya   return instr->type == nir_instr_type_deref &&
8767e102996Smaya          nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array;
8777e102996Smaya}
8787e102996Smaya
8797ec681f3Smrgstatic bool
8807ec681f3Smrgopt_remove_restricting_cast_alignments(nir_deref_instr *cast)
8817ec681f3Smrg{
8827ec681f3Smrg   assert(cast->deref_type == nir_deref_type_cast);
8837ec681f3Smrg   if (cast->cast.align_mul == 0)
8847ec681f3Smrg      return false;
8857ec681f3Smrg
8867ec681f3Smrg   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
8877ec681f3Smrg   if (parent == NULL)
8887ec681f3Smrg      return false;
8897ec681f3Smrg
8907ec681f3Smrg   /* Don't use any default alignment for this check.  We don't want to fall
8917ec681f3Smrg    * back to type alignment too early in case we find out later that we're
8927ec681f3Smrg    * somehow a child of a packed struct.
8937ec681f3Smrg    */
8947ec681f3Smrg   uint32_t parent_mul, parent_offset;
8957ec681f3Smrg   if (!nir_get_explicit_deref_align(parent, false /* default_to_type_align */,
8967ec681f3Smrg                                     &parent_mul, &parent_offset))
8977ec681f3Smrg      return false;
8987ec681f3Smrg
8997ec681f3Smrg   /* If this cast increases the alignment, we want to keep it.
9007ec681f3Smrg    *
9017ec681f3Smrg    * There is a possibility that the larger alignment provided by this cast
9027ec681f3Smrg    * somehow disagrees with the smaller alignment further up the deref chain.
9037ec681f3Smrg    * In that case, we choose to favor the alignment closer to the actual
9047ec681f3Smrg    * memory operation which, in this case, is the cast and not its parent so
9057ec681f3Smrg    * keeping the cast alignment is the right thing to do.
9067ec681f3Smrg    */
9077ec681f3Smrg   if (parent_mul < cast->cast.align_mul)
9087ec681f3Smrg      return false;
9097ec681f3Smrg
9107ec681f3Smrg   /* If we've gotten here, we have a parent deref with an align_mul at least
9117ec681f3Smrg    * as large as ours so we can potentially throw away the alignment
9127ec681f3Smrg    * information on this deref.  There are two cases to consider here:
9137ec681f3Smrg    *
9147ec681f3Smrg    *  1. We can chase the deref all the way back to the variable.  In this
9157ec681f3Smrg    *     case, we have "perfect" knowledge, modulo indirect array derefs.
9167ec681f3Smrg    *     Unless we've done something wrong in our indirect/wildcard stride
9177ec681f3Smrg    *     calculations, our knowledge from the deref walk is better than the
9187ec681f3Smrg    *     client's.
9197ec681f3Smrg    *
9207ec681f3Smrg    *  2. We can't chase it all the way back to the variable.  In this case,
9217ec681f3Smrg    *     because our call to nir_get_explicit_deref_align(parent, ...) above
9227ec681f3Smrg    *     above passes default_to_type_align=false, the only way we can even
9237ec681f3Smrg    *     get here is if something further up the deref chain has a cast with
9247ec681f3Smrg    *     an alignment which can only happen if we get an alignment from the
9257ec681f3Smrg    *     client (most likely a decoration in the SPIR-V).  If the client has
9267ec681f3Smrg    *     provided us with two conflicting alignments in the deref chain,
9277ec681f3Smrg    *     that's their fault and we can do whatever we want.
9287ec681f3Smrg    *
9297ec681f3Smrg    * In either case, we should be without our rights, at this point, to throw
9307ec681f3Smrg    * away the alignment information on this deref.  However, to be "nice" to
9317ec681f3Smrg    * weird clients, we do one more check.  It really shouldn't happen but
9327ec681f3Smrg    * it's possible that the parent's alignment offset disagrees with the
9337ec681f3Smrg    * cast's alignment offset.  In this case, we consider the cast as
9347ec681f3Smrg    * providing more information (or at least more valid information) and keep
9357ec681f3Smrg    * it even if the align_mul from the parent is larger.
9367ec681f3Smrg    */
9377ec681f3Smrg   assert(cast->cast.align_mul <= parent_mul);
9387ec681f3Smrg   if (parent_offset % cast->cast.align_mul != cast->cast.align_offset)
9397ec681f3Smrg      return false;
9407ec681f3Smrg
9417ec681f3Smrg   /* If we got here, the parent has better alignment information than the
9427ec681f3Smrg    * child and we can get rid of the child alignment information.
9437ec681f3Smrg    */
9447ec681f3Smrg   cast->cast.align_mul = 0;
9457ec681f3Smrg   cast->cast.align_offset = 0;
9467ec681f3Smrg   return true;
9477ec681f3Smrg}
9487ec681f3Smrg
9497e102996Smaya/**
9507e102996Smaya * Remove casts that just wrap other casts.
9517e102996Smaya */
9527e102996Smayastatic bool
9537e102996Smayaopt_remove_cast_cast(nir_deref_instr *cast)
9547e102996Smaya{
9557e102996Smaya   nir_deref_instr *first_cast = cast;
9567e102996Smaya
9577e102996Smaya   while (true) {
9587e102996Smaya      nir_deref_instr *parent = nir_deref_instr_parent(first_cast);
9597e102996Smaya      if (parent == NULL || parent->deref_type != nir_deref_type_cast)
9607e102996Smaya         break;
9617e102996Smaya      first_cast = parent;
9627e102996Smaya   }
9637e102996Smaya   if (cast == first_cast)
9647e102996Smaya      return false;
9657e102996Smaya
9667e102996Smaya   nir_instr_rewrite_src(&cast->instr, &cast->parent,
9677e102996Smaya                         nir_src_for_ssa(first_cast->parent.ssa));
9687e102996Smaya   return true;
9697e102996Smaya}
9707e102996Smaya
9717ec681f3Smrg/* Restrict variable modes in casts.
9727ec681f3Smrg *
9737ec681f3Smrg * If we know from something higher up the deref chain that the deref has a
9747ec681f3Smrg * specific mode, we can cast to more general and back but we can never cast
9757ec681f3Smrg * across modes.  For non-cast derefs, we should only ever do anything here if
9767ec681f3Smrg * the parent eventually comes from a cast that we restricted earlier.
9777ec681f3Smrg */
9787ec681f3Smrgstatic bool
9797ec681f3Smrgopt_restrict_deref_modes(nir_deref_instr *deref)
9807ec681f3Smrg{
9817ec681f3Smrg   if (deref->deref_type == nir_deref_type_var) {
9827ec681f3Smrg      assert(deref->modes == deref->var->data.mode);
9837ec681f3Smrg      return false;
9847ec681f3Smrg   }
9857ec681f3Smrg
9867ec681f3Smrg   nir_deref_instr *parent = nir_src_as_deref(deref->parent);
9877ec681f3Smrg   if (parent == NULL || parent->modes == deref->modes)
9887ec681f3Smrg      return false;
9897ec681f3Smrg
9907ec681f3Smrg   assert(parent->modes & deref->modes);
9917ec681f3Smrg   deref->modes &= parent->modes;
9927ec681f3Smrg   return true;
9937ec681f3Smrg}
9947ec681f3Smrg
9957ec681f3Smrgstatic bool
9967ec681f3Smrgopt_remove_sampler_cast(nir_deref_instr *cast)
9977ec681f3Smrg{
9987ec681f3Smrg   assert(cast->deref_type == nir_deref_type_cast);
9997ec681f3Smrg   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
10007ec681f3Smrg   if (parent == NULL)
10017ec681f3Smrg      return false;
10027ec681f3Smrg
10037ec681f3Smrg   /* Strip both types down to their non-array type and bail if there are any
10047ec681f3Smrg    * discrepancies in array lengths.
10057ec681f3Smrg    */
10067ec681f3Smrg   const struct glsl_type *parent_type = parent->type;
10077ec681f3Smrg   const struct glsl_type *cast_type = cast->type;
10087ec681f3Smrg   while (glsl_type_is_array(parent_type) && glsl_type_is_array(cast_type)) {
10097ec681f3Smrg      if (glsl_get_length(parent_type) != glsl_get_length(cast_type))
10107ec681f3Smrg         return false;
10117ec681f3Smrg      parent_type = glsl_get_array_element(parent_type);
10127ec681f3Smrg      cast_type = glsl_get_array_element(cast_type);
10137ec681f3Smrg   }
10147ec681f3Smrg
10157ec681f3Smrg   if (glsl_type_is_array(parent_type) || glsl_type_is_array(cast_type))
10167ec681f3Smrg      return false;
10177ec681f3Smrg
10187ec681f3Smrg   if (!glsl_type_is_sampler(parent_type) ||
10197ec681f3Smrg       cast_type != glsl_bare_sampler_type())
10207ec681f3Smrg      return false;
10217ec681f3Smrg
10227ec681f3Smrg   /* We're a cast from a more detailed sampler type to a bare sampler */
10237ec681f3Smrg   nir_ssa_def_rewrite_uses(&cast->dest.ssa,
10247ec681f3Smrg                            &parent->dest.ssa);
10257ec681f3Smrg   nir_instr_remove(&cast->instr);
10267ec681f3Smrg
10277ec681f3Smrg   /* Recursively crawl the deref tree and clean up types */
10287ec681f3Smrg   nir_deref_instr_fixup_child_types(parent);
10297ec681f3Smrg
10307ec681f3Smrg   return true;
10317ec681f3Smrg}
10327ec681f3Smrg
10337e102996Smaya/**
10347e102996Smaya * Is this casting a struct to a contained struct.
10357e102996Smaya * struct a { struct b field0 };
10367e102996Smaya * ssa_5 is structa;
10377e102996Smaya * deref_cast (structb *)ssa_5 (function_temp structb);
10387e102996Smaya * converts to
10397e102996Smaya * deref_struct &ssa_5->field0 (function_temp structb);
10407e102996Smaya * This allows subsequent copy propagation to work.
10417e102996Smaya */
10427e102996Smayastatic bool
10437e102996Smayaopt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast)
10447e102996Smaya{
10457e102996Smaya   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
10467e102996Smaya   if (!parent)
10477e102996Smaya      return false;
10487e102996Smaya
10497ec681f3Smrg   if (cast->cast.align_mul > 0)
10507ec681f3Smrg      return false;
10517ec681f3Smrg
10527e102996Smaya   if (!glsl_type_is_struct(parent->type))
10537e102996Smaya      return false;
10547e102996Smaya
10557ec681f3Smrg   /* Empty struct */
10567ec681f3Smrg   if (glsl_get_length(parent->type) < 1)
10577ec681f3Smrg      return false;
10587ec681f3Smrg
10597e102996Smaya   if (glsl_get_struct_field_offset(parent->type, 0) != 0)
10607e102996Smaya      return false;
10617e102996Smaya
10627e102996Smaya   if (cast->type != glsl_get_struct_field(parent->type, 0))
10637e102996Smaya      return false;
10647e102996Smaya
10657e102996Smaya   nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0);
10667ec681f3Smrg   nir_ssa_def_rewrite_uses(&cast->dest.ssa, &replace->dest.ssa);
10677e102996Smaya   nir_deref_instr_remove_if_unused(cast);
10687e102996Smaya   return true;
10697e102996Smaya}
10707e102996Smaya
10717e102996Smayastatic bool
10727e102996Smayaopt_deref_cast(nir_builder *b, nir_deref_instr *cast)
10737e102996Smaya{
10747ec681f3Smrg   bool progress = false;
10757ec681f3Smrg
10767ec681f3Smrg   progress |= opt_remove_restricting_cast_alignments(cast);
10777e102996Smaya
10787e102996Smaya   if (opt_replace_struct_wrapper_cast(b, cast))
10797e102996Smaya      return true;
10807e102996Smaya
10817ec681f3Smrg   if (opt_remove_sampler_cast(cast))
10827ec681f3Smrg      return true;
10837ec681f3Smrg
10847ec681f3Smrg   progress |= opt_remove_cast_cast(cast);
10857e102996Smaya   if (!is_trivial_deref_cast(cast))
10867e102996Smaya      return progress;
10877e102996Smaya
10887ec681f3Smrg   /* If this deref still contains useful alignment information, we don't want
10897ec681f3Smrg    * to delete it.
10907ec681f3Smrg    */
10917ec681f3Smrg   if (cast->cast.align_mul > 0)
10927ec681f3Smrg      return progress;
10937ec681f3Smrg
10947e102996Smaya   bool trivial_array_cast = is_trivial_array_deref_cast(cast);
10957e102996Smaya
10967e102996Smaya   assert(cast->dest.is_ssa);
10977e102996Smaya   assert(cast->parent.is_ssa);
10987e102996Smaya
10997e102996Smaya   nir_foreach_use_safe(use_src, &cast->dest.ssa) {
11007e102996Smaya      /* If this isn't a trivial array cast, we can't propagate into
11017e102996Smaya       * ptr_as_array derefs.
11027e102996Smaya       */
11037e102996Smaya      if (is_deref_ptr_as_array(use_src->parent_instr) &&
11047e102996Smaya          !trivial_array_cast)
11057e102996Smaya         continue;
11067e102996Smaya
11077e102996Smaya      nir_instr_rewrite_src(use_src->parent_instr, use_src, cast->parent);
11087e102996Smaya      progress = true;
11097e102996Smaya   }
11107e102996Smaya
11117e102996Smaya   /* If uses would be a bit crazy */
11127ec681f3Smrg   assert(list_is_empty(&cast->dest.ssa.if_uses));
11137ec681f3Smrg
11147ec681f3Smrg   if (nir_deref_instr_remove_if_unused(cast))
11157ec681f3Smrg      progress = true;
11167e102996Smaya
11177e102996Smaya   return progress;
11187e102996Smaya}
11197e102996Smaya
11207e102996Smayastatic bool
11217e102996Smayaopt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref)
11227e102996Smaya{
11237e102996Smaya   assert(deref->deref_type == nir_deref_type_ptr_as_array);
11247e102996Smaya
11257e102996Smaya   nir_deref_instr *parent = nir_deref_instr_parent(deref);
11267e102996Smaya
11277e102996Smaya   if (nir_src_is_const(deref->arr.index) &&
11287e102996Smaya       nir_src_as_int(deref->arr.index) == 0) {
11297e102996Smaya      /* If it's a ptr_as_array deref with an index of 0, it does nothing
11307ec681f3Smrg       * and we can just replace its uses with its parent, unless it has
11317ec681f3Smrg       * alignment information.
11327e102996Smaya       *
11337e102996Smaya       * The source of a ptr_as_array deref always has a deref_type of
11347e102996Smaya       * nir_deref_type_array or nir_deref_type_cast.  If it's a cast, it
11357e102996Smaya       * may be trivial and we may be able to get rid of that too.  Any
11367e102996Smaya       * trivial cast of trivial cast cases should be handled already by
11377e102996Smaya       * opt_deref_cast() above.
11387e102996Smaya       */
11397e102996Smaya      if (parent->deref_type == nir_deref_type_cast &&
11407ec681f3Smrg          parent->cast.align_mul == 0 &&
11417e102996Smaya          is_trivial_deref_cast(parent))
11427e102996Smaya         parent = nir_deref_instr_parent(parent);
11437e102996Smaya      nir_ssa_def_rewrite_uses(&deref->dest.ssa,
11447ec681f3Smrg                               &parent->dest.ssa);
11457e102996Smaya      nir_instr_remove(&deref->instr);
11467e102996Smaya      return true;
11477e102996Smaya   }
11487e102996Smaya
11497e102996Smaya   if (parent->deref_type != nir_deref_type_array &&
11507e102996Smaya       parent->deref_type != nir_deref_type_ptr_as_array)
11517e102996Smaya      return false;
11527e102996Smaya
11537e102996Smaya   assert(parent->parent.is_ssa);
11547e102996Smaya   assert(parent->arr.index.is_ssa);
11557e102996Smaya   assert(deref->arr.index.is_ssa);
11567e102996Smaya
11577e102996Smaya   nir_ssa_def *new_idx = nir_iadd(b, parent->arr.index.ssa,
11587e102996Smaya                                      deref->arr.index.ssa);
11597e102996Smaya
11607e102996Smaya   deref->deref_type = parent->deref_type;
11617e102996Smaya   nir_instr_rewrite_src(&deref->instr, &deref->parent, parent->parent);
11627e102996Smaya   nir_instr_rewrite_src(&deref->instr, &deref->arr.index,
11637e102996Smaya                         nir_src_for_ssa(new_idx));
11647e102996Smaya   return true;
11657e102996Smaya}
11667e102996Smaya
11677ec681f3Smrgstatic bool
11687ec681f3Smrgis_vector_bitcast_deref(nir_deref_instr *cast,
11697ec681f3Smrg                        nir_component_mask_t mask,
11707ec681f3Smrg                        bool is_write)
11717ec681f3Smrg{
11727ec681f3Smrg   if (cast->deref_type != nir_deref_type_cast)
11737ec681f3Smrg      return false;
11747ec681f3Smrg
11757ec681f3Smrg   /* Don't throw away useful alignment information */
11767ec681f3Smrg   if (cast->cast.align_mul > 0)
11777ec681f3Smrg      return false;
11787ec681f3Smrg
11797ec681f3Smrg   /* It has to be a cast of another deref */
11807ec681f3Smrg   nir_deref_instr *parent = nir_src_as_deref(cast->parent);
11817ec681f3Smrg   if (parent == NULL)
11827ec681f3Smrg      return false;
11837ec681f3Smrg
11847ec681f3Smrg   /* The parent has to be a vector or scalar */
11857ec681f3Smrg   if (!glsl_type_is_vector_or_scalar(parent->type))
11867ec681f3Smrg      return false;
11877ec681f3Smrg
11887ec681f3Smrg   /* Don't bother with 1-bit types */
11897ec681f3Smrg   unsigned cast_bit_size = glsl_get_bit_size(cast->type);
11907ec681f3Smrg   unsigned parent_bit_size = glsl_get_bit_size(parent->type);
11917ec681f3Smrg   if (cast_bit_size == 1 || parent_bit_size == 1)
11927ec681f3Smrg      return false;
11937ec681f3Smrg
11947ec681f3Smrg   /* A strided vector type means it's not tightly packed */
11957ec681f3Smrg   if (glsl_get_explicit_stride(cast->type) ||
11967ec681f3Smrg       glsl_get_explicit_stride(parent->type))
11977ec681f3Smrg      return false;
11987ec681f3Smrg
11997ec681f3Smrg   assert(cast_bit_size > 0 && cast_bit_size % 8 == 0);
12007ec681f3Smrg   assert(parent_bit_size > 0 && parent_bit_size % 8 == 0);
12017ec681f3Smrg   unsigned bytes_used = util_last_bit(mask) * (cast_bit_size / 8);
12027ec681f3Smrg   unsigned parent_bytes = glsl_get_vector_elements(parent->type) *
12037ec681f3Smrg                           (parent_bit_size / 8);
12047ec681f3Smrg   if (bytes_used > parent_bytes)
12057ec681f3Smrg      return false;
12067ec681f3Smrg
12077ec681f3Smrg   if (is_write && !nir_component_mask_can_reinterpret(mask, cast_bit_size,
12087ec681f3Smrg                                                       parent_bit_size))
12097ec681f3Smrg      return false;
12107ec681f3Smrg
12117ec681f3Smrg   return true;
12127ec681f3Smrg}
12137ec681f3Smrg
12147ec681f3Smrgstatic nir_ssa_def *
12157ec681f3Smrgresize_vector(nir_builder *b, nir_ssa_def *data, unsigned num_components)
12167ec681f3Smrg{
12177ec681f3Smrg   if (num_components == data->num_components)
12187ec681f3Smrg      return data;
12197ec681f3Smrg
12207ec681f3Smrg   unsigned swiz[NIR_MAX_VEC_COMPONENTS] = { 0, };
12217ec681f3Smrg   for (unsigned i = 0; i < MIN2(num_components, data->num_components); i++)
12227ec681f3Smrg      swiz[i] = i;
12237ec681f3Smrg
12247ec681f3Smrg   return nir_swizzle(b, data, swiz, num_components);
12257ec681f3Smrg}
12267ec681f3Smrg
12277ec681f3Smrgstatic bool
12287ec681f3Smrgopt_load_vec_deref(nir_builder *b, nir_intrinsic_instr *load)
12297ec681f3Smrg{
12307ec681f3Smrg   nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
12317ec681f3Smrg   nir_component_mask_t read_mask =
12327ec681f3Smrg      nir_ssa_def_components_read(&load->dest.ssa);
12337ec681f3Smrg
12347ec681f3Smrg   /* LLVM loves take advantage of the fact that vec3s in OpenCL are
12357ec681f3Smrg    * vec4-aligned and so it can just read/write them as vec4s.  This
12367ec681f3Smrg    * results in a LOT of vec4->vec3 casts on loads and stores.
12377ec681f3Smrg    */
12387ec681f3Smrg   if (is_vector_bitcast_deref(deref, read_mask, false)) {
12397ec681f3Smrg      const unsigned old_num_comps = load->dest.ssa.num_components;
12407ec681f3Smrg      const unsigned old_bit_size = load->dest.ssa.bit_size;
12417ec681f3Smrg
12427ec681f3Smrg      nir_deref_instr *parent = nir_src_as_deref(deref->parent);
12437ec681f3Smrg      const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
12447ec681f3Smrg      const unsigned new_bit_size = glsl_get_bit_size(parent->type);
12457ec681f3Smrg
12467ec681f3Smrg      /* Stomp it to reference the parent */
12477ec681f3Smrg      nir_instr_rewrite_src(&load->instr, &load->src[0],
12487ec681f3Smrg                            nir_src_for_ssa(&parent->dest.ssa));
12497ec681f3Smrg      assert(load->dest.is_ssa);
12507ec681f3Smrg      load->dest.ssa.bit_size = new_bit_size;
12517ec681f3Smrg      load->dest.ssa.num_components = new_num_comps;
12527ec681f3Smrg      load->num_components = new_num_comps;
12537ec681f3Smrg
12547ec681f3Smrg      b->cursor = nir_after_instr(&load->instr);
12557ec681f3Smrg      nir_ssa_def *data = &load->dest.ssa;
12567ec681f3Smrg      if (old_bit_size != new_bit_size)
12577ec681f3Smrg         data = nir_bitcast_vector(b, &load->dest.ssa, old_bit_size);
12587ec681f3Smrg      data = resize_vector(b, data, old_num_comps);
12597ec681f3Smrg
12607ec681f3Smrg      nir_ssa_def_rewrite_uses_after(&load->dest.ssa, data,
12617ec681f3Smrg                                     data->parent_instr);
12627ec681f3Smrg      return true;
12637ec681f3Smrg   }
12647ec681f3Smrg
12657ec681f3Smrg   return false;
12667ec681f3Smrg}
12677ec681f3Smrg
12687ec681f3Smrgstatic bool
12697ec681f3Smrgopt_store_vec_deref(nir_builder *b, nir_intrinsic_instr *store)
12707ec681f3Smrg{
12717ec681f3Smrg   nir_deref_instr *deref = nir_src_as_deref(store->src[0]);
12727ec681f3Smrg   nir_component_mask_t write_mask = nir_intrinsic_write_mask(store);
12737ec681f3Smrg
12747ec681f3Smrg   /* LLVM loves take advantage of the fact that vec3s in OpenCL are
12757ec681f3Smrg    * vec4-aligned and so it can just read/write them as vec4s.  This
12767ec681f3Smrg    * results in a LOT of vec4->vec3 casts on loads and stores.
12777ec681f3Smrg    */
12787ec681f3Smrg   if (is_vector_bitcast_deref(deref, write_mask, true)) {
12797ec681f3Smrg      assert(store->src[1].is_ssa);
12807ec681f3Smrg      nir_ssa_def *data = store->src[1].ssa;
12817ec681f3Smrg
12827ec681f3Smrg      const unsigned old_bit_size = data->bit_size;
12837ec681f3Smrg
12847ec681f3Smrg      nir_deref_instr *parent = nir_src_as_deref(deref->parent);
12857ec681f3Smrg      const unsigned new_num_comps = glsl_get_vector_elements(parent->type);
12867ec681f3Smrg      const unsigned new_bit_size = glsl_get_bit_size(parent->type);
12877ec681f3Smrg
12887ec681f3Smrg      nir_instr_rewrite_src(&store->instr, &store->src[0],
12897ec681f3Smrg                            nir_src_for_ssa(&parent->dest.ssa));
12907ec681f3Smrg
12917ec681f3Smrg      /* Restrict things down as needed so the bitcast doesn't fail */
12927ec681f3Smrg      data = nir_channels(b, data, (1 << util_last_bit(write_mask)) - 1);
12937ec681f3Smrg      if (old_bit_size != new_bit_size)
12947ec681f3Smrg         data = nir_bitcast_vector(b, data, new_bit_size);
12957ec681f3Smrg      data = resize_vector(b, data, new_num_comps);
12967ec681f3Smrg      nir_instr_rewrite_src(&store->instr, &store->src[1],
12977ec681f3Smrg                            nir_src_for_ssa(data));
12987ec681f3Smrg      store->num_components = new_num_comps;
12997ec681f3Smrg
13007ec681f3Smrg      /* Adjust the write mask */
13017ec681f3Smrg      write_mask = nir_component_mask_reinterpret(write_mask, old_bit_size,
13027ec681f3Smrg                                                  new_bit_size);
13037ec681f3Smrg      nir_intrinsic_set_write_mask(store, write_mask);
13047ec681f3Smrg      return true;
13057ec681f3Smrg   }
13067ec681f3Smrg
13077ec681f3Smrg   return false;
13087ec681f3Smrg}
13097ec681f3Smrg
13107ec681f3Smrgstatic bool
13117ec681f3Smrgopt_known_deref_mode_is(nir_builder *b, nir_intrinsic_instr *intrin)
13127ec681f3Smrg{
13137ec681f3Smrg   nir_variable_mode modes = nir_intrinsic_memory_modes(intrin);
13147ec681f3Smrg   nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
13157ec681f3Smrg   if (deref == NULL)
13167ec681f3Smrg      return false;
13177ec681f3Smrg
13187ec681f3Smrg   nir_ssa_def *deref_is = NULL;
13197ec681f3Smrg
13207ec681f3Smrg   if (nir_deref_mode_must_be(deref, modes))
13217ec681f3Smrg      deref_is = nir_imm_true(b);
13227ec681f3Smrg
13237ec681f3Smrg   if (!nir_deref_mode_may_be(deref, modes))
13247ec681f3Smrg      deref_is = nir_imm_false(b);
13257ec681f3Smrg
13267ec681f3Smrg   if (deref_is == NULL)
13277ec681f3Smrg      return false;
13287ec681f3Smrg
13297ec681f3Smrg   nir_ssa_def_rewrite_uses(&intrin->dest.ssa, deref_is);
13307ec681f3Smrg   nir_instr_remove(&intrin->instr);
13317ec681f3Smrg   return true;
13327ec681f3Smrg}
13337ec681f3Smrg
13347e102996Smayabool
13357e102996Smayanir_opt_deref_impl(nir_function_impl *impl)
13367e102996Smaya{
13377e102996Smaya   bool progress = false;
13387e102996Smaya
13397e102996Smaya   nir_builder b;
13407e102996Smaya   nir_builder_init(&b, impl);
13417e102996Smaya
13427e102996Smaya   nir_foreach_block(block, impl) {
13437e102996Smaya      nir_foreach_instr_safe(instr, block) {
13447e102996Smaya         b.cursor = nir_before_instr(instr);
13457e102996Smaya
13467ec681f3Smrg         switch (instr->type) {
13477ec681f3Smrg         case nir_instr_type_deref: {
13487ec681f3Smrg            nir_deref_instr *deref = nir_instr_as_deref(instr);
13497ec681f3Smrg
13507ec681f3Smrg            if (opt_restrict_deref_modes(deref))
13517e102996Smaya               progress = true;
13527ec681f3Smrg
13537ec681f3Smrg            switch (deref->deref_type) {
13547ec681f3Smrg            case nir_deref_type_ptr_as_array:
13557ec681f3Smrg               if (opt_deref_ptr_as_array(&b, deref))
13567ec681f3Smrg                  progress = true;
13577ec681f3Smrg               break;
13587ec681f3Smrg
13597ec681f3Smrg            case nir_deref_type_cast:
13607ec681f3Smrg               if (opt_deref_cast(&b, deref))
13617ec681f3Smrg                  progress = true;
13627ec681f3Smrg               break;
13637ec681f3Smrg
13647ec681f3Smrg            default:
13657ec681f3Smrg               /* Do nothing */
13667ec681f3Smrg               break;
13677ec681f3Smrg            }
13687e102996Smaya            break;
13697ec681f3Smrg         }
13707e102996Smaya
13717ec681f3Smrg         case nir_instr_type_intrinsic: {
13727ec681f3Smrg            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
13737ec681f3Smrg            switch (intrin->intrinsic) {
13747ec681f3Smrg            case nir_intrinsic_load_deref:
13757ec681f3Smrg               if (opt_load_vec_deref(&b, intrin))
13767ec681f3Smrg                  progress = true;
13777ec681f3Smrg               break;
13787ec681f3Smrg
13797ec681f3Smrg            case nir_intrinsic_store_deref:
13807ec681f3Smrg               if (opt_store_vec_deref(&b, intrin))
13817ec681f3Smrg                  progress = true;
13827ec681f3Smrg               break;
13837ec681f3Smrg
13847ec681f3Smrg            case nir_intrinsic_deref_mode_is:
13857ec681f3Smrg               if (opt_known_deref_mode_is(&b, intrin))
13867ec681f3Smrg                  progress = true;
13877ec681f3Smrg               break;
13887ec681f3Smrg
13897ec681f3Smrg            default:
13907ec681f3Smrg               /* Do nothing */
13917ec681f3Smrg               break;
13927ec681f3Smrg            }
13937e102996Smaya            break;
13947ec681f3Smrg         }
13957e102996Smaya
13967e102996Smaya         default:
13977e102996Smaya            /* Do nothing */
13987e102996Smaya            break;
13997e102996Smaya         }
14007e102996Smaya      }
14017e102996Smaya   }
14027e102996Smaya
14037e102996Smaya   if (progress) {
14047e102996Smaya      nir_metadata_preserve(impl, nir_metadata_block_index |
14057e102996Smaya                                  nir_metadata_dominance);
14067e102996Smaya   } else {
14077ec681f3Smrg      nir_metadata_preserve(impl, nir_metadata_all);
14087e102996Smaya   }
14097e102996Smaya
14107e102996Smaya   return progress;
14117e102996Smaya}
14127e102996Smaya
14137e102996Smayabool
14147e102996Smayanir_opt_deref(nir_shader *shader)
14157e102996Smaya{
14167e102996Smaya   bool progress = false;
14177e102996Smaya
14187e102996Smaya   nir_foreach_function(func, shader) {
14197e102996Smaya      if (func->impl && nir_opt_deref_impl(func->impl))
14207e102996Smaya         progress = true;
14217e102996Smaya   }
14227e102996Smaya
14237e102996Smaya   return progress;
14247e102996Smaya}
1425