1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2018 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include "nir.h"
25b8e80941Smrg#include "nir_builder.h"
26b8e80941Smrg#include "nir_deref.h"
27b8e80941Smrg#include "nir_vla.h"
28b8e80941Smrg
29b8e80941Smrg#include "util/u_math.h"
30b8e80941Smrg
31b8e80941Smrg
32b8e80941Smrgstruct split_var_state {
33b8e80941Smrg   void *mem_ctx;
34b8e80941Smrg
35b8e80941Smrg   nir_shader *shader;
36b8e80941Smrg   nir_function_impl *impl;
37b8e80941Smrg
38b8e80941Smrg   nir_variable *base_var;
39b8e80941Smrg};
40b8e80941Smrg
41b8e80941Smrgstruct field {
42b8e80941Smrg   struct field *parent;
43b8e80941Smrg
44b8e80941Smrg   const struct glsl_type *type;
45b8e80941Smrg
46b8e80941Smrg   unsigned num_fields;
47b8e80941Smrg   struct field *fields;
48b8e80941Smrg
49b8e80941Smrg   nir_variable *var;
50b8e80941Smrg};
51b8e80941Smrg
52b8e80941Smrgstatic const struct glsl_type *
53b8e80941Smrgwrap_type_in_array(const struct glsl_type *type,
54b8e80941Smrg                   const struct glsl_type *array_type)
55b8e80941Smrg{
56b8e80941Smrg   if (!glsl_type_is_array(array_type))
57b8e80941Smrg      return type;
58b8e80941Smrg
59b8e80941Smrg   const struct glsl_type *elem_type =
60b8e80941Smrg      wrap_type_in_array(type, glsl_get_array_element(array_type));
61b8e80941Smrg   assert(glsl_get_explicit_stride(array_type) == 0);
62b8e80941Smrg   return glsl_array_type(elem_type, glsl_get_length(array_type), 0);
63b8e80941Smrg}
64b8e80941Smrg
65b8e80941Smrgstatic int
66b8e80941Smrgnum_array_levels_in_array_of_vector_type(const struct glsl_type *type)
67b8e80941Smrg{
68b8e80941Smrg   int num_levels = 0;
69b8e80941Smrg   while (true) {
70b8e80941Smrg      if (glsl_type_is_array_or_matrix(type)) {
71b8e80941Smrg         num_levels++;
72b8e80941Smrg         type = glsl_get_array_element(type);
73b8e80941Smrg      } else if (glsl_type_is_vector_or_scalar(type)) {
74b8e80941Smrg         return num_levels;
75b8e80941Smrg      } else {
76b8e80941Smrg         /* Not an array of vectors */
77b8e80941Smrg         return -1;
78b8e80941Smrg      }
79b8e80941Smrg   }
80b8e80941Smrg}
81b8e80941Smrg
82b8e80941Smrgstatic void
83b8e80941Smrginit_field_for_type(struct field *field, struct field *parent,
84b8e80941Smrg                    const struct glsl_type *type,
85b8e80941Smrg                    const char *name,
86b8e80941Smrg                    struct split_var_state *state)
87b8e80941Smrg{
88b8e80941Smrg   *field = (struct field) {
89b8e80941Smrg      .parent = parent,
90b8e80941Smrg      .type = type,
91b8e80941Smrg   };
92b8e80941Smrg
93b8e80941Smrg   const struct glsl_type *struct_type = glsl_without_array(type);
94b8e80941Smrg   if (glsl_type_is_struct_or_ifc(struct_type)) {
95b8e80941Smrg      field->num_fields = glsl_get_length(struct_type),
96b8e80941Smrg      field->fields = ralloc_array(state->mem_ctx, struct field,
97b8e80941Smrg                                   field->num_fields);
98b8e80941Smrg      for (unsigned i = 0; i < field->num_fields; i++) {
99b8e80941Smrg         char *field_name = NULL;
100b8e80941Smrg         if (name) {
101b8e80941Smrg            field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name,
102b8e80941Smrg                                         glsl_get_struct_elem_name(struct_type, i));
103b8e80941Smrg         } else {
104b8e80941Smrg            field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s",
105b8e80941Smrg                                         glsl_get_type_name(struct_type),
106b8e80941Smrg                                         glsl_get_struct_elem_name(struct_type, i));
107b8e80941Smrg         }
108b8e80941Smrg         init_field_for_type(&field->fields[i], field,
109b8e80941Smrg                             glsl_get_struct_field(struct_type, i),
110b8e80941Smrg                             field_name, state);
111b8e80941Smrg      }
112b8e80941Smrg   } else {
113b8e80941Smrg      const struct glsl_type *var_type = type;
114b8e80941Smrg      for (struct field *f = field->parent; f; f = f->parent)
115b8e80941Smrg         var_type = wrap_type_in_array(var_type, f->type);
116b8e80941Smrg
117b8e80941Smrg      nir_variable_mode mode = state->base_var->data.mode;
118b8e80941Smrg      if (mode == nir_var_function_temp) {
119b8e80941Smrg         field->var = nir_local_variable_create(state->impl, var_type, name);
120b8e80941Smrg      } else {
121b8e80941Smrg         field->var = nir_variable_create(state->shader, mode, var_type, name);
122b8e80941Smrg      }
123b8e80941Smrg   }
124b8e80941Smrg}
125b8e80941Smrg
126b8e80941Smrgstatic bool
127b8e80941Smrgsplit_var_list_structs(nir_shader *shader,
128b8e80941Smrg                       nir_function_impl *impl,
129b8e80941Smrg                       struct exec_list *vars,
130b8e80941Smrg                       struct hash_table *var_field_map,
131b8e80941Smrg                       void *mem_ctx)
132b8e80941Smrg{
133b8e80941Smrg   struct split_var_state state = {
134b8e80941Smrg      .mem_ctx = mem_ctx,
135b8e80941Smrg      .shader = shader,
136b8e80941Smrg      .impl = impl,
137b8e80941Smrg   };
138b8e80941Smrg
139b8e80941Smrg   struct exec_list split_vars;
140b8e80941Smrg   exec_list_make_empty(&split_vars);
141b8e80941Smrg
142b8e80941Smrg   /* To avoid list confusion (we'll be adding things as we split variables),
143b8e80941Smrg    * pull all of the variables we plan to split off of the list
144b8e80941Smrg    */
145b8e80941Smrg   nir_foreach_variable_safe(var, vars) {
146b8e80941Smrg      if (!glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
147b8e80941Smrg         continue;
148b8e80941Smrg
149b8e80941Smrg      exec_node_remove(&var->node);
150b8e80941Smrg      exec_list_push_tail(&split_vars, &var->node);
151b8e80941Smrg   }
152b8e80941Smrg
153b8e80941Smrg   nir_foreach_variable(var, &split_vars) {
154b8e80941Smrg      state.base_var = var;
155b8e80941Smrg
156b8e80941Smrg      struct field *root_field = ralloc(mem_ctx, struct field);
157b8e80941Smrg      init_field_for_type(root_field, NULL, var->type, var->name, &state);
158b8e80941Smrg      _mesa_hash_table_insert(var_field_map, var, root_field);
159b8e80941Smrg   }
160b8e80941Smrg
161b8e80941Smrg   return !exec_list_is_empty(&split_vars);
162b8e80941Smrg}
163b8e80941Smrg
164b8e80941Smrgstatic void
165b8e80941Smrgsplit_struct_derefs_impl(nir_function_impl *impl,
166b8e80941Smrg                         struct hash_table *var_field_map,
167b8e80941Smrg                         nir_variable_mode modes,
168b8e80941Smrg                         void *mem_ctx)
169b8e80941Smrg{
170b8e80941Smrg   nir_builder b;
171b8e80941Smrg   nir_builder_init(&b, impl);
172b8e80941Smrg
173b8e80941Smrg   nir_foreach_block(block, impl) {
174b8e80941Smrg      nir_foreach_instr_safe(instr, block) {
175b8e80941Smrg         if (instr->type != nir_instr_type_deref)
176b8e80941Smrg            continue;
177b8e80941Smrg
178b8e80941Smrg         nir_deref_instr *deref = nir_instr_as_deref(instr);
179b8e80941Smrg         if (!(deref->mode & modes))
180b8e80941Smrg            continue;
181b8e80941Smrg
182b8e80941Smrg         /* Clean up any dead derefs we find lying around.  They may refer to
183b8e80941Smrg          * variables we're planning to split.
184b8e80941Smrg          */
185b8e80941Smrg         if (nir_deref_instr_remove_if_unused(deref))
186b8e80941Smrg            continue;
187b8e80941Smrg
188b8e80941Smrg         if (!glsl_type_is_vector_or_scalar(deref->type))
189b8e80941Smrg            continue;
190b8e80941Smrg
191b8e80941Smrg         nir_variable *base_var = nir_deref_instr_get_variable(deref);
192b8e80941Smrg         struct hash_entry *entry =
193b8e80941Smrg            _mesa_hash_table_search(var_field_map, base_var);
194b8e80941Smrg         if (!entry)
195b8e80941Smrg            continue;
196b8e80941Smrg
197b8e80941Smrg         struct field *root_field = entry->data;
198b8e80941Smrg
199b8e80941Smrg         nir_deref_path path;
200b8e80941Smrg         nir_deref_path_init(&path, deref, mem_ctx);
201b8e80941Smrg
202b8e80941Smrg         struct field *tail_field = root_field;
203b8e80941Smrg         for (unsigned i = 0; path.path[i]; i++) {
204b8e80941Smrg            if (path.path[i]->deref_type != nir_deref_type_struct)
205b8e80941Smrg               continue;
206b8e80941Smrg
207b8e80941Smrg            assert(i > 0);
208b8e80941Smrg            assert(glsl_type_is_struct_or_ifc(path.path[i - 1]->type));
209b8e80941Smrg            assert(path.path[i - 1]->type ==
210b8e80941Smrg                   glsl_without_array(tail_field->type));
211b8e80941Smrg
212b8e80941Smrg            tail_field = &tail_field->fields[path.path[i]->strct.index];
213b8e80941Smrg         }
214b8e80941Smrg         nir_variable *split_var = tail_field->var;
215b8e80941Smrg
216b8e80941Smrg         nir_deref_instr *new_deref = NULL;
217b8e80941Smrg         for (unsigned i = 0; path.path[i]; i++) {
218b8e80941Smrg            nir_deref_instr *p = path.path[i];
219b8e80941Smrg            b.cursor = nir_after_instr(&p->instr);
220b8e80941Smrg
221b8e80941Smrg            switch (p->deref_type) {
222b8e80941Smrg            case nir_deref_type_var:
223b8e80941Smrg               assert(new_deref == NULL);
224b8e80941Smrg               new_deref = nir_build_deref_var(&b, split_var);
225b8e80941Smrg               break;
226b8e80941Smrg
227b8e80941Smrg            case nir_deref_type_array:
228b8e80941Smrg            case nir_deref_type_array_wildcard:
229b8e80941Smrg               new_deref = nir_build_deref_follower(&b, new_deref, p);
230b8e80941Smrg               break;
231b8e80941Smrg
232b8e80941Smrg            case nir_deref_type_struct:
233b8e80941Smrg               /* Nothing to do; we're splitting structs */
234b8e80941Smrg               break;
235b8e80941Smrg
236b8e80941Smrg            default:
237b8e80941Smrg               unreachable("Invalid deref type in path");
238b8e80941Smrg            }
239b8e80941Smrg         }
240b8e80941Smrg
241b8e80941Smrg         assert(new_deref->type == deref->type);
242b8e80941Smrg         nir_ssa_def_rewrite_uses(&deref->dest.ssa,
243b8e80941Smrg                                  nir_src_for_ssa(&new_deref->dest.ssa));
244b8e80941Smrg         nir_deref_instr_remove_if_unused(deref);
245b8e80941Smrg      }
246b8e80941Smrg   }
247b8e80941Smrg}
248b8e80941Smrg
249b8e80941Smrg/** A pass for splitting structs into multiple variables
250b8e80941Smrg *
251b8e80941Smrg * This pass splits arrays of structs into multiple variables, one for each
252b8e80941Smrg * (possibly nested) structure member.  After this pass completes, no
253b8e80941Smrg * variables of the given mode will contain a struct type.
254b8e80941Smrg */
255b8e80941Smrgbool
256b8e80941Smrgnir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
257b8e80941Smrg{
258b8e80941Smrg   void *mem_ctx = ralloc_context(NULL);
259b8e80941Smrg   struct hash_table *var_field_map =
260b8e80941Smrg      _mesa_pointer_hash_table_create(mem_ctx);
261b8e80941Smrg
262b8e80941Smrg   assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
263b8e80941Smrg
264b8e80941Smrg   bool has_global_splits = false;
265b8e80941Smrg   if (modes & nir_var_shader_temp) {
266b8e80941Smrg      has_global_splits = split_var_list_structs(shader, NULL,
267b8e80941Smrg                                                 &shader->globals,
268b8e80941Smrg                                                 var_field_map, mem_ctx);
269b8e80941Smrg   }
270b8e80941Smrg
271b8e80941Smrg   bool progress = false;
272b8e80941Smrg   nir_foreach_function(function, shader) {
273b8e80941Smrg      if (!function->impl)
274b8e80941Smrg         continue;
275b8e80941Smrg
276b8e80941Smrg      bool has_local_splits = false;
277b8e80941Smrg      if (modes & nir_var_function_temp) {
278b8e80941Smrg         has_local_splits = split_var_list_structs(shader, function->impl,
279b8e80941Smrg                                                   &function->impl->locals,
280b8e80941Smrg                                                   var_field_map, mem_ctx);
281b8e80941Smrg      }
282b8e80941Smrg
283b8e80941Smrg      if (has_global_splits || has_local_splits) {
284b8e80941Smrg         split_struct_derefs_impl(function->impl, var_field_map,
285b8e80941Smrg                                  modes, mem_ctx);
286b8e80941Smrg
287b8e80941Smrg         nir_metadata_preserve(function->impl, nir_metadata_block_index |
288b8e80941Smrg                                               nir_metadata_dominance);
289b8e80941Smrg         progress = true;
290b8e80941Smrg      }
291b8e80941Smrg   }
292b8e80941Smrg
293b8e80941Smrg   ralloc_free(mem_ctx);
294b8e80941Smrg
295b8e80941Smrg   return progress;
296b8e80941Smrg}
297b8e80941Smrg
298b8e80941Smrgstruct array_level_info {
299b8e80941Smrg   unsigned array_len;
300b8e80941Smrg   bool split;
301b8e80941Smrg};
302b8e80941Smrg
303b8e80941Smrgstruct array_split {
304b8e80941Smrg   /* Only set if this is the tail end of the splitting */
305b8e80941Smrg   nir_variable *var;
306b8e80941Smrg
307b8e80941Smrg   unsigned num_splits;
308b8e80941Smrg   struct array_split *splits;
309b8e80941Smrg};
310b8e80941Smrg
311b8e80941Smrgstruct array_var_info {
312b8e80941Smrg   nir_variable *base_var;
313b8e80941Smrg
314b8e80941Smrg   const struct glsl_type *split_var_type;
315b8e80941Smrg
316b8e80941Smrg   bool split_var;
317b8e80941Smrg   struct array_split root_split;
318b8e80941Smrg
319b8e80941Smrg   unsigned num_levels;
320b8e80941Smrg   struct array_level_info levels[0];
321b8e80941Smrg};
322b8e80941Smrg
323b8e80941Smrgstatic bool
324b8e80941Smrginit_var_list_array_infos(struct exec_list *vars,
325b8e80941Smrg                          struct hash_table *var_info_map,
326b8e80941Smrg                          void *mem_ctx)
327b8e80941Smrg{
328b8e80941Smrg   bool has_array = false;
329b8e80941Smrg
330b8e80941Smrg   nir_foreach_variable(var, vars) {
331b8e80941Smrg      int num_levels = num_array_levels_in_array_of_vector_type(var->type);
332b8e80941Smrg      if (num_levels <= 0)
333b8e80941Smrg         continue;
334b8e80941Smrg
335b8e80941Smrg      struct array_var_info *info =
336b8e80941Smrg         rzalloc_size(mem_ctx, sizeof(*info) +
337b8e80941Smrg                               num_levels * sizeof(info->levels[0]));
338b8e80941Smrg
339b8e80941Smrg      info->base_var = var;
340b8e80941Smrg      info->num_levels = num_levels;
341b8e80941Smrg
342b8e80941Smrg      const struct glsl_type *type = var->type;
343b8e80941Smrg      for (int i = 0; i < num_levels; i++) {
344b8e80941Smrg         info->levels[i].array_len = glsl_get_length(type);
345b8e80941Smrg         type = glsl_get_array_element(type);
346b8e80941Smrg
347b8e80941Smrg         /* All levels start out initially as split */
348b8e80941Smrg         info->levels[i].split = true;
349b8e80941Smrg      }
350b8e80941Smrg
351b8e80941Smrg      _mesa_hash_table_insert(var_info_map, var, info);
352b8e80941Smrg      has_array = true;
353b8e80941Smrg   }
354b8e80941Smrg
355b8e80941Smrg   return has_array;
356b8e80941Smrg}
357b8e80941Smrg
358b8e80941Smrgstatic struct array_var_info *
359b8e80941Smrgget_array_var_info(nir_variable *var,
360b8e80941Smrg                   struct hash_table *var_info_map)
361b8e80941Smrg{
362b8e80941Smrg   struct hash_entry *entry =
363b8e80941Smrg      _mesa_hash_table_search(var_info_map, var);
364b8e80941Smrg   return entry ? entry->data : NULL;
365b8e80941Smrg}
366b8e80941Smrg
367b8e80941Smrgstatic struct array_var_info *
368b8e80941Smrgget_array_deref_info(nir_deref_instr *deref,
369b8e80941Smrg                     struct hash_table *var_info_map,
370b8e80941Smrg                     nir_variable_mode modes)
371b8e80941Smrg{
372b8e80941Smrg   if (!(deref->mode & modes))
373b8e80941Smrg      return NULL;
374b8e80941Smrg
375b8e80941Smrg   return get_array_var_info(nir_deref_instr_get_variable(deref),
376b8e80941Smrg                             var_info_map);
377b8e80941Smrg}
378b8e80941Smrg
379b8e80941Smrgstatic void
380b8e80941Smrgmark_array_deref_used(nir_deref_instr *deref,
381b8e80941Smrg                      struct hash_table *var_info_map,
382b8e80941Smrg                      nir_variable_mode modes,
383b8e80941Smrg                      void *mem_ctx)
384b8e80941Smrg{
385b8e80941Smrg   struct array_var_info *info =
386b8e80941Smrg      get_array_deref_info(deref, var_info_map, modes);
387b8e80941Smrg   if (!info)
388b8e80941Smrg      return;
389b8e80941Smrg
390b8e80941Smrg   nir_deref_path path;
391b8e80941Smrg   nir_deref_path_init(&path, deref, mem_ctx);
392b8e80941Smrg
393b8e80941Smrg   /* Walk the path and look for indirects.  If we have an array deref with an
394b8e80941Smrg    * indirect, mark the given level as not being split.
395b8e80941Smrg    */
396b8e80941Smrg   for (unsigned i = 0; i < info->num_levels; i++) {
397b8e80941Smrg      nir_deref_instr *p = path.path[i + 1];
398b8e80941Smrg      if (p->deref_type == nir_deref_type_array &&
399b8e80941Smrg          !nir_src_is_const(p->arr.index))
400b8e80941Smrg         info->levels[i].split = false;
401b8e80941Smrg   }
402b8e80941Smrg}
403b8e80941Smrg
404b8e80941Smrgstatic void
405b8e80941Smrgmark_array_usage_impl(nir_function_impl *impl,
406b8e80941Smrg                      struct hash_table *var_info_map,
407b8e80941Smrg                      nir_variable_mode modes,
408b8e80941Smrg                      void *mem_ctx)
409b8e80941Smrg{
410b8e80941Smrg   nir_foreach_block(block, impl) {
411b8e80941Smrg      nir_foreach_instr(instr, block) {
412b8e80941Smrg         if (instr->type != nir_instr_type_intrinsic)
413b8e80941Smrg            continue;
414b8e80941Smrg
415b8e80941Smrg         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
416b8e80941Smrg         switch (intrin->intrinsic) {
417b8e80941Smrg         case nir_intrinsic_copy_deref:
418b8e80941Smrg            mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
419b8e80941Smrg                                  var_info_map, modes, mem_ctx);
420b8e80941Smrg            /* Fall Through */
421b8e80941Smrg
422b8e80941Smrg         case nir_intrinsic_load_deref:
423b8e80941Smrg         case nir_intrinsic_store_deref:
424b8e80941Smrg            mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
425b8e80941Smrg                                  var_info_map, modes, mem_ctx);
426b8e80941Smrg            break;
427b8e80941Smrg
428b8e80941Smrg         default:
429b8e80941Smrg            break;
430b8e80941Smrg         }
431b8e80941Smrg      }
432b8e80941Smrg   }
433b8e80941Smrg}
434b8e80941Smrg
435b8e80941Smrgstatic void
436b8e80941Smrgcreate_split_array_vars(struct array_var_info *var_info,
437b8e80941Smrg                        unsigned level,
438b8e80941Smrg                        struct array_split *split,
439b8e80941Smrg                        const char *name,
440b8e80941Smrg                        nir_shader *shader,
441b8e80941Smrg                        nir_function_impl *impl,
442b8e80941Smrg                        void *mem_ctx)
443b8e80941Smrg{
444b8e80941Smrg   while (level < var_info->num_levels && !var_info->levels[level].split) {
445b8e80941Smrg      name = ralloc_asprintf(mem_ctx, "%s[*]", name);
446b8e80941Smrg      level++;
447b8e80941Smrg   }
448b8e80941Smrg
449b8e80941Smrg   if (level == var_info->num_levels) {
450b8e80941Smrg      /* We add parens to the variable name so it looks like "(foo[2][*])" so
451b8e80941Smrg       * that further derefs will look like "(foo[2][*])[ssa_6]"
452b8e80941Smrg       */
453b8e80941Smrg      name = ralloc_asprintf(mem_ctx, "(%s)", name);
454b8e80941Smrg
455b8e80941Smrg      nir_variable_mode mode = var_info->base_var->data.mode;
456b8e80941Smrg      if (mode == nir_var_function_temp) {
457b8e80941Smrg         split->var = nir_local_variable_create(impl,
458b8e80941Smrg                                                var_info->split_var_type, name);
459b8e80941Smrg      } else {
460b8e80941Smrg         split->var = nir_variable_create(shader, mode,
461b8e80941Smrg                                          var_info->split_var_type, name);
462b8e80941Smrg      }
463b8e80941Smrg   } else {
464b8e80941Smrg      assert(var_info->levels[level].split);
465b8e80941Smrg      split->num_splits = var_info->levels[level].array_len;
466b8e80941Smrg      split->splits = rzalloc_array(mem_ctx, struct array_split,
467b8e80941Smrg                                    split->num_splits);
468b8e80941Smrg      for (unsigned i = 0; i < split->num_splits; i++) {
469b8e80941Smrg         create_split_array_vars(var_info, level + 1, &split->splits[i],
470b8e80941Smrg                                 ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
471b8e80941Smrg                                 shader, impl, mem_ctx);
472b8e80941Smrg      }
473b8e80941Smrg   }
474b8e80941Smrg}
475b8e80941Smrg
476b8e80941Smrgstatic bool
477b8e80941Smrgsplit_var_list_arrays(nir_shader *shader,
478b8e80941Smrg                      nir_function_impl *impl,
479b8e80941Smrg                      struct exec_list *vars,
480b8e80941Smrg                      struct hash_table *var_info_map,
481b8e80941Smrg                      void *mem_ctx)
482b8e80941Smrg{
483b8e80941Smrg   struct exec_list split_vars;
484b8e80941Smrg   exec_list_make_empty(&split_vars);
485b8e80941Smrg
486b8e80941Smrg   nir_foreach_variable_safe(var, vars) {
487b8e80941Smrg      struct array_var_info *info = get_array_var_info(var, var_info_map);
488b8e80941Smrg      if (!info)
489b8e80941Smrg         continue;
490b8e80941Smrg
491b8e80941Smrg      bool has_split = false;
492b8e80941Smrg      const struct glsl_type *split_type =
493b8e80941Smrg         glsl_without_array_or_matrix(var->type);
494b8e80941Smrg      for (int i = info->num_levels - 1; i >= 0; i--) {
495b8e80941Smrg         if (info->levels[i].split) {
496b8e80941Smrg            has_split = true;
497b8e80941Smrg            continue;
498b8e80941Smrg         }
499b8e80941Smrg
500b8e80941Smrg         /* If the original type was a matrix type, we'd like to keep that so
501b8e80941Smrg          * we don't convert matrices into arrays.
502b8e80941Smrg          */
503b8e80941Smrg         if (i == info->num_levels - 1 &&
504b8e80941Smrg             glsl_type_is_matrix(glsl_without_array(var->type))) {
505b8e80941Smrg            split_type = glsl_matrix_type(glsl_get_base_type(split_type),
506b8e80941Smrg                                          glsl_get_components(split_type),
507b8e80941Smrg                                          info->levels[i].array_len);
508b8e80941Smrg         } else {
509b8e80941Smrg            split_type = glsl_array_type(split_type, info->levels[i].array_len, 0);
510b8e80941Smrg         }
511b8e80941Smrg      }
512b8e80941Smrg
513b8e80941Smrg      if (has_split) {
514b8e80941Smrg         info->split_var_type = split_type;
515b8e80941Smrg         /* To avoid list confusion (we'll be adding things as we split
516b8e80941Smrg          * variables), pull all of the variables we plan to split off of the
517b8e80941Smrg          * main variable list.
518b8e80941Smrg          */
519b8e80941Smrg         exec_node_remove(&var->node);
520b8e80941Smrg         exec_list_push_tail(&split_vars, &var->node);
521b8e80941Smrg      } else {
522b8e80941Smrg         assert(split_type == glsl_get_bare_type(var->type));
523b8e80941Smrg         /* If we're not modifying this variable, delete the info so we skip
524b8e80941Smrg          * it faster in later passes.
525b8e80941Smrg          */
526b8e80941Smrg         _mesa_hash_table_remove_key(var_info_map, var);
527b8e80941Smrg      }
528b8e80941Smrg   }
529b8e80941Smrg
530b8e80941Smrg   nir_foreach_variable(var, &split_vars) {
531b8e80941Smrg      struct array_var_info *info = get_array_var_info(var, var_info_map);
532b8e80941Smrg      create_split_array_vars(info, 0, &info->root_split, var->name,
533b8e80941Smrg                              shader, impl, mem_ctx);
534b8e80941Smrg   }
535b8e80941Smrg
536b8e80941Smrg   return !exec_list_is_empty(&split_vars);
537b8e80941Smrg}
538b8e80941Smrg
539b8e80941Smrgstatic bool
540b8e80941Smrgderef_has_split_wildcard(nir_deref_path *path,
541b8e80941Smrg                         struct array_var_info *info)
542b8e80941Smrg{
543b8e80941Smrg   if (info == NULL)
544b8e80941Smrg      return false;
545b8e80941Smrg
546b8e80941Smrg   assert(path->path[0]->var == info->base_var);
547b8e80941Smrg   for (unsigned i = 0; i < info->num_levels; i++) {
548b8e80941Smrg      if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
549b8e80941Smrg          info->levels[i].split)
550b8e80941Smrg         return true;
551b8e80941Smrg   }
552b8e80941Smrg
553b8e80941Smrg   return false;
554b8e80941Smrg}
555b8e80941Smrg
556b8e80941Smrgstatic bool
557b8e80941Smrgarray_path_is_out_of_bounds(nir_deref_path *path,
558b8e80941Smrg                            struct array_var_info *info)
559b8e80941Smrg{
560b8e80941Smrg   if (info == NULL)
561b8e80941Smrg      return false;
562b8e80941Smrg
563b8e80941Smrg   assert(path->path[0]->var == info->base_var);
564b8e80941Smrg   for (unsigned i = 0; i < info->num_levels; i++) {
565b8e80941Smrg      nir_deref_instr *p = path->path[i + 1];
566b8e80941Smrg      if (p->deref_type == nir_deref_type_array_wildcard)
567b8e80941Smrg         continue;
568b8e80941Smrg
569b8e80941Smrg      if (nir_src_is_const(p->arr.index) &&
570b8e80941Smrg          nir_src_as_uint(p->arr.index) >= info->levels[i].array_len)
571b8e80941Smrg         return true;
572b8e80941Smrg   }
573b8e80941Smrg
574b8e80941Smrg   return false;
575b8e80941Smrg}
576b8e80941Smrg
577b8e80941Smrgstatic void
578b8e80941Smrgemit_split_copies(nir_builder *b,
579b8e80941Smrg                  struct array_var_info *dst_info, nir_deref_path *dst_path,
580b8e80941Smrg                  unsigned dst_level, nir_deref_instr *dst,
581b8e80941Smrg                  struct array_var_info *src_info, nir_deref_path *src_path,
582b8e80941Smrg                  unsigned src_level, nir_deref_instr *src)
583b8e80941Smrg{
584b8e80941Smrg   nir_deref_instr *dst_p, *src_p;
585b8e80941Smrg
586b8e80941Smrg   while ((dst_p = dst_path->path[dst_level + 1])) {
587b8e80941Smrg      if (dst_p->deref_type == nir_deref_type_array_wildcard)
588b8e80941Smrg         break;
589b8e80941Smrg
590b8e80941Smrg      dst = nir_build_deref_follower(b, dst, dst_p);
591b8e80941Smrg      dst_level++;
592b8e80941Smrg   }
593b8e80941Smrg
594b8e80941Smrg   while ((src_p = src_path->path[src_level + 1])) {
595b8e80941Smrg      if (src_p->deref_type == nir_deref_type_array_wildcard)
596b8e80941Smrg         break;
597b8e80941Smrg
598b8e80941Smrg      src = nir_build_deref_follower(b, src, src_p);
599b8e80941Smrg      src_level++;
600b8e80941Smrg   }
601b8e80941Smrg
602b8e80941Smrg   if (src_p == NULL || dst_p == NULL) {
603b8e80941Smrg      assert(src_p == NULL && dst_p == NULL);
604b8e80941Smrg      nir_copy_deref(b, dst, src);
605b8e80941Smrg   } else {
606b8e80941Smrg      assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
607b8e80941Smrg             src_p->deref_type == nir_deref_type_array_wildcard);
608b8e80941Smrg
609b8e80941Smrg      if ((dst_info && dst_info->levels[dst_level].split) ||
610b8e80941Smrg          (src_info && src_info->levels[src_level].split)) {
611b8e80941Smrg         /* There are no indirects at this level on one of the source or the
612b8e80941Smrg          * destination so we are lowering it.
613b8e80941Smrg          */
614b8e80941Smrg         assert(glsl_get_length(dst_path->path[dst_level]->type) ==
615b8e80941Smrg                glsl_get_length(src_path->path[src_level]->type));
616b8e80941Smrg         unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
617b8e80941Smrg         for (unsigned i = 0; i < len; i++) {
618b8e80941Smrg            emit_split_copies(b, dst_info, dst_path, dst_level + 1,
619b8e80941Smrg                              nir_build_deref_array_imm(b, dst, i),
620b8e80941Smrg                              src_info, src_path, src_level + 1,
621b8e80941Smrg                              nir_build_deref_array_imm(b, src, i));
622b8e80941Smrg         }
623b8e80941Smrg      } else {
624b8e80941Smrg         /* Neither side is being split so we just keep going */
625b8e80941Smrg         emit_split_copies(b, dst_info, dst_path, dst_level + 1,
626b8e80941Smrg                           nir_build_deref_array_wildcard(b, dst),
627b8e80941Smrg                           src_info, src_path, src_level + 1,
628b8e80941Smrg                           nir_build_deref_array_wildcard(b, src));
629b8e80941Smrg      }
630b8e80941Smrg   }
631b8e80941Smrg}
632b8e80941Smrg
633b8e80941Smrgstatic void
634b8e80941Smrgsplit_array_copies_impl(nir_function_impl *impl,
635b8e80941Smrg                        struct hash_table *var_info_map,
636b8e80941Smrg                        nir_variable_mode modes,
637b8e80941Smrg                        void *mem_ctx)
638b8e80941Smrg{
639b8e80941Smrg   nir_builder b;
640b8e80941Smrg   nir_builder_init(&b, impl);
641b8e80941Smrg
642b8e80941Smrg   nir_foreach_block(block, impl) {
643b8e80941Smrg      nir_foreach_instr_safe(instr, block) {
644b8e80941Smrg         if (instr->type != nir_instr_type_intrinsic)
645b8e80941Smrg            continue;
646b8e80941Smrg
647b8e80941Smrg         nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
648b8e80941Smrg         if (copy->intrinsic != nir_intrinsic_copy_deref)
649b8e80941Smrg            continue;
650b8e80941Smrg
651b8e80941Smrg         nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
652b8e80941Smrg         nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
653b8e80941Smrg
654b8e80941Smrg         struct array_var_info *dst_info =
655b8e80941Smrg            get_array_deref_info(dst_deref, var_info_map, modes);
656b8e80941Smrg         struct array_var_info *src_info =
657b8e80941Smrg            get_array_deref_info(src_deref, var_info_map, modes);
658b8e80941Smrg
659b8e80941Smrg         if (!src_info && !dst_info)
660b8e80941Smrg            continue;
661b8e80941Smrg
662b8e80941Smrg         nir_deref_path dst_path, src_path;
663b8e80941Smrg         nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
664b8e80941Smrg         nir_deref_path_init(&src_path, src_deref, mem_ctx);
665b8e80941Smrg
666b8e80941Smrg         if (!deref_has_split_wildcard(&dst_path, dst_info) &&
667b8e80941Smrg             !deref_has_split_wildcard(&src_path, src_info))
668b8e80941Smrg            continue;
669b8e80941Smrg
670b8e80941Smrg         b.cursor = nir_instr_remove(&copy->instr);
671b8e80941Smrg
672b8e80941Smrg         emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
673b8e80941Smrg                               src_info, &src_path, 0, src_path.path[0]);
674b8e80941Smrg      }
675b8e80941Smrg   }
676b8e80941Smrg}
677b8e80941Smrg
678b8e80941Smrgstatic void
679b8e80941Smrgsplit_array_access_impl(nir_function_impl *impl,
680b8e80941Smrg                        struct hash_table *var_info_map,
681b8e80941Smrg                        nir_variable_mode modes,
682b8e80941Smrg                        void *mem_ctx)
683b8e80941Smrg{
684b8e80941Smrg   nir_builder b;
685b8e80941Smrg   nir_builder_init(&b, impl);
686b8e80941Smrg
687b8e80941Smrg   nir_foreach_block(block, impl) {
688b8e80941Smrg      nir_foreach_instr_safe(instr, block) {
689b8e80941Smrg         if (instr->type == nir_instr_type_deref) {
690b8e80941Smrg            /* Clean up any dead derefs we find lying around.  They may refer
691b8e80941Smrg             * to variables we're planning to split.
692b8e80941Smrg             */
693b8e80941Smrg            nir_deref_instr *deref = nir_instr_as_deref(instr);
694b8e80941Smrg            if (deref->mode & modes)
695b8e80941Smrg               nir_deref_instr_remove_if_unused(deref);
696b8e80941Smrg            continue;
697b8e80941Smrg         }
698b8e80941Smrg
699b8e80941Smrg         if (instr->type != nir_instr_type_intrinsic)
700b8e80941Smrg            continue;
701b8e80941Smrg
702b8e80941Smrg         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
703b8e80941Smrg         if (intrin->intrinsic != nir_intrinsic_load_deref &&
704b8e80941Smrg             intrin->intrinsic != nir_intrinsic_store_deref &&
705b8e80941Smrg             intrin->intrinsic != nir_intrinsic_copy_deref)
706b8e80941Smrg            continue;
707b8e80941Smrg
708b8e80941Smrg         const unsigned num_derefs =
709b8e80941Smrg            intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
710b8e80941Smrg
711b8e80941Smrg         for (unsigned d = 0; d < num_derefs; d++) {
712b8e80941Smrg            nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
713b8e80941Smrg
714b8e80941Smrg            struct array_var_info *info =
715b8e80941Smrg               get_array_deref_info(deref, var_info_map, modes);
716b8e80941Smrg            if (!info)
717b8e80941Smrg               continue;
718b8e80941Smrg
719b8e80941Smrg            nir_deref_path path;
720b8e80941Smrg            nir_deref_path_init(&path, deref, mem_ctx);
721b8e80941Smrg
722b8e80941Smrg            b.cursor = nir_before_instr(&intrin->instr);
723b8e80941Smrg
724b8e80941Smrg            if (array_path_is_out_of_bounds(&path, info)) {
725b8e80941Smrg               /* If one of the derefs is out-of-bounds, we just delete the
726b8e80941Smrg                * instruction.  If a destination is out of bounds, then it may
727b8e80941Smrg                * have been in-bounds prior to shrinking so we don't want to
728b8e80941Smrg                * accidentally stomp something.  However, we've already proven
729b8e80941Smrg                * that it will never be read so it's safe to delete.  If a
730b8e80941Smrg                * source is out of bounds then it is loading random garbage.
731b8e80941Smrg                * For loads, we replace their uses with an undef instruction
732b8e80941Smrg                * and for copies we just delete the copy since it was writing
733b8e80941Smrg                * undefined garbage anyway and we may as well leave the random
734b8e80941Smrg                * garbage in the destination alone.
735b8e80941Smrg                */
736b8e80941Smrg               if (intrin->intrinsic == nir_intrinsic_load_deref) {
737b8e80941Smrg                  nir_ssa_def *u =
738b8e80941Smrg                     nir_ssa_undef(&b, intrin->dest.ssa.num_components,
739b8e80941Smrg                                       intrin->dest.ssa.bit_size);
740b8e80941Smrg                  nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
741b8e80941Smrg                                           nir_src_for_ssa(u));
742b8e80941Smrg               }
743b8e80941Smrg               nir_instr_remove(&intrin->instr);
744b8e80941Smrg               for (unsigned i = 0; i < num_derefs; i++)
745b8e80941Smrg                  nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
746b8e80941Smrg               break;
747b8e80941Smrg            }
748b8e80941Smrg
749b8e80941Smrg            struct array_split *split = &info->root_split;
750b8e80941Smrg            for (unsigned i = 0; i < info->num_levels; i++) {
751b8e80941Smrg               if (info->levels[i].split) {
752b8e80941Smrg                  nir_deref_instr *p = path.path[i + 1];
753b8e80941Smrg                  unsigned index = nir_src_as_uint(p->arr.index);
754b8e80941Smrg                  assert(index < info->levels[i].array_len);
755b8e80941Smrg                  split = &split->splits[index];
756b8e80941Smrg               }
757b8e80941Smrg            }
758b8e80941Smrg            assert(!split->splits && split->var);
759b8e80941Smrg
760b8e80941Smrg            nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
761b8e80941Smrg            for (unsigned i = 0; i < info->num_levels; i++) {
762b8e80941Smrg               if (!info->levels[i].split) {
763b8e80941Smrg                  new_deref = nir_build_deref_follower(&b, new_deref,
764b8e80941Smrg                                                       path.path[i + 1]);
765b8e80941Smrg               }
766b8e80941Smrg            }
767b8e80941Smrg            assert(new_deref->type == deref->type);
768b8e80941Smrg
769b8e80941Smrg            /* Rewrite the deref source to point to the split one */
770b8e80941Smrg            nir_instr_rewrite_src(&intrin->instr, &intrin->src[d],
771b8e80941Smrg                                  nir_src_for_ssa(&new_deref->dest.ssa));
772b8e80941Smrg            nir_deref_instr_remove_if_unused(deref);
773b8e80941Smrg         }
774b8e80941Smrg      }
775b8e80941Smrg   }
776b8e80941Smrg}
777b8e80941Smrg
778b8e80941Smrg/** A pass for splitting arrays of vectors into multiple variables
779b8e80941Smrg *
780b8e80941Smrg * This pass looks at arrays (possibly multiple levels) of vectors (not
781b8e80941Smrg * structures or other types) and tries to split them into piles of variables,
782b8e80941Smrg * one for each array element.  The heuristic used is simple: If a given array
783b8e80941Smrg * level is never used with an indirect, that array level will get split.
784b8e80941Smrg *
785b8e80941Smrg * This pass probably could handles structures easily enough but making a pass
786b8e80941Smrg * that could see through an array of structures of arrays would be difficult
787b8e80941Smrg * so it's best to just run nir_split_struct_vars first.
788b8e80941Smrg */
789b8e80941Smrgbool
790b8e80941Smrgnir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
791b8e80941Smrg{
792b8e80941Smrg   void *mem_ctx = ralloc_context(NULL);
793b8e80941Smrg   struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx);
794b8e80941Smrg
795b8e80941Smrg   assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
796b8e80941Smrg
797b8e80941Smrg   bool has_global_array = false;
798b8e80941Smrg   if (modes & nir_var_shader_temp) {
799b8e80941Smrg      has_global_array = init_var_list_array_infos(&shader->globals,
800b8e80941Smrg                                                   var_info_map, mem_ctx);
801b8e80941Smrg   }
802b8e80941Smrg
803b8e80941Smrg   bool has_any_array = false;
804b8e80941Smrg   nir_foreach_function(function, shader) {
805b8e80941Smrg      if (!function->impl)
806b8e80941Smrg         continue;
807b8e80941Smrg
808b8e80941Smrg      bool has_local_array = false;
809b8e80941Smrg      if (modes & nir_var_function_temp) {
810b8e80941Smrg         has_local_array = init_var_list_array_infos(&function->impl->locals,
811b8e80941Smrg                                                     var_info_map, mem_ctx);
812b8e80941Smrg      }
813b8e80941Smrg
814b8e80941Smrg      if (has_global_array || has_local_array) {
815b8e80941Smrg         has_any_array = true;
816b8e80941Smrg         mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx);
817b8e80941Smrg      }
818b8e80941Smrg   }
819b8e80941Smrg
820b8e80941Smrg   /* If we failed to find any arrays of arrays, bail early. */
821b8e80941Smrg   if (!has_any_array) {
822b8e80941Smrg      ralloc_free(mem_ctx);
823b8e80941Smrg      return false;
824b8e80941Smrg   }
825b8e80941Smrg
826b8e80941Smrg   bool has_global_splits = false;
827b8e80941Smrg   if (modes & nir_var_shader_temp) {
828b8e80941Smrg      has_global_splits = split_var_list_arrays(shader, NULL,
829b8e80941Smrg                                                &shader->globals,
830b8e80941Smrg                                                var_info_map, mem_ctx);
831b8e80941Smrg   }
832b8e80941Smrg
833b8e80941Smrg   bool progress = false;
834b8e80941Smrg   nir_foreach_function(function, shader) {
835b8e80941Smrg      if (!function->impl)
836b8e80941Smrg         continue;
837b8e80941Smrg
838b8e80941Smrg      bool has_local_splits = false;
839b8e80941Smrg      if (modes & nir_var_function_temp) {
840b8e80941Smrg         has_local_splits = split_var_list_arrays(shader, function->impl,
841b8e80941Smrg                                                  &function->impl->locals,
842b8e80941Smrg                                                  var_info_map, mem_ctx);
843b8e80941Smrg      }
844b8e80941Smrg
845b8e80941Smrg      if (has_global_splits || has_local_splits) {
846b8e80941Smrg         split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx);
847b8e80941Smrg         split_array_access_impl(function->impl, var_info_map, modes, mem_ctx);
848b8e80941Smrg
849b8e80941Smrg         nir_metadata_preserve(function->impl, nir_metadata_block_index |
850b8e80941Smrg                                               nir_metadata_dominance);
851b8e80941Smrg         progress = true;
852b8e80941Smrg      }
853b8e80941Smrg   }
854b8e80941Smrg
855b8e80941Smrg   ralloc_free(mem_ctx);
856b8e80941Smrg
857b8e80941Smrg   return progress;
858b8e80941Smrg}
859b8e80941Smrg
860b8e80941Smrgstruct array_level_usage {
861b8e80941Smrg   unsigned array_len;
862b8e80941Smrg
863b8e80941Smrg   /* The value UINT_MAX will be used to indicate an indirect */
864b8e80941Smrg   unsigned max_read;
865b8e80941Smrg   unsigned max_written;
866b8e80941Smrg
867b8e80941Smrg   /* True if there is a copy that isn't to/from a shrinkable array */
868b8e80941Smrg   bool has_external_copy;
869b8e80941Smrg   struct set *levels_copied;
870b8e80941Smrg};
871b8e80941Smrg
872b8e80941Smrgstruct vec_var_usage {
873b8e80941Smrg   /* Convenience set of all components this variable has */
874b8e80941Smrg   nir_component_mask_t all_comps;
875b8e80941Smrg
876b8e80941Smrg   nir_component_mask_t comps_read;
877b8e80941Smrg   nir_component_mask_t comps_written;
878b8e80941Smrg
879b8e80941Smrg   nir_component_mask_t comps_kept;
880b8e80941Smrg
881b8e80941Smrg   /* True if there is a copy that isn't to/from a shrinkable vector */
882b8e80941Smrg   bool has_external_copy;
883b8e80941Smrg   struct set *vars_copied;
884b8e80941Smrg
885b8e80941Smrg   unsigned num_levels;
886b8e80941Smrg   struct array_level_usage levels[0];
887b8e80941Smrg};
888b8e80941Smrg
889b8e80941Smrgstatic struct vec_var_usage *
890b8e80941Smrgget_vec_var_usage(nir_variable *var,
891b8e80941Smrg                  struct hash_table *var_usage_map,
892b8e80941Smrg                  bool add_usage_entry, void *mem_ctx)
893b8e80941Smrg{
894b8e80941Smrg   struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
895b8e80941Smrg   if (entry)
896b8e80941Smrg      return entry->data;
897b8e80941Smrg
898b8e80941Smrg   if (!add_usage_entry)
899b8e80941Smrg      return NULL;
900b8e80941Smrg
901b8e80941Smrg   /* Check to make sure that we are working with an array of vectors.  We
902b8e80941Smrg    * don't bother to shrink single vectors because we figure that we can
903b8e80941Smrg    * clean it up better with SSA than by inserting piles of vecN instructions
904b8e80941Smrg    * to compact results.
905b8e80941Smrg    */
906b8e80941Smrg   int num_levels = num_array_levels_in_array_of_vector_type(var->type);
907b8e80941Smrg   if (num_levels < 1)
908b8e80941Smrg      return NULL; /* Not an array of vectors */
909b8e80941Smrg
910b8e80941Smrg   struct vec_var_usage *usage =
911b8e80941Smrg      rzalloc_size(mem_ctx, sizeof(*usage) +
912b8e80941Smrg                            num_levels * sizeof(usage->levels[0]));
913b8e80941Smrg
914b8e80941Smrg   usage->num_levels = num_levels;
915b8e80941Smrg   const struct glsl_type *type = var->type;
916b8e80941Smrg   for (unsigned i = 0; i < num_levels; i++) {
917b8e80941Smrg      usage->levels[i].array_len = glsl_get_length(type);
918b8e80941Smrg      type = glsl_get_array_element(type);
919b8e80941Smrg   }
920b8e80941Smrg   assert(glsl_type_is_vector_or_scalar(type));
921b8e80941Smrg
922b8e80941Smrg   usage->all_comps = (1 << glsl_get_components(type)) - 1;
923b8e80941Smrg
924b8e80941Smrg   _mesa_hash_table_insert(var_usage_map, var, usage);
925b8e80941Smrg
926b8e80941Smrg   return usage;
927b8e80941Smrg}
928b8e80941Smrg
929b8e80941Smrgstatic struct vec_var_usage *
930b8e80941Smrgget_vec_deref_usage(nir_deref_instr *deref,
931b8e80941Smrg                    struct hash_table *var_usage_map,
932b8e80941Smrg                    nir_variable_mode modes,
933b8e80941Smrg                    bool add_usage_entry, void *mem_ctx)
934b8e80941Smrg{
935b8e80941Smrg   if (!(deref->mode & modes))
936b8e80941Smrg      return NULL;
937b8e80941Smrg
938b8e80941Smrg   return get_vec_var_usage(nir_deref_instr_get_variable(deref),
939b8e80941Smrg                            var_usage_map, add_usage_entry, mem_ctx);
940b8e80941Smrg}
941b8e80941Smrg
942b8e80941Smrgstatic void
943b8e80941Smrgmark_deref_used(nir_deref_instr *deref,
944b8e80941Smrg                nir_component_mask_t comps_read,
945b8e80941Smrg                nir_component_mask_t comps_written,
946b8e80941Smrg                nir_deref_instr *copy_deref,
947b8e80941Smrg                struct hash_table *var_usage_map,
948b8e80941Smrg                nir_variable_mode modes,
949b8e80941Smrg                void *mem_ctx)
950b8e80941Smrg{
951b8e80941Smrg   if (!(deref->mode & modes))
952b8e80941Smrg      return;
953b8e80941Smrg
954b8e80941Smrg   nir_variable *var = nir_deref_instr_get_variable(deref);
955b8e80941Smrg
956b8e80941Smrg   struct vec_var_usage *usage =
957b8e80941Smrg      get_vec_var_usage(var, var_usage_map, true, mem_ctx);
958b8e80941Smrg   if (!usage)
959b8e80941Smrg      return;
960b8e80941Smrg
961b8e80941Smrg   usage->comps_read |= comps_read & usage->all_comps;
962b8e80941Smrg   usage->comps_written |= comps_written & usage->all_comps;
963b8e80941Smrg
964b8e80941Smrg   struct vec_var_usage *copy_usage = NULL;
965b8e80941Smrg   if (copy_deref) {
966b8e80941Smrg      copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
967b8e80941Smrg                                       true, mem_ctx);
968b8e80941Smrg      if (copy_usage) {
969b8e80941Smrg         if (usage->vars_copied == NULL) {
970b8e80941Smrg            usage->vars_copied = _mesa_pointer_set_create(mem_ctx);
971b8e80941Smrg         }
972b8e80941Smrg         _mesa_set_add(usage->vars_copied, copy_usage);
973b8e80941Smrg      } else {
974b8e80941Smrg         usage->has_external_copy = true;
975b8e80941Smrg      }
976b8e80941Smrg   }
977b8e80941Smrg
978b8e80941Smrg   nir_deref_path path;
979b8e80941Smrg   nir_deref_path_init(&path, deref, mem_ctx);
980b8e80941Smrg
981b8e80941Smrg   nir_deref_path copy_path;
982b8e80941Smrg   if (copy_usage)
983b8e80941Smrg      nir_deref_path_init(&copy_path, copy_deref, mem_ctx);
984b8e80941Smrg
985b8e80941Smrg   unsigned copy_i = 0;
986b8e80941Smrg   for (unsigned i = 0; i < usage->num_levels; i++) {
987b8e80941Smrg      struct array_level_usage *level = &usage->levels[i];
988b8e80941Smrg      nir_deref_instr *deref = path.path[i + 1];
989b8e80941Smrg      assert(deref->deref_type == nir_deref_type_array ||
990b8e80941Smrg             deref->deref_type == nir_deref_type_array_wildcard);
991b8e80941Smrg
992b8e80941Smrg      unsigned max_used;
993b8e80941Smrg      if (deref->deref_type == nir_deref_type_array) {
994b8e80941Smrg         max_used = nir_src_is_const(deref->arr.index) ?
995b8e80941Smrg                    nir_src_as_uint(deref->arr.index) : UINT_MAX;
996b8e80941Smrg      } else {
997b8e80941Smrg         /* For wildcards, we read or wrote the whole thing. */
998b8e80941Smrg         assert(deref->deref_type == nir_deref_type_array_wildcard);
999b8e80941Smrg         max_used = level->array_len - 1;
1000b8e80941Smrg
1001b8e80941Smrg         if (copy_usage) {
1002b8e80941Smrg            /* Match each wildcard level with the level on copy_usage */
1003b8e80941Smrg            for (; copy_path.path[copy_i + 1]; copy_i++) {
1004b8e80941Smrg               if (copy_path.path[copy_i + 1]->deref_type ==
1005b8e80941Smrg                   nir_deref_type_array_wildcard)
1006b8e80941Smrg                  break;
1007b8e80941Smrg            }
1008b8e80941Smrg            struct array_level_usage *copy_level =
1009b8e80941Smrg               &copy_usage->levels[copy_i++];
1010b8e80941Smrg
1011b8e80941Smrg            if (level->levels_copied == NULL) {
1012b8e80941Smrg               level->levels_copied = _mesa_pointer_set_create(mem_ctx);
1013b8e80941Smrg            }
1014b8e80941Smrg            _mesa_set_add(level->levels_copied, copy_level);
1015b8e80941Smrg         } else {
1016b8e80941Smrg            /* We have a wildcard and it comes from a variable we aren't
1017b8e80941Smrg             * tracking; flag it and we'll know to not shorten this array.
1018b8e80941Smrg             */
1019b8e80941Smrg            level->has_external_copy = true;
1020b8e80941Smrg         }
1021b8e80941Smrg      }
1022b8e80941Smrg
1023b8e80941Smrg      if (comps_written)
1024b8e80941Smrg         level->max_written = MAX2(level->max_written, max_used);
1025b8e80941Smrg      if (comps_read)
1026b8e80941Smrg         level->max_read = MAX2(level->max_read, max_used);
1027b8e80941Smrg   }
1028b8e80941Smrg}
1029b8e80941Smrg
1030b8e80941Smrgstatic bool
1031b8e80941Smrgsrc_is_load_deref(nir_src src, nir_src deref_src)
1032b8e80941Smrg{
1033b8e80941Smrg   nir_intrinsic_instr *load = nir_src_as_intrinsic(src);
1034b8e80941Smrg   if (load == NULL || load->intrinsic != nir_intrinsic_load_deref)
1035b8e80941Smrg      return false;
1036b8e80941Smrg
1037b8e80941Smrg   assert(load->src[0].is_ssa);
1038b8e80941Smrg
1039b8e80941Smrg   return load->src[0].ssa == deref_src.ssa;
1040b8e80941Smrg}
1041b8e80941Smrg
1042b8e80941Smrg/* Returns all non-self-referential components of a store instruction.  A
1043b8e80941Smrg * component is self-referential if it comes from the same component of a load
1044b8e80941Smrg * instruction on the same deref.  If the only data in a particular component
1045b8e80941Smrg * of a variable came directly from that component then it's undefined.  The
1046b8e80941Smrg * only way to get defined data into a component of a variable is for it to
1047b8e80941Smrg * get written there by something outside or from a different component.
1048b8e80941Smrg *
1049b8e80941Smrg * This is a fairly common pattern in shaders that come from either GLSL IR or
1050b8e80941Smrg * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
1051b8e80941Smrg * load-vec-store.
1052b8e80941Smrg */
1053b8e80941Smrgstatic nir_component_mask_t
1054b8e80941Smrgget_non_self_referential_store_comps(nir_intrinsic_instr *store)
1055b8e80941Smrg{
1056b8e80941Smrg   nir_component_mask_t comps = nir_intrinsic_write_mask(store);
1057b8e80941Smrg
1058b8e80941Smrg   assert(store->src[1].is_ssa);
1059b8e80941Smrg   nir_instr *src_instr = store->src[1].ssa->parent_instr;
1060b8e80941Smrg   if (src_instr->type != nir_instr_type_alu)
1061b8e80941Smrg      return comps;
1062b8e80941Smrg
1063b8e80941Smrg   nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
1064b8e80941Smrg
1065b8e80941Smrg   if (src_alu->op == nir_op_imov ||
1066b8e80941Smrg       src_alu->op == nir_op_fmov) {
1067b8e80941Smrg      /* If it's just a swizzle of a load from the same deref, discount any
1068b8e80941Smrg       * channels that don't move in the swizzle.
1069b8e80941Smrg       */
1070b8e80941Smrg      if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
1071b8e80941Smrg         for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
1072b8e80941Smrg            if (src_alu->src[0].swizzle[i] == i)
1073b8e80941Smrg               comps &= ~(1u << i);
1074b8e80941Smrg         }
1075b8e80941Smrg      }
1076b8e80941Smrg   } else if (src_alu->op == nir_op_vec2 ||
1077b8e80941Smrg              src_alu->op == nir_op_vec3 ||
1078b8e80941Smrg              src_alu->op == nir_op_vec4) {
1079b8e80941Smrg      /* If it's a vec, discount any channels that are just loads from the
1080b8e80941Smrg       * same deref put in the same spot.
1081b8e80941Smrg       */
1082b8e80941Smrg      for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
1083b8e80941Smrg         if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
1084b8e80941Smrg             src_alu->src[i].swizzle[0] == i)
1085b8e80941Smrg            comps &= ~(1u << i);
1086b8e80941Smrg      }
1087b8e80941Smrg   }
1088b8e80941Smrg
1089b8e80941Smrg   return comps;
1090b8e80941Smrg}
1091b8e80941Smrg
1092b8e80941Smrgstatic void
1093b8e80941Smrgfind_used_components_impl(nir_function_impl *impl,
1094b8e80941Smrg                          struct hash_table *var_usage_map,
1095b8e80941Smrg                          nir_variable_mode modes,
1096b8e80941Smrg                          void *mem_ctx)
1097b8e80941Smrg{
1098b8e80941Smrg   nir_foreach_block(block, impl) {
1099b8e80941Smrg      nir_foreach_instr(instr, block) {
1100b8e80941Smrg         if (instr->type != nir_instr_type_intrinsic)
1101b8e80941Smrg            continue;
1102b8e80941Smrg
1103b8e80941Smrg         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1104b8e80941Smrg         switch (intrin->intrinsic) {
1105b8e80941Smrg         case nir_intrinsic_load_deref:
1106b8e80941Smrg            mark_deref_used(nir_src_as_deref(intrin->src[0]),
1107b8e80941Smrg                            nir_ssa_def_components_read(&intrin->dest.ssa), 0,
1108b8e80941Smrg                            NULL, var_usage_map, modes, mem_ctx);
1109b8e80941Smrg            break;
1110b8e80941Smrg
1111b8e80941Smrg         case nir_intrinsic_store_deref:
1112b8e80941Smrg            mark_deref_used(nir_src_as_deref(intrin->src[0]),
1113b8e80941Smrg                            0, get_non_self_referential_store_comps(intrin),
1114b8e80941Smrg                            NULL, var_usage_map, modes, mem_ctx);
1115b8e80941Smrg            break;
1116b8e80941Smrg
1117b8e80941Smrg         case nir_intrinsic_copy_deref: {
1118b8e80941Smrg            /* Just mark everything used for copies. */
1119b8e80941Smrg            nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1120b8e80941Smrg            nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1121b8e80941Smrg            mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
1122b8e80941Smrg            mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
1123b8e80941Smrg            break;
1124b8e80941Smrg         }
1125b8e80941Smrg
1126b8e80941Smrg         default:
1127b8e80941Smrg            break;
1128b8e80941Smrg         }
1129b8e80941Smrg      }
1130b8e80941Smrg   }
1131b8e80941Smrg}
1132b8e80941Smrg
1133b8e80941Smrgstatic bool
1134b8e80941Smrgshrink_vec_var_list(struct exec_list *vars,
1135b8e80941Smrg                    struct hash_table *var_usage_map)
1136b8e80941Smrg{
1137b8e80941Smrg   /* Initialize the components kept field of each variable.  This is the
1138b8e80941Smrg    * AND of the components written and components read.  If a component is
1139b8e80941Smrg    * written but never read, it's dead.  If it is read but never written,
1140b8e80941Smrg    * then all values read are undefined garbage and we may as well not read
1141b8e80941Smrg    * them.
1142b8e80941Smrg    *
1143b8e80941Smrg    * The same logic applies to the array length.  We make the array length
1144b8e80941Smrg    * the minimum needed required length between read and write and plan to
1145b8e80941Smrg    * discard any OOB access.  The one exception here is indirect writes
1146b8e80941Smrg    * because we don't know where they will land and we can't shrink an array
1147b8e80941Smrg    * with indirect writes because previously in-bounds writes may become
1148b8e80941Smrg    * out-of-bounds and have undefined behavior.
1149b8e80941Smrg    *
1150b8e80941Smrg    * Also, if we have a copy that to/from something we can't shrink, we need
1151b8e80941Smrg    * to leave components and array_len of any wildcards alone.
1152b8e80941Smrg    */
1153b8e80941Smrg   nir_foreach_variable(var, vars) {
1154b8e80941Smrg      struct vec_var_usage *usage =
1155b8e80941Smrg         get_vec_var_usage(var, var_usage_map, false, NULL);
1156b8e80941Smrg      if (!usage)
1157b8e80941Smrg         continue;
1158b8e80941Smrg
1159b8e80941Smrg      assert(usage->comps_kept == 0);
1160b8e80941Smrg      if (usage->has_external_copy)
1161b8e80941Smrg         usage->comps_kept = usage->all_comps;
1162b8e80941Smrg      else
1163b8e80941Smrg         usage->comps_kept = usage->comps_read & usage->comps_written;
1164b8e80941Smrg
1165b8e80941Smrg      for (unsigned i = 0; i < usage->num_levels; i++) {
1166b8e80941Smrg         struct array_level_usage *level = &usage->levels[i];
1167b8e80941Smrg         assert(level->array_len > 0);
1168b8e80941Smrg
1169b8e80941Smrg         if (level->max_written == UINT_MAX || level->has_external_copy)
1170b8e80941Smrg            continue; /* Can't shrink */
1171b8e80941Smrg
1172b8e80941Smrg         unsigned max_used = MIN2(level->max_read, level->max_written);
1173b8e80941Smrg         level->array_len = MIN2(max_used, level->array_len - 1) + 1;
1174b8e80941Smrg      }
1175b8e80941Smrg   }
1176b8e80941Smrg
1177b8e80941Smrg   /* In order for variable copies to work, we have to have the same data type
1178b8e80941Smrg    * on the source and the destination.  In order to satisfy this, we run a
1179b8e80941Smrg    * little fixed-point algorithm to transitively ensure that we get enough
1180b8e80941Smrg    * components and array elements for this to hold for all copies.
1181b8e80941Smrg    */
1182b8e80941Smrg   bool fp_progress;
1183b8e80941Smrg   do {
1184b8e80941Smrg      fp_progress = false;
1185b8e80941Smrg      nir_foreach_variable(var, vars) {
1186b8e80941Smrg         struct vec_var_usage *var_usage =
1187b8e80941Smrg            get_vec_var_usage(var, var_usage_map, false, NULL);
1188b8e80941Smrg         if (!var_usage || !var_usage->vars_copied)
1189b8e80941Smrg            continue;
1190b8e80941Smrg
1191b8e80941Smrg         set_foreach(var_usage->vars_copied, copy_entry) {
1192b8e80941Smrg            struct vec_var_usage *copy_usage = (void *)copy_entry->key;
1193b8e80941Smrg            if (copy_usage->comps_kept != var_usage->comps_kept) {
1194b8e80941Smrg               nir_component_mask_t comps_kept =
1195b8e80941Smrg                  (var_usage->comps_kept | copy_usage->comps_kept);
1196b8e80941Smrg               var_usage->comps_kept = comps_kept;
1197b8e80941Smrg               copy_usage->comps_kept = comps_kept;
1198b8e80941Smrg               fp_progress = true;
1199b8e80941Smrg            }
1200b8e80941Smrg         }
1201b8e80941Smrg
1202b8e80941Smrg         for (unsigned i = 0; i < var_usage->num_levels; i++) {
1203b8e80941Smrg            struct array_level_usage *var_level = &var_usage->levels[i];
1204b8e80941Smrg            if (!var_level->levels_copied)
1205b8e80941Smrg               continue;
1206b8e80941Smrg
1207b8e80941Smrg            set_foreach(var_level->levels_copied, copy_entry) {
1208b8e80941Smrg               struct array_level_usage *copy_level = (void *)copy_entry->key;
1209b8e80941Smrg               if (var_level->array_len != copy_level->array_len) {
1210b8e80941Smrg                  unsigned array_len =
1211b8e80941Smrg                     MAX2(var_level->array_len, copy_level->array_len);
1212b8e80941Smrg                  var_level->array_len = array_len;
1213b8e80941Smrg                  copy_level->array_len = array_len;
1214b8e80941Smrg                  fp_progress = true;
1215b8e80941Smrg               }
1216b8e80941Smrg            }
1217b8e80941Smrg         }
1218b8e80941Smrg      }
1219b8e80941Smrg   } while (fp_progress);
1220b8e80941Smrg
1221b8e80941Smrg   bool vars_shrunk = false;
1222b8e80941Smrg   nir_foreach_variable_safe(var, vars) {
1223b8e80941Smrg      struct vec_var_usage *usage =
1224b8e80941Smrg         get_vec_var_usage(var, var_usage_map, false, NULL);
1225b8e80941Smrg      if (!usage)
1226b8e80941Smrg         continue;
1227b8e80941Smrg
1228b8e80941Smrg      bool shrunk = false;
1229b8e80941Smrg      const struct glsl_type *vec_type = var->type;
1230b8e80941Smrg      for (unsigned i = 0; i < usage->num_levels; i++) {
1231b8e80941Smrg         /* If we've reduced the array to zero elements at some level, just
1232b8e80941Smrg          * set comps_kept to 0 and delete the variable.
1233b8e80941Smrg          */
1234b8e80941Smrg         if (usage->levels[i].array_len == 0) {
1235b8e80941Smrg            usage->comps_kept = 0;
1236b8e80941Smrg            break;
1237b8e80941Smrg         }
1238b8e80941Smrg
1239b8e80941Smrg         assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
1240b8e80941Smrg         if (usage->levels[i].array_len < glsl_get_length(vec_type))
1241b8e80941Smrg            shrunk = true;
1242b8e80941Smrg         vec_type = glsl_get_array_element(vec_type);
1243b8e80941Smrg      }
1244b8e80941Smrg      assert(glsl_type_is_vector_or_scalar(vec_type));
1245b8e80941Smrg
1246b8e80941Smrg      assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
1247b8e80941Smrg      if (usage->comps_kept != usage->all_comps)
1248b8e80941Smrg         shrunk = true;
1249b8e80941Smrg
1250b8e80941Smrg      if (usage->comps_kept == 0) {
1251b8e80941Smrg         /* This variable is dead, remove it */
1252b8e80941Smrg         vars_shrunk = true;
1253b8e80941Smrg         exec_node_remove(&var->node);
1254b8e80941Smrg         continue;
1255b8e80941Smrg      }
1256b8e80941Smrg
1257b8e80941Smrg      if (!shrunk) {
1258b8e80941Smrg         /* This variable doesn't need to be shrunk.  Remove it from the
1259b8e80941Smrg          * hash table so later steps will ignore it.
1260b8e80941Smrg          */
1261b8e80941Smrg         _mesa_hash_table_remove_key(var_usage_map, var);
1262b8e80941Smrg         continue;
1263b8e80941Smrg      }
1264b8e80941Smrg
1265b8e80941Smrg      /* Build the new var type */
1266b8e80941Smrg      unsigned new_num_comps = util_bitcount(usage->comps_kept);
1267b8e80941Smrg      const struct glsl_type *new_type =
1268b8e80941Smrg         glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
1269b8e80941Smrg      for (int i = usage->num_levels - 1; i >= 0; i--) {
1270b8e80941Smrg         assert(usage->levels[i].array_len > 0);
1271b8e80941Smrg         /* If the original type was a matrix type, we'd like to keep that so
1272b8e80941Smrg          * we don't convert matrices into arrays.
1273b8e80941Smrg          */
1274b8e80941Smrg         if (i == usage->num_levels - 1 &&
1275b8e80941Smrg             glsl_type_is_matrix(glsl_without_array(var->type)) &&
1276b8e80941Smrg             new_num_comps > 1 && usage->levels[i].array_len > 1) {
1277b8e80941Smrg            new_type = glsl_matrix_type(glsl_get_base_type(new_type),
1278b8e80941Smrg                                        new_num_comps,
1279b8e80941Smrg                                        usage->levels[i].array_len);
1280b8e80941Smrg         } else {
1281b8e80941Smrg            new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0);
1282b8e80941Smrg         }
1283b8e80941Smrg      }
1284b8e80941Smrg      var->type = new_type;
1285b8e80941Smrg
1286b8e80941Smrg      vars_shrunk = true;
1287b8e80941Smrg   }
1288b8e80941Smrg
1289b8e80941Smrg   return vars_shrunk;
1290b8e80941Smrg}
1291b8e80941Smrg
1292b8e80941Smrgstatic bool
1293b8e80941Smrgvec_deref_is_oob(nir_deref_instr *deref,
1294b8e80941Smrg                 struct vec_var_usage *usage)
1295b8e80941Smrg{
1296b8e80941Smrg   nir_deref_path path;
1297b8e80941Smrg   nir_deref_path_init(&path, deref, NULL);
1298b8e80941Smrg
1299b8e80941Smrg   bool oob = false;
1300b8e80941Smrg   for (unsigned i = 0; i < usage->num_levels; i++) {
1301b8e80941Smrg      nir_deref_instr *p = path.path[i + 1];
1302b8e80941Smrg      if (p->deref_type == nir_deref_type_array_wildcard)
1303b8e80941Smrg         continue;
1304b8e80941Smrg
1305b8e80941Smrg      if (nir_src_is_const(p->arr.index) &&
1306b8e80941Smrg          nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) {
1307b8e80941Smrg         oob = true;
1308b8e80941Smrg         break;
1309b8e80941Smrg      }
1310b8e80941Smrg   }
1311b8e80941Smrg
1312b8e80941Smrg   nir_deref_path_finish(&path);
1313b8e80941Smrg
1314b8e80941Smrg   return oob;
1315b8e80941Smrg}
1316b8e80941Smrg
1317b8e80941Smrgstatic bool
1318b8e80941Smrgvec_deref_is_dead_or_oob(nir_deref_instr *deref,
1319b8e80941Smrg                         struct hash_table *var_usage_map,
1320b8e80941Smrg                         nir_variable_mode modes)
1321b8e80941Smrg{
1322b8e80941Smrg   struct vec_var_usage *usage =
1323b8e80941Smrg      get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1324b8e80941Smrg   if (!usage)
1325b8e80941Smrg      return false;
1326b8e80941Smrg
1327b8e80941Smrg   return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
1328b8e80941Smrg}
1329b8e80941Smrg
1330b8e80941Smrgstatic void
1331b8e80941Smrgshrink_vec_var_access_impl(nir_function_impl *impl,
1332b8e80941Smrg                           struct hash_table *var_usage_map,
1333b8e80941Smrg                           nir_variable_mode modes)
1334b8e80941Smrg{
1335b8e80941Smrg   nir_builder b;
1336b8e80941Smrg   nir_builder_init(&b, impl);
1337b8e80941Smrg
1338b8e80941Smrg   nir_foreach_block(block, impl) {
1339b8e80941Smrg      nir_foreach_instr_safe(instr, block) {
1340b8e80941Smrg         switch (instr->type) {
1341b8e80941Smrg         case nir_instr_type_deref: {
1342b8e80941Smrg            nir_deref_instr *deref = nir_instr_as_deref(instr);
1343b8e80941Smrg            if (!(deref->mode & modes))
1344b8e80941Smrg               break;
1345b8e80941Smrg
1346b8e80941Smrg            /* Clean up any dead derefs we find lying around.  They may refer
1347b8e80941Smrg             * to variables we've deleted.
1348b8e80941Smrg             */
1349b8e80941Smrg            if (nir_deref_instr_remove_if_unused(deref))
1350b8e80941Smrg               break;
1351b8e80941Smrg
1352b8e80941Smrg            /* Update the type in the deref to keep the types consistent as
1353b8e80941Smrg             * you walk down the chain.  We don't need to check if this is one
1354b8e80941Smrg             * of the derefs we're shrinking because this is a no-op if it
1355b8e80941Smrg             * isn't.  The worst that could happen is that we accidentally fix
1356b8e80941Smrg             * an invalid deref.
1357b8e80941Smrg             */
1358b8e80941Smrg            if (deref->deref_type == nir_deref_type_var) {
1359b8e80941Smrg               deref->type = deref->var->type;
1360b8e80941Smrg            } else if (deref->deref_type == nir_deref_type_array ||
1361b8e80941Smrg                       deref->deref_type == nir_deref_type_array_wildcard) {
1362b8e80941Smrg               nir_deref_instr *parent = nir_deref_instr_parent(deref);
1363b8e80941Smrg               assert(glsl_type_is_array(parent->type) ||
1364b8e80941Smrg                      glsl_type_is_matrix(parent->type));
1365b8e80941Smrg               deref->type = glsl_get_array_element(parent->type);
1366b8e80941Smrg            }
1367b8e80941Smrg            break;
1368b8e80941Smrg         }
1369b8e80941Smrg
1370b8e80941Smrg         case nir_instr_type_intrinsic: {
1371b8e80941Smrg            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1372b8e80941Smrg
1373b8e80941Smrg            /* If we have a copy whose source or destination has been deleted
1374b8e80941Smrg             * because we determined the variable was dead, then we just
1375b8e80941Smrg             * delete the copy instruction.  If the source variable was dead
1376b8e80941Smrg             * then it was writing undefined garbage anyway and if it's the
1377b8e80941Smrg             * destination variable that's dead then the write isn't needed.
1378b8e80941Smrg             */
1379b8e80941Smrg            if (intrin->intrinsic == nir_intrinsic_copy_deref) {
1380b8e80941Smrg               nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1381b8e80941Smrg               nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1382b8e80941Smrg               if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
1383b8e80941Smrg                   vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
1384b8e80941Smrg                  nir_instr_remove(&intrin->instr);
1385b8e80941Smrg                  nir_deref_instr_remove_if_unused(dst);
1386b8e80941Smrg                  nir_deref_instr_remove_if_unused(src);
1387b8e80941Smrg               }
1388b8e80941Smrg               continue;
1389b8e80941Smrg            }
1390b8e80941Smrg
1391b8e80941Smrg            if (intrin->intrinsic != nir_intrinsic_load_deref &&
1392b8e80941Smrg                intrin->intrinsic != nir_intrinsic_store_deref)
1393b8e80941Smrg               continue;
1394b8e80941Smrg
1395b8e80941Smrg            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1396b8e80941Smrg            if (!(deref->mode & modes))
1397b8e80941Smrg               continue;
1398b8e80941Smrg
1399b8e80941Smrg            struct vec_var_usage *usage =
1400b8e80941Smrg               get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1401b8e80941Smrg            if (!usage)
1402b8e80941Smrg               continue;
1403b8e80941Smrg
1404b8e80941Smrg            if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
1405b8e80941Smrg               if (intrin->intrinsic == nir_intrinsic_load_deref) {
1406b8e80941Smrg                  nir_ssa_def *u =
1407b8e80941Smrg                     nir_ssa_undef(&b, intrin->dest.ssa.num_components,
1408b8e80941Smrg                                       intrin->dest.ssa.bit_size);
1409b8e80941Smrg                  nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
1410b8e80941Smrg                                           nir_src_for_ssa(u));
1411b8e80941Smrg               }
1412b8e80941Smrg               nir_instr_remove(&intrin->instr);
1413b8e80941Smrg               nir_deref_instr_remove_if_unused(deref);
1414b8e80941Smrg               continue;
1415b8e80941Smrg            }
1416b8e80941Smrg
1417b8e80941Smrg            /* If we're not dropping any components, there's no need to
1418b8e80941Smrg             * compact vectors.
1419b8e80941Smrg             */
1420b8e80941Smrg            if (usage->comps_kept == usage->all_comps)
1421b8e80941Smrg               continue;
1422b8e80941Smrg
1423b8e80941Smrg            if (intrin->intrinsic == nir_intrinsic_load_deref) {
1424b8e80941Smrg               b.cursor = nir_after_instr(&intrin->instr);
1425b8e80941Smrg
1426b8e80941Smrg               nir_ssa_def *undef =
1427b8e80941Smrg                  nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size);
1428b8e80941Smrg               nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
1429b8e80941Smrg               unsigned c = 0;
1430b8e80941Smrg               for (unsigned i = 0; i < intrin->num_components; i++) {
1431b8e80941Smrg                  if (usage->comps_kept & (1u << i))
1432b8e80941Smrg                     vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++);
1433b8e80941Smrg                  else
1434b8e80941Smrg                     vec_srcs[i] = undef;
1435b8e80941Smrg               }
1436b8e80941Smrg               nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
1437b8e80941Smrg
1438b8e80941Smrg               nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
1439b8e80941Smrg                                              nir_src_for_ssa(vec),
1440b8e80941Smrg                                              vec->parent_instr);
1441b8e80941Smrg
1442b8e80941Smrg               /* The SSA def is now only used by the swizzle.  It's safe to
1443b8e80941Smrg                * shrink the number of components.
1444b8e80941Smrg                */
1445b8e80941Smrg               assert(list_length(&intrin->dest.ssa.uses) == c);
1446b8e80941Smrg               intrin->num_components = c;
1447b8e80941Smrg               intrin->dest.ssa.num_components = c;
1448b8e80941Smrg            } else {
1449b8e80941Smrg               nir_component_mask_t write_mask =
1450b8e80941Smrg                  nir_intrinsic_write_mask(intrin);
1451b8e80941Smrg
1452b8e80941Smrg               unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
1453b8e80941Smrg               nir_component_mask_t new_write_mask = 0;
1454b8e80941Smrg               unsigned c = 0;
1455b8e80941Smrg               for (unsigned i = 0; i < intrin->num_components; i++) {
1456b8e80941Smrg                  if (usage->comps_kept & (1u << i)) {
1457b8e80941Smrg                     swizzle[c] = i;
1458b8e80941Smrg                     if (write_mask & (1u << i))
1459b8e80941Smrg                        new_write_mask |= 1u << c;
1460b8e80941Smrg                     c++;
1461b8e80941Smrg                  }
1462b8e80941Smrg               }
1463b8e80941Smrg
1464b8e80941Smrg               b.cursor = nir_before_instr(&intrin->instr);
1465b8e80941Smrg
1466b8e80941Smrg               nir_ssa_def *swizzled =
1467b8e80941Smrg                  nir_swizzle(&b, intrin->src[1].ssa, swizzle, c, false);
1468b8e80941Smrg
1469b8e80941Smrg               /* Rewrite to use the compacted source */
1470b8e80941Smrg               nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
1471b8e80941Smrg                                     nir_src_for_ssa(swizzled));
1472b8e80941Smrg               nir_intrinsic_set_write_mask(intrin, new_write_mask);
1473b8e80941Smrg               intrin->num_components = c;
1474b8e80941Smrg            }
1475b8e80941Smrg            break;
1476b8e80941Smrg         }
1477b8e80941Smrg
1478b8e80941Smrg         default:
1479b8e80941Smrg            break;
1480b8e80941Smrg         }
1481b8e80941Smrg      }
1482b8e80941Smrg   }
1483b8e80941Smrg}
1484b8e80941Smrg
1485b8e80941Smrgstatic bool
1486b8e80941Smrgfunction_impl_has_vars_with_modes(nir_function_impl *impl,
1487b8e80941Smrg                                  nir_variable_mode modes)
1488b8e80941Smrg{
1489b8e80941Smrg   nir_shader *shader = impl->function->shader;
1490b8e80941Smrg
1491b8e80941Smrg   if ((modes & nir_var_shader_temp) && !exec_list_is_empty(&shader->globals))
1492b8e80941Smrg      return true;
1493b8e80941Smrg
1494b8e80941Smrg   if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals))
1495b8e80941Smrg      return true;
1496b8e80941Smrg
1497b8e80941Smrg   return false;
1498b8e80941Smrg}
1499b8e80941Smrg
1500b8e80941Smrg/** Attempt to shrink arrays of vectors
1501b8e80941Smrg *
1502b8e80941Smrg * This pass looks at variables which contain a vector or an array (possibly
1503b8e80941Smrg * multiple dimensions) of vectors and attempts to lower to a smaller vector
1504b8e80941Smrg * or array.  If the pass can prove that a component of a vector (or array of
1505b8e80941Smrg * vectors) is never really used, then that component will be removed.
1506b8e80941Smrg * Similarly, the pass attempts to shorten arrays based on what elements it
1507b8e80941Smrg * can prove are never read or never contain valid data.
1508b8e80941Smrg */
1509b8e80941Smrgbool
1510b8e80941Smrgnir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
1511b8e80941Smrg{
1512b8e80941Smrg   assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
1513b8e80941Smrg
1514b8e80941Smrg   void *mem_ctx = ralloc_context(NULL);
1515b8e80941Smrg
1516b8e80941Smrg   struct hash_table *var_usage_map =
1517b8e80941Smrg      _mesa_pointer_hash_table_create(mem_ctx);
1518b8e80941Smrg
1519b8e80941Smrg   bool has_vars_to_shrink = false;
1520b8e80941Smrg   nir_foreach_function(function, shader) {
1521b8e80941Smrg      if (!function->impl)
1522b8e80941Smrg         continue;
1523b8e80941Smrg
1524b8e80941Smrg      /* Don't even bother crawling the IR if we don't have any variables.
1525b8e80941Smrg       * Given that this pass deletes any unused variables, it's likely that
1526b8e80941Smrg       * we will be in this scenario eventually.
1527b8e80941Smrg       */
1528b8e80941Smrg      if (function_impl_has_vars_with_modes(function->impl, modes)) {
1529b8e80941Smrg         has_vars_to_shrink = true;
1530b8e80941Smrg         find_used_components_impl(function->impl, var_usage_map,
1531b8e80941Smrg                                   modes, mem_ctx);
1532b8e80941Smrg      }
1533b8e80941Smrg   }
1534b8e80941Smrg   if (!has_vars_to_shrink) {
1535b8e80941Smrg      ralloc_free(mem_ctx);
1536b8e80941Smrg      return false;
1537b8e80941Smrg   }
1538b8e80941Smrg
1539b8e80941Smrg   bool globals_shrunk = false;
1540b8e80941Smrg   if (modes & nir_var_shader_temp)
1541b8e80941Smrg      globals_shrunk = shrink_vec_var_list(&shader->globals, var_usage_map);
1542b8e80941Smrg
1543b8e80941Smrg   bool progress = false;
1544b8e80941Smrg   nir_foreach_function(function, shader) {
1545b8e80941Smrg      if (!function->impl)
1546b8e80941Smrg         continue;
1547b8e80941Smrg
1548b8e80941Smrg      bool locals_shrunk = false;
1549b8e80941Smrg      if (modes & nir_var_function_temp) {
1550b8e80941Smrg         locals_shrunk = shrink_vec_var_list(&function->impl->locals,
1551b8e80941Smrg                                             var_usage_map);
1552b8e80941Smrg      }
1553b8e80941Smrg
1554b8e80941Smrg      if (globals_shrunk || locals_shrunk) {
1555b8e80941Smrg         shrink_vec_var_access_impl(function->impl, var_usage_map, modes);
1556b8e80941Smrg
1557b8e80941Smrg         nir_metadata_preserve(function->impl, nir_metadata_block_index |
1558b8e80941Smrg                                               nir_metadata_dominance);
1559b8e80941Smrg         progress = true;
1560b8e80941Smrg      }
1561b8e80941Smrg   }
1562b8e80941Smrg
1563b8e80941Smrg   ralloc_free(mem_ctx);
1564b8e80941Smrg
1565b8e80941Smrg   return progress;
1566b8e80941Smrg}
1567