1/*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir.h"
25#include "nir_builder.h"
26#include "nir_deref.h"
27#include "nir_vla.h"
28
29#include "util/u_math.h"
30
31
32struct split_var_state {
33   void *mem_ctx;
34
35   nir_shader *shader;
36   nir_function_impl *impl;
37
38   nir_variable *base_var;
39};
40
41struct field {
42   struct field *parent;
43
44   const struct glsl_type *type;
45
46   unsigned num_fields;
47   struct field *fields;
48
49   nir_variable *var;
50};
51
52static const struct glsl_type *
53wrap_type_in_array(const struct glsl_type *type,
54                   const struct glsl_type *array_type)
55{
56   if (!glsl_type_is_array(array_type))
57      return type;
58
59   const struct glsl_type *elem_type =
60      wrap_type_in_array(type, glsl_get_array_element(array_type));
61   assert(glsl_get_explicit_stride(array_type) == 0);
62   return glsl_array_type(elem_type, glsl_get_length(array_type), 0);
63}
64
65static int
66num_array_levels_in_array_of_vector_type(const struct glsl_type *type)
67{
68   int num_levels = 0;
69   while (true) {
70      if (glsl_type_is_array_or_matrix(type)) {
71         num_levels++;
72         type = glsl_get_array_element(type);
73      } else if (glsl_type_is_vector_or_scalar(type)) {
74         return num_levels;
75      } else {
76         /* Not an array of vectors */
77         return -1;
78      }
79   }
80}
81
82static void
83init_field_for_type(struct field *field, struct field *parent,
84                    const struct glsl_type *type,
85                    const char *name,
86                    struct split_var_state *state)
87{
88   *field = (struct field) {
89      .parent = parent,
90      .type = type,
91   };
92
93   const struct glsl_type *struct_type = glsl_without_array(type);
94   if (glsl_type_is_struct_or_ifc(struct_type)) {
95      field->num_fields = glsl_get_length(struct_type),
96      field->fields = ralloc_array(state->mem_ctx, struct field,
97                                   field->num_fields);
98      for (unsigned i = 0; i < field->num_fields; i++) {
99         char *field_name = NULL;
100         if (name) {
101            field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name,
102                                         glsl_get_struct_elem_name(struct_type, i));
103         } else {
104            field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s",
105                                         glsl_get_type_name(struct_type),
106                                         glsl_get_struct_elem_name(struct_type, i));
107         }
108         init_field_for_type(&field->fields[i], field,
109                             glsl_get_struct_field(struct_type, i),
110                             field_name, state);
111      }
112   } else {
113      const struct glsl_type *var_type = type;
114      for (struct field *f = field->parent; f; f = f->parent)
115         var_type = wrap_type_in_array(var_type, f->type);
116
117      nir_variable_mode mode = state->base_var->data.mode;
118      if (mode == nir_var_function_temp) {
119         field->var = nir_local_variable_create(state->impl, var_type, name);
120      } else {
121         field->var = nir_variable_create(state->shader, mode, var_type, name);
122      }
123   }
124}
125
126static bool
127split_var_list_structs(nir_shader *shader,
128                       nir_function_impl *impl,
129                       struct exec_list *vars,
130                       struct hash_table *var_field_map,
131                       void *mem_ctx)
132{
133   struct split_var_state state = {
134      .mem_ctx = mem_ctx,
135      .shader = shader,
136      .impl = impl,
137   };
138
139   struct exec_list split_vars;
140   exec_list_make_empty(&split_vars);
141
142   /* To avoid list confusion (we'll be adding things as we split variables),
143    * pull all of the variables we plan to split off of the list
144    */
145   nir_foreach_variable_safe(var, vars) {
146      if (!glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
147         continue;
148
149      exec_node_remove(&var->node);
150      exec_list_push_tail(&split_vars, &var->node);
151   }
152
153   nir_foreach_variable(var, &split_vars) {
154      state.base_var = var;
155
156      struct field *root_field = ralloc(mem_ctx, struct field);
157      init_field_for_type(root_field, NULL, var->type, var->name, &state);
158      _mesa_hash_table_insert(var_field_map, var, root_field);
159   }
160
161   return !exec_list_is_empty(&split_vars);
162}
163
164static void
165split_struct_derefs_impl(nir_function_impl *impl,
166                         struct hash_table *var_field_map,
167                         nir_variable_mode modes,
168                         void *mem_ctx)
169{
170   nir_builder b;
171   nir_builder_init(&b, impl);
172
173   nir_foreach_block(block, impl) {
174      nir_foreach_instr_safe(instr, block) {
175         if (instr->type != nir_instr_type_deref)
176            continue;
177
178         nir_deref_instr *deref = nir_instr_as_deref(instr);
179         if (!(deref->mode & modes))
180            continue;
181
182         /* Clean up any dead derefs we find lying around.  They may refer to
183          * variables we're planning to split.
184          */
185         if (nir_deref_instr_remove_if_unused(deref))
186            continue;
187
188         if (!glsl_type_is_vector_or_scalar(deref->type))
189            continue;
190
191         nir_variable *base_var = nir_deref_instr_get_variable(deref);
192         struct hash_entry *entry =
193            _mesa_hash_table_search(var_field_map, base_var);
194         if (!entry)
195            continue;
196
197         struct field *root_field = entry->data;
198
199         nir_deref_path path;
200         nir_deref_path_init(&path, deref, mem_ctx);
201
202         struct field *tail_field = root_field;
203         for (unsigned i = 0; path.path[i]; i++) {
204            if (path.path[i]->deref_type != nir_deref_type_struct)
205               continue;
206
207            assert(i > 0);
208            assert(glsl_type_is_struct_or_ifc(path.path[i - 1]->type));
209            assert(path.path[i - 1]->type ==
210                   glsl_without_array(tail_field->type));
211
212            tail_field = &tail_field->fields[path.path[i]->strct.index];
213         }
214         nir_variable *split_var = tail_field->var;
215
216         nir_deref_instr *new_deref = NULL;
217         for (unsigned i = 0; path.path[i]; i++) {
218            nir_deref_instr *p = path.path[i];
219            b.cursor = nir_after_instr(&p->instr);
220
221            switch (p->deref_type) {
222            case nir_deref_type_var:
223               assert(new_deref == NULL);
224               new_deref = nir_build_deref_var(&b, split_var);
225               break;
226
227            case nir_deref_type_array:
228            case nir_deref_type_array_wildcard:
229               new_deref = nir_build_deref_follower(&b, new_deref, p);
230               break;
231
232            case nir_deref_type_struct:
233               /* Nothing to do; we're splitting structs */
234               break;
235
236            default:
237               unreachable("Invalid deref type in path");
238            }
239         }
240
241         assert(new_deref->type == deref->type);
242         nir_ssa_def_rewrite_uses(&deref->dest.ssa,
243                                  nir_src_for_ssa(&new_deref->dest.ssa));
244         nir_deref_instr_remove_if_unused(deref);
245      }
246   }
247}
248
249/** A pass for splitting structs into multiple variables
250 *
251 * This pass splits arrays of structs into multiple variables, one for each
252 * (possibly nested) structure member.  After this pass completes, no
253 * variables of the given mode will contain a struct type.
254 */
255bool
256nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes)
257{
258   void *mem_ctx = ralloc_context(NULL);
259   struct hash_table *var_field_map =
260      _mesa_pointer_hash_table_create(mem_ctx);
261
262   assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
263
264   bool has_global_splits = false;
265   if (modes & nir_var_shader_temp) {
266      has_global_splits = split_var_list_structs(shader, NULL,
267                                                 &shader->globals,
268                                                 var_field_map, mem_ctx);
269   }
270
271   bool progress = false;
272   nir_foreach_function(function, shader) {
273      if (!function->impl)
274         continue;
275
276      bool has_local_splits = false;
277      if (modes & nir_var_function_temp) {
278         has_local_splits = split_var_list_structs(shader, function->impl,
279                                                   &function->impl->locals,
280                                                   var_field_map, mem_ctx);
281      }
282
283      if (has_global_splits || has_local_splits) {
284         split_struct_derefs_impl(function->impl, var_field_map,
285                                  modes, mem_ctx);
286
287         nir_metadata_preserve(function->impl, nir_metadata_block_index |
288                                               nir_metadata_dominance);
289         progress = true;
290      }
291   }
292
293   ralloc_free(mem_ctx);
294
295   return progress;
296}
297
298struct array_level_info {
299   unsigned array_len;
300   bool split;
301};
302
303struct array_split {
304   /* Only set if this is the tail end of the splitting */
305   nir_variable *var;
306
307   unsigned num_splits;
308   struct array_split *splits;
309};
310
311struct array_var_info {
312   nir_variable *base_var;
313
314   const struct glsl_type *split_var_type;
315
316   bool split_var;
317   struct array_split root_split;
318
319   unsigned num_levels;
320   struct array_level_info levels[0];
321};
322
323static bool
324init_var_list_array_infos(struct exec_list *vars,
325                          struct hash_table *var_info_map,
326                          void *mem_ctx)
327{
328   bool has_array = false;
329
330   nir_foreach_variable(var, vars) {
331      int num_levels = num_array_levels_in_array_of_vector_type(var->type);
332      if (num_levels <= 0)
333         continue;
334
335      struct array_var_info *info =
336         rzalloc_size(mem_ctx, sizeof(*info) +
337                               num_levels * sizeof(info->levels[0]));
338
339      info->base_var = var;
340      info->num_levels = num_levels;
341
342      const struct glsl_type *type = var->type;
343      for (int i = 0; i < num_levels; i++) {
344         info->levels[i].array_len = glsl_get_length(type);
345         type = glsl_get_array_element(type);
346
347         /* All levels start out initially as split */
348         info->levels[i].split = true;
349      }
350
351      _mesa_hash_table_insert(var_info_map, var, info);
352      has_array = true;
353   }
354
355   return has_array;
356}
357
358static struct array_var_info *
359get_array_var_info(nir_variable *var,
360                   struct hash_table *var_info_map)
361{
362   struct hash_entry *entry =
363      _mesa_hash_table_search(var_info_map, var);
364   return entry ? entry->data : NULL;
365}
366
367static struct array_var_info *
368get_array_deref_info(nir_deref_instr *deref,
369                     struct hash_table *var_info_map,
370                     nir_variable_mode modes)
371{
372   if (!(deref->mode & modes))
373      return NULL;
374
375   return get_array_var_info(nir_deref_instr_get_variable(deref),
376                             var_info_map);
377}
378
379static void
380mark_array_deref_used(nir_deref_instr *deref,
381                      struct hash_table *var_info_map,
382                      nir_variable_mode modes,
383                      void *mem_ctx)
384{
385   struct array_var_info *info =
386      get_array_deref_info(deref, var_info_map, modes);
387   if (!info)
388      return;
389
390   nir_deref_path path;
391   nir_deref_path_init(&path, deref, mem_ctx);
392
393   /* Walk the path and look for indirects.  If we have an array deref with an
394    * indirect, mark the given level as not being split.
395    */
396   for (unsigned i = 0; i < info->num_levels; i++) {
397      nir_deref_instr *p = path.path[i + 1];
398      if (p->deref_type == nir_deref_type_array &&
399          !nir_src_is_const(p->arr.index))
400         info->levels[i].split = false;
401   }
402}
403
404static void
405mark_array_usage_impl(nir_function_impl *impl,
406                      struct hash_table *var_info_map,
407                      nir_variable_mode modes,
408                      void *mem_ctx)
409{
410   nir_foreach_block(block, impl) {
411      nir_foreach_instr(instr, block) {
412         if (instr->type != nir_instr_type_intrinsic)
413            continue;
414
415         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
416         switch (intrin->intrinsic) {
417         case nir_intrinsic_copy_deref:
418            mark_array_deref_used(nir_src_as_deref(intrin->src[1]),
419                                  var_info_map, modes, mem_ctx);
420            /* Fall Through */
421
422         case nir_intrinsic_load_deref:
423         case nir_intrinsic_store_deref:
424            mark_array_deref_used(nir_src_as_deref(intrin->src[0]),
425                                  var_info_map, modes, mem_ctx);
426            break;
427
428         default:
429            break;
430         }
431      }
432   }
433}
434
435static void
436create_split_array_vars(struct array_var_info *var_info,
437                        unsigned level,
438                        struct array_split *split,
439                        const char *name,
440                        nir_shader *shader,
441                        nir_function_impl *impl,
442                        void *mem_ctx)
443{
444   while (level < var_info->num_levels && !var_info->levels[level].split) {
445      name = ralloc_asprintf(mem_ctx, "%s[*]", name);
446      level++;
447   }
448
449   if (level == var_info->num_levels) {
450      /* We add parens to the variable name so it looks like "(foo[2][*])" so
451       * that further derefs will look like "(foo[2][*])[ssa_6]"
452       */
453      name = ralloc_asprintf(mem_ctx, "(%s)", name);
454
455      nir_variable_mode mode = var_info->base_var->data.mode;
456      if (mode == nir_var_function_temp) {
457         split->var = nir_local_variable_create(impl,
458                                                var_info->split_var_type, name);
459      } else {
460         split->var = nir_variable_create(shader, mode,
461                                          var_info->split_var_type, name);
462      }
463   } else {
464      assert(var_info->levels[level].split);
465      split->num_splits = var_info->levels[level].array_len;
466      split->splits = rzalloc_array(mem_ctx, struct array_split,
467                                    split->num_splits);
468      for (unsigned i = 0; i < split->num_splits; i++) {
469         create_split_array_vars(var_info, level + 1, &split->splits[i],
470                                 ralloc_asprintf(mem_ctx, "%s[%d]", name, i),
471                                 shader, impl, mem_ctx);
472      }
473   }
474}
475
476static bool
477split_var_list_arrays(nir_shader *shader,
478                      nir_function_impl *impl,
479                      struct exec_list *vars,
480                      struct hash_table *var_info_map,
481                      void *mem_ctx)
482{
483   struct exec_list split_vars;
484   exec_list_make_empty(&split_vars);
485
486   nir_foreach_variable_safe(var, vars) {
487      struct array_var_info *info = get_array_var_info(var, var_info_map);
488      if (!info)
489         continue;
490
491      bool has_split = false;
492      const struct glsl_type *split_type =
493         glsl_without_array_or_matrix(var->type);
494      for (int i = info->num_levels - 1; i >= 0; i--) {
495         if (info->levels[i].split) {
496            has_split = true;
497            continue;
498         }
499
500         /* If the original type was a matrix type, we'd like to keep that so
501          * we don't convert matrices into arrays.
502          */
503         if (i == info->num_levels - 1 &&
504             glsl_type_is_matrix(glsl_without_array(var->type))) {
505            split_type = glsl_matrix_type(glsl_get_base_type(split_type),
506                                          glsl_get_components(split_type),
507                                          info->levels[i].array_len);
508         } else {
509            split_type = glsl_array_type(split_type, info->levels[i].array_len, 0);
510         }
511      }
512
513      if (has_split) {
514         info->split_var_type = split_type;
515         /* To avoid list confusion (we'll be adding things as we split
516          * variables), pull all of the variables we plan to split off of the
517          * main variable list.
518          */
519         exec_node_remove(&var->node);
520         exec_list_push_tail(&split_vars, &var->node);
521      } else {
522         assert(split_type == glsl_get_bare_type(var->type));
523         /* If we're not modifying this variable, delete the info so we skip
524          * it faster in later passes.
525          */
526         _mesa_hash_table_remove_key(var_info_map, var);
527      }
528   }
529
530   nir_foreach_variable(var, &split_vars) {
531      struct array_var_info *info = get_array_var_info(var, var_info_map);
532      create_split_array_vars(info, 0, &info->root_split, var->name,
533                              shader, impl, mem_ctx);
534   }
535
536   return !exec_list_is_empty(&split_vars);
537}
538
539static bool
540deref_has_split_wildcard(nir_deref_path *path,
541                         struct array_var_info *info)
542{
543   if (info == NULL)
544      return false;
545
546   assert(path->path[0]->var == info->base_var);
547   for (unsigned i = 0; i < info->num_levels; i++) {
548      if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard &&
549          info->levels[i].split)
550         return true;
551   }
552
553   return false;
554}
555
556static bool
557array_path_is_out_of_bounds(nir_deref_path *path,
558                            struct array_var_info *info)
559{
560   if (info == NULL)
561      return false;
562
563   assert(path->path[0]->var == info->base_var);
564   for (unsigned i = 0; i < info->num_levels; i++) {
565      nir_deref_instr *p = path->path[i + 1];
566      if (p->deref_type == nir_deref_type_array_wildcard)
567         continue;
568
569      if (nir_src_is_const(p->arr.index) &&
570          nir_src_as_uint(p->arr.index) >= info->levels[i].array_len)
571         return true;
572   }
573
574   return false;
575}
576
577static void
578emit_split_copies(nir_builder *b,
579                  struct array_var_info *dst_info, nir_deref_path *dst_path,
580                  unsigned dst_level, nir_deref_instr *dst,
581                  struct array_var_info *src_info, nir_deref_path *src_path,
582                  unsigned src_level, nir_deref_instr *src)
583{
584   nir_deref_instr *dst_p, *src_p;
585
586   while ((dst_p = dst_path->path[dst_level + 1])) {
587      if (dst_p->deref_type == nir_deref_type_array_wildcard)
588         break;
589
590      dst = nir_build_deref_follower(b, dst, dst_p);
591      dst_level++;
592   }
593
594   while ((src_p = src_path->path[src_level + 1])) {
595      if (src_p->deref_type == nir_deref_type_array_wildcard)
596         break;
597
598      src = nir_build_deref_follower(b, src, src_p);
599      src_level++;
600   }
601
602   if (src_p == NULL || dst_p == NULL) {
603      assert(src_p == NULL && dst_p == NULL);
604      nir_copy_deref(b, dst, src);
605   } else {
606      assert(dst_p->deref_type == nir_deref_type_array_wildcard &&
607             src_p->deref_type == nir_deref_type_array_wildcard);
608
609      if ((dst_info && dst_info->levels[dst_level].split) ||
610          (src_info && src_info->levels[src_level].split)) {
611         /* There are no indirects at this level on one of the source or the
612          * destination so we are lowering it.
613          */
614         assert(glsl_get_length(dst_path->path[dst_level]->type) ==
615                glsl_get_length(src_path->path[src_level]->type));
616         unsigned len = glsl_get_length(dst_path->path[dst_level]->type);
617         for (unsigned i = 0; i < len; i++) {
618            emit_split_copies(b, dst_info, dst_path, dst_level + 1,
619                              nir_build_deref_array_imm(b, dst, i),
620                              src_info, src_path, src_level + 1,
621                              nir_build_deref_array_imm(b, src, i));
622         }
623      } else {
624         /* Neither side is being split so we just keep going */
625         emit_split_copies(b, dst_info, dst_path, dst_level + 1,
626                           nir_build_deref_array_wildcard(b, dst),
627                           src_info, src_path, src_level + 1,
628                           nir_build_deref_array_wildcard(b, src));
629      }
630   }
631}
632
633static void
634split_array_copies_impl(nir_function_impl *impl,
635                        struct hash_table *var_info_map,
636                        nir_variable_mode modes,
637                        void *mem_ctx)
638{
639   nir_builder b;
640   nir_builder_init(&b, impl);
641
642   nir_foreach_block(block, impl) {
643      nir_foreach_instr_safe(instr, block) {
644         if (instr->type != nir_instr_type_intrinsic)
645            continue;
646
647         nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr);
648         if (copy->intrinsic != nir_intrinsic_copy_deref)
649            continue;
650
651         nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]);
652         nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]);
653
654         struct array_var_info *dst_info =
655            get_array_deref_info(dst_deref, var_info_map, modes);
656         struct array_var_info *src_info =
657            get_array_deref_info(src_deref, var_info_map, modes);
658
659         if (!src_info && !dst_info)
660            continue;
661
662         nir_deref_path dst_path, src_path;
663         nir_deref_path_init(&dst_path, dst_deref, mem_ctx);
664         nir_deref_path_init(&src_path, src_deref, mem_ctx);
665
666         if (!deref_has_split_wildcard(&dst_path, dst_info) &&
667             !deref_has_split_wildcard(&src_path, src_info))
668            continue;
669
670         b.cursor = nir_instr_remove(&copy->instr);
671
672         emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0],
673                               src_info, &src_path, 0, src_path.path[0]);
674      }
675   }
676}
677
678static void
679split_array_access_impl(nir_function_impl *impl,
680                        struct hash_table *var_info_map,
681                        nir_variable_mode modes,
682                        void *mem_ctx)
683{
684   nir_builder b;
685   nir_builder_init(&b, impl);
686
687   nir_foreach_block(block, impl) {
688      nir_foreach_instr_safe(instr, block) {
689         if (instr->type == nir_instr_type_deref) {
690            /* Clean up any dead derefs we find lying around.  They may refer
691             * to variables we're planning to split.
692             */
693            nir_deref_instr *deref = nir_instr_as_deref(instr);
694            if (deref->mode & modes)
695               nir_deref_instr_remove_if_unused(deref);
696            continue;
697         }
698
699         if (instr->type != nir_instr_type_intrinsic)
700            continue;
701
702         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
703         if (intrin->intrinsic != nir_intrinsic_load_deref &&
704             intrin->intrinsic != nir_intrinsic_store_deref &&
705             intrin->intrinsic != nir_intrinsic_copy_deref)
706            continue;
707
708         const unsigned num_derefs =
709            intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1;
710
711         for (unsigned d = 0; d < num_derefs; d++) {
712            nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]);
713
714            struct array_var_info *info =
715               get_array_deref_info(deref, var_info_map, modes);
716            if (!info)
717               continue;
718
719            nir_deref_path path;
720            nir_deref_path_init(&path, deref, mem_ctx);
721
722            b.cursor = nir_before_instr(&intrin->instr);
723
724            if (array_path_is_out_of_bounds(&path, info)) {
725               /* If one of the derefs is out-of-bounds, we just delete the
726                * instruction.  If a destination is out of bounds, then it may
727                * have been in-bounds prior to shrinking so we don't want to
728                * accidentally stomp something.  However, we've already proven
729                * that it will never be read so it's safe to delete.  If a
730                * source is out of bounds then it is loading random garbage.
731                * For loads, we replace their uses with an undef instruction
732                * and for copies we just delete the copy since it was writing
733                * undefined garbage anyway and we may as well leave the random
734                * garbage in the destination alone.
735                */
736               if (intrin->intrinsic == nir_intrinsic_load_deref) {
737                  nir_ssa_def *u =
738                     nir_ssa_undef(&b, intrin->dest.ssa.num_components,
739                                       intrin->dest.ssa.bit_size);
740                  nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
741                                           nir_src_for_ssa(u));
742               }
743               nir_instr_remove(&intrin->instr);
744               for (unsigned i = 0; i < num_derefs; i++)
745                  nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i]));
746               break;
747            }
748
749            struct array_split *split = &info->root_split;
750            for (unsigned i = 0; i < info->num_levels; i++) {
751               if (info->levels[i].split) {
752                  nir_deref_instr *p = path.path[i + 1];
753                  unsigned index = nir_src_as_uint(p->arr.index);
754                  assert(index < info->levels[i].array_len);
755                  split = &split->splits[index];
756               }
757            }
758            assert(!split->splits && split->var);
759
760            nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var);
761            for (unsigned i = 0; i < info->num_levels; i++) {
762               if (!info->levels[i].split) {
763                  new_deref = nir_build_deref_follower(&b, new_deref,
764                                                       path.path[i + 1]);
765               }
766            }
767            assert(new_deref->type == deref->type);
768
769            /* Rewrite the deref source to point to the split one */
770            nir_instr_rewrite_src(&intrin->instr, &intrin->src[d],
771                                  nir_src_for_ssa(&new_deref->dest.ssa));
772            nir_deref_instr_remove_if_unused(deref);
773         }
774      }
775   }
776}
777
778/** A pass for splitting arrays of vectors into multiple variables
779 *
780 * This pass looks at arrays (possibly multiple levels) of vectors (not
781 * structures or other types) and tries to split them into piles of variables,
782 * one for each array element.  The heuristic used is simple: If a given array
783 * level is never used with an indirect, that array level will get split.
784 *
785 * This pass probably could handles structures easily enough but making a pass
786 * that could see through an array of structures of arrays would be difficult
787 * so it's best to just run nir_split_struct_vars first.
788 */
789bool
790nir_split_array_vars(nir_shader *shader, nir_variable_mode modes)
791{
792   void *mem_ctx = ralloc_context(NULL);
793   struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx);
794
795   assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
796
797   bool has_global_array = false;
798   if (modes & nir_var_shader_temp) {
799      has_global_array = init_var_list_array_infos(&shader->globals,
800                                                   var_info_map, mem_ctx);
801   }
802
803   bool has_any_array = false;
804   nir_foreach_function(function, shader) {
805      if (!function->impl)
806         continue;
807
808      bool has_local_array = false;
809      if (modes & nir_var_function_temp) {
810         has_local_array = init_var_list_array_infos(&function->impl->locals,
811                                                     var_info_map, mem_ctx);
812      }
813
814      if (has_global_array || has_local_array) {
815         has_any_array = true;
816         mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx);
817      }
818   }
819
820   /* If we failed to find any arrays of arrays, bail early. */
821   if (!has_any_array) {
822      ralloc_free(mem_ctx);
823      return false;
824   }
825
826   bool has_global_splits = false;
827   if (modes & nir_var_shader_temp) {
828      has_global_splits = split_var_list_arrays(shader, NULL,
829                                                &shader->globals,
830                                                var_info_map, mem_ctx);
831   }
832
833   bool progress = false;
834   nir_foreach_function(function, shader) {
835      if (!function->impl)
836         continue;
837
838      bool has_local_splits = false;
839      if (modes & nir_var_function_temp) {
840         has_local_splits = split_var_list_arrays(shader, function->impl,
841                                                  &function->impl->locals,
842                                                  var_info_map, mem_ctx);
843      }
844
845      if (has_global_splits || has_local_splits) {
846         split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx);
847         split_array_access_impl(function->impl, var_info_map, modes, mem_ctx);
848
849         nir_metadata_preserve(function->impl, nir_metadata_block_index |
850                                               nir_metadata_dominance);
851         progress = true;
852      }
853   }
854
855   ralloc_free(mem_ctx);
856
857   return progress;
858}
859
860struct array_level_usage {
861   unsigned array_len;
862
863   /* The value UINT_MAX will be used to indicate an indirect */
864   unsigned max_read;
865   unsigned max_written;
866
867   /* True if there is a copy that isn't to/from a shrinkable array */
868   bool has_external_copy;
869   struct set *levels_copied;
870};
871
872struct vec_var_usage {
873   /* Convenience set of all components this variable has */
874   nir_component_mask_t all_comps;
875
876   nir_component_mask_t comps_read;
877   nir_component_mask_t comps_written;
878
879   nir_component_mask_t comps_kept;
880
881   /* True if there is a copy that isn't to/from a shrinkable vector */
882   bool has_external_copy;
883   struct set *vars_copied;
884
885   unsigned num_levels;
886   struct array_level_usage levels[0];
887};
888
889static struct vec_var_usage *
890get_vec_var_usage(nir_variable *var,
891                  struct hash_table *var_usage_map,
892                  bool add_usage_entry, void *mem_ctx)
893{
894   struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var);
895   if (entry)
896      return entry->data;
897
898   if (!add_usage_entry)
899      return NULL;
900
901   /* Check to make sure that we are working with an array of vectors.  We
902    * don't bother to shrink single vectors because we figure that we can
903    * clean it up better with SSA than by inserting piles of vecN instructions
904    * to compact results.
905    */
906   int num_levels = num_array_levels_in_array_of_vector_type(var->type);
907   if (num_levels < 1)
908      return NULL; /* Not an array of vectors */
909
910   struct vec_var_usage *usage =
911      rzalloc_size(mem_ctx, sizeof(*usage) +
912                            num_levels * sizeof(usage->levels[0]));
913
914   usage->num_levels = num_levels;
915   const struct glsl_type *type = var->type;
916   for (unsigned i = 0; i < num_levels; i++) {
917      usage->levels[i].array_len = glsl_get_length(type);
918      type = glsl_get_array_element(type);
919   }
920   assert(glsl_type_is_vector_or_scalar(type));
921
922   usage->all_comps = (1 << glsl_get_components(type)) - 1;
923
924   _mesa_hash_table_insert(var_usage_map, var, usage);
925
926   return usage;
927}
928
929static struct vec_var_usage *
930get_vec_deref_usage(nir_deref_instr *deref,
931                    struct hash_table *var_usage_map,
932                    nir_variable_mode modes,
933                    bool add_usage_entry, void *mem_ctx)
934{
935   if (!(deref->mode & modes))
936      return NULL;
937
938   return get_vec_var_usage(nir_deref_instr_get_variable(deref),
939                            var_usage_map, add_usage_entry, mem_ctx);
940}
941
942static void
943mark_deref_used(nir_deref_instr *deref,
944                nir_component_mask_t comps_read,
945                nir_component_mask_t comps_written,
946                nir_deref_instr *copy_deref,
947                struct hash_table *var_usage_map,
948                nir_variable_mode modes,
949                void *mem_ctx)
950{
951   if (!(deref->mode & modes))
952      return;
953
954   nir_variable *var = nir_deref_instr_get_variable(deref);
955
956   struct vec_var_usage *usage =
957      get_vec_var_usage(var, var_usage_map, true, mem_ctx);
958   if (!usage)
959      return;
960
961   usage->comps_read |= comps_read & usage->all_comps;
962   usage->comps_written |= comps_written & usage->all_comps;
963
964   struct vec_var_usage *copy_usage = NULL;
965   if (copy_deref) {
966      copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes,
967                                       true, mem_ctx);
968      if (copy_usage) {
969         if (usage->vars_copied == NULL) {
970            usage->vars_copied = _mesa_pointer_set_create(mem_ctx);
971         }
972         _mesa_set_add(usage->vars_copied, copy_usage);
973      } else {
974         usage->has_external_copy = true;
975      }
976   }
977
978   nir_deref_path path;
979   nir_deref_path_init(&path, deref, mem_ctx);
980
981   nir_deref_path copy_path;
982   if (copy_usage)
983      nir_deref_path_init(&copy_path, copy_deref, mem_ctx);
984
985   unsigned copy_i = 0;
986   for (unsigned i = 0; i < usage->num_levels; i++) {
987      struct array_level_usage *level = &usage->levels[i];
988      nir_deref_instr *deref = path.path[i + 1];
989      assert(deref->deref_type == nir_deref_type_array ||
990             deref->deref_type == nir_deref_type_array_wildcard);
991
992      unsigned max_used;
993      if (deref->deref_type == nir_deref_type_array) {
994         max_used = nir_src_is_const(deref->arr.index) ?
995                    nir_src_as_uint(deref->arr.index) : UINT_MAX;
996      } else {
997         /* For wildcards, we read or wrote the whole thing. */
998         assert(deref->deref_type == nir_deref_type_array_wildcard);
999         max_used = level->array_len - 1;
1000
1001         if (copy_usage) {
1002            /* Match each wildcard level with the level on copy_usage */
1003            for (; copy_path.path[copy_i + 1]; copy_i++) {
1004               if (copy_path.path[copy_i + 1]->deref_type ==
1005                   nir_deref_type_array_wildcard)
1006                  break;
1007            }
1008            struct array_level_usage *copy_level =
1009               &copy_usage->levels[copy_i++];
1010
1011            if (level->levels_copied == NULL) {
1012               level->levels_copied = _mesa_pointer_set_create(mem_ctx);
1013            }
1014            _mesa_set_add(level->levels_copied, copy_level);
1015         } else {
1016            /* We have a wildcard and it comes from a variable we aren't
1017             * tracking; flag it and we'll know to not shorten this array.
1018             */
1019            level->has_external_copy = true;
1020         }
1021      }
1022
1023      if (comps_written)
1024         level->max_written = MAX2(level->max_written, max_used);
1025      if (comps_read)
1026         level->max_read = MAX2(level->max_read, max_used);
1027   }
1028}
1029
1030static bool
1031src_is_load_deref(nir_src src, nir_src deref_src)
1032{
1033   nir_intrinsic_instr *load = nir_src_as_intrinsic(src);
1034   if (load == NULL || load->intrinsic != nir_intrinsic_load_deref)
1035      return false;
1036
1037   assert(load->src[0].is_ssa);
1038
1039   return load->src[0].ssa == deref_src.ssa;
1040}
1041
1042/* Returns all non-self-referential components of a store instruction.  A
1043 * component is self-referential if it comes from the same component of a load
1044 * instruction on the same deref.  If the only data in a particular component
1045 * of a variable came directly from that component then it's undefined.  The
1046 * only way to get defined data into a component of a variable is for it to
1047 * get written there by something outside or from a different component.
1048 *
1049 * This is a fairly common pattern in shaders that come from either GLSL IR or
1050 * GLSLang because both glsl_to_nir and GLSLang implement write-masking with
1051 * load-vec-store.
1052 */
1053static nir_component_mask_t
1054get_non_self_referential_store_comps(nir_intrinsic_instr *store)
1055{
1056   nir_component_mask_t comps = nir_intrinsic_write_mask(store);
1057
1058   assert(store->src[1].is_ssa);
1059   nir_instr *src_instr = store->src[1].ssa->parent_instr;
1060   if (src_instr->type != nir_instr_type_alu)
1061      return comps;
1062
1063   nir_alu_instr *src_alu = nir_instr_as_alu(src_instr);
1064
1065   if (src_alu->op == nir_op_imov ||
1066       src_alu->op == nir_op_fmov) {
1067      /* If it's just a swizzle of a load from the same deref, discount any
1068       * channels that don't move in the swizzle.
1069       */
1070      if (src_is_load_deref(src_alu->src[0].src, store->src[0])) {
1071         for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
1072            if (src_alu->src[0].swizzle[i] == i)
1073               comps &= ~(1u << i);
1074         }
1075      }
1076   } else if (src_alu->op == nir_op_vec2 ||
1077              src_alu->op == nir_op_vec3 ||
1078              src_alu->op == nir_op_vec4) {
1079      /* If it's a vec, discount any channels that are just loads from the
1080       * same deref put in the same spot.
1081       */
1082      for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) {
1083         if (src_is_load_deref(src_alu->src[i].src, store->src[0]) &&
1084             src_alu->src[i].swizzle[0] == i)
1085            comps &= ~(1u << i);
1086      }
1087   }
1088
1089   return comps;
1090}
1091
1092static void
1093find_used_components_impl(nir_function_impl *impl,
1094                          struct hash_table *var_usage_map,
1095                          nir_variable_mode modes,
1096                          void *mem_ctx)
1097{
1098   nir_foreach_block(block, impl) {
1099      nir_foreach_instr(instr, block) {
1100         if (instr->type != nir_instr_type_intrinsic)
1101            continue;
1102
1103         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1104         switch (intrin->intrinsic) {
1105         case nir_intrinsic_load_deref:
1106            mark_deref_used(nir_src_as_deref(intrin->src[0]),
1107                            nir_ssa_def_components_read(&intrin->dest.ssa), 0,
1108                            NULL, var_usage_map, modes, mem_ctx);
1109            break;
1110
1111         case nir_intrinsic_store_deref:
1112            mark_deref_used(nir_src_as_deref(intrin->src[0]),
1113                            0, get_non_self_referential_store_comps(intrin),
1114                            NULL, var_usage_map, modes, mem_ctx);
1115            break;
1116
1117         case nir_intrinsic_copy_deref: {
1118            /* Just mark everything used for copies. */
1119            nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1120            nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1121            mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx);
1122            mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx);
1123            break;
1124         }
1125
1126         default:
1127            break;
1128         }
1129      }
1130   }
1131}
1132
1133static bool
1134shrink_vec_var_list(struct exec_list *vars,
1135                    struct hash_table *var_usage_map)
1136{
1137   /* Initialize the components kept field of each variable.  This is the
1138    * AND of the components written and components read.  If a component is
1139    * written but never read, it's dead.  If it is read but never written,
1140    * then all values read are undefined garbage and we may as well not read
1141    * them.
1142    *
1143    * The same logic applies to the array length.  We make the array length
1144    * the minimum needed required length between read and write and plan to
1145    * discard any OOB access.  The one exception here is indirect writes
1146    * because we don't know where they will land and we can't shrink an array
1147    * with indirect writes because previously in-bounds writes may become
1148    * out-of-bounds and have undefined behavior.
1149    *
1150    * Also, if we have a copy that to/from something we can't shrink, we need
1151    * to leave components and array_len of any wildcards alone.
1152    */
1153   nir_foreach_variable(var, vars) {
1154      struct vec_var_usage *usage =
1155         get_vec_var_usage(var, var_usage_map, false, NULL);
1156      if (!usage)
1157         continue;
1158
1159      assert(usage->comps_kept == 0);
1160      if (usage->has_external_copy)
1161         usage->comps_kept = usage->all_comps;
1162      else
1163         usage->comps_kept = usage->comps_read & usage->comps_written;
1164
1165      for (unsigned i = 0; i < usage->num_levels; i++) {
1166         struct array_level_usage *level = &usage->levels[i];
1167         assert(level->array_len > 0);
1168
1169         if (level->max_written == UINT_MAX || level->has_external_copy)
1170            continue; /* Can't shrink */
1171
1172         unsigned max_used = MIN2(level->max_read, level->max_written);
1173         level->array_len = MIN2(max_used, level->array_len - 1) + 1;
1174      }
1175   }
1176
1177   /* In order for variable copies to work, we have to have the same data type
1178    * on the source and the destination.  In order to satisfy this, we run a
1179    * little fixed-point algorithm to transitively ensure that we get enough
1180    * components and array elements for this to hold for all copies.
1181    */
1182   bool fp_progress;
1183   do {
1184      fp_progress = false;
1185      nir_foreach_variable(var, vars) {
1186         struct vec_var_usage *var_usage =
1187            get_vec_var_usage(var, var_usage_map, false, NULL);
1188         if (!var_usage || !var_usage->vars_copied)
1189            continue;
1190
1191         set_foreach(var_usage->vars_copied, copy_entry) {
1192            struct vec_var_usage *copy_usage = (void *)copy_entry->key;
1193            if (copy_usage->comps_kept != var_usage->comps_kept) {
1194               nir_component_mask_t comps_kept =
1195                  (var_usage->comps_kept | copy_usage->comps_kept);
1196               var_usage->comps_kept = comps_kept;
1197               copy_usage->comps_kept = comps_kept;
1198               fp_progress = true;
1199            }
1200         }
1201
1202         for (unsigned i = 0; i < var_usage->num_levels; i++) {
1203            struct array_level_usage *var_level = &var_usage->levels[i];
1204            if (!var_level->levels_copied)
1205               continue;
1206
1207            set_foreach(var_level->levels_copied, copy_entry) {
1208               struct array_level_usage *copy_level = (void *)copy_entry->key;
1209               if (var_level->array_len != copy_level->array_len) {
1210                  unsigned array_len =
1211                     MAX2(var_level->array_len, copy_level->array_len);
1212                  var_level->array_len = array_len;
1213                  copy_level->array_len = array_len;
1214                  fp_progress = true;
1215               }
1216            }
1217         }
1218      }
1219   } while (fp_progress);
1220
1221   bool vars_shrunk = false;
1222   nir_foreach_variable_safe(var, vars) {
1223      struct vec_var_usage *usage =
1224         get_vec_var_usage(var, var_usage_map, false, NULL);
1225      if (!usage)
1226         continue;
1227
1228      bool shrunk = false;
1229      const struct glsl_type *vec_type = var->type;
1230      for (unsigned i = 0; i < usage->num_levels; i++) {
1231         /* If we've reduced the array to zero elements at some level, just
1232          * set comps_kept to 0 and delete the variable.
1233          */
1234         if (usage->levels[i].array_len == 0) {
1235            usage->comps_kept = 0;
1236            break;
1237         }
1238
1239         assert(usage->levels[i].array_len <= glsl_get_length(vec_type));
1240         if (usage->levels[i].array_len < glsl_get_length(vec_type))
1241            shrunk = true;
1242         vec_type = glsl_get_array_element(vec_type);
1243      }
1244      assert(glsl_type_is_vector_or_scalar(vec_type));
1245
1246      assert(usage->comps_kept == (usage->comps_kept & usage->all_comps));
1247      if (usage->comps_kept != usage->all_comps)
1248         shrunk = true;
1249
1250      if (usage->comps_kept == 0) {
1251         /* This variable is dead, remove it */
1252         vars_shrunk = true;
1253         exec_node_remove(&var->node);
1254         continue;
1255      }
1256
1257      if (!shrunk) {
1258         /* This variable doesn't need to be shrunk.  Remove it from the
1259          * hash table so later steps will ignore it.
1260          */
1261         _mesa_hash_table_remove_key(var_usage_map, var);
1262         continue;
1263      }
1264
1265      /* Build the new var type */
1266      unsigned new_num_comps = util_bitcount(usage->comps_kept);
1267      const struct glsl_type *new_type =
1268         glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps);
1269      for (int i = usage->num_levels - 1; i >= 0; i--) {
1270         assert(usage->levels[i].array_len > 0);
1271         /* If the original type was a matrix type, we'd like to keep that so
1272          * we don't convert matrices into arrays.
1273          */
1274         if (i == usage->num_levels - 1 &&
1275             glsl_type_is_matrix(glsl_without_array(var->type)) &&
1276             new_num_comps > 1 && usage->levels[i].array_len > 1) {
1277            new_type = glsl_matrix_type(glsl_get_base_type(new_type),
1278                                        new_num_comps,
1279                                        usage->levels[i].array_len);
1280         } else {
1281            new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0);
1282         }
1283      }
1284      var->type = new_type;
1285
1286      vars_shrunk = true;
1287   }
1288
1289   return vars_shrunk;
1290}
1291
1292static bool
1293vec_deref_is_oob(nir_deref_instr *deref,
1294                 struct vec_var_usage *usage)
1295{
1296   nir_deref_path path;
1297   nir_deref_path_init(&path, deref, NULL);
1298
1299   bool oob = false;
1300   for (unsigned i = 0; i < usage->num_levels; i++) {
1301      nir_deref_instr *p = path.path[i + 1];
1302      if (p->deref_type == nir_deref_type_array_wildcard)
1303         continue;
1304
1305      if (nir_src_is_const(p->arr.index) &&
1306          nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) {
1307         oob = true;
1308         break;
1309      }
1310   }
1311
1312   nir_deref_path_finish(&path);
1313
1314   return oob;
1315}
1316
1317static bool
1318vec_deref_is_dead_or_oob(nir_deref_instr *deref,
1319                         struct hash_table *var_usage_map,
1320                         nir_variable_mode modes)
1321{
1322   struct vec_var_usage *usage =
1323      get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1324   if (!usage)
1325      return false;
1326
1327   return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage);
1328}
1329
1330static void
1331shrink_vec_var_access_impl(nir_function_impl *impl,
1332                           struct hash_table *var_usage_map,
1333                           nir_variable_mode modes)
1334{
1335   nir_builder b;
1336   nir_builder_init(&b, impl);
1337
1338   nir_foreach_block(block, impl) {
1339      nir_foreach_instr_safe(instr, block) {
1340         switch (instr->type) {
1341         case nir_instr_type_deref: {
1342            nir_deref_instr *deref = nir_instr_as_deref(instr);
1343            if (!(deref->mode & modes))
1344               break;
1345
1346            /* Clean up any dead derefs we find lying around.  They may refer
1347             * to variables we've deleted.
1348             */
1349            if (nir_deref_instr_remove_if_unused(deref))
1350               break;
1351
1352            /* Update the type in the deref to keep the types consistent as
1353             * you walk down the chain.  We don't need to check if this is one
1354             * of the derefs we're shrinking because this is a no-op if it
1355             * isn't.  The worst that could happen is that we accidentally fix
1356             * an invalid deref.
1357             */
1358            if (deref->deref_type == nir_deref_type_var) {
1359               deref->type = deref->var->type;
1360            } else if (deref->deref_type == nir_deref_type_array ||
1361                       deref->deref_type == nir_deref_type_array_wildcard) {
1362               nir_deref_instr *parent = nir_deref_instr_parent(deref);
1363               assert(glsl_type_is_array(parent->type) ||
1364                      glsl_type_is_matrix(parent->type));
1365               deref->type = glsl_get_array_element(parent->type);
1366            }
1367            break;
1368         }
1369
1370         case nir_instr_type_intrinsic: {
1371            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
1372
1373            /* If we have a copy whose source or destination has been deleted
1374             * because we determined the variable was dead, then we just
1375             * delete the copy instruction.  If the source variable was dead
1376             * then it was writing undefined garbage anyway and if it's the
1377             * destination variable that's dead then the write isn't needed.
1378             */
1379            if (intrin->intrinsic == nir_intrinsic_copy_deref) {
1380               nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
1381               nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
1382               if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) ||
1383                   vec_deref_is_dead_or_oob(src, var_usage_map, modes)) {
1384                  nir_instr_remove(&intrin->instr);
1385                  nir_deref_instr_remove_if_unused(dst);
1386                  nir_deref_instr_remove_if_unused(src);
1387               }
1388               continue;
1389            }
1390
1391            if (intrin->intrinsic != nir_intrinsic_load_deref &&
1392                intrin->intrinsic != nir_intrinsic_store_deref)
1393               continue;
1394
1395            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
1396            if (!(deref->mode & modes))
1397               continue;
1398
1399            struct vec_var_usage *usage =
1400               get_vec_deref_usage(deref, var_usage_map, modes, false, NULL);
1401            if (!usage)
1402               continue;
1403
1404            if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) {
1405               if (intrin->intrinsic == nir_intrinsic_load_deref) {
1406                  nir_ssa_def *u =
1407                     nir_ssa_undef(&b, intrin->dest.ssa.num_components,
1408                                       intrin->dest.ssa.bit_size);
1409                  nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
1410                                           nir_src_for_ssa(u));
1411               }
1412               nir_instr_remove(&intrin->instr);
1413               nir_deref_instr_remove_if_unused(deref);
1414               continue;
1415            }
1416
1417            /* If we're not dropping any components, there's no need to
1418             * compact vectors.
1419             */
1420            if (usage->comps_kept == usage->all_comps)
1421               continue;
1422
1423            if (intrin->intrinsic == nir_intrinsic_load_deref) {
1424               b.cursor = nir_after_instr(&intrin->instr);
1425
1426               nir_ssa_def *undef =
1427                  nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size);
1428               nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS];
1429               unsigned c = 0;
1430               for (unsigned i = 0; i < intrin->num_components; i++) {
1431                  if (usage->comps_kept & (1u << i))
1432                     vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++);
1433                  else
1434                     vec_srcs[i] = undef;
1435               }
1436               nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components);
1437
1438               nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
1439                                              nir_src_for_ssa(vec),
1440                                              vec->parent_instr);
1441
1442               /* The SSA def is now only used by the swizzle.  It's safe to
1443                * shrink the number of components.
1444                */
1445               assert(list_length(&intrin->dest.ssa.uses) == c);
1446               intrin->num_components = c;
1447               intrin->dest.ssa.num_components = c;
1448            } else {
1449               nir_component_mask_t write_mask =
1450                  nir_intrinsic_write_mask(intrin);
1451
1452               unsigned swizzle[NIR_MAX_VEC_COMPONENTS];
1453               nir_component_mask_t new_write_mask = 0;
1454               unsigned c = 0;
1455               for (unsigned i = 0; i < intrin->num_components; i++) {
1456                  if (usage->comps_kept & (1u << i)) {
1457                     swizzle[c] = i;
1458                     if (write_mask & (1u << i))
1459                        new_write_mask |= 1u << c;
1460                     c++;
1461                  }
1462               }
1463
1464               b.cursor = nir_before_instr(&intrin->instr);
1465
1466               nir_ssa_def *swizzled =
1467                  nir_swizzle(&b, intrin->src[1].ssa, swizzle, c, false);
1468
1469               /* Rewrite to use the compacted source */
1470               nir_instr_rewrite_src(&intrin->instr, &intrin->src[1],
1471                                     nir_src_for_ssa(swizzled));
1472               nir_intrinsic_set_write_mask(intrin, new_write_mask);
1473               intrin->num_components = c;
1474            }
1475            break;
1476         }
1477
1478         default:
1479            break;
1480         }
1481      }
1482   }
1483}
1484
1485static bool
1486function_impl_has_vars_with_modes(nir_function_impl *impl,
1487                                  nir_variable_mode modes)
1488{
1489   nir_shader *shader = impl->function->shader;
1490
1491   if ((modes & nir_var_shader_temp) && !exec_list_is_empty(&shader->globals))
1492      return true;
1493
1494   if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals))
1495      return true;
1496
1497   return false;
1498}
1499
1500/** Attempt to shrink arrays of vectors
1501 *
1502 * This pass looks at variables which contain a vector or an array (possibly
1503 * multiple dimensions) of vectors and attempts to lower to a smaller vector
1504 * or array.  If the pass can prove that a component of a vector (or array of
1505 * vectors) is never really used, then that component will be removed.
1506 * Similarly, the pass attempts to shorten arrays based on what elements it
1507 * can prove are never read or never contain valid data.
1508 */
1509bool
1510nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes)
1511{
1512   assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes);
1513
1514   void *mem_ctx = ralloc_context(NULL);
1515
1516   struct hash_table *var_usage_map =
1517      _mesa_pointer_hash_table_create(mem_ctx);
1518
1519   bool has_vars_to_shrink = false;
1520   nir_foreach_function(function, shader) {
1521      if (!function->impl)
1522         continue;
1523
1524      /* Don't even bother crawling the IR if we don't have any variables.
1525       * Given that this pass deletes any unused variables, it's likely that
1526       * we will be in this scenario eventually.
1527       */
1528      if (function_impl_has_vars_with_modes(function->impl, modes)) {
1529         has_vars_to_shrink = true;
1530         find_used_components_impl(function->impl, var_usage_map,
1531                                   modes, mem_ctx);
1532      }
1533   }
1534   if (!has_vars_to_shrink) {
1535      ralloc_free(mem_ctx);
1536      return false;
1537   }
1538
1539   bool globals_shrunk = false;
1540   if (modes & nir_var_shader_temp)
1541      globals_shrunk = shrink_vec_var_list(&shader->globals, var_usage_map);
1542
1543   bool progress = false;
1544   nir_foreach_function(function, shader) {
1545      if (!function->impl)
1546         continue;
1547
1548      bool locals_shrunk = false;
1549      if (modes & nir_var_function_temp) {
1550         locals_shrunk = shrink_vec_var_list(&function->impl->locals,
1551                                             var_usage_map);
1552      }
1553
1554      if (globals_shrunk || locals_shrunk) {
1555         shrink_vec_var_access_impl(function->impl, var_usage_map, modes);
1556
1557         nir_metadata_preserve(function->impl, nir_metadata_block_index |
1558                                               nir_metadata_dominance);
1559         progress = true;
1560      }
1561   }
1562
1563   ralloc_free(mem_ctx);
1564
1565   return progress;
1566}
1567