1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir.h"
25#include "nir_builder.h"
26#include "nir_deref.h"
27
28#include "util/bitscan.h"
29#include "util/u_dynarray.h"
30
31static const bool debug = false;
32
33/**
34 * Variable-based copy propagation
35 *
36 * Normally, NIR trusts in SSA form for most of its copy-propagation needs.
37 * However, there are cases, especially when dealing with indirects, where SSA
38 * won't help you.  This pass is for those times.  Specifically, it handles
39 * the following things that the rest of NIR can't:
40 *
41 *  1) Copy-propagation on variables that have indirect access.  This includes
42 *     propagating from indirect stores into indirect loads.
43 *
44 *  2) Removal of redundant load_deref intrinsics.  We can't trust regular CSE
45 *     to do this because it isn't aware of variable writes that may alias the
46 *     value and make the former load invalid.
47 *
48 * This pass uses an intermediate solution between being local / "per-block"
49 * and a complete data-flow analysis.  It follows the control flow graph, and
50 * propagate the available copy information forward, invalidating data at each
51 * cf_node.
52 *
53 * Removal of dead writes to variables is handled by another pass.
54 */
55
56struct vars_written {
57   nir_variable_mode modes;
58
59   /* Key is deref and value is the uintptr_t with the write mask. */
60   struct hash_table *derefs;
61};
62
63struct value {
64   bool is_ssa;
65   union {
66      struct {
67         nir_ssa_def *def[NIR_MAX_VEC_COMPONENTS];
68         uint8_t component[NIR_MAX_VEC_COMPONENTS];
69      } ssa;
70      nir_deref_and_path deref;
71   };
72};
73
74static void
75value_set_ssa_components(struct value *value, nir_ssa_def *def,
76                         unsigned num_components)
77{
78   if (!value->is_ssa)
79      memset(&value->ssa, 0, sizeof(value->ssa));
80   value->is_ssa = true;
81   for (unsigned i = 0; i < num_components; i++) {
82      value->ssa.def[i] = def;
83      value->ssa.component[i] = i;
84   }
85}
86
87struct copy_entry {
88   struct value src;
89
90   nir_deref_and_path dst;
91};
92
93struct copy_prop_var_state {
94   nir_function_impl *impl;
95
96   void *mem_ctx;
97   void *lin_ctx;
98
99   /* Maps nodes to vars_written.  Used to invalidate copy entries when
100    * visiting each node.
101    */
102   struct hash_table *vars_written_map;
103
104   bool progress;
105};
106
107static bool
108value_equals_store_src(struct value *value, nir_intrinsic_instr *intrin)
109{
110   assert(intrin->intrinsic == nir_intrinsic_store_deref);
111   nir_component_mask_t write_mask = nir_intrinsic_write_mask(intrin);
112
113   for (unsigned i = 0; i < intrin->num_components; i++) {
114      if ((write_mask & (1 << i)) &&
115          (value->ssa.def[i] != intrin->src[1].ssa ||
116           value->ssa.component[i] != i))
117         return false;
118   }
119
120   return true;
121}
122
123static struct vars_written *
124create_vars_written(struct copy_prop_var_state *state)
125{
126   struct vars_written *written =
127      linear_zalloc_child(state->lin_ctx, sizeof(struct vars_written));
128   written->derefs = _mesa_pointer_hash_table_create(state->mem_ctx);
129   return written;
130}
131
132static void
133gather_vars_written(struct copy_prop_var_state *state,
134                    struct vars_written *written,
135                    nir_cf_node *cf_node)
136{
137   struct vars_written *new_written = NULL;
138
139   switch (cf_node->type) {
140   case nir_cf_node_function: {
141      nir_function_impl *impl = nir_cf_node_as_function(cf_node);
142      foreach_list_typed_safe(nir_cf_node, cf_node, node, &impl->body)
143         gather_vars_written(state, NULL, cf_node);
144      break;
145   }
146
147   case nir_cf_node_block: {
148      if (!written)
149         break;
150
151      nir_block *block = nir_cf_node_as_block(cf_node);
152      nir_foreach_instr(instr, block) {
153         if (instr->type == nir_instr_type_call) {
154            written->modes |= nir_var_shader_out |
155                              nir_var_shader_temp |
156                              nir_var_function_temp |
157                              nir_var_mem_ssbo |
158                              nir_var_mem_shared |
159                              nir_var_mem_global;
160            continue;
161         }
162
163         if (instr->type != nir_instr_type_intrinsic)
164            continue;
165
166         nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
167         switch (intrin->intrinsic) {
168         case nir_intrinsic_control_barrier:
169         case nir_intrinsic_group_memory_barrier:
170         case nir_intrinsic_memory_barrier:
171            written->modes |= nir_var_shader_out |
172                              nir_var_mem_ssbo |
173                              nir_var_mem_shared |
174                              nir_var_mem_global;
175            break;
176
177         case nir_intrinsic_scoped_barrier:
178            if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_ACQUIRE)
179               written->modes |= nir_intrinsic_memory_modes(intrin);
180            break;
181
182         case nir_intrinsic_emit_vertex:
183         case nir_intrinsic_emit_vertex_with_counter:
184            written->modes = nir_var_shader_out;
185            break;
186
187         case nir_intrinsic_trace_ray:
188         case nir_intrinsic_execute_callable:
189         case nir_intrinsic_rt_trace_ray:
190         case nir_intrinsic_rt_execute_callable: {
191            nir_deref_instr *payload =
192               nir_src_as_deref(*nir_get_shader_call_payload_src(intrin));
193
194            nir_component_mask_t mask = (nir_component_mask_t)
195               BITFIELD_MASK(glsl_get_vector_elements(payload->type));
196
197            struct hash_entry *ht_entry =
198               _mesa_hash_table_search(written->derefs, payload);
199            if (ht_entry) {
200               ht_entry->data = (void *)(mask | (uintptr_t)ht_entry->data);
201            } else {
202               _mesa_hash_table_insert(written->derefs, payload,
203                                       (void *)(uintptr_t)mask);
204            }
205            break;
206         }
207
208         case nir_intrinsic_report_ray_intersection:
209            written->modes |= nir_var_mem_ssbo |
210                              nir_var_mem_global |
211                              nir_var_shader_call_data |
212                              nir_var_ray_hit_attrib;
213            break;
214
215         case nir_intrinsic_ignore_ray_intersection:
216         case nir_intrinsic_terminate_ray:
217            written->modes |= nir_var_mem_ssbo |
218                              nir_var_mem_global |
219                              nir_var_shader_call_data;
220            break;
221
222         case nir_intrinsic_deref_atomic_add:
223         case nir_intrinsic_deref_atomic_fadd:
224         case nir_intrinsic_deref_atomic_imin:
225         case nir_intrinsic_deref_atomic_umin:
226         case nir_intrinsic_deref_atomic_fmin:
227         case nir_intrinsic_deref_atomic_imax:
228         case nir_intrinsic_deref_atomic_umax:
229         case nir_intrinsic_deref_atomic_fmax:
230         case nir_intrinsic_deref_atomic_and:
231         case nir_intrinsic_deref_atomic_or:
232         case nir_intrinsic_deref_atomic_xor:
233         case nir_intrinsic_deref_atomic_exchange:
234         case nir_intrinsic_deref_atomic_comp_swap:
235         case nir_intrinsic_deref_atomic_fcomp_swap:
236         case nir_intrinsic_store_deref:
237         case nir_intrinsic_copy_deref:
238         case nir_intrinsic_memcpy_deref: {
239            /* Destination in all of store_deref, copy_deref and the atomics is src[0]. */
240            nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
241
242            uintptr_t mask = intrin->intrinsic == nir_intrinsic_store_deref ?
243               nir_intrinsic_write_mask(intrin) : (1 << glsl_get_vector_elements(dst->type)) - 1;
244
245            struct hash_entry *ht_entry = _mesa_hash_table_search(written->derefs, dst);
246            if (ht_entry)
247               ht_entry->data = (void *)(mask | (uintptr_t)ht_entry->data);
248            else
249               _mesa_hash_table_insert(written->derefs, dst, (void *)mask);
250
251            break;
252         }
253
254         default:
255            break;
256         }
257      }
258
259      break;
260   }
261
262   case nir_cf_node_if: {
263      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
264
265      new_written = create_vars_written(state);
266
267      foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->then_list)
268         gather_vars_written(state, new_written, cf_node);
269
270      foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->else_list)
271         gather_vars_written(state, new_written, cf_node);
272
273      break;
274   }
275
276   case nir_cf_node_loop: {
277      nir_loop *loop = nir_cf_node_as_loop(cf_node);
278
279      new_written = create_vars_written(state);
280
281      foreach_list_typed_safe(nir_cf_node, cf_node, node, &loop->body)
282         gather_vars_written(state, new_written, cf_node);
283
284      break;
285   }
286
287   default:
288      unreachable("Invalid CF node type");
289   }
290
291   if (new_written) {
292      /* Merge new information to the parent control flow node. */
293      if (written) {
294         written->modes |= new_written->modes;
295         hash_table_foreach(new_written->derefs, new_entry) {
296            struct hash_entry *old_entry =
297               _mesa_hash_table_search_pre_hashed(written->derefs, new_entry->hash,
298                                                  new_entry->key);
299            if (old_entry) {
300               nir_component_mask_t merged = (uintptr_t) new_entry->data |
301                                             (uintptr_t) old_entry->data;
302               old_entry->data = (void *) ((uintptr_t) merged);
303            } else {
304               _mesa_hash_table_insert_pre_hashed(written->derefs, new_entry->hash,
305                                                  new_entry->key, new_entry->data);
306            }
307         }
308      }
309      _mesa_hash_table_insert(state->vars_written_map, cf_node, new_written);
310   }
311}
312
313static struct copy_entry *
314copy_entry_create(struct util_dynarray *copies,
315                  nir_deref_and_path *deref)
316{
317   struct copy_entry new_entry = {
318      .dst = *deref,
319   };
320   util_dynarray_append(copies, struct copy_entry, new_entry);
321   return util_dynarray_top_ptr(copies, struct copy_entry);
322}
323
324/* Remove copy entry by swapping it with the last element and reducing the
325 * size.  If used inside an iteration on copies, it must be a reverse
326 * (backwards) iteration.  It is safe to use in those cases because the swap
327 * will not affect the rest of the iteration.
328 */
329static void
330copy_entry_remove(struct util_dynarray *copies,
331                  struct copy_entry *entry)
332{
333   const struct copy_entry *src =
334      util_dynarray_pop_ptr(copies, struct copy_entry);
335   if (src != entry)
336      *entry = *src;
337}
338
339static bool
340is_array_deref_of_vector(const nir_deref_and_path *deref)
341{
342   if (deref->instr->deref_type != nir_deref_type_array)
343      return false;
344   nir_deref_instr *parent = nir_deref_instr_parent(deref->instr);
345   return glsl_type_is_vector(parent->type);
346}
347
348static struct copy_entry *
349lookup_entry_for_deref(struct copy_prop_var_state *state,
350                       struct util_dynarray *copies,
351                       nir_deref_and_path *deref,
352                       nir_deref_compare_result allowed_comparisons,
353                       bool *equal)
354{
355   struct copy_entry *entry = NULL;
356   util_dynarray_foreach(copies, struct copy_entry, iter) {
357      nir_deref_compare_result result =
358         nir_compare_derefs_and_paths(state->mem_ctx, &iter->dst, deref);
359      if (result & allowed_comparisons) {
360         entry = iter;
361         if (result & nir_derefs_equal_bit) {
362            if (equal != NULL)
363               *equal = true;
364            break;
365         }
366         /* Keep looking in case we have an equal match later in the array. */
367      }
368   }
369
370   return entry;
371}
372
373static struct copy_entry *
374lookup_entry_and_kill_aliases(struct copy_prop_var_state *state,
375                              struct util_dynarray *copies,
376                              nir_deref_and_path *deref,
377                              unsigned write_mask)
378{
379   /* TODO: Take into account the write_mask. */
380
381   nir_deref_instr *dst_match = NULL;
382   util_dynarray_foreach_reverse(copies, struct copy_entry, iter) {
383      if (!iter->src.is_ssa) {
384         /* If this write aliases the source of some entry, get rid of it */
385         nir_deref_compare_result result =
386            nir_compare_derefs_and_paths(state->mem_ctx, &iter->src.deref, deref);
387         if (result & nir_derefs_may_alias_bit) {
388            copy_entry_remove(copies, iter);
389            continue;
390         }
391      }
392
393      nir_deref_compare_result comp =
394         nir_compare_derefs_and_paths(state->mem_ctx, &iter->dst, deref);
395
396      if (comp & nir_derefs_equal_bit) {
397         /* Removing entries invalidate previous iter pointers, so we'll
398          * collect the matching entry later.  Just make sure it is unique.
399          */
400         assert(!dst_match);
401         dst_match = iter->dst.instr;
402      } else if (comp & nir_derefs_may_alias_bit) {
403         copy_entry_remove(copies, iter);
404      }
405   }
406
407   struct copy_entry *entry = NULL;
408   if (dst_match) {
409      util_dynarray_foreach(copies, struct copy_entry, iter) {
410         if (iter->dst.instr == dst_match) {
411            entry = iter;
412            break;
413         }
414      }
415      assert(entry);
416   }
417   return entry;
418}
419
420static void
421kill_aliases(struct copy_prop_var_state *state,
422             struct util_dynarray *copies,
423             nir_deref_and_path *deref,
424             unsigned write_mask)
425{
426   /* TODO: Take into account the write_mask. */
427
428   struct copy_entry *entry =
429      lookup_entry_and_kill_aliases(state, copies, deref, write_mask);
430   if (entry)
431      copy_entry_remove(copies, entry);
432}
433
434static struct copy_entry *
435get_entry_and_kill_aliases(struct copy_prop_var_state *state,
436                           struct util_dynarray *copies,
437                           nir_deref_and_path *deref,
438                           unsigned write_mask)
439{
440   /* TODO: Take into account the write_mask. */
441
442   struct copy_entry *entry =
443      lookup_entry_and_kill_aliases(state, copies, deref, write_mask);
444
445   if (entry == NULL)
446      entry = copy_entry_create(copies, deref);
447
448   return entry;
449}
450
451static void
452apply_barrier_for_modes(struct util_dynarray *copies,
453                        nir_variable_mode modes)
454{
455   util_dynarray_foreach_reverse(copies, struct copy_entry, iter) {
456      if (nir_deref_mode_may_be(iter->dst.instr, modes) ||
457          (!iter->src.is_ssa && nir_deref_mode_may_be(iter->src.deref.instr, modes)))
458         copy_entry_remove(copies, iter);
459   }
460}
461
462static void
463value_set_from_value(struct value *value, const struct value *from,
464                     unsigned base_index, unsigned write_mask)
465{
466   /* We can't have non-zero indexes with non-trivial write masks */
467   assert(base_index == 0 || write_mask == 1);
468
469   if (from->is_ssa) {
470      /* Clear value if it was being used as non-SSA. */
471      if (!value->is_ssa)
472         memset(&value->ssa, 0, sizeof(value->ssa));
473      value->is_ssa = true;
474      /* Only overwrite the written components */
475      for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) {
476         if (write_mask & (1 << i)) {
477            value->ssa.def[base_index + i] = from->ssa.def[i];
478            value->ssa.component[base_index + i] = from->ssa.component[i];
479         }
480      }
481   } else {
482      /* Non-ssa stores always write everything */
483      value->is_ssa = false;
484      value->deref = from->deref;
485   }
486}
487
488/* Try to load a single element of a vector from the copy_entry.  If the data
489 * isn't available, just let the original intrinsic do the work.
490 */
491static bool
492load_element_from_ssa_entry_value(struct copy_prop_var_state *state,
493                                  struct copy_entry *entry,
494                                  nir_builder *b, nir_intrinsic_instr *intrin,
495                                  struct value *value, unsigned index)
496{
497   assert(index < glsl_get_vector_elements(entry->dst.instr->type));
498
499   /* We don't have the element available, so let the instruction do the work. */
500   if (!entry->src.ssa.def[index])
501      return false;
502
503   b->cursor = nir_instr_remove(&intrin->instr);
504   intrin->instr.block = NULL;
505
506   assert(entry->src.ssa.component[index] <
507          entry->src.ssa.def[index]->num_components);
508   nir_ssa_def *def = nir_channel(b, entry->src.ssa.def[index],
509                                     entry->src.ssa.component[index]);
510
511   *value = (struct value) {
512      .is_ssa = true,
513      {
514	.ssa = {
515	  .def = { def },
516	  .component = { 0 },
517	},
518      }
519   };
520
521   return true;
522}
523
524/* Do a "load" from an SSA-based entry return it in "value" as a value with a
525 * single SSA def.  Because an entry could reference multiple different SSA
526 * defs, a vecN operation may be inserted to combine them into a single SSA
527 * def before handing it back to the caller.  If the load instruction is no
528 * longer needed, it is removed and nir_instr::block is set to NULL.  (It is
529 * possible, in some cases, for the load to be used in the vecN operation in
530 * which case it isn't deleted.)
531 */
532static bool
533load_from_ssa_entry_value(struct copy_prop_var_state *state,
534                          struct copy_entry *entry,
535                          nir_builder *b, nir_intrinsic_instr *intrin,
536                          nir_deref_and_path *src, struct value *value)
537{
538   if (is_array_deref_of_vector(src)) {
539      if (nir_src_is_const(src->instr->arr.index)) {
540         unsigned index = nir_src_as_uint(src->instr->arr.index);
541         return load_element_from_ssa_entry_value(state, entry, b, intrin,
542                                                  value, index);
543      }
544
545      /* An SSA copy_entry for the vector won't help indirect load. */
546      if (glsl_type_is_vector(entry->dst.instr->type)) {
547         assert(entry->dst.instr->type == nir_deref_instr_parent(src->instr)->type);
548         /* TODO: If all SSA entries are there, try an if-ladder. */
549         return false;
550      }
551   }
552
553   *value = entry->src;
554   assert(value->is_ssa);
555
556   const struct glsl_type *type = entry->dst.instr->type;
557   unsigned num_components = glsl_get_vector_elements(type);
558
559   nir_component_mask_t available = 0;
560   bool all_same = true;
561   for (unsigned i = 0; i < num_components; i++) {
562      if (value->ssa.def[i])
563         available |= (1 << i);
564
565      if (value->ssa.def[i] != value->ssa.def[0])
566         all_same = false;
567
568      if (value->ssa.component[i] != i)
569         all_same = false;
570   }
571
572   if (all_same) {
573      /* Our work here is done */
574      b->cursor = nir_instr_remove(&intrin->instr);
575      intrin->instr.block = NULL;
576      return true;
577   }
578
579   if (available != (1 << num_components) - 1 &&
580       intrin->intrinsic == nir_intrinsic_load_deref &&
581       (available & nir_ssa_def_components_read(&intrin->dest.ssa)) == 0) {
582      /* If none of the components read are available as SSA values, then we
583       * should just bail.  Otherwise, we would end up replacing the uses of
584       * the load_deref a vecN() that just gathers up its components.
585       */
586      return false;
587   }
588
589   b->cursor = nir_after_instr(&intrin->instr);
590
591   nir_ssa_def *load_def =
592      intrin->intrinsic == nir_intrinsic_load_deref ? &intrin->dest.ssa : NULL;
593
594   bool keep_intrin = false;
595   nir_ssa_def *comps[NIR_MAX_VEC_COMPONENTS];
596   for (unsigned i = 0; i < num_components; i++) {
597      if (value->ssa.def[i]) {
598         comps[i] = nir_channel(b, value->ssa.def[i], value->ssa.component[i]);
599      } else {
600         /* We don't have anything for this component in our
601          * list.  Just re-use a channel from the load.
602          */
603         if (load_def == NULL)
604            load_def = nir_load_deref(b, entry->dst.instr);
605
606         if (load_def->parent_instr == &intrin->instr)
607            keep_intrin = true;
608
609         comps[i] = nir_channel(b, load_def, i);
610      }
611   }
612
613   nir_ssa_def *vec = nir_vec(b, comps, num_components);
614   value_set_ssa_components(value, vec, num_components);
615
616   if (!keep_intrin) {
617      /* Removing this instruction should not touch the cursor because we
618       * created the cursor after the intrinsic and have added at least one
619       * instruction (the vec) since then.
620       */
621      assert(b->cursor.instr != &intrin->instr);
622      nir_instr_remove(&intrin->instr);
623      intrin->instr.block = NULL;
624   }
625
626   return true;
627}
628
629/**
630 * Specialize the wildcards in a deref chain
631 *
632 * This function returns a deref chain identical to \param deref except that
633 * some of its wildcards are replaced with indices from \param specific.  The
634 * process is guided by \param guide which references the same type as \param
635 * specific but has the same wildcard array lengths as \param deref.
636 */
637static nir_deref_instr *
638specialize_wildcards(nir_builder *b,
639                     nir_deref_path *deref,
640                     nir_deref_path *guide,
641                     nir_deref_path *specific)
642{
643   nir_deref_instr **deref_p = &deref->path[1];
644   nir_deref_instr **guide_p = &guide->path[1];
645   nir_deref_instr **spec_p = &specific->path[1];
646   nir_deref_instr *ret_tail = deref->path[0];
647   for (; *deref_p; deref_p++) {
648      if ((*deref_p)->deref_type == nir_deref_type_array_wildcard) {
649         /* This is where things get tricky.  We have to search through
650          * the entry deref to find its corresponding wildcard and fill
651          * this slot in with the value from the src.
652          */
653         while (*guide_p &&
654                (*guide_p)->deref_type != nir_deref_type_array_wildcard) {
655            guide_p++;
656            spec_p++;
657         }
658         assert(*guide_p && *spec_p);
659
660         ret_tail = nir_build_deref_follower(b, ret_tail, *spec_p);
661
662         guide_p++;
663         spec_p++;
664      } else {
665         ret_tail = nir_build_deref_follower(b, ret_tail, *deref_p);
666      }
667   }
668
669   return ret_tail;
670}
671
672/* Do a "load" from an deref-based entry return it in "value" as a value.  The
673 * deref returned in "value" will always be a fresh copy so the caller can
674 * steal it and assign it to the instruction directly without copying it
675 * again.
676 */
677static bool
678load_from_deref_entry_value(struct copy_prop_var_state *state,
679                            struct copy_entry *entry,
680                            nir_builder *b, nir_intrinsic_instr *intrin,
681                            nir_deref_and_path *src, struct value *value)
682{
683   *value = entry->src;
684
685   b->cursor = nir_instr_remove(&intrin->instr);
686
687   nir_deref_path *entry_dst_path = nir_get_deref_path(state->mem_ctx, &entry->dst);
688   nir_deref_path *src_path = nir_get_deref_path(state->mem_ctx, src);
689
690   bool need_to_specialize_wildcards = false;
691   nir_deref_instr **entry_p = &entry_dst_path->path[1];
692   nir_deref_instr **src_p = &src_path->path[1];
693   while (*entry_p && *src_p) {
694      nir_deref_instr *entry_tail = *entry_p++;
695      nir_deref_instr *src_tail = *src_p++;
696
697      if (src_tail->deref_type == nir_deref_type_array &&
698          entry_tail->deref_type == nir_deref_type_array_wildcard)
699         need_to_specialize_wildcards = true;
700   }
701
702   /* If the entry deref is longer than the source deref then it refers to a
703    * smaller type and we can't source from it.
704    */
705   assert(*entry_p == NULL);
706
707   value->deref._path = NULL;
708
709   if (need_to_specialize_wildcards) {
710      /* The entry has some wildcards that are not in src.  This means we need
711       * to construct a new deref based on the entry but using the wildcards
712       * from the source and guided by the entry dst.  Oof.
713       */
714      nir_deref_path *entry_src_path =
715         nir_get_deref_path(state->mem_ctx, &entry->src.deref);
716      value->deref.instr = specialize_wildcards(b, entry_src_path,
717                                                entry_dst_path, src_path);
718   }
719
720   /* If our source deref is longer than the entry deref, that's ok because
721    * it just means the entry deref needs to be extended a bit.
722    */
723   while (*src_p) {
724      nir_deref_instr *src_tail = *src_p++;
725      value->deref.instr = nir_build_deref_follower(b, value->deref.instr, src_tail);
726   }
727
728   return true;
729}
730
731static bool
732try_load_from_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
733                    nir_builder *b, nir_intrinsic_instr *intrin,
734                    nir_deref_and_path *src, struct value *value)
735{
736   if (entry == NULL)
737      return false;
738
739   if (entry->src.is_ssa) {
740      return load_from_ssa_entry_value(state, entry, b, intrin, src, value);
741   } else {
742      return load_from_deref_entry_value(state, entry, b, intrin, src, value);
743   }
744}
745
746static void
747invalidate_copies_for_cf_node(struct copy_prop_var_state *state,
748                              struct util_dynarray *copies,
749                              nir_cf_node *cf_node)
750{
751   struct hash_entry *ht_entry = _mesa_hash_table_search(state->vars_written_map, cf_node);
752   assert(ht_entry);
753
754   struct vars_written *written = ht_entry->data;
755   if (written->modes) {
756      util_dynarray_foreach_reverse(copies, struct copy_entry, entry) {
757         if (nir_deref_mode_may_be(entry->dst.instr, written->modes))
758            copy_entry_remove(copies, entry);
759      }
760   }
761
762   hash_table_foreach (written->derefs, entry) {
763      nir_deref_instr *deref_written = (nir_deref_instr *)entry->key;
764      nir_deref_and_path deref = {deref_written, NULL};
765      kill_aliases(state, copies, &deref, (uintptr_t)entry->data);
766   }
767}
768
769static void
770print_value(struct value *value, unsigned num_components)
771{
772   if (!value->is_ssa) {
773      printf(" %s ", glsl_get_type_name(value->deref.instr->type));
774      nir_print_deref(value->deref.instr, stdout);
775      return;
776   }
777
778   bool same_ssa = true;
779   for (unsigned i = 0; i < num_components; i++) {
780      if (value->ssa.component[i] != i ||
781          (i > 0 && value->ssa.def[i - 1] != value->ssa.def[i])) {
782         same_ssa = false;
783         break;
784      }
785   }
786   if (same_ssa) {
787      printf(" ssa_%d", value->ssa.def[0]->index);
788   } else {
789      for (int i = 0; i < num_components; i++) {
790         if (value->ssa.def[i])
791            printf(" ssa_%d[%u]", value->ssa.def[i]->index, value->ssa.component[i]);
792         else
793            printf(" _");
794      }
795   }
796}
797
798static void
799print_copy_entry(struct copy_entry *entry)
800{
801   printf("    %s ", glsl_get_type_name(entry->dst.instr->type));
802   nir_print_deref(entry->dst.instr, stdout);
803   printf(":\t");
804
805   unsigned num_components = glsl_get_vector_elements(entry->dst.instr->type);
806   print_value(&entry->src, num_components);
807   printf("\n");
808}
809
810static void
811dump_instr(nir_instr *instr)
812{
813   printf("  ");
814   nir_print_instr(instr, stdout);
815   printf("\n");
816}
817
818static void
819dump_copy_entries(struct util_dynarray *copies)
820{
821   util_dynarray_foreach(copies, struct copy_entry, iter)
822      print_copy_entry(iter);
823   printf("\n");
824}
825
826static void
827copy_prop_vars_block(struct copy_prop_var_state *state,
828                     nir_builder *b, nir_block *block,
829                     struct util_dynarray *copies)
830{
831   if (debug) {
832      printf("# block%d\n", block->index);
833      dump_copy_entries(copies);
834   }
835
836   nir_foreach_instr_safe(instr, block) {
837      if (debug && instr->type == nir_instr_type_deref)
838         dump_instr(instr);
839
840      if (instr->type == nir_instr_type_call) {
841         if (debug) dump_instr(instr);
842         apply_barrier_for_modes(copies, nir_var_shader_out |
843                                         nir_var_shader_temp |
844                                         nir_var_function_temp |
845                                         nir_var_mem_ssbo |
846                                         nir_var_mem_shared |
847                                         nir_var_mem_global);
848         if (debug) dump_copy_entries(copies);
849         continue;
850      }
851
852      if (instr->type != nir_instr_type_intrinsic)
853         continue;
854
855      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
856      switch (intrin->intrinsic) {
857      case nir_intrinsic_control_barrier:
858      case nir_intrinsic_memory_barrier:
859         if (debug) dump_instr(instr);
860
861         apply_barrier_for_modes(copies, nir_var_shader_out |
862                                         nir_var_mem_ssbo |
863                                         nir_var_mem_shared |
864                                         nir_var_mem_global);
865         break;
866
867      case nir_intrinsic_memory_barrier_buffer:
868         if (debug) dump_instr(instr);
869
870         apply_barrier_for_modes(copies, nir_var_mem_ssbo |
871                                         nir_var_mem_global);
872         break;
873
874      case nir_intrinsic_memory_barrier_shared:
875         if (debug) dump_instr(instr);
876
877         apply_barrier_for_modes(copies, nir_var_mem_shared);
878         break;
879
880      case nir_intrinsic_memory_barrier_tcs_patch:
881         if (debug) dump_instr(instr);
882
883         apply_barrier_for_modes(copies, nir_var_shader_out);
884         break;
885
886      case nir_intrinsic_scoped_barrier:
887         if (debug) dump_instr(instr);
888
889         if (nir_intrinsic_memory_semantics(intrin) & NIR_MEMORY_ACQUIRE)
890            apply_barrier_for_modes(copies, nir_intrinsic_memory_modes(intrin));
891         break;
892
893      case nir_intrinsic_emit_vertex:
894      case nir_intrinsic_emit_vertex_with_counter:
895         if (debug) dump_instr(instr);
896
897         apply_barrier_for_modes(copies, nir_var_shader_out);
898         break;
899
900      case nir_intrinsic_report_ray_intersection:
901         apply_barrier_for_modes(copies, nir_var_mem_ssbo |
902                                         nir_var_mem_global |
903                                         nir_var_shader_call_data |
904                                         nir_var_ray_hit_attrib);
905         break;
906
907      case nir_intrinsic_ignore_ray_intersection:
908      case nir_intrinsic_terminate_ray:
909         apply_barrier_for_modes(copies, nir_var_mem_ssbo |
910                                         nir_var_mem_global |
911                                         nir_var_shader_call_data);
912         break;
913
914      case nir_intrinsic_load_deref: {
915         if (debug) dump_instr(instr);
916
917         if (nir_intrinsic_access(intrin) & ACCESS_VOLATILE)
918            break;
919
920         nir_deref_and_path src = {nir_src_as_deref(intrin->src[0]), NULL};
921
922         /* If this is a load from a read-only mode, then all this pass would
923          * do is combine redundant loads and CSE should be more efficient for
924          * that.
925          */
926         nir_variable_mode ignore = nir_var_read_only_modes & ~nir_var_vec_indexable_modes;
927         if (nir_deref_mode_must_be(src.instr, ignore))
928            break;
929
930         /* Direct array_derefs of vectors operate on the vectors (the parent
931          * deref).  Indirects will be handled like other derefs.
932          */
933         int vec_index = 0;
934         nir_deref_and_path vec_src = src;
935         if (is_array_deref_of_vector(&src) && nir_src_is_const(src.instr->arr.index)) {
936            vec_src.instr = nir_deref_instr_parent(src.instr);
937            unsigned vec_comps = glsl_get_vector_elements(vec_src.instr->type);
938            vec_index = nir_src_as_uint(src.instr->arr.index);
939
940            /* Loading from an invalid index yields an undef */
941            if (vec_index >= vec_comps) {
942               b->cursor = nir_instr_remove(instr);
943               nir_ssa_def *u = nir_ssa_undef(b, 1, intrin->dest.ssa.bit_size);
944               nir_ssa_def_rewrite_uses(&intrin->dest.ssa, u);
945               state->progress = true;
946               break;
947            }
948         }
949
950         bool src_entry_equal = false;
951         struct copy_entry *src_entry =
952            lookup_entry_for_deref(state, copies, &src,
953                                   nir_derefs_a_contains_b_bit, &src_entry_equal);
954         struct value value = {0};
955         if (try_load_from_entry(state, src_entry, b, intrin, &src, &value)) {
956            if (value.is_ssa) {
957               /* lookup_load has already ensured that we get a single SSA
958                * value that has all of the channels.  We just have to do the
959                * rewrite operation.  Note for array derefs of vectors, the
960                * channel 0 is used.
961                */
962               if (intrin->instr.block) {
963                  /* The lookup left our instruction in-place.  This means it
964                   * must have used it to vec up a bunch of different sources.
965                   * We need to be careful when rewriting uses so we don't
966                   * rewrite the vecN itself.
967                   */
968                  nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
969                                                 value.ssa.def[0],
970                                                 value.ssa.def[0]->parent_instr);
971               } else {
972                  nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
973                                           value.ssa.def[0]);
974               }
975            } else {
976               /* We're turning it into a load of a different variable */
977               intrin->src[0] = nir_src_for_ssa(&value.deref.instr->dest.ssa);
978
979               /* Put it back in again. */
980               nir_builder_instr_insert(b, instr);
981               value_set_ssa_components(&value, &intrin->dest.ssa,
982                                        intrin->num_components);
983            }
984            state->progress = true;
985         } else {
986            value_set_ssa_components(&value, &intrin->dest.ssa,
987                                     intrin->num_components);
988         }
989
990         /* Now that we have a value, we're going to store it back so that we
991          * have the right value next time we come looking for it.  In order
992          * to do this, we need an exact match, not just something that
993          * contains what we're looking for.
994          *
995          * We avoid doing another lookup if src.instr == vec_src.instr.
996          */
997         struct copy_entry *entry = src_entry;
998         if (src.instr != vec_src.instr)
999            entry = lookup_entry_for_deref(state, copies, &vec_src,
1000                                           nir_derefs_equal_bit, NULL);
1001         else if (!src_entry_equal)
1002            entry = NULL;
1003
1004         if (!entry)
1005            entry = copy_entry_create(copies, &vec_src);
1006
1007         /* Update the entry with the value of the load.  This way
1008          * we can potentially remove subsequent loads.
1009          */
1010         value_set_from_value(&entry->src, &value, vec_index,
1011                              (1 << intrin->num_components) - 1);
1012         break;
1013      }
1014
1015      case nir_intrinsic_store_deref: {
1016         if (debug) dump_instr(instr);
1017
1018         nir_deref_and_path dst = {nir_src_as_deref(intrin->src[0]), NULL};
1019         assert(glsl_type_is_vector_or_scalar(dst.instr->type));
1020
1021         /* Direct array_derefs of vectors operate on the vectors (the parent
1022          * deref).  Indirects will be handled like other derefs.
1023          */
1024         int vec_index = 0;
1025         nir_deref_and_path vec_dst = dst;
1026         if (is_array_deref_of_vector(&dst) && nir_src_is_const(dst.instr->arr.index)) {
1027            vec_dst.instr = nir_deref_instr_parent(dst.instr);
1028            unsigned vec_comps = glsl_get_vector_elements(vec_dst.instr->type);
1029
1030            vec_index = nir_src_as_uint(dst.instr->arr.index);
1031
1032            /* Storing to an invalid index is a no-op. */
1033            if (vec_index >= vec_comps) {
1034               nir_instr_remove(instr);
1035               state->progress = true;
1036               break;
1037            }
1038         }
1039
1040         if (nir_intrinsic_access(intrin) & ACCESS_VOLATILE) {
1041            unsigned wrmask = nir_intrinsic_write_mask(intrin);
1042            kill_aliases(state, copies, &dst, wrmask);
1043            break;
1044         }
1045
1046         struct copy_entry *entry =
1047            lookup_entry_for_deref(state, copies, &dst, nir_derefs_equal_bit, NULL);
1048         if (entry && value_equals_store_src(&entry->src, intrin)) {
1049            /* If we are storing the value from a load of the same var the
1050             * store is redundant so remove it.
1051             */
1052            nir_instr_remove(instr);
1053            state->progress = true;
1054         } else {
1055            struct value value = {0};
1056            value_set_ssa_components(&value, intrin->src[1].ssa,
1057                                     intrin->num_components);
1058            unsigned wrmask = nir_intrinsic_write_mask(intrin);
1059            struct copy_entry *entry =
1060               get_entry_and_kill_aliases(state, copies, &vec_dst, wrmask);
1061            value_set_from_value(&entry->src, &value, vec_index, wrmask);
1062         }
1063
1064         break;
1065      }
1066
1067      case nir_intrinsic_copy_deref: {
1068         if (debug) dump_instr(instr);
1069
1070         nir_deref_and_path dst = {nir_src_as_deref(intrin->src[0]), NULL};
1071         nir_deref_and_path src = {nir_src_as_deref(intrin->src[1]), NULL};
1072
1073         /* The copy_deref intrinsic doesn't keep track of num_components, so
1074          * get it ourselves.
1075          */
1076         unsigned num_components = glsl_get_vector_elements(dst.instr->type);
1077         unsigned full_mask = (1 << num_components) - 1;
1078
1079         if ((nir_intrinsic_src_access(intrin) & ACCESS_VOLATILE) ||
1080             (nir_intrinsic_dst_access(intrin) & ACCESS_VOLATILE)) {
1081            kill_aliases(state, copies, &dst, full_mask);
1082            break;
1083         }
1084
1085         nir_deref_compare_result comp =
1086            nir_compare_derefs_and_paths(state->mem_ctx, &src, &dst);
1087         if (comp & nir_derefs_equal_bit) {
1088            /* This is a no-op self-copy.  Get rid of it */
1089            nir_instr_remove(instr);
1090            state->progress = true;
1091            continue;
1092         }
1093
1094         /* Copy of direct array derefs of vectors are not handled.  Just
1095          * invalidate what's written and bail.
1096          */
1097         if ((is_array_deref_of_vector(&src) && nir_src_is_const(src.instr->arr.index)) ||
1098             (is_array_deref_of_vector(&dst) && nir_src_is_const(dst.instr->arr.index))) {
1099            kill_aliases(state, copies, &dst, full_mask);
1100            break;
1101         }
1102
1103         struct copy_entry *src_entry =
1104            lookup_entry_for_deref(state, copies, &src, nir_derefs_a_contains_b_bit, NULL);
1105         struct value value;
1106         if (try_load_from_entry(state, src_entry, b, intrin, &src, &value)) {
1107            /* If load works, intrin (the copy_deref) is removed. */
1108            if (value.is_ssa) {
1109               nir_store_deref(b, dst.instr, value.ssa.def[0], full_mask);
1110            } else {
1111               /* If this would be a no-op self-copy, don't bother. */
1112               comp = nir_compare_derefs_and_paths(state->mem_ctx, &value.deref, &dst);
1113               if (comp & nir_derefs_equal_bit)
1114                  continue;
1115
1116               /* Just turn it into a copy of a different deref */
1117               intrin->src[1] = nir_src_for_ssa(&value.deref.instr->dest.ssa);
1118
1119               /* Put it back in again. */
1120               nir_builder_instr_insert(b, instr);
1121            }
1122
1123            state->progress = true;
1124         } else {
1125            value = (struct value) {
1126               .is_ssa = false,
1127               { .deref = src },
1128            };
1129         }
1130
1131         nir_variable *src_var = nir_deref_instr_get_variable(src.instr);
1132         if (src_var && src_var->data.cannot_coalesce) {
1133            /* The source cannot be coaleseced, which means we can't propagate
1134             * this copy.
1135             */
1136            break;
1137         }
1138
1139         struct copy_entry *dst_entry =
1140            get_entry_and_kill_aliases(state, copies, &dst, full_mask);
1141         value_set_from_value(&dst_entry->src, &value, 0, full_mask);
1142         break;
1143      }
1144
1145      case nir_intrinsic_trace_ray:
1146      case nir_intrinsic_execute_callable:
1147      case nir_intrinsic_rt_trace_ray:
1148      case nir_intrinsic_rt_execute_callable: {
1149         if (debug) dump_instr(instr);
1150
1151         nir_deref_and_path payload = {
1152            nir_src_as_deref(*nir_get_shader_call_payload_src(intrin)), NULL};
1153         nir_component_mask_t full_mask = (nir_component_mask_t)
1154            BITFIELD_MASK(glsl_get_vector_elements(payload.instr->type));
1155         kill_aliases(state, copies, &payload, full_mask);
1156         break;
1157      }
1158
1159      case nir_intrinsic_memcpy_deref:
1160      case nir_intrinsic_deref_atomic_add:
1161      case nir_intrinsic_deref_atomic_fadd:
1162      case nir_intrinsic_deref_atomic_imin:
1163      case nir_intrinsic_deref_atomic_umin:
1164      case nir_intrinsic_deref_atomic_fmin:
1165      case nir_intrinsic_deref_atomic_imax:
1166      case nir_intrinsic_deref_atomic_umax:
1167      case nir_intrinsic_deref_atomic_fmax:
1168      case nir_intrinsic_deref_atomic_and:
1169      case nir_intrinsic_deref_atomic_or:
1170      case nir_intrinsic_deref_atomic_xor:
1171      case nir_intrinsic_deref_atomic_exchange:
1172      case nir_intrinsic_deref_atomic_comp_swap:
1173      case nir_intrinsic_deref_atomic_fcomp_swap:
1174         if (debug) dump_instr(instr);
1175
1176         nir_deref_and_path dst = {nir_src_as_deref(intrin->src[0]), NULL};
1177         unsigned num_components = glsl_get_vector_elements(dst.instr->type);
1178         unsigned full_mask = (1 << num_components) - 1;
1179         kill_aliases(state, copies, &dst, full_mask);
1180         break;
1181
1182      case nir_intrinsic_store_deref_block_intel: {
1183         if (debug) dump_instr(instr);
1184
1185         /* Invalidate the whole variable (or cast) and anything that alias
1186          * with it.
1187          */
1188         nir_deref_and_path dst = {nir_src_as_deref(intrin->src[0]), NULL};
1189         while (nir_deref_instr_parent(dst.instr))
1190            dst.instr = nir_deref_instr_parent(dst.instr);
1191         assert(dst.instr->deref_type == nir_deref_type_var ||
1192                dst.instr->deref_type == nir_deref_type_cast);
1193
1194         unsigned num_components = glsl_get_vector_elements(dst.instr->type);
1195         unsigned full_mask = (1 << num_components) - 1;
1196         kill_aliases(state, copies, &dst, full_mask);
1197         break;
1198      }
1199
1200      default:
1201         continue; /* To skip the debug below. */
1202      }
1203
1204      if (debug) dump_copy_entries(copies);
1205   }
1206}
1207
1208static void
1209copy_prop_vars_cf_node(struct copy_prop_var_state *state,
1210                       struct util_dynarray *copies,
1211                       nir_cf_node *cf_node)
1212{
1213   switch (cf_node->type) {
1214   case nir_cf_node_function: {
1215      nir_function_impl *impl = nir_cf_node_as_function(cf_node);
1216
1217      struct util_dynarray impl_copies;
1218      util_dynarray_init(&impl_copies, state->mem_ctx);
1219
1220      foreach_list_typed_safe(nir_cf_node, cf_node, node, &impl->body)
1221         copy_prop_vars_cf_node(state, &impl_copies, cf_node);
1222
1223      break;
1224   }
1225
1226   case nir_cf_node_block: {
1227      nir_block *block = nir_cf_node_as_block(cf_node);
1228      nir_builder b;
1229      nir_builder_init(&b, state->impl);
1230      copy_prop_vars_block(state, &b, block, copies);
1231      break;
1232   }
1233
1234   case nir_cf_node_if: {
1235      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
1236
1237      /* Clone the copies for each branch of the if statement.  The idea is
1238       * that they both see the same state of available copies, but do not
1239       * interfere to each other.
1240       */
1241
1242      struct util_dynarray then_copies;
1243      util_dynarray_clone(&then_copies, state->mem_ctx, copies);
1244
1245      struct util_dynarray else_copies;
1246      util_dynarray_clone(&else_copies, state->mem_ctx, copies);
1247
1248      foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->then_list)
1249         copy_prop_vars_cf_node(state, &then_copies, cf_node);
1250
1251      foreach_list_typed_safe(nir_cf_node, cf_node, node, &if_stmt->else_list)
1252         copy_prop_vars_cf_node(state, &else_copies, cf_node);
1253
1254      /* Both branches copies can be ignored, since the effect of running both
1255       * branches was captured in the first pass that collects vars_written.
1256       */
1257
1258      invalidate_copies_for_cf_node(state, copies, cf_node);
1259
1260      break;
1261   }
1262
1263   case nir_cf_node_loop: {
1264      nir_loop *loop = nir_cf_node_as_loop(cf_node);
1265
1266      /* Invalidate before cloning the copies for the loop, since the loop
1267       * body can be executed more than once.
1268       */
1269
1270      invalidate_copies_for_cf_node(state, copies, cf_node);
1271
1272      struct util_dynarray loop_copies;
1273      util_dynarray_clone(&loop_copies, state->mem_ctx, copies);
1274
1275      foreach_list_typed_safe(nir_cf_node, cf_node, node, &loop->body)
1276         copy_prop_vars_cf_node(state, &loop_copies, cf_node);
1277
1278      break;
1279   }
1280
1281   default:
1282      unreachable("Invalid CF node type");
1283   }
1284}
1285
1286static bool
1287nir_copy_prop_vars_impl(nir_function_impl *impl)
1288{
1289   void *mem_ctx = ralloc_context(NULL);
1290
1291   if (debug) {
1292      nir_metadata_require(impl, nir_metadata_block_index);
1293      printf("## nir_copy_prop_vars_impl for %s\n", impl->function->name);
1294   }
1295
1296   struct copy_prop_var_state state = {
1297      .impl = impl,
1298      .mem_ctx = mem_ctx,
1299      .lin_ctx = linear_zalloc_parent(mem_ctx, 0),
1300
1301      .vars_written_map = _mesa_pointer_hash_table_create(mem_ctx),
1302   };
1303
1304   gather_vars_written(&state, NULL, &impl->cf_node);
1305
1306   copy_prop_vars_cf_node(&state, NULL, &impl->cf_node);
1307
1308   if (state.progress) {
1309      nir_metadata_preserve(impl, nir_metadata_block_index |
1310                                  nir_metadata_dominance);
1311   } else {
1312      nir_metadata_preserve(impl, nir_metadata_all);
1313   }
1314
1315   ralloc_free(mem_ctx);
1316   return state.progress;
1317}
1318
1319bool
1320nir_opt_copy_prop_vars(nir_shader *shader)
1321{
1322   bool progress = false;
1323
1324   nir_foreach_function(function, shader) {
1325      if (!function->impl)
1326         continue;
1327      progress |= nir_copy_prop_vars_impl(function->impl);
1328   }
1329
1330   return progress;
1331}
1332