1/*
2 * Copyright © 2018 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir.h"
25#include "nir_builder.h"
26#include "nir_deref.h"
27
28#include "util/u_dynarray.h"
29
30/**
31 * Elimination of dead writes based on derefs.
32 *
33 * Dead writes are stores and copies that write to a deref, which then gets
34 * another write before it was used (read or sourced for a copy).  Those
35 * writes can be removed since they don't affect anything.
36 *
37 * For derefs that refer to a memory area that can be read after the program,
38 * the last write is considered used.  The presence of certain instructions
39 * may also cause writes to be considered used, e.g. memory barrier (in this case
40 * the value must be written as other thread might use it).
41 *
42 * The write mask for store instructions is considered, so it is possible that
43 * a store is removed because of the combination of other stores overwritten
44 * its value.
45 */
46
47/* Entry for unused_writes arrays. */
48struct write_entry {
49   /* If NULL indicates the entry is free to be reused. */
50   nir_intrinsic_instr *intrin;
51   nir_component_mask_t mask;
52   nir_deref_instr *dst;
53};
54
55static void
56clear_unused_for_modes(struct util_dynarray *unused_writes, nir_variable_mode modes)
57{
58   util_dynarray_foreach_reverse(unused_writes, struct write_entry, entry) {
59      if (entry->dst->mode & modes)
60         *entry = util_dynarray_pop(unused_writes, struct write_entry);
61   }
62}
63
64static void
65clear_unused_for_read(struct util_dynarray *unused_writes, nir_deref_instr *src)
66{
67   util_dynarray_foreach_reverse(unused_writes, struct write_entry, entry) {
68      if (nir_compare_derefs(src, entry->dst) & nir_derefs_may_alias_bit)
69         *entry = util_dynarray_pop(unused_writes, struct write_entry);
70   }
71}
72
73static bool
74update_unused_writes(struct util_dynarray *unused_writes,
75                     nir_intrinsic_instr *intrin,
76                     nir_deref_instr *dst, nir_component_mask_t mask)
77{
78   bool progress = false;
79
80   /* This pass assumes that destination of copies and stores are derefs that
81    * end in a vector or scalar (it is OK to have wildcards or indirects for
82    * arrays).
83    */
84   assert(glsl_type_is_vector_or_scalar(dst->type));
85
86   /* Find writes that are unused and can be removed. */
87   util_dynarray_foreach_reverse(unused_writes, struct write_entry, entry) {
88      nir_deref_compare_result comp = nir_compare_derefs(dst, entry->dst);
89      if (comp & nir_derefs_a_contains_b_bit) {
90         entry->mask &= ~mask;
91         if (entry->mask == 0) {
92            nir_instr_remove(&entry->intrin->instr);
93            *entry = util_dynarray_pop(unused_writes, struct write_entry);
94            progress = true;
95         }
96      }
97   }
98
99   /* Add the new write to the unused array. */
100   struct write_entry new_entry = {
101      .intrin = intrin,
102      .mask = mask,
103      .dst = dst,
104   };
105
106   util_dynarray_append(unused_writes, struct write_entry, new_entry);
107
108   return progress;
109}
110
111static bool
112remove_dead_write_vars_local(void *mem_ctx, nir_block *block)
113{
114   bool progress = false;
115
116   struct util_dynarray unused_writes;
117   util_dynarray_init(&unused_writes, mem_ctx);
118
119   nir_foreach_instr_safe(instr, block) {
120      if (instr->type == nir_instr_type_call) {
121         clear_unused_for_modes(&unused_writes, nir_var_shader_out |
122                                                nir_var_shader_temp |
123                                                nir_var_function_temp |
124                                                nir_var_mem_ssbo |
125                                                nir_var_mem_shared);
126         continue;
127      }
128
129      if (instr->type != nir_instr_type_intrinsic)
130         continue;
131
132      nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
133      switch (intrin->intrinsic) {
134      case nir_intrinsic_barrier:
135      case nir_intrinsic_memory_barrier: {
136         clear_unused_for_modes(&unused_writes, nir_var_shader_out |
137                                                nir_var_mem_ssbo |
138                                                nir_var_mem_shared);
139         break;
140      }
141
142      case nir_intrinsic_emit_vertex:
143      case nir_intrinsic_emit_vertex_with_counter: {
144         clear_unused_for_modes(&unused_writes, nir_var_shader_out);
145         break;
146      }
147
148      case nir_intrinsic_load_deref: {
149         nir_deref_instr *src = nir_src_as_deref(intrin->src[0]);
150         clear_unused_for_read(&unused_writes, src);
151         break;
152      }
153
154      case nir_intrinsic_store_deref: {
155         nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
156         nir_component_mask_t mask = nir_intrinsic_write_mask(intrin);
157         progress |= update_unused_writes(&unused_writes, intrin, dst, mask);
158         break;
159      }
160
161      case nir_intrinsic_copy_deref: {
162         nir_deref_instr *src = nir_src_as_deref(intrin->src[1]);
163         nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]);
164
165         /* Self-copy is removed. */
166         if (nir_compare_derefs(src, dst) & nir_derefs_equal_bit) {
167            nir_instr_remove(instr);
168            progress = true;
169            break;
170         }
171
172         clear_unused_for_read(&unused_writes, src);
173         nir_component_mask_t mask = (1 << glsl_get_vector_elements(dst->type)) - 1;
174         progress |= update_unused_writes(&unused_writes, intrin, dst, mask);
175         break;
176      }
177
178      default:
179         break;
180      }
181   }
182
183   /* All unused writes at the end of the block are kept, since we can't be
184    * sure they'll be overwritten or not with local analysis only.
185    */
186
187   return progress;
188}
189
190static bool
191remove_dead_write_vars_impl(void *mem_ctx, nir_function_impl *impl)
192{
193   bool progress = false;
194
195   nir_metadata_require(impl, nir_metadata_block_index);
196
197   nir_foreach_block(block, impl)
198      progress |= remove_dead_write_vars_local(mem_ctx, block);
199
200   if (progress) {
201      nir_metadata_preserve(impl, nir_metadata_block_index |
202                                  nir_metadata_dominance);
203   }
204
205   return progress;
206}
207
208bool
209nir_opt_dead_write_vars(nir_shader *shader)
210{
211   void *mem_ctx = ralloc_context(NULL);
212   bool progress = false;
213
214   nir_foreach_function(function, shader) {
215      if (!function->impl)
216         continue;
217      progress |= remove_dead_write_vars_impl(mem_ctx, function->impl);
218   }
219
220   ralloc_free(mem_ctx);
221   return progress;
222}
223