1/* 2 * Copyright © 2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26#include "nir_deref.h" 27 28#include "util/u_dynarray.h" 29 30/** 31 * Elimination of dead writes based on derefs. 32 * 33 * Dead writes are stores and copies that write to a deref, which then gets 34 * another write before it was used (read or sourced for a copy). Those 35 * writes can be removed since they don't affect anything. 36 * 37 * For derefs that refer to a memory area that can be read after the program, 38 * the last write is considered used. The presence of certain instructions 39 * may also cause writes to be considered used, e.g. memory barrier (in this case 40 * the value must be written as other thread might use it). 41 * 42 * The write mask for store instructions is considered, so it is possible that 43 * a store is removed because of the combination of other stores overwritten 44 * its value. 45 */ 46 47/* Entry for unused_writes arrays. */ 48struct write_entry { 49 /* If NULL indicates the entry is free to be reused. */ 50 nir_intrinsic_instr *intrin; 51 nir_component_mask_t mask; 52 nir_deref_instr *dst; 53}; 54 55static void 56clear_unused_for_modes(struct util_dynarray *unused_writes, nir_variable_mode modes) 57{ 58 util_dynarray_foreach_reverse(unused_writes, struct write_entry, entry) { 59 if (entry->dst->mode & modes) 60 *entry = util_dynarray_pop(unused_writes, struct write_entry); 61 } 62} 63 64static void 65clear_unused_for_read(struct util_dynarray *unused_writes, nir_deref_instr *src) 66{ 67 util_dynarray_foreach_reverse(unused_writes, struct write_entry, entry) { 68 if (nir_compare_derefs(src, entry->dst) & nir_derefs_may_alias_bit) 69 *entry = util_dynarray_pop(unused_writes, struct write_entry); 70 } 71} 72 73static bool 74update_unused_writes(struct util_dynarray *unused_writes, 75 nir_intrinsic_instr *intrin, 76 nir_deref_instr *dst, nir_component_mask_t mask) 77{ 78 bool progress = false; 79 80 /* This pass assumes that destination of copies and stores are derefs that 81 * end in a vector or scalar (it is OK to have wildcards or indirects for 82 * arrays). 83 */ 84 assert(glsl_type_is_vector_or_scalar(dst->type)); 85 86 /* Find writes that are unused and can be removed. */ 87 util_dynarray_foreach_reverse(unused_writes, struct write_entry, entry) { 88 nir_deref_compare_result comp = nir_compare_derefs(dst, entry->dst); 89 if (comp & nir_derefs_a_contains_b_bit) { 90 entry->mask &= ~mask; 91 if (entry->mask == 0) { 92 nir_instr_remove(&entry->intrin->instr); 93 *entry = util_dynarray_pop(unused_writes, struct write_entry); 94 progress = true; 95 } 96 } 97 } 98 99 /* Add the new write to the unused array. */ 100 struct write_entry new_entry = { 101 .intrin = intrin, 102 .mask = mask, 103 .dst = dst, 104 }; 105 106 util_dynarray_append(unused_writes, struct write_entry, new_entry); 107 108 return progress; 109} 110 111static bool 112remove_dead_write_vars_local(void *mem_ctx, nir_block *block) 113{ 114 bool progress = false; 115 116 struct util_dynarray unused_writes; 117 util_dynarray_init(&unused_writes, mem_ctx); 118 119 nir_foreach_instr_safe(instr, block) { 120 if (instr->type == nir_instr_type_call) { 121 clear_unused_for_modes(&unused_writes, nir_var_shader_out | 122 nir_var_shader_temp | 123 nir_var_function_temp | 124 nir_var_mem_ssbo | 125 nir_var_mem_shared); 126 continue; 127 } 128 129 if (instr->type != nir_instr_type_intrinsic) 130 continue; 131 132 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 133 switch (intrin->intrinsic) { 134 case nir_intrinsic_barrier: 135 case nir_intrinsic_memory_barrier: { 136 clear_unused_for_modes(&unused_writes, nir_var_shader_out | 137 nir_var_mem_ssbo | 138 nir_var_mem_shared); 139 break; 140 } 141 142 case nir_intrinsic_emit_vertex: 143 case nir_intrinsic_emit_vertex_with_counter: { 144 clear_unused_for_modes(&unused_writes, nir_var_shader_out); 145 break; 146 } 147 148 case nir_intrinsic_load_deref: { 149 nir_deref_instr *src = nir_src_as_deref(intrin->src[0]); 150 clear_unused_for_read(&unused_writes, src); 151 break; 152 } 153 154 case nir_intrinsic_store_deref: { 155 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); 156 nir_component_mask_t mask = nir_intrinsic_write_mask(intrin); 157 progress |= update_unused_writes(&unused_writes, intrin, dst, mask); 158 break; 159 } 160 161 case nir_intrinsic_copy_deref: { 162 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]); 163 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); 164 165 /* Self-copy is removed. */ 166 if (nir_compare_derefs(src, dst) & nir_derefs_equal_bit) { 167 nir_instr_remove(instr); 168 progress = true; 169 break; 170 } 171 172 clear_unused_for_read(&unused_writes, src); 173 nir_component_mask_t mask = (1 << glsl_get_vector_elements(dst->type)) - 1; 174 progress |= update_unused_writes(&unused_writes, intrin, dst, mask); 175 break; 176 } 177 178 default: 179 break; 180 } 181 } 182 183 /* All unused writes at the end of the block are kept, since we can't be 184 * sure they'll be overwritten or not with local analysis only. 185 */ 186 187 return progress; 188} 189 190static bool 191remove_dead_write_vars_impl(void *mem_ctx, nir_function_impl *impl) 192{ 193 bool progress = false; 194 195 nir_metadata_require(impl, nir_metadata_block_index); 196 197 nir_foreach_block(block, impl) 198 progress |= remove_dead_write_vars_local(mem_ctx, block); 199 200 if (progress) { 201 nir_metadata_preserve(impl, nir_metadata_block_index | 202 nir_metadata_dominance); 203 } 204 205 return progress; 206} 207 208bool 209nir_opt_dead_write_vars(nir_shader *shader) 210{ 211 void *mem_ctx = ralloc_context(NULL); 212 bool progress = false; 213 214 nir_foreach_function(function, shader) { 215 if (!function->impl) 216 continue; 217 progress |= remove_dead_write_vars_impl(mem_ctx, function->impl); 218 } 219 220 ralloc_free(mem_ctx); 221 return progress; 222} 223