1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2018 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "nir.h" 25b8e80941Smrg#include "nir_builder.h" 26b8e80941Smrg#include "nir_deref.h" 27b8e80941Smrg#include "util/hash_table.h" 28b8e80941Smrg 29b8e80941Smrgvoid 30b8e80941Smrgnir_deref_path_init(nir_deref_path *path, 31b8e80941Smrg nir_deref_instr *deref, void *mem_ctx) 32b8e80941Smrg{ 33b8e80941Smrg assert(deref != NULL); 34b8e80941Smrg 35b8e80941Smrg /* The length of the short path is at most ARRAY_SIZE - 1 because we need 36b8e80941Smrg * room for the NULL terminator. 37b8e80941Smrg */ 38b8e80941Smrg static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1; 39b8e80941Smrg 40b8e80941Smrg int count = 0; 41b8e80941Smrg 42b8e80941Smrg nir_deref_instr **tail = &path->_short_path[max_short_path_len]; 43b8e80941Smrg nir_deref_instr **head = tail; 44b8e80941Smrg 45b8e80941Smrg *tail = NULL; 46b8e80941Smrg for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { 47b8e80941Smrg count++; 48b8e80941Smrg if (count <= max_short_path_len) 49b8e80941Smrg *(--head) = d; 50b8e80941Smrg } 51b8e80941Smrg 52b8e80941Smrg if (count <= max_short_path_len) { 53b8e80941Smrg /* If we're under max_short_path_len, just use the short path. */ 54b8e80941Smrg path->path = head; 55b8e80941Smrg goto done; 56b8e80941Smrg } 57b8e80941Smrg 58b8e80941Smrg#ifndef NDEBUG 59b8e80941Smrg /* Just in case someone uses short_path by accident */ 60b8e80941Smrg for (unsigned i = 0; i < ARRAY_SIZE(path->_short_path); i++) 61b8e80941Smrg path->_short_path[i] = (void *)0xdeadbeef; 62b8e80941Smrg#endif 63b8e80941Smrg 64b8e80941Smrg path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1); 65b8e80941Smrg head = tail = path->path + count; 66b8e80941Smrg *tail = NULL; 67b8e80941Smrg for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) 68b8e80941Smrg *(--head) = d; 69b8e80941Smrg 70b8e80941Smrgdone: 71b8e80941Smrg assert(head == path->path); 72b8e80941Smrg assert(tail == head + count); 73b8e80941Smrg assert(*tail == NULL); 74b8e80941Smrg} 75b8e80941Smrg 76b8e80941Smrgvoid 77b8e80941Smrgnir_deref_path_finish(nir_deref_path *path) 78b8e80941Smrg{ 79b8e80941Smrg if (path->path < &path->_short_path[0] || 80b8e80941Smrg path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1]) 81b8e80941Smrg ralloc_free(path->path); 82b8e80941Smrg} 83b8e80941Smrg 84b8e80941Smrg/** 85b8e80941Smrg * Recursively removes unused deref instructions 86b8e80941Smrg */ 87b8e80941Smrgbool 88b8e80941Smrgnir_deref_instr_remove_if_unused(nir_deref_instr *instr) 89b8e80941Smrg{ 90b8e80941Smrg bool progress = false; 91b8e80941Smrg 92b8e80941Smrg for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) { 93b8e80941Smrg /* If anyone is using this deref, leave it alone */ 94b8e80941Smrg assert(d->dest.is_ssa); 95b8e80941Smrg if (!list_empty(&d->dest.ssa.uses)) 96b8e80941Smrg break; 97b8e80941Smrg 98b8e80941Smrg nir_instr_remove(&d->instr); 99b8e80941Smrg progress = true; 100b8e80941Smrg } 101b8e80941Smrg 102b8e80941Smrg return progress; 103b8e80941Smrg} 104b8e80941Smrg 105b8e80941Smrgbool 106b8e80941Smrgnir_deref_instr_has_indirect(nir_deref_instr *instr) 107b8e80941Smrg{ 108b8e80941Smrg while (instr->deref_type != nir_deref_type_var) { 109b8e80941Smrg /* Consider casts to be indirects */ 110b8e80941Smrg if (instr->deref_type == nir_deref_type_cast) 111b8e80941Smrg return true; 112b8e80941Smrg 113b8e80941Smrg if ((instr->deref_type == nir_deref_type_array || 114b8e80941Smrg instr->deref_type == nir_deref_type_ptr_as_array) && 115b8e80941Smrg !nir_src_is_const(instr->arr.index)) 116b8e80941Smrg return true; 117b8e80941Smrg 118b8e80941Smrg instr = nir_deref_instr_parent(instr); 119b8e80941Smrg } 120b8e80941Smrg 121b8e80941Smrg return false; 122b8e80941Smrg} 123b8e80941Smrg 124b8e80941Smrgunsigned 125b8e80941Smrgnir_deref_instr_ptr_as_array_stride(nir_deref_instr *deref) 126b8e80941Smrg{ 127b8e80941Smrg switch (deref->deref_type) { 128b8e80941Smrg case nir_deref_type_array: 129b8e80941Smrg return glsl_get_explicit_stride(nir_deref_instr_parent(deref)->type); 130b8e80941Smrg case nir_deref_type_ptr_as_array: 131b8e80941Smrg return nir_deref_instr_ptr_as_array_stride(nir_deref_instr_parent(deref)); 132b8e80941Smrg case nir_deref_type_cast: 133b8e80941Smrg return deref->cast.ptr_stride; 134b8e80941Smrg default: 135b8e80941Smrg return 0; 136b8e80941Smrg } 137b8e80941Smrg} 138b8e80941Smrg 139b8e80941Smrgstatic unsigned 140b8e80941Smrgtype_get_array_stride(const struct glsl_type *elem_type, 141b8e80941Smrg glsl_type_size_align_func size_align) 142b8e80941Smrg{ 143b8e80941Smrg unsigned elem_size, elem_align; 144b8e80941Smrg size_align(elem_type, &elem_size, &elem_align); 145b8e80941Smrg return ALIGN_POT(elem_size, elem_align); 146b8e80941Smrg} 147b8e80941Smrg 148b8e80941Smrgstatic unsigned 149b8e80941Smrgstruct_type_get_field_offset(const struct glsl_type *struct_type, 150b8e80941Smrg glsl_type_size_align_func size_align, 151b8e80941Smrg unsigned field_idx) 152b8e80941Smrg{ 153b8e80941Smrg assert(glsl_type_is_struct_or_ifc(struct_type)); 154b8e80941Smrg unsigned offset = 0; 155b8e80941Smrg for (unsigned i = 0; i <= field_idx; i++) { 156b8e80941Smrg unsigned elem_size, elem_align; 157b8e80941Smrg size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align); 158b8e80941Smrg offset = ALIGN_POT(offset, elem_align); 159b8e80941Smrg if (i < field_idx) 160b8e80941Smrg offset += elem_size; 161b8e80941Smrg } 162b8e80941Smrg return offset; 163b8e80941Smrg} 164b8e80941Smrg 165b8e80941Smrgunsigned 166b8e80941Smrgnir_deref_instr_get_const_offset(nir_deref_instr *deref, 167b8e80941Smrg glsl_type_size_align_func size_align) 168b8e80941Smrg{ 169b8e80941Smrg nir_deref_path path; 170b8e80941Smrg nir_deref_path_init(&path, deref, NULL); 171b8e80941Smrg 172b8e80941Smrg assert(path.path[0]->deref_type == nir_deref_type_var); 173b8e80941Smrg 174b8e80941Smrg unsigned offset = 0; 175b8e80941Smrg for (nir_deref_instr **p = &path.path[1]; *p; p++) { 176b8e80941Smrg if ((*p)->deref_type == nir_deref_type_array) { 177b8e80941Smrg offset += nir_src_as_uint((*p)->arr.index) * 178b8e80941Smrg type_get_array_stride((*p)->type, size_align); 179b8e80941Smrg } else if ((*p)->deref_type == nir_deref_type_struct) { 180b8e80941Smrg /* p starts at path[1], so this is safe */ 181b8e80941Smrg nir_deref_instr *parent = *(p - 1); 182b8e80941Smrg offset += struct_type_get_field_offset(parent->type, size_align, 183b8e80941Smrg (*p)->strct.index); 184b8e80941Smrg } else { 185b8e80941Smrg unreachable("Unsupported deref type"); 186b8e80941Smrg } 187b8e80941Smrg } 188b8e80941Smrg 189b8e80941Smrg nir_deref_path_finish(&path); 190b8e80941Smrg 191b8e80941Smrg return offset; 192b8e80941Smrg} 193b8e80941Smrg 194b8e80941Smrgnir_ssa_def * 195b8e80941Smrgnir_build_deref_offset(nir_builder *b, nir_deref_instr *deref, 196b8e80941Smrg glsl_type_size_align_func size_align) 197b8e80941Smrg{ 198b8e80941Smrg nir_deref_path path; 199b8e80941Smrg nir_deref_path_init(&path, deref, NULL); 200b8e80941Smrg 201b8e80941Smrg assert(path.path[0]->deref_type == nir_deref_type_var); 202b8e80941Smrg 203b8e80941Smrg nir_ssa_def *offset = nir_imm_int(b, 0); 204b8e80941Smrg for (nir_deref_instr **p = &path.path[1]; *p; p++) { 205b8e80941Smrg if ((*p)->deref_type == nir_deref_type_array) { 206b8e80941Smrg nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1); 207b8e80941Smrg int stride = type_get_array_stride((*p)->type, size_align); 208b8e80941Smrg offset = nir_iadd(b, offset, nir_imul_imm(b, index, stride)); 209b8e80941Smrg } else if ((*p)->deref_type == nir_deref_type_struct) { 210b8e80941Smrg /* p starts at path[1], so this is safe */ 211b8e80941Smrg nir_deref_instr *parent = *(p - 1); 212b8e80941Smrg unsigned field_offset = 213b8e80941Smrg struct_type_get_field_offset(parent->type, size_align, 214b8e80941Smrg (*p)->strct.index); 215b8e80941Smrg offset = nir_iadd_imm(b, offset, field_offset); 216b8e80941Smrg } else { 217b8e80941Smrg unreachable("Unsupported deref type"); 218b8e80941Smrg } 219b8e80941Smrg } 220b8e80941Smrg 221b8e80941Smrg nir_deref_path_finish(&path); 222b8e80941Smrg 223b8e80941Smrg return offset; 224b8e80941Smrg} 225b8e80941Smrg 226b8e80941Smrgbool 227b8e80941Smrgnir_remove_dead_derefs_impl(nir_function_impl *impl) 228b8e80941Smrg{ 229b8e80941Smrg bool progress = false; 230b8e80941Smrg 231b8e80941Smrg nir_foreach_block(block, impl) { 232b8e80941Smrg nir_foreach_instr_safe(instr, block) { 233b8e80941Smrg if (instr->type == nir_instr_type_deref && 234b8e80941Smrg nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr))) 235b8e80941Smrg progress = true; 236b8e80941Smrg } 237b8e80941Smrg } 238b8e80941Smrg 239b8e80941Smrg if (progress) 240b8e80941Smrg nir_metadata_preserve(impl, nir_metadata_block_index | 241b8e80941Smrg nir_metadata_dominance); 242b8e80941Smrg 243b8e80941Smrg return progress; 244b8e80941Smrg} 245b8e80941Smrg 246b8e80941Smrgbool 247b8e80941Smrgnir_remove_dead_derefs(nir_shader *shader) 248b8e80941Smrg{ 249b8e80941Smrg bool progress = false; 250b8e80941Smrg nir_foreach_function(function, shader) { 251b8e80941Smrg if (function->impl && nir_remove_dead_derefs_impl(function->impl)) 252b8e80941Smrg progress = true; 253b8e80941Smrg } 254b8e80941Smrg 255b8e80941Smrg return progress; 256b8e80941Smrg} 257b8e80941Smrg 258b8e80941Smrgvoid 259b8e80941Smrgnir_fixup_deref_modes(nir_shader *shader) 260b8e80941Smrg{ 261b8e80941Smrg nir_foreach_function(function, shader) { 262b8e80941Smrg if (!function->impl) 263b8e80941Smrg continue; 264b8e80941Smrg 265b8e80941Smrg nir_foreach_block(block, function->impl) { 266b8e80941Smrg nir_foreach_instr(instr, block) { 267b8e80941Smrg if (instr->type != nir_instr_type_deref) 268b8e80941Smrg continue; 269b8e80941Smrg 270b8e80941Smrg nir_deref_instr *deref = nir_instr_as_deref(instr); 271b8e80941Smrg if (deref->deref_type == nir_deref_type_cast) 272b8e80941Smrg continue; 273b8e80941Smrg 274b8e80941Smrg nir_variable_mode parent_mode; 275b8e80941Smrg if (deref->deref_type == nir_deref_type_var) { 276b8e80941Smrg parent_mode = deref->var->data.mode; 277b8e80941Smrg } else { 278b8e80941Smrg assert(deref->parent.is_ssa); 279b8e80941Smrg nir_deref_instr *parent = 280b8e80941Smrg nir_instr_as_deref(deref->parent.ssa->parent_instr); 281b8e80941Smrg parent_mode = parent->mode; 282b8e80941Smrg } 283b8e80941Smrg 284b8e80941Smrg deref->mode = parent_mode; 285b8e80941Smrg } 286b8e80941Smrg } 287b8e80941Smrg } 288b8e80941Smrg} 289b8e80941Smrg 290b8e80941Smrgstatic bool 291b8e80941Smrgmodes_may_alias(nir_variable_mode a, nir_variable_mode b) 292b8e80941Smrg{ 293b8e80941Smrg /* Generic pointers can alias with SSBOs */ 294b8e80941Smrg if ((a == nir_var_mem_ssbo || a == nir_var_mem_global) && 295b8e80941Smrg (b == nir_var_mem_ssbo || b == nir_var_mem_global)) 296b8e80941Smrg return true; 297b8e80941Smrg 298b8e80941Smrg /* In the general case, pointers can only alias if they have the same mode. 299b8e80941Smrg * 300b8e80941Smrg * NOTE: In future, with things like OpenCL generic pointers, this may not 301b8e80941Smrg * be true and will have to be re-evaluated. However, with graphics only, 302b8e80941Smrg * it should be safe. 303b8e80941Smrg */ 304b8e80941Smrg return a == b; 305b8e80941Smrg} 306b8e80941Smrg 307b8e80941Smrgstatic bool 308b8e80941Smrgderef_path_contains_coherent_decoration(nir_deref_path *path) 309b8e80941Smrg{ 310b8e80941Smrg assert(path->path[0]->deref_type == nir_deref_type_var); 311b8e80941Smrg 312b8e80941Smrg if (path->path[0]->var->data.image.access & ACCESS_COHERENT) 313b8e80941Smrg return true; 314b8e80941Smrg 315b8e80941Smrg for (nir_deref_instr **p = &path->path[1]; *p; p++) { 316b8e80941Smrg if ((*p)->deref_type != nir_deref_type_struct) 317b8e80941Smrg continue; 318b8e80941Smrg 319b8e80941Smrg const struct glsl_type *struct_type = (*(p - 1))->type; 320b8e80941Smrg const struct glsl_struct_field *field = 321b8e80941Smrg glsl_get_struct_field_data(struct_type, (*p)->strct.index); 322b8e80941Smrg if (field->memory_coherent) 323b8e80941Smrg return true; 324b8e80941Smrg } 325b8e80941Smrg 326b8e80941Smrg return false; 327b8e80941Smrg} 328b8e80941Smrg 329b8e80941Smrgnir_deref_compare_result 330b8e80941Smrgnir_compare_deref_paths(nir_deref_path *a_path, 331b8e80941Smrg nir_deref_path *b_path) 332b8e80941Smrg{ 333b8e80941Smrg if (!modes_may_alias(b_path->path[0]->mode, a_path->path[0]->mode)) 334b8e80941Smrg return nir_derefs_do_not_alias; 335b8e80941Smrg 336b8e80941Smrg if (a_path->path[0]->deref_type != b_path->path[0]->deref_type) 337b8e80941Smrg return nir_derefs_may_alias_bit; 338b8e80941Smrg 339b8e80941Smrg if (a_path->path[0]->deref_type == nir_deref_type_var) { 340b8e80941Smrg if (a_path->path[0]->var != b_path->path[0]->var) { 341b8e80941Smrg /* Shader and function temporaries aren't backed by memory so two 342b8e80941Smrg * distinct variables never alias. 343b8e80941Smrg */ 344b8e80941Smrg static const nir_variable_mode temp_var_modes = 345b8e80941Smrg nir_var_shader_temp | nir_var_function_temp; 346b8e80941Smrg if ((a_path->path[0]->mode & temp_var_modes) || 347b8e80941Smrg (b_path->path[0]->mode & temp_var_modes)) 348b8e80941Smrg return nir_derefs_do_not_alias; 349b8e80941Smrg 350b8e80941Smrg /* If they are both declared coherent or have coherent somewhere in 351b8e80941Smrg * their path (due to a member of an interface being declared 352b8e80941Smrg * coherent), we have to assume we that we could have any kind of 353b8e80941Smrg * aliasing. Otherwise, they could still alias but the client didn't 354b8e80941Smrg * tell us and that's their fault. 355b8e80941Smrg */ 356b8e80941Smrg if (deref_path_contains_coherent_decoration(a_path) && 357b8e80941Smrg deref_path_contains_coherent_decoration(b_path)) 358b8e80941Smrg return nir_derefs_may_alias_bit; 359b8e80941Smrg 360b8e80941Smrg /* If we can chase the deref all the way back to the variable and 361b8e80941Smrg * they're not the same variable and at least one is not declared 362b8e80941Smrg * coherent, we know they can't possibly alias. 363b8e80941Smrg */ 364b8e80941Smrg return nir_derefs_do_not_alias; 365b8e80941Smrg } 366b8e80941Smrg } else { 367b8e80941Smrg assert(a_path->path[0]->deref_type == nir_deref_type_cast); 368b8e80941Smrg /* If they're not exactly the same cast, it's hard to compare them so we 369b8e80941Smrg * just assume they alias. Comparing casts is tricky as there are lots 370b8e80941Smrg * of things such as mode, type, etc. to make sure work out; for now, we 371b8e80941Smrg * just assume nit_opt_deref will combine them and compare the deref 372b8e80941Smrg * instructions. 373b8e80941Smrg * 374b8e80941Smrg * TODO: At some point in the future, we could be clever and understand 375b8e80941Smrg * that a float[] and int[] have the same layout and aliasing structure 376b8e80941Smrg * but double[] and vec3[] do not and we could potentially be a bit 377b8e80941Smrg * smarter here. 378b8e80941Smrg */ 379b8e80941Smrg if (a_path->path[0] != b_path->path[0]) 380b8e80941Smrg return nir_derefs_may_alias_bit; 381b8e80941Smrg } 382b8e80941Smrg 383b8e80941Smrg /* Start off assuming they fully compare. We ignore equality for now. In 384b8e80941Smrg * the end, we'll determine that by containment. 385b8e80941Smrg */ 386b8e80941Smrg nir_deref_compare_result result = nir_derefs_may_alias_bit | 387b8e80941Smrg nir_derefs_a_contains_b_bit | 388b8e80941Smrg nir_derefs_b_contains_a_bit; 389b8e80941Smrg 390b8e80941Smrg nir_deref_instr **a_p = &a_path->path[1]; 391b8e80941Smrg nir_deref_instr **b_p = &b_path->path[1]; 392b8e80941Smrg while (*a_p != NULL && *a_p == *b_p) { 393b8e80941Smrg a_p++; 394b8e80941Smrg b_p++; 395b8e80941Smrg } 396b8e80941Smrg 397b8e80941Smrg /* We're at either the tail or the divergence point between the two deref 398b8e80941Smrg * paths. Look to see if either contains a ptr_as_array deref. It it 399b8e80941Smrg * does we don't know how to safely make any inferences. Hopefully, 400b8e80941Smrg * nir_opt_deref will clean most of these up and we can start inferring 401b8e80941Smrg * things again. 402b8e80941Smrg * 403b8e80941Smrg * In theory, we could do a bit better. For instance, we could detect the 404b8e80941Smrg * case where we have exactly one ptr_as_array deref in the chain after the 405b8e80941Smrg * divergence point and it's matched in both chains and the two chains have 406b8e80941Smrg * different constant indices. 407b8e80941Smrg */ 408b8e80941Smrg for (nir_deref_instr **t_p = a_p; *t_p; t_p++) { 409b8e80941Smrg if ((*t_p)->deref_type == nir_deref_type_ptr_as_array) 410b8e80941Smrg return nir_derefs_may_alias_bit; 411b8e80941Smrg } 412b8e80941Smrg for (nir_deref_instr **t_p = b_p; *t_p; t_p++) { 413b8e80941Smrg if ((*t_p)->deref_type == nir_deref_type_ptr_as_array) 414b8e80941Smrg return nir_derefs_may_alias_bit; 415b8e80941Smrg } 416b8e80941Smrg 417b8e80941Smrg while (*a_p != NULL && *b_p != NULL) { 418b8e80941Smrg nir_deref_instr *a_tail = *(a_p++); 419b8e80941Smrg nir_deref_instr *b_tail = *(b_p++); 420b8e80941Smrg 421b8e80941Smrg switch (a_tail->deref_type) { 422b8e80941Smrg case nir_deref_type_array: 423b8e80941Smrg case nir_deref_type_array_wildcard: { 424b8e80941Smrg assert(b_tail->deref_type == nir_deref_type_array || 425b8e80941Smrg b_tail->deref_type == nir_deref_type_array_wildcard); 426b8e80941Smrg 427b8e80941Smrg if (a_tail->deref_type == nir_deref_type_array_wildcard) { 428b8e80941Smrg if (b_tail->deref_type != nir_deref_type_array_wildcard) 429b8e80941Smrg result &= ~nir_derefs_b_contains_a_bit; 430b8e80941Smrg } else if (b_tail->deref_type == nir_deref_type_array_wildcard) { 431b8e80941Smrg if (a_tail->deref_type != nir_deref_type_array_wildcard) 432b8e80941Smrg result &= ~nir_derefs_a_contains_b_bit; 433b8e80941Smrg } else { 434b8e80941Smrg assert(a_tail->deref_type == nir_deref_type_array && 435b8e80941Smrg b_tail->deref_type == nir_deref_type_array); 436b8e80941Smrg assert(a_tail->arr.index.is_ssa && b_tail->arr.index.is_ssa); 437b8e80941Smrg 438b8e80941Smrg if (nir_src_is_const(a_tail->arr.index) && 439b8e80941Smrg nir_src_is_const(b_tail->arr.index)) { 440b8e80941Smrg /* If they're both direct and have different offsets, they 441b8e80941Smrg * don't even alias much less anything else. 442b8e80941Smrg */ 443b8e80941Smrg if (nir_src_as_uint(a_tail->arr.index) != 444b8e80941Smrg nir_src_as_uint(b_tail->arr.index)) 445b8e80941Smrg return nir_derefs_do_not_alias; 446b8e80941Smrg } else if (a_tail->arr.index.ssa == b_tail->arr.index.ssa) { 447b8e80941Smrg /* They're the same indirect, continue on */ 448b8e80941Smrg } else { 449b8e80941Smrg /* They're not the same index so we can't prove anything about 450b8e80941Smrg * containment. 451b8e80941Smrg */ 452b8e80941Smrg result &= ~(nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit); 453b8e80941Smrg } 454b8e80941Smrg } 455b8e80941Smrg break; 456b8e80941Smrg } 457b8e80941Smrg 458b8e80941Smrg case nir_deref_type_struct: { 459b8e80941Smrg /* If they're different struct members, they don't even alias */ 460b8e80941Smrg if (a_tail->strct.index != b_tail->strct.index) 461b8e80941Smrg return nir_derefs_do_not_alias; 462b8e80941Smrg break; 463b8e80941Smrg } 464b8e80941Smrg 465b8e80941Smrg default: 466b8e80941Smrg unreachable("Invalid deref type"); 467b8e80941Smrg } 468b8e80941Smrg } 469b8e80941Smrg 470b8e80941Smrg /* If a is longer than b, then it can't contain b */ 471b8e80941Smrg if (*a_p != NULL) 472b8e80941Smrg result &= ~nir_derefs_a_contains_b_bit; 473b8e80941Smrg if (*b_p != NULL) 474b8e80941Smrg result &= ~nir_derefs_b_contains_a_bit; 475b8e80941Smrg 476b8e80941Smrg /* If a contains b and b contains a they must be equal. */ 477b8e80941Smrg if ((result & nir_derefs_a_contains_b_bit) && (result & nir_derefs_b_contains_a_bit)) 478b8e80941Smrg result |= nir_derefs_equal_bit; 479b8e80941Smrg 480b8e80941Smrg return result; 481b8e80941Smrg} 482b8e80941Smrg 483b8e80941Smrgnir_deref_compare_result 484b8e80941Smrgnir_compare_derefs(nir_deref_instr *a, nir_deref_instr *b) 485b8e80941Smrg{ 486b8e80941Smrg if (a == b) { 487b8e80941Smrg return nir_derefs_equal_bit | nir_derefs_may_alias_bit | 488b8e80941Smrg nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit; 489b8e80941Smrg } 490b8e80941Smrg 491b8e80941Smrg nir_deref_path a_path, b_path; 492b8e80941Smrg nir_deref_path_init(&a_path, a, NULL); 493b8e80941Smrg nir_deref_path_init(&b_path, b, NULL); 494b8e80941Smrg assert(a_path.path[0]->deref_type == nir_deref_type_var || 495b8e80941Smrg a_path.path[0]->deref_type == nir_deref_type_cast); 496b8e80941Smrg assert(b_path.path[0]->deref_type == nir_deref_type_var || 497b8e80941Smrg b_path.path[0]->deref_type == nir_deref_type_cast); 498b8e80941Smrg 499b8e80941Smrg nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path); 500b8e80941Smrg 501b8e80941Smrg nir_deref_path_finish(&a_path); 502b8e80941Smrg nir_deref_path_finish(&b_path); 503b8e80941Smrg 504b8e80941Smrg return result; 505b8e80941Smrg} 506b8e80941Smrg 507b8e80941Smrgstruct rematerialize_deref_state { 508b8e80941Smrg bool progress; 509b8e80941Smrg nir_builder builder; 510b8e80941Smrg nir_block *block; 511b8e80941Smrg struct hash_table *cache; 512b8e80941Smrg}; 513b8e80941Smrg 514b8e80941Smrgstatic nir_deref_instr * 515b8e80941Smrgrematerialize_deref_in_block(nir_deref_instr *deref, 516b8e80941Smrg struct rematerialize_deref_state *state) 517b8e80941Smrg{ 518b8e80941Smrg if (deref->instr.block == state->block) 519b8e80941Smrg return deref; 520b8e80941Smrg 521b8e80941Smrg if (!state->cache) { 522b8e80941Smrg state->cache = _mesa_pointer_hash_table_create(NULL); 523b8e80941Smrg } 524b8e80941Smrg 525b8e80941Smrg struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref); 526b8e80941Smrg if (cached) 527b8e80941Smrg return cached->data; 528b8e80941Smrg 529b8e80941Smrg nir_builder *b = &state->builder; 530b8e80941Smrg nir_deref_instr *new_deref = 531b8e80941Smrg nir_deref_instr_create(b->shader, deref->deref_type); 532b8e80941Smrg new_deref->mode = deref->mode; 533b8e80941Smrg new_deref->type = deref->type; 534b8e80941Smrg 535b8e80941Smrg if (deref->deref_type == nir_deref_type_var) { 536b8e80941Smrg new_deref->var = deref->var; 537b8e80941Smrg } else { 538b8e80941Smrg nir_deref_instr *parent = nir_src_as_deref(deref->parent); 539b8e80941Smrg if (parent) { 540b8e80941Smrg parent = rematerialize_deref_in_block(parent, state); 541b8e80941Smrg new_deref->parent = nir_src_for_ssa(&parent->dest.ssa); 542b8e80941Smrg } else { 543b8e80941Smrg nir_src_copy(&new_deref->parent, &deref->parent, new_deref); 544b8e80941Smrg } 545b8e80941Smrg } 546b8e80941Smrg 547b8e80941Smrg switch (deref->deref_type) { 548b8e80941Smrg case nir_deref_type_var: 549b8e80941Smrg case nir_deref_type_array_wildcard: 550b8e80941Smrg case nir_deref_type_cast: 551b8e80941Smrg /* Nothing more to do */ 552b8e80941Smrg break; 553b8e80941Smrg 554b8e80941Smrg case nir_deref_type_array: 555b8e80941Smrg assert(!nir_src_as_deref(deref->arr.index)); 556b8e80941Smrg nir_src_copy(&new_deref->arr.index, &deref->arr.index, new_deref); 557b8e80941Smrg break; 558b8e80941Smrg 559b8e80941Smrg case nir_deref_type_struct: 560b8e80941Smrg new_deref->strct.index = deref->strct.index; 561b8e80941Smrg break; 562b8e80941Smrg 563b8e80941Smrg default: 564b8e80941Smrg unreachable("Invalid deref instruction type"); 565b8e80941Smrg } 566b8e80941Smrg 567b8e80941Smrg nir_ssa_dest_init(&new_deref->instr, &new_deref->dest, 568b8e80941Smrg deref->dest.ssa.num_components, 569b8e80941Smrg deref->dest.ssa.bit_size, 570b8e80941Smrg deref->dest.ssa.name); 571b8e80941Smrg nir_builder_instr_insert(b, &new_deref->instr); 572b8e80941Smrg 573b8e80941Smrg return new_deref; 574b8e80941Smrg} 575b8e80941Smrg 576b8e80941Smrgstatic bool 577b8e80941Smrgrematerialize_deref_src(nir_src *src, void *_state) 578b8e80941Smrg{ 579b8e80941Smrg struct rematerialize_deref_state *state = _state; 580b8e80941Smrg 581b8e80941Smrg nir_deref_instr *deref = nir_src_as_deref(*src); 582b8e80941Smrg if (!deref) 583b8e80941Smrg return true; 584b8e80941Smrg 585b8e80941Smrg nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state); 586b8e80941Smrg if (block_deref != deref) { 587b8e80941Smrg nir_instr_rewrite_src(src->parent_instr, src, 588b8e80941Smrg nir_src_for_ssa(&block_deref->dest.ssa)); 589b8e80941Smrg nir_deref_instr_remove_if_unused(deref); 590b8e80941Smrg state->progress = true; 591b8e80941Smrg } 592b8e80941Smrg 593b8e80941Smrg return true; 594b8e80941Smrg} 595b8e80941Smrg 596b8e80941Smrg/** Re-materialize derefs in every block 597b8e80941Smrg * 598b8e80941Smrg * This pass re-materializes deref instructions in every block in which it is 599b8e80941Smrg * used. After this pass has been run, every use of a deref will be of a 600b8e80941Smrg * deref in the same block as the use. Also, all unused derefs will be 601b8e80941Smrg * deleted as a side-effect. 602b8e80941Smrg */ 603b8e80941Smrgbool 604b8e80941Smrgnir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl) 605b8e80941Smrg{ 606b8e80941Smrg struct rematerialize_deref_state state = { 0 }; 607b8e80941Smrg nir_builder_init(&state.builder, impl); 608b8e80941Smrg 609b8e80941Smrg nir_foreach_block(block, impl) { 610b8e80941Smrg state.block = block; 611b8e80941Smrg 612b8e80941Smrg /* Start each block with a fresh cache */ 613b8e80941Smrg if (state.cache) 614b8e80941Smrg _mesa_hash_table_clear(state.cache, NULL); 615b8e80941Smrg 616b8e80941Smrg nir_foreach_instr_safe(instr, block) { 617b8e80941Smrg if (instr->type == nir_instr_type_deref && 618b8e80941Smrg nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr))) 619b8e80941Smrg continue; 620b8e80941Smrg 621b8e80941Smrg state.builder.cursor = nir_before_instr(instr); 622b8e80941Smrg nir_foreach_src(instr, rematerialize_deref_src, &state); 623b8e80941Smrg } 624b8e80941Smrg 625b8e80941Smrg#ifndef NDEBUG 626b8e80941Smrg nir_if *following_if = nir_block_get_following_if(block); 627b8e80941Smrg if (following_if) 628b8e80941Smrg assert(!nir_src_as_deref(following_if->condition)); 629b8e80941Smrg#endif 630b8e80941Smrg } 631b8e80941Smrg 632b8e80941Smrg _mesa_hash_table_destroy(state.cache, NULL); 633b8e80941Smrg 634b8e80941Smrg return state.progress; 635b8e80941Smrg} 636b8e80941Smrg 637b8e80941Smrgstatic bool 638b8e80941Smrgis_trivial_deref_cast(nir_deref_instr *cast) 639b8e80941Smrg{ 640b8e80941Smrg nir_deref_instr *parent = nir_src_as_deref(cast->parent); 641b8e80941Smrg if (!parent) 642b8e80941Smrg return false; 643b8e80941Smrg 644b8e80941Smrg return cast->mode == parent->mode && 645b8e80941Smrg cast->type == parent->type && 646b8e80941Smrg cast->dest.ssa.num_components == parent->dest.ssa.num_components && 647b8e80941Smrg cast->dest.ssa.bit_size == parent->dest.ssa.bit_size; 648b8e80941Smrg} 649b8e80941Smrg 650b8e80941Smrgstatic bool 651b8e80941Smrgis_trivial_array_deref_cast(nir_deref_instr *cast) 652b8e80941Smrg{ 653b8e80941Smrg assert(is_trivial_deref_cast(cast)); 654b8e80941Smrg 655b8e80941Smrg nir_deref_instr *parent = nir_src_as_deref(cast->parent); 656b8e80941Smrg 657b8e80941Smrg if (parent->deref_type == nir_deref_type_array) { 658b8e80941Smrg return cast->cast.ptr_stride == 659b8e80941Smrg glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type); 660b8e80941Smrg } else if (parent->deref_type == nir_deref_type_ptr_as_array) { 661b8e80941Smrg return cast->cast.ptr_stride == 662b8e80941Smrg nir_deref_instr_ptr_as_array_stride(parent); 663b8e80941Smrg } else { 664b8e80941Smrg return false; 665b8e80941Smrg } 666b8e80941Smrg} 667b8e80941Smrg 668b8e80941Smrgstatic bool 669b8e80941Smrgis_deref_ptr_as_array(nir_instr *instr) 670b8e80941Smrg{ 671b8e80941Smrg return instr->type == nir_instr_type_deref && 672b8e80941Smrg nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array; 673b8e80941Smrg} 674b8e80941Smrg 675b8e80941Smrg/** 676b8e80941Smrg * Remove casts that just wrap other casts. 677b8e80941Smrg */ 678b8e80941Smrgstatic bool 679b8e80941Smrgopt_remove_cast_cast(nir_deref_instr *cast) 680b8e80941Smrg{ 681b8e80941Smrg nir_deref_instr *first_cast = cast; 682b8e80941Smrg 683b8e80941Smrg while (true) { 684b8e80941Smrg nir_deref_instr *parent = nir_deref_instr_parent(first_cast); 685b8e80941Smrg if (parent == NULL || parent->deref_type != nir_deref_type_cast) 686b8e80941Smrg break; 687b8e80941Smrg first_cast = parent; 688b8e80941Smrg } 689b8e80941Smrg if (cast == first_cast) 690b8e80941Smrg return false; 691b8e80941Smrg 692b8e80941Smrg nir_instr_rewrite_src(&cast->instr, &cast->parent, 693b8e80941Smrg nir_src_for_ssa(first_cast->parent.ssa)); 694b8e80941Smrg return true; 695b8e80941Smrg} 696b8e80941Smrg 697b8e80941Smrg/** 698b8e80941Smrg * Is this casting a struct to a contained struct. 699b8e80941Smrg * struct a { struct b field0 }; 700b8e80941Smrg * ssa_5 is structa; 701b8e80941Smrg * deref_cast (structb *)ssa_5 (function_temp structb); 702b8e80941Smrg * converts to 703b8e80941Smrg * deref_struct &ssa_5->field0 (function_temp structb); 704b8e80941Smrg * This allows subsequent copy propagation to work. 705b8e80941Smrg */ 706b8e80941Smrgstatic bool 707b8e80941Smrgopt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast) 708b8e80941Smrg{ 709b8e80941Smrg nir_deref_instr *parent = nir_src_as_deref(cast->parent); 710b8e80941Smrg if (!parent) 711b8e80941Smrg return false; 712b8e80941Smrg 713b8e80941Smrg if (!glsl_type_is_struct(parent->type)) 714b8e80941Smrg return false; 715b8e80941Smrg 716b8e80941Smrg if (glsl_get_struct_field_offset(parent->type, 0) != 0) 717b8e80941Smrg return false; 718b8e80941Smrg 719b8e80941Smrg if (cast->type != glsl_get_struct_field(parent->type, 0)) 720b8e80941Smrg return false; 721b8e80941Smrg 722b8e80941Smrg nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0); 723b8e80941Smrg nir_ssa_def_rewrite_uses(&cast->dest.ssa, nir_src_for_ssa(&replace->dest.ssa)); 724b8e80941Smrg nir_deref_instr_remove_if_unused(cast); 725b8e80941Smrg return true; 726b8e80941Smrg} 727b8e80941Smrg 728b8e80941Smrgstatic bool 729b8e80941Smrgopt_deref_cast(nir_builder *b, nir_deref_instr *cast) 730b8e80941Smrg{ 731b8e80941Smrg bool progress; 732b8e80941Smrg 733b8e80941Smrg if (opt_replace_struct_wrapper_cast(b, cast)) 734b8e80941Smrg return true; 735b8e80941Smrg 736b8e80941Smrg progress = opt_remove_cast_cast(cast); 737b8e80941Smrg if (!is_trivial_deref_cast(cast)) 738b8e80941Smrg return progress; 739b8e80941Smrg 740b8e80941Smrg bool trivial_array_cast = is_trivial_array_deref_cast(cast); 741b8e80941Smrg 742b8e80941Smrg assert(cast->dest.is_ssa); 743b8e80941Smrg assert(cast->parent.is_ssa); 744b8e80941Smrg 745b8e80941Smrg nir_foreach_use_safe(use_src, &cast->dest.ssa) { 746b8e80941Smrg /* If this isn't a trivial array cast, we can't propagate into 747b8e80941Smrg * ptr_as_array derefs. 748b8e80941Smrg */ 749b8e80941Smrg if (is_deref_ptr_as_array(use_src->parent_instr) && 750b8e80941Smrg !trivial_array_cast) 751b8e80941Smrg continue; 752b8e80941Smrg 753b8e80941Smrg nir_instr_rewrite_src(use_src->parent_instr, use_src, cast->parent); 754b8e80941Smrg progress = true; 755b8e80941Smrg } 756b8e80941Smrg 757b8e80941Smrg /* If uses would be a bit crazy */ 758b8e80941Smrg assert(list_empty(&cast->dest.ssa.if_uses)); 759b8e80941Smrg 760b8e80941Smrg nir_deref_instr_remove_if_unused(cast); 761b8e80941Smrg return progress; 762b8e80941Smrg} 763b8e80941Smrg 764b8e80941Smrgstatic bool 765b8e80941Smrgopt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref) 766b8e80941Smrg{ 767b8e80941Smrg assert(deref->deref_type == nir_deref_type_ptr_as_array); 768b8e80941Smrg 769b8e80941Smrg nir_deref_instr *parent = nir_deref_instr_parent(deref); 770b8e80941Smrg 771b8e80941Smrg if (nir_src_is_const(deref->arr.index) && 772b8e80941Smrg nir_src_as_int(deref->arr.index) == 0) { 773b8e80941Smrg /* If it's a ptr_as_array deref with an index of 0, it does nothing 774b8e80941Smrg * and we can just replace its uses with its parent. 775b8e80941Smrg * 776b8e80941Smrg * The source of a ptr_as_array deref always has a deref_type of 777b8e80941Smrg * nir_deref_type_array or nir_deref_type_cast. If it's a cast, it 778b8e80941Smrg * may be trivial and we may be able to get rid of that too. Any 779b8e80941Smrg * trivial cast of trivial cast cases should be handled already by 780b8e80941Smrg * opt_deref_cast() above. 781b8e80941Smrg */ 782b8e80941Smrg if (parent->deref_type == nir_deref_type_cast && 783b8e80941Smrg is_trivial_deref_cast(parent)) 784b8e80941Smrg parent = nir_deref_instr_parent(parent); 785b8e80941Smrg nir_ssa_def_rewrite_uses(&deref->dest.ssa, 786b8e80941Smrg nir_src_for_ssa(&parent->dest.ssa)); 787b8e80941Smrg nir_instr_remove(&deref->instr); 788b8e80941Smrg return true; 789b8e80941Smrg } 790b8e80941Smrg 791b8e80941Smrg if (parent->deref_type != nir_deref_type_array && 792b8e80941Smrg parent->deref_type != nir_deref_type_ptr_as_array) 793b8e80941Smrg return false; 794b8e80941Smrg 795b8e80941Smrg assert(parent->parent.is_ssa); 796b8e80941Smrg assert(parent->arr.index.is_ssa); 797b8e80941Smrg assert(deref->arr.index.is_ssa); 798b8e80941Smrg 799b8e80941Smrg nir_ssa_def *new_idx = nir_iadd(b, parent->arr.index.ssa, 800b8e80941Smrg deref->arr.index.ssa); 801b8e80941Smrg 802b8e80941Smrg deref->deref_type = parent->deref_type; 803b8e80941Smrg nir_instr_rewrite_src(&deref->instr, &deref->parent, parent->parent); 804b8e80941Smrg nir_instr_rewrite_src(&deref->instr, &deref->arr.index, 805b8e80941Smrg nir_src_for_ssa(new_idx)); 806b8e80941Smrg return true; 807b8e80941Smrg} 808b8e80941Smrg 809b8e80941Smrgbool 810b8e80941Smrgnir_opt_deref_impl(nir_function_impl *impl) 811b8e80941Smrg{ 812b8e80941Smrg bool progress = false; 813b8e80941Smrg 814b8e80941Smrg nir_builder b; 815b8e80941Smrg nir_builder_init(&b, impl); 816b8e80941Smrg 817b8e80941Smrg nir_foreach_block(block, impl) { 818b8e80941Smrg nir_foreach_instr_safe(instr, block) { 819b8e80941Smrg if (instr->type != nir_instr_type_deref) 820b8e80941Smrg continue; 821b8e80941Smrg 822b8e80941Smrg b.cursor = nir_before_instr(instr); 823b8e80941Smrg 824b8e80941Smrg nir_deref_instr *deref = nir_instr_as_deref(instr); 825b8e80941Smrg switch (deref->deref_type) { 826b8e80941Smrg case nir_deref_type_ptr_as_array: 827b8e80941Smrg if (opt_deref_ptr_as_array(&b, deref)) 828b8e80941Smrg progress = true; 829b8e80941Smrg break; 830b8e80941Smrg 831b8e80941Smrg case nir_deref_type_cast: 832b8e80941Smrg if (opt_deref_cast(&b, deref)) 833b8e80941Smrg progress = true; 834b8e80941Smrg break; 835b8e80941Smrg 836b8e80941Smrg default: 837b8e80941Smrg /* Do nothing */ 838b8e80941Smrg break; 839b8e80941Smrg } 840b8e80941Smrg } 841b8e80941Smrg } 842b8e80941Smrg 843b8e80941Smrg if (progress) { 844b8e80941Smrg nir_metadata_preserve(impl, nir_metadata_block_index | 845b8e80941Smrg nir_metadata_dominance); 846b8e80941Smrg } else { 847b8e80941Smrg#ifndef NDEBUG 848b8e80941Smrg impl->valid_metadata &= ~nir_metadata_not_properly_reset; 849b8e80941Smrg#endif 850b8e80941Smrg } 851b8e80941Smrg 852b8e80941Smrg return progress; 853b8e80941Smrg} 854b8e80941Smrg 855b8e80941Smrgbool 856b8e80941Smrgnir_opt_deref(nir_shader *shader) 857b8e80941Smrg{ 858b8e80941Smrg bool progress = false; 859b8e80941Smrg 860b8e80941Smrg nir_foreach_function(func, shader) { 861b8e80941Smrg if (func->impl && nir_opt_deref_impl(func->impl)) 862b8e80941Smrg progress = true; 863b8e80941Smrg } 864b8e80941Smrg 865b8e80941Smrg return progress; 866b8e80941Smrg} 867