101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2018 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include "nir.h" 2501e04c3fSmrg#include "nir_builder.h" 2601e04c3fSmrg#include "nir_deref.h" 2701e04c3fSmrg#include "util/hash_table.h" 2801e04c3fSmrg 297ec681f3Smrgstatic bool 307ec681f3Smrgis_trivial_deref_cast(nir_deref_instr *cast) 317ec681f3Smrg{ 327ec681f3Smrg nir_deref_instr *parent = nir_src_as_deref(cast->parent); 337ec681f3Smrg if (!parent) 347ec681f3Smrg return false; 357ec681f3Smrg 367ec681f3Smrg return cast->modes == parent->modes && 377ec681f3Smrg cast->type == parent->type && 387ec681f3Smrg cast->dest.ssa.num_components == parent->dest.ssa.num_components && 397ec681f3Smrg cast->dest.ssa.bit_size == parent->dest.ssa.bit_size; 407ec681f3Smrg} 417ec681f3Smrg 4201e04c3fSmrgvoid 4301e04c3fSmrgnir_deref_path_init(nir_deref_path *path, 4401e04c3fSmrg nir_deref_instr *deref, void *mem_ctx) 4501e04c3fSmrg{ 4601e04c3fSmrg assert(deref != NULL); 4701e04c3fSmrg 4801e04c3fSmrg /* The length of the short path is at most ARRAY_SIZE - 1 because we need 4901e04c3fSmrg * room for the NULL terminator. 5001e04c3fSmrg */ 5101e04c3fSmrg static const int max_short_path_len = ARRAY_SIZE(path->_short_path) - 1; 5201e04c3fSmrg 5301e04c3fSmrg int count = 0; 5401e04c3fSmrg 5501e04c3fSmrg nir_deref_instr **tail = &path->_short_path[max_short_path_len]; 5601e04c3fSmrg nir_deref_instr **head = tail; 5701e04c3fSmrg 5801e04c3fSmrg *tail = NULL; 5901e04c3fSmrg for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { 607ec681f3Smrg if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d)) 617ec681f3Smrg continue; 6201e04c3fSmrg count++; 6301e04c3fSmrg if (count <= max_short_path_len) 6401e04c3fSmrg *(--head) = d; 6501e04c3fSmrg } 6601e04c3fSmrg 6701e04c3fSmrg if (count <= max_short_path_len) { 6801e04c3fSmrg /* If we're under max_short_path_len, just use the short path. */ 6901e04c3fSmrg path->path = head; 7001e04c3fSmrg goto done; 7101e04c3fSmrg } 7201e04c3fSmrg 7301e04c3fSmrg#ifndef NDEBUG 7401e04c3fSmrg /* Just in case someone uses short_path by accident */ 7501e04c3fSmrg for (unsigned i = 0; i < ARRAY_SIZE(path->_short_path); i++) 767ec681f3Smrg path->_short_path[i] = (void *)(uintptr_t)0xdeadbeef; 7701e04c3fSmrg#endif 7801e04c3fSmrg 7901e04c3fSmrg path->path = ralloc_array(mem_ctx, nir_deref_instr *, count + 1); 8001e04c3fSmrg head = tail = path->path + count; 8101e04c3fSmrg *tail = NULL; 827ec681f3Smrg for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { 837ec681f3Smrg if (d->deref_type == nir_deref_type_cast && is_trivial_deref_cast(d)) 847ec681f3Smrg continue; 8501e04c3fSmrg *(--head) = d; 867ec681f3Smrg } 8701e04c3fSmrg 8801e04c3fSmrgdone: 8901e04c3fSmrg assert(head == path->path); 9001e04c3fSmrg assert(tail == head + count); 9101e04c3fSmrg assert(*tail == NULL); 9201e04c3fSmrg} 9301e04c3fSmrg 9401e04c3fSmrgvoid 9501e04c3fSmrgnir_deref_path_finish(nir_deref_path *path) 9601e04c3fSmrg{ 9701e04c3fSmrg if (path->path < &path->_short_path[0] || 9801e04c3fSmrg path->path > &path->_short_path[ARRAY_SIZE(path->_short_path) - 1]) 9901e04c3fSmrg ralloc_free(path->path); 10001e04c3fSmrg} 10101e04c3fSmrg 10201e04c3fSmrg/** 10301e04c3fSmrg * Recursively removes unused deref instructions 10401e04c3fSmrg */ 10501e04c3fSmrgbool 10601e04c3fSmrgnir_deref_instr_remove_if_unused(nir_deref_instr *instr) 10701e04c3fSmrg{ 10801e04c3fSmrg bool progress = false; 10901e04c3fSmrg 11001e04c3fSmrg for (nir_deref_instr *d = instr; d; d = nir_deref_instr_parent(d)) { 11101e04c3fSmrg /* If anyone is using this deref, leave it alone */ 11201e04c3fSmrg assert(d->dest.is_ssa); 1137ec681f3Smrg if (!nir_ssa_def_is_unused(&d->dest.ssa)) 11401e04c3fSmrg break; 11501e04c3fSmrg 11601e04c3fSmrg nir_instr_remove(&d->instr); 11701e04c3fSmrg progress = true; 11801e04c3fSmrg } 11901e04c3fSmrg 12001e04c3fSmrg return progress; 12101e04c3fSmrg} 12201e04c3fSmrg 12301e04c3fSmrgbool 12401e04c3fSmrgnir_deref_instr_has_indirect(nir_deref_instr *instr) 12501e04c3fSmrg{ 12601e04c3fSmrg while (instr->deref_type != nir_deref_type_var) { 12701e04c3fSmrg /* Consider casts to be indirects */ 12801e04c3fSmrg if (instr->deref_type == nir_deref_type_cast) 12901e04c3fSmrg return true; 13001e04c3fSmrg 1317e102996Smaya if ((instr->deref_type == nir_deref_type_array || 1327e102996Smaya instr->deref_type == nir_deref_type_ptr_as_array) && 13301e04c3fSmrg !nir_src_is_const(instr->arr.index)) 13401e04c3fSmrg return true; 13501e04c3fSmrg 13601e04c3fSmrg instr = nir_deref_instr_parent(instr); 13701e04c3fSmrg } 13801e04c3fSmrg 13901e04c3fSmrg return false; 14001e04c3fSmrg} 14101e04c3fSmrg 1427ec681f3Smrgbool 1437ec681f3Smrgnir_deref_instr_is_known_out_of_bounds(nir_deref_instr *instr) 1447ec681f3Smrg{ 1457ec681f3Smrg for (; instr; instr = nir_deref_instr_parent(instr)) { 1467ec681f3Smrg if (instr->deref_type == nir_deref_type_array && 1477ec681f3Smrg nir_src_is_const(instr->arr.index) && 1487ec681f3Smrg nir_src_as_uint(instr->arr.index) >= 1497ec681f3Smrg glsl_get_length(nir_deref_instr_parent(instr)->type)) 1507ec681f3Smrg return true; 1517ec681f3Smrg } 1527ec681f3Smrg 1537ec681f3Smrg return false; 1547ec681f3Smrg} 1557ec681f3Smrg 1567ec681f3Smrgbool 1577ec681f3Smrgnir_deref_instr_has_complex_use(nir_deref_instr *deref) 1587ec681f3Smrg{ 1597ec681f3Smrg nir_foreach_use(use_src, &deref->dest.ssa) { 1607ec681f3Smrg nir_instr *use_instr = use_src->parent_instr; 1617ec681f3Smrg 1627ec681f3Smrg switch (use_instr->type) { 1637ec681f3Smrg case nir_instr_type_deref: { 1647ec681f3Smrg nir_deref_instr *use_deref = nir_instr_as_deref(use_instr); 1657ec681f3Smrg 1667ec681f3Smrg /* A var deref has no sources */ 1677ec681f3Smrg assert(use_deref->deref_type != nir_deref_type_var); 1687ec681f3Smrg 1697ec681f3Smrg /* If a deref shows up in an array index or something like that, it's 1707ec681f3Smrg * a complex use. 1717ec681f3Smrg */ 1727ec681f3Smrg if (use_src != &use_deref->parent) 1737ec681f3Smrg return true; 1747ec681f3Smrg 1757ec681f3Smrg /* Anything that isn't a basic struct or array deref is considered to 1767ec681f3Smrg * be a "complex" use. In particular, we don't allow ptr_as_array 1777ec681f3Smrg * because we assume that opt_deref will turn any non-complex 1787ec681f3Smrg * ptr_as_array derefs into regular array derefs eventually so passes 1797ec681f3Smrg * which only want to handle simple derefs will pick them up in a 1807ec681f3Smrg * later pass. 1817ec681f3Smrg */ 1827ec681f3Smrg if (use_deref->deref_type != nir_deref_type_struct && 1837ec681f3Smrg use_deref->deref_type != nir_deref_type_array_wildcard && 1847ec681f3Smrg use_deref->deref_type != nir_deref_type_array) 1857ec681f3Smrg return true; 1867ec681f3Smrg 1877ec681f3Smrg if (nir_deref_instr_has_complex_use(use_deref)) 1887ec681f3Smrg return true; 1897ec681f3Smrg 1907ec681f3Smrg continue; 1917ec681f3Smrg } 1927ec681f3Smrg 1937ec681f3Smrg case nir_instr_type_intrinsic: { 1947ec681f3Smrg nir_intrinsic_instr *use_intrin = nir_instr_as_intrinsic(use_instr); 1957ec681f3Smrg switch (use_intrin->intrinsic) { 1967ec681f3Smrg case nir_intrinsic_load_deref: 1977ec681f3Smrg assert(use_src == &use_intrin->src[0]); 1987ec681f3Smrg continue; 1997ec681f3Smrg 2007ec681f3Smrg case nir_intrinsic_copy_deref: 2017ec681f3Smrg assert(use_src == &use_intrin->src[0] || 2027ec681f3Smrg use_src == &use_intrin->src[1]); 2037ec681f3Smrg continue; 2047ec681f3Smrg 2057ec681f3Smrg case nir_intrinsic_store_deref: 2067ec681f3Smrg /* A use in src[1] of a store means we're taking that pointer and 2077ec681f3Smrg * writing it to a variable. Because we have no idea who will 2087ec681f3Smrg * read that variable and what they will do with the pointer, it's 2097ec681f3Smrg * considered a "complex" use. A use in src[0], on the other 2107ec681f3Smrg * hand, is a simple use because we're just going to dereference 2117ec681f3Smrg * it and write a value there. 2127ec681f3Smrg */ 2137ec681f3Smrg if (use_src == &use_intrin->src[0]) 2147ec681f3Smrg continue; 2157ec681f3Smrg return true; 2167ec681f3Smrg 2177ec681f3Smrg default: 2187ec681f3Smrg return true; 2197ec681f3Smrg } 2207ec681f3Smrg unreachable("Switch default failed"); 2217ec681f3Smrg } 2227ec681f3Smrg 2237ec681f3Smrg default: 2247ec681f3Smrg return true; 2257ec681f3Smrg } 2267ec681f3Smrg } 2277ec681f3Smrg 2287ec681f3Smrg nir_foreach_if_use(use, &deref->dest.ssa) 2297ec681f3Smrg return true; 2307ec681f3Smrg 2317ec681f3Smrg return false; 2327ec681f3Smrg} 2337ec681f3Smrg 2347ec681f3Smrgstatic unsigned 2357ec681f3Smrgtype_scalar_size_bytes(const struct glsl_type *type) 2367ec681f3Smrg{ 2377ec681f3Smrg assert(glsl_type_is_vector_or_scalar(type) || 2387ec681f3Smrg glsl_type_is_matrix(type)); 2397ec681f3Smrg return glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8; 2407ec681f3Smrg} 2417ec681f3Smrg 2427e102996Smayaunsigned 2437ec681f3Smrgnir_deref_instr_array_stride(nir_deref_instr *deref) 2447e102996Smaya{ 2457e102996Smaya switch (deref->deref_type) { 2467e102996Smaya case nir_deref_type_array: 2477ec681f3Smrg case nir_deref_type_array_wildcard: { 2487ec681f3Smrg const struct glsl_type *arr_type = nir_deref_instr_parent(deref)->type; 2497ec681f3Smrg unsigned stride = glsl_get_explicit_stride(arr_type); 2507ec681f3Smrg 2517ec681f3Smrg if ((glsl_type_is_matrix(arr_type) && 2527ec681f3Smrg glsl_matrix_type_is_row_major(arr_type)) || 2537ec681f3Smrg (glsl_type_is_vector(arr_type) && stride == 0)) 2547ec681f3Smrg stride = type_scalar_size_bytes(arr_type); 2557ec681f3Smrg 2567ec681f3Smrg return stride; 2577ec681f3Smrg } 2587e102996Smaya case nir_deref_type_ptr_as_array: 2597ec681f3Smrg return nir_deref_instr_array_stride(nir_deref_instr_parent(deref)); 2607e102996Smaya case nir_deref_type_cast: 2617e102996Smaya return deref->cast.ptr_stride; 2627e102996Smaya default: 2637e102996Smaya return 0; 2647e102996Smaya } 2657e102996Smaya} 2667e102996Smaya 26701e04c3fSmrgstatic unsigned 26801e04c3fSmrgtype_get_array_stride(const struct glsl_type *elem_type, 26901e04c3fSmrg glsl_type_size_align_func size_align) 27001e04c3fSmrg{ 27101e04c3fSmrg unsigned elem_size, elem_align; 2727e102996Smaya size_align(elem_type, &elem_size, &elem_align); 27301e04c3fSmrg return ALIGN_POT(elem_size, elem_align); 27401e04c3fSmrg} 27501e04c3fSmrg 27601e04c3fSmrgstatic unsigned 27701e04c3fSmrgstruct_type_get_field_offset(const struct glsl_type *struct_type, 27801e04c3fSmrg glsl_type_size_align_func size_align, 27901e04c3fSmrg unsigned field_idx) 28001e04c3fSmrg{ 2817e102996Smaya assert(glsl_type_is_struct_or_ifc(struct_type)); 28201e04c3fSmrg unsigned offset = 0; 28301e04c3fSmrg for (unsigned i = 0; i <= field_idx; i++) { 28401e04c3fSmrg unsigned elem_size, elem_align; 2857e102996Smaya size_align(glsl_get_struct_field(struct_type, i), &elem_size, &elem_align); 28601e04c3fSmrg offset = ALIGN_POT(offset, elem_align); 28701e04c3fSmrg if (i < field_idx) 28801e04c3fSmrg offset += elem_size; 28901e04c3fSmrg } 29001e04c3fSmrg return offset; 29101e04c3fSmrg} 29201e04c3fSmrg 29301e04c3fSmrgunsigned 29401e04c3fSmrgnir_deref_instr_get_const_offset(nir_deref_instr *deref, 29501e04c3fSmrg glsl_type_size_align_func size_align) 29601e04c3fSmrg{ 29701e04c3fSmrg nir_deref_path path; 29801e04c3fSmrg nir_deref_path_init(&path, deref, NULL); 29901e04c3fSmrg 30001e04c3fSmrg unsigned offset = 0; 30101e04c3fSmrg for (nir_deref_instr **p = &path.path[1]; *p; p++) { 3027ec681f3Smrg switch ((*p)->deref_type) { 3037ec681f3Smrg case nir_deref_type_array: 30401e04c3fSmrg offset += nir_src_as_uint((*p)->arr.index) * 30501e04c3fSmrg type_get_array_stride((*p)->type, size_align); 3067ec681f3Smrg break; 3077ec681f3Smrg case nir_deref_type_struct: { 30801e04c3fSmrg /* p starts at path[1], so this is safe */ 30901e04c3fSmrg nir_deref_instr *parent = *(p - 1); 31001e04c3fSmrg offset += struct_type_get_field_offset(parent->type, size_align, 31101e04c3fSmrg (*p)->strct.index); 3127ec681f3Smrg break; 3137ec681f3Smrg } 3147ec681f3Smrg case nir_deref_type_cast: 3157ec681f3Smrg /* A cast doesn't contribute to the offset */ 3167ec681f3Smrg break; 3177ec681f3Smrg default: 31801e04c3fSmrg unreachable("Unsupported deref type"); 31901e04c3fSmrg } 32001e04c3fSmrg } 32101e04c3fSmrg 32201e04c3fSmrg nir_deref_path_finish(&path); 32301e04c3fSmrg 32401e04c3fSmrg return offset; 32501e04c3fSmrg} 32601e04c3fSmrg 32701e04c3fSmrgnir_ssa_def * 32801e04c3fSmrgnir_build_deref_offset(nir_builder *b, nir_deref_instr *deref, 32901e04c3fSmrg glsl_type_size_align_func size_align) 33001e04c3fSmrg{ 33101e04c3fSmrg nir_deref_path path; 33201e04c3fSmrg nir_deref_path_init(&path, deref, NULL); 33301e04c3fSmrg 3347ec681f3Smrg nir_ssa_def *offset = nir_imm_intN_t(b, 0, deref->dest.ssa.bit_size); 33501e04c3fSmrg for (nir_deref_instr **p = &path.path[1]; *p; p++) { 3367ec681f3Smrg switch ((*p)->deref_type) { 3377ec681f3Smrg case nir_deref_type_array: 3387ec681f3Smrg case nir_deref_type_ptr_as_array: { 33901e04c3fSmrg nir_ssa_def *index = nir_ssa_for_src(b, (*p)->arr.index, 1); 3407e102996Smaya int stride = type_get_array_stride((*p)->type, size_align); 3417ec681f3Smrg offset = nir_iadd(b, offset, nir_amul_imm(b, index, stride)); 3427ec681f3Smrg break; 3437ec681f3Smrg } 3447ec681f3Smrg case nir_deref_type_struct: { 34501e04c3fSmrg /* p starts at path[1], so this is safe */ 34601e04c3fSmrg nir_deref_instr *parent = *(p - 1); 34701e04c3fSmrg unsigned field_offset = 34801e04c3fSmrg struct_type_get_field_offset(parent->type, size_align, 34901e04c3fSmrg (*p)->strct.index); 3507e102996Smaya offset = nir_iadd_imm(b, offset, field_offset); 3517ec681f3Smrg break; 3527ec681f3Smrg } 3537ec681f3Smrg case nir_deref_type_cast: 3547ec681f3Smrg /* A cast doesn't contribute to the offset */ 3557ec681f3Smrg break; 3567ec681f3Smrg default: 35701e04c3fSmrg unreachable("Unsupported deref type"); 35801e04c3fSmrg } 35901e04c3fSmrg } 36001e04c3fSmrg 36101e04c3fSmrg nir_deref_path_finish(&path); 36201e04c3fSmrg 36301e04c3fSmrg return offset; 36401e04c3fSmrg} 36501e04c3fSmrg 36601e04c3fSmrgbool 36701e04c3fSmrgnir_remove_dead_derefs_impl(nir_function_impl *impl) 36801e04c3fSmrg{ 36901e04c3fSmrg bool progress = false; 37001e04c3fSmrg 37101e04c3fSmrg nir_foreach_block(block, impl) { 37201e04c3fSmrg nir_foreach_instr_safe(instr, block) { 37301e04c3fSmrg if (instr->type == nir_instr_type_deref && 37401e04c3fSmrg nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr))) 37501e04c3fSmrg progress = true; 37601e04c3fSmrg } 37701e04c3fSmrg } 37801e04c3fSmrg 37901e04c3fSmrg if (progress) 38001e04c3fSmrg nir_metadata_preserve(impl, nir_metadata_block_index | 38101e04c3fSmrg nir_metadata_dominance); 38201e04c3fSmrg 38301e04c3fSmrg return progress; 38401e04c3fSmrg} 38501e04c3fSmrg 38601e04c3fSmrgbool 38701e04c3fSmrgnir_remove_dead_derefs(nir_shader *shader) 38801e04c3fSmrg{ 38901e04c3fSmrg bool progress = false; 39001e04c3fSmrg nir_foreach_function(function, shader) { 39101e04c3fSmrg if (function->impl && nir_remove_dead_derefs_impl(function->impl)) 39201e04c3fSmrg progress = true; 39301e04c3fSmrg } 39401e04c3fSmrg 39501e04c3fSmrg return progress; 39601e04c3fSmrg} 39701e04c3fSmrg 39801e04c3fSmrgvoid 39901e04c3fSmrgnir_fixup_deref_modes(nir_shader *shader) 40001e04c3fSmrg{ 40101e04c3fSmrg nir_foreach_function(function, shader) { 40201e04c3fSmrg if (!function->impl) 40301e04c3fSmrg continue; 40401e04c3fSmrg 40501e04c3fSmrg nir_foreach_block(block, function->impl) { 40601e04c3fSmrg nir_foreach_instr(instr, block) { 40701e04c3fSmrg if (instr->type != nir_instr_type_deref) 40801e04c3fSmrg continue; 40901e04c3fSmrg 41001e04c3fSmrg nir_deref_instr *deref = nir_instr_as_deref(instr); 4117e102996Smaya if (deref->deref_type == nir_deref_type_cast) 4127e102996Smaya continue; 41301e04c3fSmrg 4147ec681f3Smrg nir_variable_mode parent_modes; 41501e04c3fSmrg if (deref->deref_type == nir_deref_type_var) { 4167ec681f3Smrg parent_modes = deref->var->data.mode; 41701e04c3fSmrg } else { 41801e04c3fSmrg assert(deref->parent.is_ssa); 41901e04c3fSmrg nir_deref_instr *parent = 42001e04c3fSmrg nir_instr_as_deref(deref->parent.ssa->parent_instr); 4217ec681f3Smrg parent_modes = parent->modes; 42201e04c3fSmrg } 42301e04c3fSmrg 4247ec681f3Smrg deref->modes = parent_modes; 42501e04c3fSmrg } 42601e04c3fSmrg } 42701e04c3fSmrg } 42801e04c3fSmrg} 42901e04c3fSmrg 4307e102996Smayastatic bool 4317e102996Smayamodes_may_alias(nir_variable_mode a, nir_variable_mode b) 4327e102996Smaya{ 4337e102996Smaya /* Generic pointers can alias with SSBOs */ 4347ec681f3Smrg if ((a & (nir_var_mem_ssbo | nir_var_mem_global)) && 4357ec681f3Smrg (b & (nir_var_mem_ssbo | nir_var_mem_global))) 4367e102996Smaya return true; 4377e102996Smaya 4387ec681f3Smrg /* Pointers can only alias if they share a mode. */ 4397ec681f3Smrg return a & b; 4407e102996Smaya} 4417e102996Smaya 4427e102996Smayastatic bool 4437e102996Smayaderef_path_contains_coherent_decoration(nir_deref_path *path) 4447e102996Smaya{ 4457e102996Smaya assert(path->path[0]->deref_type == nir_deref_type_var); 4467e102996Smaya 4477ec681f3Smrg if (path->path[0]->var->data.access & ACCESS_COHERENT) 4487e102996Smaya return true; 4497e102996Smaya 4507e102996Smaya for (nir_deref_instr **p = &path->path[1]; *p; p++) { 4517e102996Smaya if ((*p)->deref_type != nir_deref_type_struct) 4527e102996Smaya continue; 4537e102996Smaya 4547e102996Smaya const struct glsl_type *struct_type = (*(p - 1))->type; 4557e102996Smaya const struct glsl_struct_field *field = 4567e102996Smaya glsl_get_struct_field_data(struct_type, (*p)->strct.index); 4577e102996Smaya if (field->memory_coherent) 4587e102996Smaya return true; 4597e102996Smaya } 4607e102996Smaya 4617e102996Smaya return false; 4627e102996Smaya} 4637e102996Smaya 46401e04c3fSmrgnir_deref_compare_result 46501e04c3fSmrgnir_compare_deref_paths(nir_deref_path *a_path, 46601e04c3fSmrg nir_deref_path *b_path) 46701e04c3fSmrg{ 4687ec681f3Smrg if (!modes_may_alias(b_path->path[0]->modes, a_path->path[0]->modes)) 4697e102996Smaya return nir_derefs_do_not_alias; 4707e102996Smaya 4717e102996Smaya if (a_path->path[0]->deref_type != b_path->path[0]->deref_type) 4727e102996Smaya return nir_derefs_may_alias_bit; 4737e102996Smaya 4747e102996Smaya if (a_path->path[0]->deref_type == nir_deref_type_var) { 4757e102996Smaya if (a_path->path[0]->var != b_path->path[0]->var) { 4767e102996Smaya /* Shader and function temporaries aren't backed by memory so two 4777e102996Smaya * distinct variables never alias. 4787e102996Smaya */ 4797e102996Smaya static const nir_variable_mode temp_var_modes = 4807e102996Smaya nir_var_shader_temp | nir_var_function_temp; 4817ec681f3Smrg if (!(a_path->path[0]->modes & ~temp_var_modes) || 4827ec681f3Smrg !(b_path->path[0]->modes & ~temp_var_modes)) 4837e102996Smaya return nir_derefs_do_not_alias; 4847e102996Smaya 4857e102996Smaya /* If they are both declared coherent or have coherent somewhere in 4867e102996Smaya * their path (due to a member of an interface being declared 4877e102996Smaya * coherent), we have to assume we that we could have any kind of 4887e102996Smaya * aliasing. Otherwise, they could still alias but the client didn't 4897e102996Smaya * tell us and that's their fault. 4907e102996Smaya */ 4917e102996Smaya if (deref_path_contains_coherent_decoration(a_path) && 4927e102996Smaya deref_path_contains_coherent_decoration(b_path)) 4937e102996Smaya return nir_derefs_may_alias_bit; 4947e102996Smaya 4957ec681f3Smrg /* Per SPV_KHR_workgroup_memory_explicit_layout and GL_EXT_shared_memory_block, 4967ec681f3Smrg * shared blocks alias each other. 4977ec681f3Smrg */ 4987ec681f3Smrg if (a_path->path[0]->modes & nir_var_mem_shared && 4997ec681f3Smrg b_path->path[0]->modes & nir_var_mem_shared && 5007ec681f3Smrg (glsl_type_is_interface(a_path->path[0]->var->type) || 5017ec681f3Smrg glsl_type_is_interface(b_path->path[0]->var->type))) { 5027ec681f3Smrg assert(glsl_type_is_interface(a_path->path[0]->var->type) && 5037ec681f3Smrg glsl_type_is_interface(b_path->path[0]->var->type)); 5047ec681f3Smrg return nir_derefs_may_alias_bit; 5057ec681f3Smrg } 5067ec681f3Smrg 5077e102996Smaya /* If we can chase the deref all the way back to the variable and 5087e102996Smaya * they're not the same variable and at least one is not declared 5097e102996Smaya * coherent, we know they can't possibly alias. 5107e102996Smaya */ 5117e102996Smaya return nir_derefs_do_not_alias; 5127e102996Smaya } 5137e102996Smaya } else { 5147e102996Smaya assert(a_path->path[0]->deref_type == nir_deref_type_cast); 5157e102996Smaya /* If they're not exactly the same cast, it's hard to compare them so we 5167e102996Smaya * just assume they alias. Comparing casts is tricky as there are lots 5177e102996Smaya * of things such as mode, type, etc. to make sure work out; for now, we 5187e102996Smaya * just assume nit_opt_deref will combine them and compare the deref 5197e102996Smaya * instructions. 5207e102996Smaya * 5217e102996Smaya * TODO: At some point in the future, we could be clever and understand 5227e102996Smaya * that a float[] and int[] have the same layout and aliasing structure 5237e102996Smaya * but double[] and vec3[] do not and we could potentially be a bit 5247e102996Smaya * smarter here. 5257e102996Smaya */ 5267e102996Smaya if (a_path->path[0] != b_path->path[0]) 5277e102996Smaya return nir_derefs_may_alias_bit; 5287e102996Smaya } 52901e04c3fSmrg 53001e04c3fSmrg /* Start off assuming they fully compare. We ignore equality for now. In 53101e04c3fSmrg * the end, we'll determine that by containment. 53201e04c3fSmrg */ 53301e04c3fSmrg nir_deref_compare_result result = nir_derefs_may_alias_bit | 53401e04c3fSmrg nir_derefs_a_contains_b_bit | 53501e04c3fSmrg nir_derefs_b_contains_a_bit; 53601e04c3fSmrg 53701e04c3fSmrg nir_deref_instr **a_p = &a_path->path[1]; 53801e04c3fSmrg nir_deref_instr **b_p = &b_path->path[1]; 5397e102996Smaya while (*a_p != NULL && *a_p == *b_p) { 5407e102996Smaya a_p++; 5417e102996Smaya b_p++; 5427e102996Smaya } 5437e102996Smaya 5447e102996Smaya /* We're at either the tail or the divergence point between the two deref 5457ec681f3Smrg * paths. Look to see if either contains cast or a ptr_as_array deref. If 5467ec681f3Smrg * it does we don't know how to safely make any inferences. Hopefully, 5477e102996Smaya * nir_opt_deref will clean most of these up and we can start inferring 5487e102996Smaya * things again. 5497e102996Smaya * 5507e102996Smaya * In theory, we could do a bit better. For instance, we could detect the 5517e102996Smaya * case where we have exactly one ptr_as_array deref in the chain after the 5527e102996Smaya * divergence point and it's matched in both chains and the two chains have 5537e102996Smaya * different constant indices. 5547e102996Smaya */ 5557e102996Smaya for (nir_deref_instr **t_p = a_p; *t_p; t_p++) { 5567ec681f3Smrg if ((*t_p)->deref_type == nir_deref_type_cast || 5577ec681f3Smrg (*t_p)->deref_type == nir_deref_type_ptr_as_array) 5587e102996Smaya return nir_derefs_may_alias_bit; 5597e102996Smaya } 5607e102996Smaya for (nir_deref_instr **t_p = b_p; *t_p; t_p++) { 5617ec681f3Smrg if ((*t_p)->deref_type == nir_deref_type_cast || 5627ec681f3Smrg (*t_p)->deref_type == nir_deref_type_ptr_as_array) 5637e102996Smaya return nir_derefs_may_alias_bit; 5647e102996Smaya } 5657e102996Smaya 56601e04c3fSmrg while (*a_p != NULL && *b_p != NULL) { 56701e04c3fSmrg nir_deref_instr *a_tail = *(a_p++); 56801e04c3fSmrg nir_deref_instr *b_tail = *(b_p++); 56901e04c3fSmrg 57001e04c3fSmrg switch (a_tail->deref_type) { 57101e04c3fSmrg case nir_deref_type_array: 57201e04c3fSmrg case nir_deref_type_array_wildcard: { 57301e04c3fSmrg assert(b_tail->deref_type == nir_deref_type_array || 57401e04c3fSmrg b_tail->deref_type == nir_deref_type_array_wildcard); 57501e04c3fSmrg 57601e04c3fSmrg if (a_tail->deref_type == nir_deref_type_array_wildcard) { 57701e04c3fSmrg if (b_tail->deref_type != nir_deref_type_array_wildcard) 57801e04c3fSmrg result &= ~nir_derefs_b_contains_a_bit; 57901e04c3fSmrg } else if (b_tail->deref_type == nir_deref_type_array_wildcard) { 58001e04c3fSmrg if (a_tail->deref_type != nir_deref_type_array_wildcard) 58101e04c3fSmrg result &= ~nir_derefs_a_contains_b_bit; 58201e04c3fSmrg } else { 58301e04c3fSmrg assert(a_tail->deref_type == nir_deref_type_array && 58401e04c3fSmrg b_tail->deref_type == nir_deref_type_array); 58501e04c3fSmrg assert(a_tail->arr.index.is_ssa && b_tail->arr.index.is_ssa); 58601e04c3fSmrg 58701e04c3fSmrg if (nir_src_is_const(a_tail->arr.index) && 58801e04c3fSmrg nir_src_is_const(b_tail->arr.index)) { 58901e04c3fSmrg /* If they're both direct and have different offsets, they 59001e04c3fSmrg * don't even alias much less anything else. 59101e04c3fSmrg */ 59201e04c3fSmrg if (nir_src_as_uint(a_tail->arr.index) != 59301e04c3fSmrg nir_src_as_uint(b_tail->arr.index)) 5947e102996Smaya return nir_derefs_do_not_alias; 59501e04c3fSmrg } else if (a_tail->arr.index.ssa == b_tail->arr.index.ssa) { 59601e04c3fSmrg /* They're the same indirect, continue on */ 59701e04c3fSmrg } else { 59801e04c3fSmrg /* They're not the same index so we can't prove anything about 59901e04c3fSmrg * containment. 60001e04c3fSmrg */ 60101e04c3fSmrg result &= ~(nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit); 60201e04c3fSmrg } 60301e04c3fSmrg } 60401e04c3fSmrg break; 60501e04c3fSmrg } 60601e04c3fSmrg 60701e04c3fSmrg case nir_deref_type_struct: { 60801e04c3fSmrg /* If they're different struct members, they don't even alias */ 60901e04c3fSmrg if (a_tail->strct.index != b_tail->strct.index) 6107e102996Smaya return nir_derefs_do_not_alias; 61101e04c3fSmrg break; 61201e04c3fSmrg } 61301e04c3fSmrg 61401e04c3fSmrg default: 61501e04c3fSmrg unreachable("Invalid deref type"); 61601e04c3fSmrg } 61701e04c3fSmrg } 61801e04c3fSmrg 61901e04c3fSmrg /* If a is longer than b, then it can't contain b */ 62001e04c3fSmrg if (*a_p != NULL) 62101e04c3fSmrg result &= ~nir_derefs_a_contains_b_bit; 62201e04c3fSmrg if (*b_p != NULL) 62301e04c3fSmrg result &= ~nir_derefs_b_contains_a_bit; 62401e04c3fSmrg 62501e04c3fSmrg /* If a contains b and b contains a they must be equal. */ 62601e04c3fSmrg if ((result & nir_derefs_a_contains_b_bit) && (result & nir_derefs_b_contains_a_bit)) 62701e04c3fSmrg result |= nir_derefs_equal_bit; 62801e04c3fSmrg 62901e04c3fSmrg return result; 63001e04c3fSmrg} 63101e04c3fSmrg 63201e04c3fSmrgnir_deref_compare_result 63301e04c3fSmrgnir_compare_derefs(nir_deref_instr *a, nir_deref_instr *b) 63401e04c3fSmrg{ 63501e04c3fSmrg if (a == b) { 63601e04c3fSmrg return nir_derefs_equal_bit | nir_derefs_may_alias_bit | 63701e04c3fSmrg nir_derefs_a_contains_b_bit | nir_derefs_b_contains_a_bit; 63801e04c3fSmrg } 63901e04c3fSmrg 64001e04c3fSmrg nir_deref_path a_path, b_path; 64101e04c3fSmrg nir_deref_path_init(&a_path, a, NULL); 64201e04c3fSmrg nir_deref_path_init(&b_path, b, NULL); 6437e102996Smaya assert(a_path.path[0]->deref_type == nir_deref_type_var || 6447e102996Smaya a_path.path[0]->deref_type == nir_deref_type_cast); 6457e102996Smaya assert(b_path.path[0]->deref_type == nir_deref_type_var || 6467e102996Smaya b_path.path[0]->deref_type == nir_deref_type_cast); 64701e04c3fSmrg 64801e04c3fSmrg nir_deref_compare_result result = nir_compare_deref_paths(&a_path, &b_path); 64901e04c3fSmrg 65001e04c3fSmrg nir_deref_path_finish(&a_path); 65101e04c3fSmrg nir_deref_path_finish(&b_path); 65201e04c3fSmrg 65301e04c3fSmrg return result; 65401e04c3fSmrg} 65501e04c3fSmrg 6567ec681f3Smrgnir_deref_path *nir_get_deref_path(void *mem_ctx, nir_deref_and_path *deref) 6577ec681f3Smrg{ 6587ec681f3Smrg if (!deref->_path) { 6597ec681f3Smrg deref->_path = ralloc(mem_ctx, nir_deref_path); 6607ec681f3Smrg nir_deref_path_init(deref->_path, deref->instr, mem_ctx); 6617ec681f3Smrg } 6627ec681f3Smrg return deref->_path; 6637ec681f3Smrg} 6647ec681f3Smrg 6657ec681f3Smrgnir_deref_compare_result nir_compare_derefs_and_paths(void *mem_ctx, 6667ec681f3Smrg nir_deref_and_path *a, 6677ec681f3Smrg nir_deref_and_path *b) 6687ec681f3Smrg{ 6697ec681f3Smrg if (a->instr == b->instr) /* nir_compare_derefs has a fast path if a == b */ 6707ec681f3Smrg return nir_compare_derefs(a->instr, b->instr); 6717ec681f3Smrg 6727ec681f3Smrg return nir_compare_deref_paths(nir_get_deref_path(mem_ctx, a), 6737ec681f3Smrg nir_get_deref_path(mem_ctx, b)); 6747ec681f3Smrg} 6757ec681f3Smrg 67601e04c3fSmrgstruct rematerialize_deref_state { 67701e04c3fSmrg bool progress; 67801e04c3fSmrg nir_builder builder; 67901e04c3fSmrg nir_block *block; 68001e04c3fSmrg struct hash_table *cache; 68101e04c3fSmrg}; 68201e04c3fSmrg 68301e04c3fSmrgstatic nir_deref_instr * 68401e04c3fSmrgrematerialize_deref_in_block(nir_deref_instr *deref, 68501e04c3fSmrg struct rematerialize_deref_state *state) 68601e04c3fSmrg{ 68701e04c3fSmrg if (deref->instr.block == state->block) 68801e04c3fSmrg return deref; 68901e04c3fSmrg 69001e04c3fSmrg if (!state->cache) { 6917e102996Smaya state->cache = _mesa_pointer_hash_table_create(NULL); 69201e04c3fSmrg } 69301e04c3fSmrg 69401e04c3fSmrg struct hash_entry *cached = _mesa_hash_table_search(state->cache, deref); 69501e04c3fSmrg if (cached) 69601e04c3fSmrg return cached->data; 69701e04c3fSmrg 69801e04c3fSmrg nir_builder *b = &state->builder; 69901e04c3fSmrg nir_deref_instr *new_deref = 70001e04c3fSmrg nir_deref_instr_create(b->shader, deref->deref_type); 7017ec681f3Smrg new_deref->modes = deref->modes; 70201e04c3fSmrg new_deref->type = deref->type; 70301e04c3fSmrg 70401e04c3fSmrg if (deref->deref_type == nir_deref_type_var) { 70501e04c3fSmrg new_deref->var = deref->var; 70601e04c3fSmrg } else { 70701e04c3fSmrg nir_deref_instr *parent = nir_src_as_deref(deref->parent); 70801e04c3fSmrg if (parent) { 70901e04c3fSmrg parent = rematerialize_deref_in_block(parent, state); 71001e04c3fSmrg new_deref->parent = nir_src_for_ssa(&parent->dest.ssa); 71101e04c3fSmrg } else { 7127ec681f3Smrg nir_src_copy(&new_deref->parent, &deref->parent); 71301e04c3fSmrg } 71401e04c3fSmrg } 71501e04c3fSmrg 71601e04c3fSmrg switch (deref->deref_type) { 71701e04c3fSmrg case nir_deref_type_var: 71801e04c3fSmrg case nir_deref_type_array_wildcard: 71901e04c3fSmrg /* Nothing more to do */ 72001e04c3fSmrg break; 72101e04c3fSmrg 7227ec681f3Smrg case nir_deref_type_cast: 7237ec681f3Smrg new_deref->cast.ptr_stride = deref->cast.ptr_stride; 7247ec681f3Smrg break; 7257ec681f3Smrg 72601e04c3fSmrg case nir_deref_type_array: 7277ec681f3Smrg case nir_deref_type_ptr_as_array: 72801e04c3fSmrg assert(!nir_src_as_deref(deref->arr.index)); 7297ec681f3Smrg nir_src_copy(&new_deref->arr.index, &deref->arr.index); 73001e04c3fSmrg break; 73101e04c3fSmrg 73201e04c3fSmrg case nir_deref_type_struct: 73301e04c3fSmrg new_deref->strct.index = deref->strct.index; 73401e04c3fSmrg break; 73501e04c3fSmrg 73601e04c3fSmrg default: 73701e04c3fSmrg unreachable("Invalid deref instruction type"); 73801e04c3fSmrg } 73901e04c3fSmrg 74001e04c3fSmrg nir_ssa_dest_init(&new_deref->instr, &new_deref->dest, 74101e04c3fSmrg deref->dest.ssa.num_components, 74201e04c3fSmrg deref->dest.ssa.bit_size, 7437ec681f3Smrg NULL); 74401e04c3fSmrg nir_builder_instr_insert(b, &new_deref->instr); 74501e04c3fSmrg 74601e04c3fSmrg return new_deref; 74701e04c3fSmrg} 74801e04c3fSmrg 74901e04c3fSmrgstatic bool 75001e04c3fSmrgrematerialize_deref_src(nir_src *src, void *_state) 75101e04c3fSmrg{ 75201e04c3fSmrg struct rematerialize_deref_state *state = _state; 75301e04c3fSmrg 75401e04c3fSmrg nir_deref_instr *deref = nir_src_as_deref(*src); 75501e04c3fSmrg if (!deref) 75601e04c3fSmrg return true; 75701e04c3fSmrg 75801e04c3fSmrg nir_deref_instr *block_deref = rematerialize_deref_in_block(deref, state); 75901e04c3fSmrg if (block_deref != deref) { 76001e04c3fSmrg nir_instr_rewrite_src(src->parent_instr, src, 76101e04c3fSmrg nir_src_for_ssa(&block_deref->dest.ssa)); 76201e04c3fSmrg nir_deref_instr_remove_if_unused(deref); 76301e04c3fSmrg state->progress = true; 76401e04c3fSmrg } 76501e04c3fSmrg 76601e04c3fSmrg return true; 76701e04c3fSmrg} 76801e04c3fSmrg 76901e04c3fSmrg/** Re-materialize derefs in every block 77001e04c3fSmrg * 77101e04c3fSmrg * This pass re-materializes deref instructions in every block in which it is 77201e04c3fSmrg * used. After this pass has been run, every use of a deref will be of a 77301e04c3fSmrg * deref in the same block as the use. Also, all unused derefs will be 77401e04c3fSmrg * deleted as a side-effect. 7757ec681f3Smrg * 7767ec681f3Smrg * Derefs used as sources of phi instructions are not rematerialized. 77701e04c3fSmrg */ 77801e04c3fSmrgbool 77901e04c3fSmrgnir_rematerialize_derefs_in_use_blocks_impl(nir_function_impl *impl) 78001e04c3fSmrg{ 78101e04c3fSmrg struct rematerialize_deref_state state = { 0 }; 78201e04c3fSmrg nir_builder_init(&state.builder, impl); 78301e04c3fSmrg 7847ec681f3Smrg nir_foreach_block_unstructured(block, impl) { 78501e04c3fSmrg state.block = block; 78601e04c3fSmrg 78701e04c3fSmrg /* Start each block with a fresh cache */ 78801e04c3fSmrg if (state.cache) 78901e04c3fSmrg _mesa_hash_table_clear(state.cache, NULL); 79001e04c3fSmrg 79101e04c3fSmrg nir_foreach_instr_safe(instr, block) { 79201e04c3fSmrg if (instr->type == nir_instr_type_deref && 79301e04c3fSmrg nir_deref_instr_remove_if_unused(nir_instr_as_deref(instr))) 79401e04c3fSmrg continue; 79501e04c3fSmrg 7967ec681f3Smrg /* If a deref is used in a phi, we can't rematerialize it, as the new 7977ec681f3Smrg * derefs would appear before the phi, which is not valid. 7987ec681f3Smrg */ 7997ec681f3Smrg if (instr->type == nir_instr_type_phi) 8007ec681f3Smrg continue; 8017ec681f3Smrg 80201e04c3fSmrg state.builder.cursor = nir_before_instr(instr); 80301e04c3fSmrg nir_foreach_src(instr, rematerialize_deref_src, &state); 80401e04c3fSmrg } 80501e04c3fSmrg 80601e04c3fSmrg#ifndef NDEBUG 80701e04c3fSmrg nir_if *following_if = nir_block_get_following_if(block); 80801e04c3fSmrg if (following_if) 80901e04c3fSmrg assert(!nir_src_as_deref(following_if->condition)); 81001e04c3fSmrg#endif 81101e04c3fSmrg } 81201e04c3fSmrg 81301e04c3fSmrg _mesa_hash_table_destroy(state.cache, NULL); 81401e04c3fSmrg 81501e04c3fSmrg return state.progress; 81601e04c3fSmrg} 8177e102996Smaya 8187ec681f3Smrgstatic void 8197ec681f3Smrgnir_deref_instr_fixup_child_types(nir_deref_instr *parent) 8207e102996Smaya{ 8217ec681f3Smrg nir_foreach_use(use, &parent->dest.ssa) { 8227ec681f3Smrg if (use->parent_instr->type != nir_instr_type_deref) 8237ec681f3Smrg continue; 8247e102996Smaya 8257ec681f3Smrg nir_deref_instr *child = nir_instr_as_deref(use->parent_instr); 8267ec681f3Smrg switch (child->deref_type) { 8277ec681f3Smrg case nir_deref_type_var: 8287ec681f3Smrg unreachable("nir_deref_type_var cannot be a child"); 8297ec681f3Smrg 8307ec681f3Smrg case nir_deref_type_array: 8317ec681f3Smrg case nir_deref_type_array_wildcard: 8327ec681f3Smrg child->type = glsl_get_array_element(parent->type); 8337ec681f3Smrg break; 8347ec681f3Smrg 8357ec681f3Smrg case nir_deref_type_ptr_as_array: 8367ec681f3Smrg child->type = parent->type; 8377ec681f3Smrg break; 8387ec681f3Smrg 8397ec681f3Smrg case nir_deref_type_struct: 8407ec681f3Smrg child->type = glsl_get_struct_field(parent->type, 8417ec681f3Smrg child->strct.index); 8427ec681f3Smrg break; 8437ec681f3Smrg 8447ec681f3Smrg case nir_deref_type_cast: 8457ec681f3Smrg /* We stop the recursion here */ 8467ec681f3Smrg continue; 8477ec681f3Smrg } 8487ec681f3Smrg 8497ec681f3Smrg /* Recurse into children */ 8507ec681f3Smrg nir_deref_instr_fixup_child_types(child); 8517ec681f3Smrg } 8527e102996Smaya} 8537e102996Smaya 8547e102996Smayastatic bool 8557e102996Smayais_trivial_array_deref_cast(nir_deref_instr *cast) 8567e102996Smaya{ 8577e102996Smaya assert(is_trivial_deref_cast(cast)); 8587e102996Smaya 8597e102996Smaya nir_deref_instr *parent = nir_src_as_deref(cast->parent); 8607e102996Smaya 8617e102996Smaya if (parent->deref_type == nir_deref_type_array) { 8627e102996Smaya return cast->cast.ptr_stride == 8637e102996Smaya glsl_get_explicit_stride(nir_deref_instr_parent(parent)->type); 8647e102996Smaya } else if (parent->deref_type == nir_deref_type_ptr_as_array) { 8657e102996Smaya return cast->cast.ptr_stride == 8667ec681f3Smrg nir_deref_instr_array_stride(parent); 8677e102996Smaya } else { 8687e102996Smaya return false; 8697e102996Smaya } 8707e102996Smaya} 8717e102996Smaya 8727e102996Smayastatic bool 8737e102996Smayais_deref_ptr_as_array(nir_instr *instr) 8747e102996Smaya{ 8757e102996Smaya return instr->type == nir_instr_type_deref && 8767e102996Smaya nir_instr_as_deref(instr)->deref_type == nir_deref_type_ptr_as_array; 8777e102996Smaya} 8787e102996Smaya 8797ec681f3Smrgstatic bool 8807ec681f3Smrgopt_remove_restricting_cast_alignments(nir_deref_instr *cast) 8817ec681f3Smrg{ 8827ec681f3Smrg assert(cast->deref_type == nir_deref_type_cast); 8837ec681f3Smrg if (cast->cast.align_mul == 0) 8847ec681f3Smrg return false; 8857ec681f3Smrg 8867ec681f3Smrg nir_deref_instr *parent = nir_src_as_deref(cast->parent); 8877ec681f3Smrg if (parent == NULL) 8887ec681f3Smrg return false; 8897ec681f3Smrg 8907ec681f3Smrg /* Don't use any default alignment for this check. We don't want to fall 8917ec681f3Smrg * back to type alignment too early in case we find out later that we're 8927ec681f3Smrg * somehow a child of a packed struct. 8937ec681f3Smrg */ 8947ec681f3Smrg uint32_t parent_mul, parent_offset; 8957ec681f3Smrg if (!nir_get_explicit_deref_align(parent, false /* default_to_type_align */, 8967ec681f3Smrg &parent_mul, &parent_offset)) 8977ec681f3Smrg return false; 8987ec681f3Smrg 8997ec681f3Smrg /* If this cast increases the alignment, we want to keep it. 9007ec681f3Smrg * 9017ec681f3Smrg * There is a possibility that the larger alignment provided by this cast 9027ec681f3Smrg * somehow disagrees with the smaller alignment further up the deref chain. 9037ec681f3Smrg * In that case, we choose to favor the alignment closer to the actual 9047ec681f3Smrg * memory operation which, in this case, is the cast and not its parent so 9057ec681f3Smrg * keeping the cast alignment is the right thing to do. 9067ec681f3Smrg */ 9077ec681f3Smrg if (parent_mul < cast->cast.align_mul) 9087ec681f3Smrg return false; 9097ec681f3Smrg 9107ec681f3Smrg /* If we've gotten here, we have a parent deref with an align_mul at least 9117ec681f3Smrg * as large as ours so we can potentially throw away the alignment 9127ec681f3Smrg * information on this deref. There are two cases to consider here: 9137ec681f3Smrg * 9147ec681f3Smrg * 1. We can chase the deref all the way back to the variable. In this 9157ec681f3Smrg * case, we have "perfect" knowledge, modulo indirect array derefs. 9167ec681f3Smrg * Unless we've done something wrong in our indirect/wildcard stride 9177ec681f3Smrg * calculations, our knowledge from the deref walk is better than the 9187ec681f3Smrg * client's. 9197ec681f3Smrg * 9207ec681f3Smrg * 2. We can't chase it all the way back to the variable. In this case, 9217ec681f3Smrg * because our call to nir_get_explicit_deref_align(parent, ...) above 9227ec681f3Smrg * above passes default_to_type_align=false, the only way we can even 9237ec681f3Smrg * get here is if something further up the deref chain has a cast with 9247ec681f3Smrg * an alignment which can only happen if we get an alignment from the 9257ec681f3Smrg * client (most likely a decoration in the SPIR-V). If the client has 9267ec681f3Smrg * provided us with two conflicting alignments in the deref chain, 9277ec681f3Smrg * that's their fault and we can do whatever we want. 9287ec681f3Smrg * 9297ec681f3Smrg * In either case, we should be without our rights, at this point, to throw 9307ec681f3Smrg * away the alignment information on this deref. However, to be "nice" to 9317ec681f3Smrg * weird clients, we do one more check. It really shouldn't happen but 9327ec681f3Smrg * it's possible that the parent's alignment offset disagrees with the 9337ec681f3Smrg * cast's alignment offset. In this case, we consider the cast as 9347ec681f3Smrg * providing more information (or at least more valid information) and keep 9357ec681f3Smrg * it even if the align_mul from the parent is larger. 9367ec681f3Smrg */ 9377ec681f3Smrg assert(cast->cast.align_mul <= parent_mul); 9387ec681f3Smrg if (parent_offset % cast->cast.align_mul != cast->cast.align_offset) 9397ec681f3Smrg return false; 9407ec681f3Smrg 9417ec681f3Smrg /* If we got here, the parent has better alignment information than the 9427ec681f3Smrg * child and we can get rid of the child alignment information. 9437ec681f3Smrg */ 9447ec681f3Smrg cast->cast.align_mul = 0; 9457ec681f3Smrg cast->cast.align_offset = 0; 9467ec681f3Smrg return true; 9477ec681f3Smrg} 9487ec681f3Smrg 9497e102996Smaya/** 9507e102996Smaya * Remove casts that just wrap other casts. 9517e102996Smaya */ 9527e102996Smayastatic bool 9537e102996Smayaopt_remove_cast_cast(nir_deref_instr *cast) 9547e102996Smaya{ 9557e102996Smaya nir_deref_instr *first_cast = cast; 9567e102996Smaya 9577e102996Smaya while (true) { 9587e102996Smaya nir_deref_instr *parent = nir_deref_instr_parent(first_cast); 9597e102996Smaya if (parent == NULL || parent->deref_type != nir_deref_type_cast) 9607e102996Smaya break; 9617e102996Smaya first_cast = parent; 9627e102996Smaya } 9637e102996Smaya if (cast == first_cast) 9647e102996Smaya return false; 9657e102996Smaya 9667e102996Smaya nir_instr_rewrite_src(&cast->instr, &cast->parent, 9677e102996Smaya nir_src_for_ssa(first_cast->parent.ssa)); 9687e102996Smaya return true; 9697e102996Smaya} 9707e102996Smaya 9717ec681f3Smrg/* Restrict variable modes in casts. 9727ec681f3Smrg * 9737ec681f3Smrg * If we know from something higher up the deref chain that the deref has a 9747ec681f3Smrg * specific mode, we can cast to more general and back but we can never cast 9757ec681f3Smrg * across modes. For non-cast derefs, we should only ever do anything here if 9767ec681f3Smrg * the parent eventually comes from a cast that we restricted earlier. 9777ec681f3Smrg */ 9787ec681f3Smrgstatic bool 9797ec681f3Smrgopt_restrict_deref_modes(nir_deref_instr *deref) 9807ec681f3Smrg{ 9817ec681f3Smrg if (deref->deref_type == nir_deref_type_var) { 9827ec681f3Smrg assert(deref->modes == deref->var->data.mode); 9837ec681f3Smrg return false; 9847ec681f3Smrg } 9857ec681f3Smrg 9867ec681f3Smrg nir_deref_instr *parent = nir_src_as_deref(deref->parent); 9877ec681f3Smrg if (parent == NULL || parent->modes == deref->modes) 9887ec681f3Smrg return false; 9897ec681f3Smrg 9907ec681f3Smrg assert(parent->modes & deref->modes); 9917ec681f3Smrg deref->modes &= parent->modes; 9927ec681f3Smrg return true; 9937ec681f3Smrg} 9947ec681f3Smrg 9957ec681f3Smrgstatic bool 9967ec681f3Smrgopt_remove_sampler_cast(nir_deref_instr *cast) 9977ec681f3Smrg{ 9987ec681f3Smrg assert(cast->deref_type == nir_deref_type_cast); 9997ec681f3Smrg nir_deref_instr *parent = nir_src_as_deref(cast->parent); 10007ec681f3Smrg if (parent == NULL) 10017ec681f3Smrg return false; 10027ec681f3Smrg 10037ec681f3Smrg /* Strip both types down to their non-array type and bail if there are any 10047ec681f3Smrg * discrepancies in array lengths. 10057ec681f3Smrg */ 10067ec681f3Smrg const struct glsl_type *parent_type = parent->type; 10077ec681f3Smrg const struct glsl_type *cast_type = cast->type; 10087ec681f3Smrg while (glsl_type_is_array(parent_type) && glsl_type_is_array(cast_type)) { 10097ec681f3Smrg if (glsl_get_length(parent_type) != glsl_get_length(cast_type)) 10107ec681f3Smrg return false; 10117ec681f3Smrg parent_type = glsl_get_array_element(parent_type); 10127ec681f3Smrg cast_type = glsl_get_array_element(cast_type); 10137ec681f3Smrg } 10147ec681f3Smrg 10157ec681f3Smrg if (glsl_type_is_array(parent_type) || glsl_type_is_array(cast_type)) 10167ec681f3Smrg return false; 10177ec681f3Smrg 10187ec681f3Smrg if (!glsl_type_is_sampler(parent_type) || 10197ec681f3Smrg cast_type != glsl_bare_sampler_type()) 10207ec681f3Smrg return false; 10217ec681f3Smrg 10227ec681f3Smrg /* We're a cast from a more detailed sampler type to a bare sampler */ 10237ec681f3Smrg nir_ssa_def_rewrite_uses(&cast->dest.ssa, 10247ec681f3Smrg &parent->dest.ssa); 10257ec681f3Smrg nir_instr_remove(&cast->instr); 10267ec681f3Smrg 10277ec681f3Smrg /* Recursively crawl the deref tree and clean up types */ 10287ec681f3Smrg nir_deref_instr_fixup_child_types(parent); 10297ec681f3Smrg 10307ec681f3Smrg return true; 10317ec681f3Smrg} 10327ec681f3Smrg 10337e102996Smaya/** 10347e102996Smaya * Is this casting a struct to a contained struct. 10357e102996Smaya * struct a { struct b field0 }; 10367e102996Smaya * ssa_5 is structa; 10377e102996Smaya * deref_cast (structb *)ssa_5 (function_temp structb); 10387e102996Smaya * converts to 10397e102996Smaya * deref_struct &ssa_5->field0 (function_temp structb); 10407e102996Smaya * This allows subsequent copy propagation to work. 10417e102996Smaya */ 10427e102996Smayastatic bool 10437e102996Smayaopt_replace_struct_wrapper_cast(nir_builder *b, nir_deref_instr *cast) 10447e102996Smaya{ 10457e102996Smaya nir_deref_instr *parent = nir_src_as_deref(cast->parent); 10467e102996Smaya if (!parent) 10477e102996Smaya return false; 10487e102996Smaya 10497ec681f3Smrg if (cast->cast.align_mul > 0) 10507ec681f3Smrg return false; 10517ec681f3Smrg 10527e102996Smaya if (!glsl_type_is_struct(parent->type)) 10537e102996Smaya return false; 10547e102996Smaya 10557ec681f3Smrg /* Empty struct */ 10567ec681f3Smrg if (glsl_get_length(parent->type) < 1) 10577ec681f3Smrg return false; 10587ec681f3Smrg 10597e102996Smaya if (glsl_get_struct_field_offset(parent->type, 0) != 0) 10607e102996Smaya return false; 10617e102996Smaya 10627e102996Smaya if (cast->type != glsl_get_struct_field(parent->type, 0)) 10637e102996Smaya return false; 10647e102996Smaya 10657e102996Smaya nir_deref_instr *replace = nir_build_deref_struct(b, parent, 0); 10667ec681f3Smrg nir_ssa_def_rewrite_uses(&cast->dest.ssa, &replace->dest.ssa); 10677e102996Smaya nir_deref_instr_remove_if_unused(cast); 10687e102996Smaya return true; 10697e102996Smaya} 10707e102996Smaya 10717e102996Smayastatic bool 10727e102996Smayaopt_deref_cast(nir_builder *b, nir_deref_instr *cast) 10737e102996Smaya{ 10747ec681f3Smrg bool progress = false; 10757ec681f3Smrg 10767ec681f3Smrg progress |= opt_remove_restricting_cast_alignments(cast); 10777e102996Smaya 10787e102996Smaya if (opt_replace_struct_wrapper_cast(b, cast)) 10797e102996Smaya return true; 10807e102996Smaya 10817ec681f3Smrg if (opt_remove_sampler_cast(cast)) 10827ec681f3Smrg return true; 10837ec681f3Smrg 10847ec681f3Smrg progress |= opt_remove_cast_cast(cast); 10857e102996Smaya if (!is_trivial_deref_cast(cast)) 10867e102996Smaya return progress; 10877e102996Smaya 10887ec681f3Smrg /* If this deref still contains useful alignment information, we don't want 10897ec681f3Smrg * to delete it. 10907ec681f3Smrg */ 10917ec681f3Smrg if (cast->cast.align_mul > 0) 10927ec681f3Smrg return progress; 10937ec681f3Smrg 10947e102996Smaya bool trivial_array_cast = is_trivial_array_deref_cast(cast); 10957e102996Smaya 10967e102996Smaya assert(cast->dest.is_ssa); 10977e102996Smaya assert(cast->parent.is_ssa); 10987e102996Smaya 10997e102996Smaya nir_foreach_use_safe(use_src, &cast->dest.ssa) { 11007e102996Smaya /* If this isn't a trivial array cast, we can't propagate into 11017e102996Smaya * ptr_as_array derefs. 11027e102996Smaya */ 11037e102996Smaya if (is_deref_ptr_as_array(use_src->parent_instr) && 11047e102996Smaya !trivial_array_cast) 11057e102996Smaya continue; 11067e102996Smaya 11077e102996Smaya nir_instr_rewrite_src(use_src->parent_instr, use_src, cast->parent); 11087e102996Smaya progress = true; 11097e102996Smaya } 11107e102996Smaya 11117e102996Smaya /* If uses would be a bit crazy */ 11127ec681f3Smrg assert(list_is_empty(&cast->dest.ssa.if_uses)); 11137ec681f3Smrg 11147ec681f3Smrg if (nir_deref_instr_remove_if_unused(cast)) 11157ec681f3Smrg progress = true; 11167e102996Smaya 11177e102996Smaya return progress; 11187e102996Smaya} 11197e102996Smaya 11207e102996Smayastatic bool 11217e102996Smayaopt_deref_ptr_as_array(nir_builder *b, nir_deref_instr *deref) 11227e102996Smaya{ 11237e102996Smaya assert(deref->deref_type == nir_deref_type_ptr_as_array); 11247e102996Smaya 11257e102996Smaya nir_deref_instr *parent = nir_deref_instr_parent(deref); 11267e102996Smaya 11277e102996Smaya if (nir_src_is_const(deref->arr.index) && 11287e102996Smaya nir_src_as_int(deref->arr.index) == 0) { 11297e102996Smaya /* If it's a ptr_as_array deref with an index of 0, it does nothing 11307ec681f3Smrg * and we can just replace its uses with its parent, unless it has 11317ec681f3Smrg * alignment information. 11327e102996Smaya * 11337e102996Smaya * The source of a ptr_as_array deref always has a deref_type of 11347e102996Smaya * nir_deref_type_array or nir_deref_type_cast. If it's a cast, it 11357e102996Smaya * may be trivial and we may be able to get rid of that too. Any 11367e102996Smaya * trivial cast of trivial cast cases should be handled already by 11377e102996Smaya * opt_deref_cast() above. 11387e102996Smaya */ 11397e102996Smaya if (parent->deref_type == nir_deref_type_cast && 11407ec681f3Smrg parent->cast.align_mul == 0 && 11417e102996Smaya is_trivial_deref_cast(parent)) 11427e102996Smaya parent = nir_deref_instr_parent(parent); 11437e102996Smaya nir_ssa_def_rewrite_uses(&deref->dest.ssa, 11447ec681f3Smrg &parent->dest.ssa); 11457e102996Smaya nir_instr_remove(&deref->instr); 11467e102996Smaya return true; 11477e102996Smaya } 11487e102996Smaya 11497e102996Smaya if (parent->deref_type != nir_deref_type_array && 11507e102996Smaya parent->deref_type != nir_deref_type_ptr_as_array) 11517e102996Smaya return false; 11527e102996Smaya 11537e102996Smaya assert(parent->parent.is_ssa); 11547e102996Smaya assert(parent->arr.index.is_ssa); 11557e102996Smaya assert(deref->arr.index.is_ssa); 11567e102996Smaya 11577e102996Smaya nir_ssa_def *new_idx = nir_iadd(b, parent->arr.index.ssa, 11587e102996Smaya deref->arr.index.ssa); 11597e102996Smaya 11607e102996Smaya deref->deref_type = parent->deref_type; 11617e102996Smaya nir_instr_rewrite_src(&deref->instr, &deref->parent, parent->parent); 11627e102996Smaya nir_instr_rewrite_src(&deref->instr, &deref->arr.index, 11637e102996Smaya nir_src_for_ssa(new_idx)); 11647e102996Smaya return true; 11657e102996Smaya} 11667e102996Smaya 11677ec681f3Smrgstatic bool 11687ec681f3Smrgis_vector_bitcast_deref(nir_deref_instr *cast, 11697ec681f3Smrg nir_component_mask_t mask, 11707ec681f3Smrg bool is_write) 11717ec681f3Smrg{ 11727ec681f3Smrg if (cast->deref_type != nir_deref_type_cast) 11737ec681f3Smrg return false; 11747ec681f3Smrg 11757ec681f3Smrg /* Don't throw away useful alignment information */ 11767ec681f3Smrg if (cast->cast.align_mul > 0) 11777ec681f3Smrg return false; 11787ec681f3Smrg 11797ec681f3Smrg /* It has to be a cast of another deref */ 11807ec681f3Smrg nir_deref_instr *parent = nir_src_as_deref(cast->parent); 11817ec681f3Smrg if (parent == NULL) 11827ec681f3Smrg return false; 11837ec681f3Smrg 11847ec681f3Smrg /* The parent has to be a vector or scalar */ 11857ec681f3Smrg if (!glsl_type_is_vector_or_scalar(parent->type)) 11867ec681f3Smrg return false; 11877ec681f3Smrg 11887ec681f3Smrg /* Don't bother with 1-bit types */ 11897ec681f3Smrg unsigned cast_bit_size = glsl_get_bit_size(cast->type); 11907ec681f3Smrg unsigned parent_bit_size = glsl_get_bit_size(parent->type); 11917ec681f3Smrg if (cast_bit_size == 1 || parent_bit_size == 1) 11927ec681f3Smrg return false; 11937ec681f3Smrg 11947ec681f3Smrg /* A strided vector type means it's not tightly packed */ 11957ec681f3Smrg if (glsl_get_explicit_stride(cast->type) || 11967ec681f3Smrg glsl_get_explicit_stride(parent->type)) 11977ec681f3Smrg return false; 11987ec681f3Smrg 11997ec681f3Smrg assert(cast_bit_size > 0 && cast_bit_size % 8 == 0); 12007ec681f3Smrg assert(parent_bit_size > 0 && parent_bit_size % 8 == 0); 12017ec681f3Smrg unsigned bytes_used = util_last_bit(mask) * (cast_bit_size / 8); 12027ec681f3Smrg unsigned parent_bytes = glsl_get_vector_elements(parent->type) * 12037ec681f3Smrg (parent_bit_size / 8); 12047ec681f3Smrg if (bytes_used > parent_bytes) 12057ec681f3Smrg return false; 12067ec681f3Smrg 12077ec681f3Smrg if (is_write && !nir_component_mask_can_reinterpret(mask, cast_bit_size, 12087ec681f3Smrg parent_bit_size)) 12097ec681f3Smrg return false; 12107ec681f3Smrg 12117ec681f3Smrg return true; 12127ec681f3Smrg} 12137ec681f3Smrg 12147ec681f3Smrgstatic nir_ssa_def * 12157ec681f3Smrgresize_vector(nir_builder *b, nir_ssa_def *data, unsigned num_components) 12167ec681f3Smrg{ 12177ec681f3Smrg if (num_components == data->num_components) 12187ec681f3Smrg return data; 12197ec681f3Smrg 12207ec681f3Smrg unsigned swiz[NIR_MAX_VEC_COMPONENTS] = { 0, }; 12217ec681f3Smrg for (unsigned i = 0; i < MIN2(num_components, data->num_components); i++) 12227ec681f3Smrg swiz[i] = i; 12237ec681f3Smrg 12247ec681f3Smrg return nir_swizzle(b, data, swiz, num_components); 12257ec681f3Smrg} 12267ec681f3Smrg 12277ec681f3Smrgstatic bool 12287ec681f3Smrgopt_load_vec_deref(nir_builder *b, nir_intrinsic_instr *load) 12297ec681f3Smrg{ 12307ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(load->src[0]); 12317ec681f3Smrg nir_component_mask_t read_mask = 12327ec681f3Smrg nir_ssa_def_components_read(&load->dest.ssa); 12337ec681f3Smrg 12347ec681f3Smrg /* LLVM loves take advantage of the fact that vec3s in OpenCL are 12357ec681f3Smrg * vec4-aligned and so it can just read/write them as vec4s. This 12367ec681f3Smrg * results in a LOT of vec4->vec3 casts on loads and stores. 12377ec681f3Smrg */ 12387ec681f3Smrg if (is_vector_bitcast_deref(deref, read_mask, false)) { 12397ec681f3Smrg const unsigned old_num_comps = load->dest.ssa.num_components; 12407ec681f3Smrg const unsigned old_bit_size = load->dest.ssa.bit_size; 12417ec681f3Smrg 12427ec681f3Smrg nir_deref_instr *parent = nir_src_as_deref(deref->parent); 12437ec681f3Smrg const unsigned new_num_comps = glsl_get_vector_elements(parent->type); 12447ec681f3Smrg const unsigned new_bit_size = glsl_get_bit_size(parent->type); 12457ec681f3Smrg 12467ec681f3Smrg /* Stomp it to reference the parent */ 12477ec681f3Smrg nir_instr_rewrite_src(&load->instr, &load->src[0], 12487ec681f3Smrg nir_src_for_ssa(&parent->dest.ssa)); 12497ec681f3Smrg assert(load->dest.is_ssa); 12507ec681f3Smrg load->dest.ssa.bit_size = new_bit_size; 12517ec681f3Smrg load->dest.ssa.num_components = new_num_comps; 12527ec681f3Smrg load->num_components = new_num_comps; 12537ec681f3Smrg 12547ec681f3Smrg b->cursor = nir_after_instr(&load->instr); 12557ec681f3Smrg nir_ssa_def *data = &load->dest.ssa; 12567ec681f3Smrg if (old_bit_size != new_bit_size) 12577ec681f3Smrg data = nir_bitcast_vector(b, &load->dest.ssa, old_bit_size); 12587ec681f3Smrg data = resize_vector(b, data, old_num_comps); 12597ec681f3Smrg 12607ec681f3Smrg nir_ssa_def_rewrite_uses_after(&load->dest.ssa, data, 12617ec681f3Smrg data->parent_instr); 12627ec681f3Smrg return true; 12637ec681f3Smrg } 12647ec681f3Smrg 12657ec681f3Smrg return false; 12667ec681f3Smrg} 12677ec681f3Smrg 12687ec681f3Smrgstatic bool 12697ec681f3Smrgopt_store_vec_deref(nir_builder *b, nir_intrinsic_instr *store) 12707ec681f3Smrg{ 12717ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(store->src[0]); 12727ec681f3Smrg nir_component_mask_t write_mask = nir_intrinsic_write_mask(store); 12737ec681f3Smrg 12747ec681f3Smrg /* LLVM loves take advantage of the fact that vec3s in OpenCL are 12757ec681f3Smrg * vec4-aligned and so it can just read/write them as vec4s. This 12767ec681f3Smrg * results in a LOT of vec4->vec3 casts on loads and stores. 12777ec681f3Smrg */ 12787ec681f3Smrg if (is_vector_bitcast_deref(deref, write_mask, true)) { 12797ec681f3Smrg assert(store->src[1].is_ssa); 12807ec681f3Smrg nir_ssa_def *data = store->src[1].ssa; 12817ec681f3Smrg 12827ec681f3Smrg const unsigned old_bit_size = data->bit_size; 12837ec681f3Smrg 12847ec681f3Smrg nir_deref_instr *parent = nir_src_as_deref(deref->parent); 12857ec681f3Smrg const unsigned new_num_comps = glsl_get_vector_elements(parent->type); 12867ec681f3Smrg const unsigned new_bit_size = glsl_get_bit_size(parent->type); 12877ec681f3Smrg 12887ec681f3Smrg nir_instr_rewrite_src(&store->instr, &store->src[0], 12897ec681f3Smrg nir_src_for_ssa(&parent->dest.ssa)); 12907ec681f3Smrg 12917ec681f3Smrg /* Restrict things down as needed so the bitcast doesn't fail */ 12927ec681f3Smrg data = nir_channels(b, data, (1 << util_last_bit(write_mask)) - 1); 12937ec681f3Smrg if (old_bit_size != new_bit_size) 12947ec681f3Smrg data = nir_bitcast_vector(b, data, new_bit_size); 12957ec681f3Smrg data = resize_vector(b, data, new_num_comps); 12967ec681f3Smrg nir_instr_rewrite_src(&store->instr, &store->src[1], 12977ec681f3Smrg nir_src_for_ssa(data)); 12987ec681f3Smrg store->num_components = new_num_comps; 12997ec681f3Smrg 13007ec681f3Smrg /* Adjust the write mask */ 13017ec681f3Smrg write_mask = nir_component_mask_reinterpret(write_mask, old_bit_size, 13027ec681f3Smrg new_bit_size); 13037ec681f3Smrg nir_intrinsic_set_write_mask(store, write_mask); 13047ec681f3Smrg return true; 13057ec681f3Smrg } 13067ec681f3Smrg 13077ec681f3Smrg return false; 13087ec681f3Smrg} 13097ec681f3Smrg 13107ec681f3Smrgstatic bool 13117ec681f3Smrgopt_known_deref_mode_is(nir_builder *b, nir_intrinsic_instr *intrin) 13127ec681f3Smrg{ 13137ec681f3Smrg nir_variable_mode modes = nir_intrinsic_memory_modes(intrin); 13147ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 13157ec681f3Smrg if (deref == NULL) 13167ec681f3Smrg return false; 13177ec681f3Smrg 13187ec681f3Smrg nir_ssa_def *deref_is = NULL; 13197ec681f3Smrg 13207ec681f3Smrg if (nir_deref_mode_must_be(deref, modes)) 13217ec681f3Smrg deref_is = nir_imm_true(b); 13227ec681f3Smrg 13237ec681f3Smrg if (!nir_deref_mode_may_be(deref, modes)) 13247ec681f3Smrg deref_is = nir_imm_false(b); 13257ec681f3Smrg 13267ec681f3Smrg if (deref_is == NULL) 13277ec681f3Smrg return false; 13287ec681f3Smrg 13297ec681f3Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, deref_is); 13307ec681f3Smrg nir_instr_remove(&intrin->instr); 13317ec681f3Smrg return true; 13327ec681f3Smrg} 13337ec681f3Smrg 13347e102996Smayabool 13357e102996Smayanir_opt_deref_impl(nir_function_impl *impl) 13367e102996Smaya{ 13377e102996Smaya bool progress = false; 13387e102996Smaya 13397e102996Smaya nir_builder b; 13407e102996Smaya nir_builder_init(&b, impl); 13417e102996Smaya 13427e102996Smaya nir_foreach_block(block, impl) { 13437e102996Smaya nir_foreach_instr_safe(instr, block) { 13447e102996Smaya b.cursor = nir_before_instr(instr); 13457e102996Smaya 13467ec681f3Smrg switch (instr->type) { 13477ec681f3Smrg case nir_instr_type_deref: { 13487ec681f3Smrg nir_deref_instr *deref = nir_instr_as_deref(instr); 13497ec681f3Smrg 13507ec681f3Smrg if (opt_restrict_deref_modes(deref)) 13517e102996Smaya progress = true; 13527ec681f3Smrg 13537ec681f3Smrg switch (deref->deref_type) { 13547ec681f3Smrg case nir_deref_type_ptr_as_array: 13557ec681f3Smrg if (opt_deref_ptr_as_array(&b, deref)) 13567ec681f3Smrg progress = true; 13577ec681f3Smrg break; 13587ec681f3Smrg 13597ec681f3Smrg case nir_deref_type_cast: 13607ec681f3Smrg if (opt_deref_cast(&b, deref)) 13617ec681f3Smrg progress = true; 13627ec681f3Smrg break; 13637ec681f3Smrg 13647ec681f3Smrg default: 13657ec681f3Smrg /* Do nothing */ 13667ec681f3Smrg break; 13677ec681f3Smrg } 13687e102996Smaya break; 13697ec681f3Smrg } 13707e102996Smaya 13717ec681f3Smrg case nir_instr_type_intrinsic: { 13727ec681f3Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 13737ec681f3Smrg switch (intrin->intrinsic) { 13747ec681f3Smrg case nir_intrinsic_load_deref: 13757ec681f3Smrg if (opt_load_vec_deref(&b, intrin)) 13767ec681f3Smrg progress = true; 13777ec681f3Smrg break; 13787ec681f3Smrg 13797ec681f3Smrg case nir_intrinsic_store_deref: 13807ec681f3Smrg if (opt_store_vec_deref(&b, intrin)) 13817ec681f3Smrg progress = true; 13827ec681f3Smrg break; 13837ec681f3Smrg 13847ec681f3Smrg case nir_intrinsic_deref_mode_is: 13857ec681f3Smrg if (opt_known_deref_mode_is(&b, intrin)) 13867ec681f3Smrg progress = true; 13877ec681f3Smrg break; 13887ec681f3Smrg 13897ec681f3Smrg default: 13907ec681f3Smrg /* Do nothing */ 13917ec681f3Smrg break; 13927ec681f3Smrg } 13937e102996Smaya break; 13947ec681f3Smrg } 13957e102996Smaya 13967e102996Smaya default: 13977e102996Smaya /* Do nothing */ 13987e102996Smaya break; 13997e102996Smaya } 14007e102996Smaya } 14017e102996Smaya } 14027e102996Smaya 14037e102996Smaya if (progress) { 14047e102996Smaya nir_metadata_preserve(impl, nir_metadata_block_index | 14057e102996Smaya nir_metadata_dominance); 14067e102996Smaya } else { 14077ec681f3Smrg nir_metadata_preserve(impl, nir_metadata_all); 14087e102996Smaya } 14097e102996Smaya 14107e102996Smaya return progress; 14117e102996Smaya} 14127e102996Smaya 14137e102996Smayabool 14147e102996Smayanir_opt_deref(nir_shader *shader) 14157e102996Smaya{ 14167e102996Smaya bool progress = false; 14177e102996Smaya 14187e102996Smaya nir_foreach_function(func, shader) { 14197e102996Smaya if (func->impl && nir_opt_deref_impl(func->impl)) 14207e102996Smaya progress = true; 14217e102996Smaya } 14227e102996Smaya 14237e102996Smaya return progress; 14247e102996Smaya} 1425