1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2018 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "nir.h" 25b8e80941Smrg#include "nir_builder.h" 26b8e80941Smrg#include "nir_deref.h" 27b8e80941Smrg#include "nir_vla.h" 28b8e80941Smrg 29b8e80941Smrg#include "util/u_math.h" 30b8e80941Smrg 31b8e80941Smrg 32b8e80941Smrgstruct split_var_state { 33b8e80941Smrg void *mem_ctx; 34b8e80941Smrg 35b8e80941Smrg nir_shader *shader; 36b8e80941Smrg nir_function_impl *impl; 37b8e80941Smrg 38b8e80941Smrg nir_variable *base_var; 39b8e80941Smrg}; 40b8e80941Smrg 41b8e80941Smrgstruct field { 42b8e80941Smrg struct field *parent; 43b8e80941Smrg 44b8e80941Smrg const struct glsl_type *type; 45b8e80941Smrg 46b8e80941Smrg unsigned num_fields; 47b8e80941Smrg struct field *fields; 48b8e80941Smrg 49b8e80941Smrg nir_variable *var; 50b8e80941Smrg}; 51b8e80941Smrg 52b8e80941Smrgstatic const struct glsl_type * 53b8e80941Smrgwrap_type_in_array(const struct glsl_type *type, 54b8e80941Smrg const struct glsl_type *array_type) 55b8e80941Smrg{ 56b8e80941Smrg if (!glsl_type_is_array(array_type)) 57b8e80941Smrg return type; 58b8e80941Smrg 59b8e80941Smrg const struct glsl_type *elem_type = 60b8e80941Smrg wrap_type_in_array(type, glsl_get_array_element(array_type)); 61b8e80941Smrg assert(glsl_get_explicit_stride(array_type) == 0); 62b8e80941Smrg return glsl_array_type(elem_type, glsl_get_length(array_type), 0); 63b8e80941Smrg} 64b8e80941Smrg 65b8e80941Smrgstatic int 66b8e80941Smrgnum_array_levels_in_array_of_vector_type(const struct glsl_type *type) 67b8e80941Smrg{ 68b8e80941Smrg int num_levels = 0; 69b8e80941Smrg while (true) { 70b8e80941Smrg if (glsl_type_is_array_or_matrix(type)) { 71b8e80941Smrg num_levels++; 72b8e80941Smrg type = glsl_get_array_element(type); 73b8e80941Smrg } else if (glsl_type_is_vector_or_scalar(type)) { 74b8e80941Smrg return num_levels; 75b8e80941Smrg } else { 76b8e80941Smrg /* Not an array of vectors */ 77b8e80941Smrg return -1; 78b8e80941Smrg } 79b8e80941Smrg } 80b8e80941Smrg} 81b8e80941Smrg 82b8e80941Smrgstatic void 83b8e80941Smrginit_field_for_type(struct field *field, struct field *parent, 84b8e80941Smrg const struct glsl_type *type, 85b8e80941Smrg const char *name, 86b8e80941Smrg struct split_var_state *state) 87b8e80941Smrg{ 88b8e80941Smrg *field = (struct field) { 89b8e80941Smrg .parent = parent, 90b8e80941Smrg .type = type, 91b8e80941Smrg }; 92b8e80941Smrg 93b8e80941Smrg const struct glsl_type *struct_type = glsl_without_array(type); 94b8e80941Smrg if (glsl_type_is_struct_or_ifc(struct_type)) { 95b8e80941Smrg field->num_fields = glsl_get_length(struct_type), 96b8e80941Smrg field->fields = ralloc_array(state->mem_ctx, struct field, 97b8e80941Smrg field->num_fields); 98b8e80941Smrg for (unsigned i = 0; i < field->num_fields; i++) { 99b8e80941Smrg char *field_name = NULL; 100b8e80941Smrg if (name) { 101b8e80941Smrg field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name, 102b8e80941Smrg glsl_get_struct_elem_name(struct_type, i)); 103b8e80941Smrg } else { 104b8e80941Smrg field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s", 105b8e80941Smrg glsl_get_type_name(struct_type), 106b8e80941Smrg glsl_get_struct_elem_name(struct_type, i)); 107b8e80941Smrg } 108b8e80941Smrg init_field_for_type(&field->fields[i], field, 109b8e80941Smrg glsl_get_struct_field(struct_type, i), 110b8e80941Smrg field_name, state); 111b8e80941Smrg } 112b8e80941Smrg } else { 113b8e80941Smrg const struct glsl_type *var_type = type; 114b8e80941Smrg for (struct field *f = field->parent; f; f = f->parent) 115b8e80941Smrg var_type = wrap_type_in_array(var_type, f->type); 116b8e80941Smrg 117b8e80941Smrg nir_variable_mode mode = state->base_var->data.mode; 118b8e80941Smrg if (mode == nir_var_function_temp) { 119b8e80941Smrg field->var = nir_local_variable_create(state->impl, var_type, name); 120b8e80941Smrg } else { 121b8e80941Smrg field->var = nir_variable_create(state->shader, mode, var_type, name); 122b8e80941Smrg } 123b8e80941Smrg } 124b8e80941Smrg} 125b8e80941Smrg 126b8e80941Smrgstatic bool 127b8e80941Smrgsplit_var_list_structs(nir_shader *shader, 128b8e80941Smrg nir_function_impl *impl, 129b8e80941Smrg struct exec_list *vars, 130b8e80941Smrg struct hash_table *var_field_map, 131b8e80941Smrg void *mem_ctx) 132b8e80941Smrg{ 133b8e80941Smrg struct split_var_state state = { 134b8e80941Smrg .mem_ctx = mem_ctx, 135b8e80941Smrg .shader = shader, 136b8e80941Smrg .impl = impl, 137b8e80941Smrg }; 138b8e80941Smrg 139b8e80941Smrg struct exec_list split_vars; 140b8e80941Smrg exec_list_make_empty(&split_vars); 141b8e80941Smrg 142b8e80941Smrg /* To avoid list confusion (we'll be adding things as we split variables), 143b8e80941Smrg * pull all of the variables we plan to split off of the list 144b8e80941Smrg */ 145b8e80941Smrg nir_foreach_variable_safe(var, vars) { 146b8e80941Smrg if (!glsl_type_is_struct_or_ifc(glsl_without_array(var->type))) 147b8e80941Smrg continue; 148b8e80941Smrg 149b8e80941Smrg exec_node_remove(&var->node); 150b8e80941Smrg exec_list_push_tail(&split_vars, &var->node); 151b8e80941Smrg } 152b8e80941Smrg 153b8e80941Smrg nir_foreach_variable(var, &split_vars) { 154b8e80941Smrg state.base_var = var; 155b8e80941Smrg 156b8e80941Smrg struct field *root_field = ralloc(mem_ctx, struct field); 157b8e80941Smrg init_field_for_type(root_field, NULL, var->type, var->name, &state); 158b8e80941Smrg _mesa_hash_table_insert(var_field_map, var, root_field); 159b8e80941Smrg } 160b8e80941Smrg 161b8e80941Smrg return !exec_list_is_empty(&split_vars); 162b8e80941Smrg} 163b8e80941Smrg 164b8e80941Smrgstatic void 165b8e80941Smrgsplit_struct_derefs_impl(nir_function_impl *impl, 166b8e80941Smrg struct hash_table *var_field_map, 167b8e80941Smrg nir_variable_mode modes, 168b8e80941Smrg void *mem_ctx) 169b8e80941Smrg{ 170b8e80941Smrg nir_builder b; 171b8e80941Smrg nir_builder_init(&b, impl); 172b8e80941Smrg 173b8e80941Smrg nir_foreach_block(block, impl) { 174b8e80941Smrg nir_foreach_instr_safe(instr, block) { 175b8e80941Smrg if (instr->type != nir_instr_type_deref) 176b8e80941Smrg continue; 177b8e80941Smrg 178b8e80941Smrg nir_deref_instr *deref = nir_instr_as_deref(instr); 179b8e80941Smrg if (!(deref->mode & modes)) 180b8e80941Smrg continue; 181b8e80941Smrg 182b8e80941Smrg /* Clean up any dead derefs we find lying around. They may refer to 183b8e80941Smrg * variables we're planning to split. 184b8e80941Smrg */ 185b8e80941Smrg if (nir_deref_instr_remove_if_unused(deref)) 186b8e80941Smrg continue; 187b8e80941Smrg 188b8e80941Smrg if (!glsl_type_is_vector_or_scalar(deref->type)) 189b8e80941Smrg continue; 190b8e80941Smrg 191b8e80941Smrg nir_variable *base_var = nir_deref_instr_get_variable(deref); 192b8e80941Smrg struct hash_entry *entry = 193b8e80941Smrg _mesa_hash_table_search(var_field_map, base_var); 194b8e80941Smrg if (!entry) 195b8e80941Smrg continue; 196b8e80941Smrg 197b8e80941Smrg struct field *root_field = entry->data; 198b8e80941Smrg 199b8e80941Smrg nir_deref_path path; 200b8e80941Smrg nir_deref_path_init(&path, deref, mem_ctx); 201b8e80941Smrg 202b8e80941Smrg struct field *tail_field = root_field; 203b8e80941Smrg for (unsigned i = 0; path.path[i]; i++) { 204b8e80941Smrg if (path.path[i]->deref_type != nir_deref_type_struct) 205b8e80941Smrg continue; 206b8e80941Smrg 207b8e80941Smrg assert(i > 0); 208b8e80941Smrg assert(glsl_type_is_struct_or_ifc(path.path[i - 1]->type)); 209b8e80941Smrg assert(path.path[i - 1]->type == 210b8e80941Smrg glsl_without_array(tail_field->type)); 211b8e80941Smrg 212b8e80941Smrg tail_field = &tail_field->fields[path.path[i]->strct.index]; 213b8e80941Smrg } 214b8e80941Smrg nir_variable *split_var = tail_field->var; 215b8e80941Smrg 216b8e80941Smrg nir_deref_instr *new_deref = NULL; 217b8e80941Smrg for (unsigned i = 0; path.path[i]; i++) { 218b8e80941Smrg nir_deref_instr *p = path.path[i]; 219b8e80941Smrg b.cursor = nir_after_instr(&p->instr); 220b8e80941Smrg 221b8e80941Smrg switch (p->deref_type) { 222b8e80941Smrg case nir_deref_type_var: 223b8e80941Smrg assert(new_deref == NULL); 224b8e80941Smrg new_deref = nir_build_deref_var(&b, split_var); 225b8e80941Smrg break; 226b8e80941Smrg 227b8e80941Smrg case nir_deref_type_array: 228b8e80941Smrg case nir_deref_type_array_wildcard: 229b8e80941Smrg new_deref = nir_build_deref_follower(&b, new_deref, p); 230b8e80941Smrg break; 231b8e80941Smrg 232b8e80941Smrg case nir_deref_type_struct: 233b8e80941Smrg /* Nothing to do; we're splitting structs */ 234b8e80941Smrg break; 235b8e80941Smrg 236b8e80941Smrg default: 237b8e80941Smrg unreachable("Invalid deref type in path"); 238b8e80941Smrg } 239b8e80941Smrg } 240b8e80941Smrg 241b8e80941Smrg assert(new_deref->type == deref->type); 242b8e80941Smrg nir_ssa_def_rewrite_uses(&deref->dest.ssa, 243b8e80941Smrg nir_src_for_ssa(&new_deref->dest.ssa)); 244b8e80941Smrg nir_deref_instr_remove_if_unused(deref); 245b8e80941Smrg } 246b8e80941Smrg } 247b8e80941Smrg} 248b8e80941Smrg 249b8e80941Smrg/** A pass for splitting structs into multiple variables 250b8e80941Smrg * 251b8e80941Smrg * This pass splits arrays of structs into multiple variables, one for each 252b8e80941Smrg * (possibly nested) structure member. After this pass completes, no 253b8e80941Smrg * variables of the given mode will contain a struct type. 254b8e80941Smrg */ 255b8e80941Smrgbool 256b8e80941Smrgnir_split_struct_vars(nir_shader *shader, nir_variable_mode modes) 257b8e80941Smrg{ 258b8e80941Smrg void *mem_ctx = ralloc_context(NULL); 259b8e80941Smrg struct hash_table *var_field_map = 260b8e80941Smrg _mesa_pointer_hash_table_create(mem_ctx); 261b8e80941Smrg 262b8e80941Smrg assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes); 263b8e80941Smrg 264b8e80941Smrg bool has_global_splits = false; 265b8e80941Smrg if (modes & nir_var_shader_temp) { 266b8e80941Smrg has_global_splits = split_var_list_structs(shader, NULL, 267b8e80941Smrg &shader->globals, 268b8e80941Smrg var_field_map, mem_ctx); 269b8e80941Smrg } 270b8e80941Smrg 271b8e80941Smrg bool progress = false; 272b8e80941Smrg nir_foreach_function(function, shader) { 273b8e80941Smrg if (!function->impl) 274b8e80941Smrg continue; 275b8e80941Smrg 276b8e80941Smrg bool has_local_splits = false; 277b8e80941Smrg if (modes & nir_var_function_temp) { 278b8e80941Smrg has_local_splits = split_var_list_structs(shader, function->impl, 279b8e80941Smrg &function->impl->locals, 280b8e80941Smrg var_field_map, mem_ctx); 281b8e80941Smrg } 282b8e80941Smrg 283b8e80941Smrg if (has_global_splits || has_local_splits) { 284b8e80941Smrg split_struct_derefs_impl(function->impl, var_field_map, 285b8e80941Smrg modes, mem_ctx); 286b8e80941Smrg 287b8e80941Smrg nir_metadata_preserve(function->impl, nir_metadata_block_index | 288b8e80941Smrg nir_metadata_dominance); 289b8e80941Smrg progress = true; 290b8e80941Smrg } 291b8e80941Smrg } 292b8e80941Smrg 293b8e80941Smrg ralloc_free(mem_ctx); 294b8e80941Smrg 295b8e80941Smrg return progress; 296b8e80941Smrg} 297b8e80941Smrg 298b8e80941Smrgstruct array_level_info { 299b8e80941Smrg unsigned array_len; 300b8e80941Smrg bool split; 301b8e80941Smrg}; 302b8e80941Smrg 303b8e80941Smrgstruct array_split { 304b8e80941Smrg /* Only set if this is the tail end of the splitting */ 305b8e80941Smrg nir_variable *var; 306b8e80941Smrg 307b8e80941Smrg unsigned num_splits; 308b8e80941Smrg struct array_split *splits; 309b8e80941Smrg}; 310b8e80941Smrg 311b8e80941Smrgstruct array_var_info { 312b8e80941Smrg nir_variable *base_var; 313b8e80941Smrg 314b8e80941Smrg const struct glsl_type *split_var_type; 315b8e80941Smrg 316b8e80941Smrg bool split_var; 317b8e80941Smrg struct array_split root_split; 318b8e80941Smrg 319b8e80941Smrg unsigned num_levels; 320b8e80941Smrg struct array_level_info levels[0]; 321b8e80941Smrg}; 322b8e80941Smrg 323b8e80941Smrgstatic bool 324b8e80941Smrginit_var_list_array_infos(struct exec_list *vars, 325b8e80941Smrg struct hash_table *var_info_map, 326b8e80941Smrg void *mem_ctx) 327b8e80941Smrg{ 328b8e80941Smrg bool has_array = false; 329b8e80941Smrg 330b8e80941Smrg nir_foreach_variable(var, vars) { 331b8e80941Smrg int num_levels = num_array_levels_in_array_of_vector_type(var->type); 332b8e80941Smrg if (num_levels <= 0) 333b8e80941Smrg continue; 334b8e80941Smrg 335b8e80941Smrg struct array_var_info *info = 336b8e80941Smrg rzalloc_size(mem_ctx, sizeof(*info) + 337b8e80941Smrg num_levels * sizeof(info->levels[0])); 338b8e80941Smrg 339b8e80941Smrg info->base_var = var; 340b8e80941Smrg info->num_levels = num_levels; 341b8e80941Smrg 342b8e80941Smrg const struct glsl_type *type = var->type; 343b8e80941Smrg for (int i = 0; i < num_levels; i++) { 344b8e80941Smrg info->levels[i].array_len = glsl_get_length(type); 345b8e80941Smrg type = glsl_get_array_element(type); 346b8e80941Smrg 347b8e80941Smrg /* All levels start out initially as split */ 348b8e80941Smrg info->levels[i].split = true; 349b8e80941Smrg } 350b8e80941Smrg 351b8e80941Smrg _mesa_hash_table_insert(var_info_map, var, info); 352b8e80941Smrg has_array = true; 353b8e80941Smrg } 354b8e80941Smrg 355b8e80941Smrg return has_array; 356b8e80941Smrg} 357b8e80941Smrg 358b8e80941Smrgstatic struct array_var_info * 359b8e80941Smrgget_array_var_info(nir_variable *var, 360b8e80941Smrg struct hash_table *var_info_map) 361b8e80941Smrg{ 362b8e80941Smrg struct hash_entry *entry = 363b8e80941Smrg _mesa_hash_table_search(var_info_map, var); 364b8e80941Smrg return entry ? entry->data : NULL; 365b8e80941Smrg} 366b8e80941Smrg 367b8e80941Smrgstatic struct array_var_info * 368b8e80941Smrgget_array_deref_info(nir_deref_instr *deref, 369b8e80941Smrg struct hash_table *var_info_map, 370b8e80941Smrg nir_variable_mode modes) 371b8e80941Smrg{ 372b8e80941Smrg if (!(deref->mode & modes)) 373b8e80941Smrg return NULL; 374b8e80941Smrg 375b8e80941Smrg return get_array_var_info(nir_deref_instr_get_variable(deref), 376b8e80941Smrg var_info_map); 377b8e80941Smrg} 378b8e80941Smrg 379b8e80941Smrgstatic void 380b8e80941Smrgmark_array_deref_used(nir_deref_instr *deref, 381b8e80941Smrg struct hash_table *var_info_map, 382b8e80941Smrg nir_variable_mode modes, 383b8e80941Smrg void *mem_ctx) 384b8e80941Smrg{ 385b8e80941Smrg struct array_var_info *info = 386b8e80941Smrg get_array_deref_info(deref, var_info_map, modes); 387b8e80941Smrg if (!info) 388b8e80941Smrg return; 389b8e80941Smrg 390b8e80941Smrg nir_deref_path path; 391b8e80941Smrg nir_deref_path_init(&path, deref, mem_ctx); 392b8e80941Smrg 393b8e80941Smrg /* Walk the path and look for indirects. If we have an array deref with an 394b8e80941Smrg * indirect, mark the given level as not being split. 395b8e80941Smrg */ 396b8e80941Smrg for (unsigned i = 0; i < info->num_levels; i++) { 397b8e80941Smrg nir_deref_instr *p = path.path[i + 1]; 398b8e80941Smrg if (p->deref_type == nir_deref_type_array && 399b8e80941Smrg !nir_src_is_const(p->arr.index)) 400b8e80941Smrg info->levels[i].split = false; 401b8e80941Smrg } 402b8e80941Smrg} 403b8e80941Smrg 404b8e80941Smrgstatic void 405b8e80941Smrgmark_array_usage_impl(nir_function_impl *impl, 406b8e80941Smrg struct hash_table *var_info_map, 407b8e80941Smrg nir_variable_mode modes, 408b8e80941Smrg void *mem_ctx) 409b8e80941Smrg{ 410b8e80941Smrg nir_foreach_block(block, impl) { 411b8e80941Smrg nir_foreach_instr(instr, block) { 412b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 413b8e80941Smrg continue; 414b8e80941Smrg 415b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 416b8e80941Smrg switch (intrin->intrinsic) { 417b8e80941Smrg case nir_intrinsic_copy_deref: 418b8e80941Smrg mark_array_deref_used(nir_src_as_deref(intrin->src[1]), 419b8e80941Smrg var_info_map, modes, mem_ctx); 420b8e80941Smrg /* Fall Through */ 421b8e80941Smrg 422b8e80941Smrg case nir_intrinsic_load_deref: 423b8e80941Smrg case nir_intrinsic_store_deref: 424b8e80941Smrg mark_array_deref_used(nir_src_as_deref(intrin->src[0]), 425b8e80941Smrg var_info_map, modes, mem_ctx); 426b8e80941Smrg break; 427b8e80941Smrg 428b8e80941Smrg default: 429b8e80941Smrg break; 430b8e80941Smrg } 431b8e80941Smrg } 432b8e80941Smrg } 433b8e80941Smrg} 434b8e80941Smrg 435b8e80941Smrgstatic void 436b8e80941Smrgcreate_split_array_vars(struct array_var_info *var_info, 437b8e80941Smrg unsigned level, 438b8e80941Smrg struct array_split *split, 439b8e80941Smrg const char *name, 440b8e80941Smrg nir_shader *shader, 441b8e80941Smrg nir_function_impl *impl, 442b8e80941Smrg void *mem_ctx) 443b8e80941Smrg{ 444b8e80941Smrg while (level < var_info->num_levels && !var_info->levels[level].split) { 445b8e80941Smrg name = ralloc_asprintf(mem_ctx, "%s[*]", name); 446b8e80941Smrg level++; 447b8e80941Smrg } 448b8e80941Smrg 449b8e80941Smrg if (level == var_info->num_levels) { 450b8e80941Smrg /* We add parens to the variable name so it looks like "(foo[2][*])" so 451b8e80941Smrg * that further derefs will look like "(foo[2][*])[ssa_6]" 452b8e80941Smrg */ 453b8e80941Smrg name = ralloc_asprintf(mem_ctx, "(%s)", name); 454b8e80941Smrg 455b8e80941Smrg nir_variable_mode mode = var_info->base_var->data.mode; 456b8e80941Smrg if (mode == nir_var_function_temp) { 457b8e80941Smrg split->var = nir_local_variable_create(impl, 458b8e80941Smrg var_info->split_var_type, name); 459b8e80941Smrg } else { 460b8e80941Smrg split->var = nir_variable_create(shader, mode, 461b8e80941Smrg var_info->split_var_type, name); 462b8e80941Smrg } 463b8e80941Smrg } else { 464b8e80941Smrg assert(var_info->levels[level].split); 465b8e80941Smrg split->num_splits = var_info->levels[level].array_len; 466b8e80941Smrg split->splits = rzalloc_array(mem_ctx, struct array_split, 467b8e80941Smrg split->num_splits); 468b8e80941Smrg for (unsigned i = 0; i < split->num_splits; i++) { 469b8e80941Smrg create_split_array_vars(var_info, level + 1, &split->splits[i], 470b8e80941Smrg ralloc_asprintf(mem_ctx, "%s[%d]", name, i), 471b8e80941Smrg shader, impl, mem_ctx); 472b8e80941Smrg } 473b8e80941Smrg } 474b8e80941Smrg} 475b8e80941Smrg 476b8e80941Smrgstatic bool 477b8e80941Smrgsplit_var_list_arrays(nir_shader *shader, 478b8e80941Smrg nir_function_impl *impl, 479b8e80941Smrg struct exec_list *vars, 480b8e80941Smrg struct hash_table *var_info_map, 481b8e80941Smrg void *mem_ctx) 482b8e80941Smrg{ 483b8e80941Smrg struct exec_list split_vars; 484b8e80941Smrg exec_list_make_empty(&split_vars); 485b8e80941Smrg 486b8e80941Smrg nir_foreach_variable_safe(var, vars) { 487b8e80941Smrg struct array_var_info *info = get_array_var_info(var, var_info_map); 488b8e80941Smrg if (!info) 489b8e80941Smrg continue; 490b8e80941Smrg 491b8e80941Smrg bool has_split = false; 492b8e80941Smrg const struct glsl_type *split_type = 493b8e80941Smrg glsl_without_array_or_matrix(var->type); 494b8e80941Smrg for (int i = info->num_levels - 1; i >= 0; i--) { 495b8e80941Smrg if (info->levels[i].split) { 496b8e80941Smrg has_split = true; 497b8e80941Smrg continue; 498b8e80941Smrg } 499b8e80941Smrg 500b8e80941Smrg /* If the original type was a matrix type, we'd like to keep that so 501b8e80941Smrg * we don't convert matrices into arrays. 502b8e80941Smrg */ 503b8e80941Smrg if (i == info->num_levels - 1 && 504b8e80941Smrg glsl_type_is_matrix(glsl_without_array(var->type))) { 505b8e80941Smrg split_type = glsl_matrix_type(glsl_get_base_type(split_type), 506b8e80941Smrg glsl_get_components(split_type), 507b8e80941Smrg info->levels[i].array_len); 508b8e80941Smrg } else { 509b8e80941Smrg split_type = glsl_array_type(split_type, info->levels[i].array_len, 0); 510b8e80941Smrg } 511b8e80941Smrg } 512b8e80941Smrg 513b8e80941Smrg if (has_split) { 514b8e80941Smrg info->split_var_type = split_type; 515b8e80941Smrg /* To avoid list confusion (we'll be adding things as we split 516b8e80941Smrg * variables), pull all of the variables we plan to split off of the 517b8e80941Smrg * main variable list. 518b8e80941Smrg */ 519b8e80941Smrg exec_node_remove(&var->node); 520b8e80941Smrg exec_list_push_tail(&split_vars, &var->node); 521b8e80941Smrg } else { 522b8e80941Smrg assert(split_type == glsl_get_bare_type(var->type)); 523b8e80941Smrg /* If we're not modifying this variable, delete the info so we skip 524b8e80941Smrg * it faster in later passes. 525b8e80941Smrg */ 526b8e80941Smrg _mesa_hash_table_remove_key(var_info_map, var); 527b8e80941Smrg } 528b8e80941Smrg } 529b8e80941Smrg 530b8e80941Smrg nir_foreach_variable(var, &split_vars) { 531b8e80941Smrg struct array_var_info *info = get_array_var_info(var, var_info_map); 532b8e80941Smrg create_split_array_vars(info, 0, &info->root_split, var->name, 533b8e80941Smrg shader, impl, mem_ctx); 534b8e80941Smrg } 535b8e80941Smrg 536b8e80941Smrg return !exec_list_is_empty(&split_vars); 537b8e80941Smrg} 538b8e80941Smrg 539b8e80941Smrgstatic bool 540b8e80941Smrgderef_has_split_wildcard(nir_deref_path *path, 541b8e80941Smrg struct array_var_info *info) 542b8e80941Smrg{ 543b8e80941Smrg if (info == NULL) 544b8e80941Smrg return false; 545b8e80941Smrg 546b8e80941Smrg assert(path->path[0]->var == info->base_var); 547b8e80941Smrg for (unsigned i = 0; i < info->num_levels; i++) { 548b8e80941Smrg if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard && 549b8e80941Smrg info->levels[i].split) 550b8e80941Smrg return true; 551b8e80941Smrg } 552b8e80941Smrg 553b8e80941Smrg return false; 554b8e80941Smrg} 555b8e80941Smrg 556b8e80941Smrgstatic bool 557b8e80941Smrgarray_path_is_out_of_bounds(nir_deref_path *path, 558b8e80941Smrg struct array_var_info *info) 559b8e80941Smrg{ 560b8e80941Smrg if (info == NULL) 561b8e80941Smrg return false; 562b8e80941Smrg 563b8e80941Smrg assert(path->path[0]->var == info->base_var); 564b8e80941Smrg for (unsigned i = 0; i < info->num_levels; i++) { 565b8e80941Smrg nir_deref_instr *p = path->path[i + 1]; 566b8e80941Smrg if (p->deref_type == nir_deref_type_array_wildcard) 567b8e80941Smrg continue; 568b8e80941Smrg 569b8e80941Smrg if (nir_src_is_const(p->arr.index) && 570b8e80941Smrg nir_src_as_uint(p->arr.index) >= info->levels[i].array_len) 571b8e80941Smrg return true; 572b8e80941Smrg } 573b8e80941Smrg 574b8e80941Smrg return false; 575b8e80941Smrg} 576b8e80941Smrg 577b8e80941Smrgstatic void 578b8e80941Smrgemit_split_copies(nir_builder *b, 579b8e80941Smrg struct array_var_info *dst_info, nir_deref_path *dst_path, 580b8e80941Smrg unsigned dst_level, nir_deref_instr *dst, 581b8e80941Smrg struct array_var_info *src_info, nir_deref_path *src_path, 582b8e80941Smrg unsigned src_level, nir_deref_instr *src) 583b8e80941Smrg{ 584b8e80941Smrg nir_deref_instr *dst_p, *src_p; 585b8e80941Smrg 586b8e80941Smrg while ((dst_p = dst_path->path[dst_level + 1])) { 587b8e80941Smrg if (dst_p->deref_type == nir_deref_type_array_wildcard) 588b8e80941Smrg break; 589b8e80941Smrg 590b8e80941Smrg dst = nir_build_deref_follower(b, dst, dst_p); 591b8e80941Smrg dst_level++; 592b8e80941Smrg } 593b8e80941Smrg 594b8e80941Smrg while ((src_p = src_path->path[src_level + 1])) { 595b8e80941Smrg if (src_p->deref_type == nir_deref_type_array_wildcard) 596b8e80941Smrg break; 597b8e80941Smrg 598b8e80941Smrg src = nir_build_deref_follower(b, src, src_p); 599b8e80941Smrg src_level++; 600b8e80941Smrg } 601b8e80941Smrg 602b8e80941Smrg if (src_p == NULL || dst_p == NULL) { 603b8e80941Smrg assert(src_p == NULL && dst_p == NULL); 604b8e80941Smrg nir_copy_deref(b, dst, src); 605b8e80941Smrg } else { 606b8e80941Smrg assert(dst_p->deref_type == nir_deref_type_array_wildcard && 607b8e80941Smrg src_p->deref_type == nir_deref_type_array_wildcard); 608b8e80941Smrg 609b8e80941Smrg if ((dst_info && dst_info->levels[dst_level].split) || 610b8e80941Smrg (src_info && src_info->levels[src_level].split)) { 611b8e80941Smrg /* There are no indirects at this level on one of the source or the 612b8e80941Smrg * destination so we are lowering it. 613b8e80941Smrg */ 614b8e80941Smrg assert(glsl_get_length(dst_path->path[dst_level]->type) == 615b8e80941Smrg glsl_get_length(src_path->path[src_level]->type)); 616b8e80941Smrg unsigned len = glsl_get_length(dst_path->path[dst_level]->type); 617b8e80941Smrg for (unsigned i = 0; i < len; i++) { 618b8e80941Smrg emit_split_copies(b, dst_info, dst_path, dst_level + 1, 619b8e80941Smrg nir_build_deref_array_imm(b, dst, i), 620b8e80941Smrg src_info, src_path, src_level + 1, 621b8e80941Smrg nir_build_deref_array_imm(b, src, i)); 622b8e80941Smrg } 623b8e80941Smrg } else { 624b8e80941Smrg /* Neither side is being split so we just keep going */ 625b8e80941Smrg emit_split_copies(b, dst_info, dst_path, dst_level + 1, 626b8e80941Smrg nir_build_deref_array_wildcard(b, dst), 627b8e80941Smrg src_info, src_path, src_level + 1, 628b8e80941Smrg nir_build_deref_array_wildcard(b, src)); 629b8e80941Smrg } 630b8e80941Smrg } 631b8e80941Smrg} 632b8e80941Smrg 633b8e80941Smrgstatic void 634b8e80941Smrgsplit_array_copies_impl(nir_function_impl *impl, 635b8e80941Smrg struct hash_table *var_info_map, 636b8e80941Smrg nir_variable_mode modes, 637b8e80941Smrg void *mem_ctx) 638b8e80941Smrg{ 639b8e80941Smrg nir_builder b; 640b8e80941Smrg nir_builder_init(&b, impl); 641b8e80941Smrg 642b8e80941Smrg nir_foreach_block(block, impl) { 643b8e80941Smrg nir_foreach_instr_safe(instr, block) { 644b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 645b8e80941Smrg continue; 646b8e80941Smrg 647b8e80941Smrg nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr); 648b8e80941Smrg if (copy->intrinsic != nir_intrinsic_copy_deref) 649b8e80941Smrg continue; 650b8e80941Smrg 651b8e80941Smrg nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]); 652b8e80941Smrg nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]); 653b8e80941Smrg 654b8e80941Smrg struct array_var_info *dst_info = 655b8e80941Smrg get_array_deref_info(dst_deref, var_info_map, modes); 656b8e80941Smrg struct array_var_info *src_info = 657b8e80941Smrg get_array_deref_info(src_deref, var_info_map, modes); 658b8e80941Smrg 659b8e80941Smrg if (!src_info && !dst_info) 660b8e80941Smrg continue; 661b8e80941Smrg 662b8e80941Smrg nir_deref_path dst_path, src_path; 663b8e80941Smrg nir_deref_path_init(&dst_path, dst_deref, mem_ctx); 664b8e80941Smrg nir_deref_path_init(&src_path, src_deref, mem_ctx); 665b8e80941Smrg 666b8e80941Smrg if (!deref_has_split_wildcard(&dst_path, dst_info) && 667b8e80941Smrg !deref_has_split_wildcard(&src_path, src_info)) 668b8e80941Smrg continue; 669b8e80941Smrg 670b8e80941Smrg b.cursor = nir_instr_remove(©->instr); 671b8e80941Smrg 672b8e80941Smrg emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0], 673b8e80941Smrg src_info, &src_path, 0, src_path.path[0]); 674b8e80941Smrg } 675b8e80941Smrg } 676b8e80941Smrg} 677b8e80941Smrg 678b8e80941Smrgstatic void 679b8e80941Smrgsplit_array_access_impl(nir_function_impl *impl, 680b8e80941Smrg struct hash_table *var_info_map, 681b8e80941Smrg nir_variable_mode modes, 682b8e80941Smrg void *mem_ctx) 683b8e80941Smrg{ 684b8e80941Smrg nir_builder b; 685b8e80941Smrg nir_builder_init(&b, impl); 686b8e80941Smrg 687b8e80941Smrg nir_foreach_block(block, impl) { 688b8e80941Smrg nir_foreach_instr_safe(instr, block) { 689b8e80941Smrg if (instr->type == nir_instr_type_deref) { 690b8e80941Smrg /* Clean up any dead derefs we find lying around. They may refer 691b8e80941Smrg * to variables we're planning to split. 692b8e80941Smrg */ 693b8e80941Smrg nir_deref_instr *deref = nir_instr_as_deref(instr); 694b8e80941Smrg if (deref->mode & modes) 695b8e80941Smrg nir_deref_instr_remove_if_unused(deref); 696b8e80941Smrg continue; 697b8e80941Smrg } 698b8e80941Smrg 699b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 700b8e80941Smrg continue; 701b8e80941Smrg 702b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 703b8e80941Smrg if (intrin->intrinsic != nir_intrinsic_load_deref && 704b8e80941Smrg intrin->intrinsic != nir_intrinsic_store_deref && 705b8e80941Smrg intrin->intrinsic != nir_intrinsic_copy_deref) 706b8e80941Smrg continue; 707b8e80941Smrg 708b8e80941Smrg const unsigned num_derefs = 709b8e80941Smrg intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1; 710b8e80941Smrg 711b8e80941Smrg for (unsigned d = 0; d < num_derefs; d++) { 712b8e80941Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]); 713b8e80941Smrg 714b8e80941Smrg struct array_var_info *info = 715b8e80941Smrg get_array_deref_info(deref, var_info_map, modes); 716b8e80941Smrg if (!info) 717b8e80941Smrg continue; 718b8e80941Smrg 719b8e80941Smrg nir_deref_path path; 720b8e80941Smrg nir_deref_path_init(&path, deref, mem_ctx); 721b8e80941Smrg 722b8e80941Smrg b.cursor = nir_before_instr(&intrin->instr); 723b8e80941Smrg 724b8e80941Smrg if (array_path_is_out_of_bounds(&path, info)) { 725b8e80941Smrg /* If one of the derefs is out-of-bounds, we just delete the 726b8e80941Smrg * instruction. If a destination is out of bounds, then it may 727b8e80941Smrg * have been in-bounds prior to shrinking so we don't want to 728b8e80941Smrg * accidentally stomp something. However, we've already proven 729b8e80941Smrg * that it will never be read so it's safe to delete. If a 730b8e80941Smrg * source is out of bounds then it is loading random garbage. 731b8e80941Smrg * For loads, we replace their uses with an undef instruction 732b8e80941Smrg * and for copies we just delete the copy since it was writing 733b8e80941Smrg * undefined garbage anyway and we may as well leave the random 734b8e80941Smrg * garbage in the destination alone. 735b8e80941Smrg */ 736b8e80941Smrg if (intrin->intrinsic == nir_intrinsic_load_deref) { 737b8e80941Smrg nir_ssa_def *u = 738b8e80941Smrg nir_ssa_undef(&b, intrin->dest.ssa.num_components, 739b8e80941Smrg intrin->dest.ssa.bit_size); 740b8e80941Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 741b8e80941Smrg nir_src_for_ssa(u)); 742b8e80941Smrg } 743b8e80941Smrg nir_instr_remove(&intrin->instr); 744b8e80941Smrg for (unsigned i = 0; i < num_derefs; i++) 745b8e80941Smrg nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i])); 746b8e80941Smrg break; 747b8e80941Smrg } 748b8e80941Smrg 749b8e80941Smrg struct array_split *split = &info->root_split; 750b8e80941Smrg for (unsigned i = 0; i < info->num_levels; i++) { 751b8e80941Smrg if (info->levels[i].split) { 752b8e80941Smrg nir_deref_instr *p = path.path[i + 1]; 753b8e80941Smrg unsigned index = nir_src_as_uint(p->arr.index); 754b8e80941Smrg assert(index < info->levels[i].array_len); 755b8e80941Smrg split = &split->splits[index]; 756b8e80941Smrg } 757b8e80941Smrg } 758b8e80941Smrg assert(!split->splits && split->var); 759b8e80941Smrg 760b8e80941Smrg nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var); 761b8e80941Smrg for (unsigned i = 0; i < info->num_levels; i++) { 762b8e80941Smrg if (!info->levels[i].split) { 763b8e80941Smrg new_deref = nir_build_deref_follower(&b, new_deref, 764b8e80941Smrg path.path[i + 1]); 765b8e80941Smrg } 766b8e80941Smrg } 767b8e80941Smrg assert(new_deref->type == deref->type); 768b8e80941Smrg 769b8e80941Smrg /* Rewrite the deref source to point to the split one */ 770b8e80941Smrg nir_instr_rewrite_src(&intrin->instr, &intrin->src[d], 771b8e80941Smrg nir_src_for_ssa(&new_deref->dest.ssa)); 772b8e80941Smrg nir_deref_instr_remove_if_unused(deref); 773b8e80941Smrg } 774b8e80941Smrg } 775b8e80941Smrg } 776b8e80941Smrg} 777b8e80941Smrg 778b8e80941Smrg/** A pass for splitting arrays of vectors into multiple variables 779b8e80941Smrg * 780b8e80941Smrg * This pass looks at arrays (possibly multiple levels) of vectors (not 781b8e80941Smrg * structures or other types) and tries to split them into piles of variables, 782b8e80941Smrg * one for each array element. The heuristic used is simple: If a given array 783b8e80941Smrg * level is never used with an indirect, that array level will get split. 784b8e80941Smrg * 785b8e80941Smrg * This pass probably could handles structures easily enough but making a pass 786b8e80941Smrg * that could see through an array of structures of arrays would be difficult 787b8e80941Smrg * so it's best to just run nir_split_struct_vars first. 788b8e80941Smrg */ 789b8e80941Smrgbool 790b8e80941Smrgnir_split_array_vars(nir_shader *shader, nir_variable_mode modes) 791b8e80941Smrg{ 792b8e80941Smrg void *mem_ctx = ralloc_context(NULL); 793b8e80941Smrg struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx); 794b8e80941Smrg 795b8e80941Smrg assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes); 796b8e80941Smrg 797b8e80941Smrg bool has_global_array = false; 798b8e80941Smrg if (modes & nir_var_shader_temp) { 799b8e80941Smrg has_global_array = init_var_list_array_infos(&shader->globals, 800b8e80941Smrg var_info_map, mem_ctx); 801b8e80941Smrg } 802b8e80941Smrg 803b8e80941Smrg bool has_any_array = false; 804b8e80941Smrg nir_foreach_function(function, shader) { 805b8e80941Smrg if (!function->impl) 806b8e80941Smrg continue; 807b8e80941Smrg 808b8e80941Smrg bool has_local_array = false; 809b8e80941Smrg if (modes & nir_var_function_temp) { 810b8e80941Smrg has_local_array = init_var_list_array_infos(&function->impl->locals, 811b8e80941Smrg var_info_map, mem_ctx); 812b8e80941Smrg } 813b8e80941Smrg 814b8e80941Smrg if (has_global_array || has_local_array) { 815b8e80941Smrg has_any_array = true; 816b8e80941Smrg mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx); 817b8e80941Smrg } 818b8e80941Smrg } 819b8e80941Smrg 820b8e80941Smrg /* If we failed to find any arrays of arrays, bail early. */ 821b8e80941Smrg if (!has_any_array) { 822b8e80941Smrg ralloc_free(mem_ctx); 823b8e80941Smrg return false; 824b8e80941Smrg } 825b8e80941Smrg 826b8e80941Smrg bool has_global_splits = false; 827b8e80941Smrg if (modes & nir_var_shader_temp) { 828b8e80941Smrg has_global_splits = split_var_list_arrays(shader, NULL, 829b8e80941Smrg &shader->globals, 830b8e80941Smrg var_info_map, mem_ctx); 831b8e80941Smrg } 832b8e80941Smrg 833b8e80941Smrg bool progress = false; 834b8e80941Smrg nir_foreach_function(function, shader) { 835b8e80941Smrg if (!function->impl) 836b8e80941Smrg continue; 837b8e80941Smrg 838b8e80941Smrg bool has_local_splits = false; 839b8e80941Smrg if (modes & nir_var_function_temp) { 840b8e80941Smrg has_local_splits = split_var_list_arrays(shader, function->impl, 841b8e80941Smrg &function->impl->locals, 842b8e80941Smrg var_info_map, mem_ctx); 843b8e80941Smrg } 844b8e80941Smrg 845b8e80941Smrg if (has_global_splits || has_local_splits) { 846b8e80941Smrg split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx); 847b8e80941Smrg split_array_access_impl(function->impl, var_info_map, modes, mem_ctx); 848b8e80941Smrg 849b8e80941Smrg nir_metadata_preserve(function->impl, nir_metadata_block_index | 850b8e80941Smrg nir_metadata_dominance); 851b8e80941Smrg progress = true; 852b8e80941Smrg } 853b8e80941Smrg } 854b8e80941Smrg 855b8e80941Smrg ralloc_free(mem_ctx); 856b8e80941Smrg 857b8e80941Smrg return progress; 858b8e80941Smrg} 859b8e80941Smrg 860b8e80941Smrgstruct array_level_usage { 861b8e80941Smrg unsigned array_len; 862b8e80941Smrg 863b8e80941Smrg /* The value UINT_MAX will be used to indicate an indirect */ 864b8e80941Smrg unsigned max_read; 865b8e80941Smrg unsigned max_written; 866b8e80941Smrg 867b8e80941Smrg /* True if there is a copy that isn't to/from a shrinkable array */ 868b8e80941Smrg bool has_external_copy; 869b8e80941Smrg struct set *levels_copied; 870b8e80941Smrg}; 871b8e80941Smrg 872b8e80941Smrgstruct vec_var_usage { 873b8e80941Smrg /* Convenience set of all components this variable has */ 874b8e80941Smrg nir_component_mask_t all_comps; 875b8e80941Smrg 876b8e80941Smrg nir_component_mask_t comps_read; 877b8e80941Smrg nir_component_mask_t comps_written; 878b8e80941Smrg 879b8e80941Smrg nir_component_mask_t comps_kept; 880b8e80941Smrg 881b8e80941Smrg /* True if there is a copy that isn't to/from a shrinkable vector */ 882b8e80941Smrg bool has_external_copy; 883b8e80941Smrg struct set *vars_copied; 884b8e80941Smrg 885b8e80941Smrg unsigned num_levels; 886b8e80941Smrg struct array_level_usage levels[0]; 887b8e80941Smrg}; 888b8e80941Smrg 889b8e80941Smrgstatic struct vec_var_usage * 890b8e80941Smrgget_vec_var_usage(nir_variable *var, 891b8e80941Smrg struct hash_table *var_usage_map, 892b8e80941Smrg bool add_usage_entry, void *mem_ctx) 893b8e80941Smrg{ 894b8e80941Smrg struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var); 895b8e80941Smrg if (entry) 896b8e80941Smrg return entry->data; 897b8e80941Smrg 898b8e80941Smrg if (!add_usage_entry) 899b8e80941Smrg return NULL; 900b8e80941Smrg 901b8e80941Smrg /* Check to make sure that we are working with an array of vectors. We 902b8e80941Smrg * don't bother to shrink single vectors because we figure that we can 903b8e80941Smrg * clean it up better with SSA than by inserting piles of vecN instructions 904b8e80941Smrg * to compact results. 905b8e80941Smrg */ 906b8e80941Smrg int num_levels = num_array_levels_in_array_of_vector_type(var->type); 907b8e80941Smrg if (num_levels < 1) 908b8e80941Smrg return NULL; /* Not an array of vectors */ 909b8e80941Smrg 910b8e80941Smrg struct vec_var_usage *usage = 911b8e80941Smrg rzalloc_size(mem_ctx, sizeof(*usage) + 912b8e80941Smrg num_levels * sizeof(usage->levels[0])); 913b8e80941Smrg 914b8e80941Smrg usage->num_levels = num_levels; 915b8e80941Smrg const struct glsl_type *type = var->type; 916b8e80941Smrg for (unsigned i = 0; i < num_levels; i++) { 917b8e80941Smrg usage->levels[i].array_len = glsl_get_length(type); 918b8e80941Smrg type = glsl_get_array_element(type); 919b8e80941Smrg } 920b8e80941Smrg assert(glsl_type_is_vector_or_scalar(type)); 921b8e80941Smrg 922b8e80941Smrg usage->all_comps = (1 << glsl_get_components(type)) - 1; 923b8e80941Smrg 924b8e80941Smrg _mesa_hash_table_insert(var_usage_map, var, usage); 925b8e80941Smrg 926b8e80941Smrg return usage; 927b8e80941Smrg} 928b8e80941Smrg 929b8e80941Smrgstatic struct vec_var_usage * 930b8e80941Smrgget_vec_deref_usage(nir_deref_instr *deref, 931b8e80941Smrg struct hash_table *var_usage_map, 932b8e80941Smrg nir_variable_mode modes, 933b8e80941Smrg bool add_usage_entry, void *mem_ctx) 934b8e80941Smrg{ 935b8e80941Smrg if (!(deref->mode & modes)) 936b8e80941Smrg return NULL; 937b8e80941Smrg 938b8e80941Smrg return get_vec_var_usage(nir_deref_instr_get_variable(deref), 939b8e80941Smrg var_usage_map, add_usage_entry, mem_ctx); 940b8e80941Smrg} 941b8e80941Smrg 942b8e80941Smrgstatic void 943b8e80941Smrgmark_deref_used(nir_deref_instr *deref, 944b8e80941Smrg nir_component_mask_t comps_read, 945b8e80941Smrg nir_component_mask_t comps_written, 946b8e80941Smrg nir_deref_instr *copy_deref, 947b8e80941Smrg struct hash_table *var_usage_map, 948b8e80941Smrg nir_variable_mode modes, 949b8e80941Smrg void *mem_ctx) 950b8e80941Smrg{ 951b8e80941Smrg if (!(deref->mode & modes)) 952b8e80941Smrg return; 953b8e80941Smrg 954b8e80941Smrg nir_variable *var = nir_deref_instr_get_variable(deref); 955b8e80941Smrg 956b8e80941Smrg struct vec_var_usage *usage = 957b8e80941Smrg get_vec_var_usage(var, var_usage_map, true, mem_ctx); 958b8e80941Smrg if (!usage) 959b8e80941Smrg return; 960b8e80941Smrg 961b8e80941Smrg usage->comps_read |= comps_read & usage->all_comps; 962b8e80941Smrg usage->comps_written |= comps_written & usage->all_comps; 963b8e80941Smrg 964b8e80941Smrg struct vec_var_usage *copy_usage = NULL; 965b8e80941Smrg if (copy_deref) { 966b8e80941Smrg copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes, 967b8e80941Smrg true, mem_ctx); 968b8e80941Smrg if (copy_usage) { 969b8e80941Smrg if (usage->vars_copied == NULL) { 970b8e80941Smrg usage->vars_copied = _mesa_pointer_set_create(mem_ctx); 971b8e80941Smrg } 972b8e80941Smrg _mesa_set_add(usage->vars_copied, copy_usage); 973b8e80941Smrg } else { 974b8e80941Smrg usage->has_external_copy = true; 975b8e80941Smrg } 976b8e80941Smrg } 977b8e80941Smrg 978b8e80941Smrg nir_deref_path path; 979b8e80941Smrg nir_deref_path_init(&path, deref, mem_ctx); 980b8e80941Smrg 981b8e80941Smrg nir_deref_path copy_path; 982b8e80941Smrg if (copy_usage) 983b8e80941Smrg nir_deref_path_init(©_path, copy_deref, mem_ctx); 984b8e80941Smrg 985b8e80941Smrg unsigned copy_i = 0; 986b8e80941Smrg for (unsigned i = 0; i < usage->num_levels; i++) { 987b8e80941Smrg struct array_level_usage *level = &usage->levels[i]; 988b8e80941Smrg nir_deref_instr *deref = path.path[i + 1]; 989b8e80941Smrg assert(deref->deref_type == nir_deref_type_array || 990b8e80941Smrg deref->deref_type == nir_deref_type_array_wildcard); 991b8e80941Smrg 992b8e80941Smrg unsigned max_used; 993b8e80941Smrg if (deref->deref_type == nir_deref_type_array) { 994b8e80941Smrg max_used = nir_src_is_const(deref->arr.index) ? 995b8e80941Smrg nir_src_as_uint(deref->arr.index) : UINT_MAX; 996b8e80941Smrg } else { 997b8e80941Smrg /* For wildcards, we read or wrote the whole thing. */ 998b8e80941Smrg assert(deref->deref_type == nir_deref_type_array_wildcard); 999b8e80941Smrg max_used = level->array_len - 1; 1000b8e80941Smrg 1001b8e80941Smrg if (copy_usage) { 1002b8e80941Smrg /* Match each wildcard level with the level on copy_usage */ 1003b8e80941Smrg for (; copy_path.path[copy_i + 1]; copy_i++) { 1004b8e80941Smrg if (copy_path.path[copy_i + 1]->deref_type == 1005b8e80941Smrg nir_deref_type_array_wildcard) 1006b8e80941Smrg break; 1007b8e80941Smrg } 1008b8e80941Smrg struct array_level_usage *copy_level = 1009b8e80941Smrg ©_usage->levels[copy_i++]; 1010b8e80941Smrg 1011b8e80941Smrg if (level->levels_copied == NULL) { 1012b8e80941Smrg level->levels_copied = _mesa_pointer_set_create(mem_ctx); 1013b8e80941Smrg } 1014b8e80941Smrg _mesa_set_add(level->levels_copied, copy_level); 1015b8e80941Smrg } else { 1016b8e80941Smrg /* We have a wildcard and it comes from a variable we aren't 1017b8e80941Smrg * tracking; flag it and we'll know to not shorten this array. 1018b8e80941Smrg */ 1019b8e80941Smrg level->has_external_copy = true; 1020b8e80941Smrg } 1021b8e80941Smrg } 1022b8e80941Smrg 1023b8e80941Smrg if (comps_written) 1024b8e80941Smrg level->max_written = MAX2(level->max_written, max_used); 1025b8e80941Smrg if (comps_read) 1026b8e80941Smrg level->max_read = MAX2(level->max_read, max_used); 1027b8e80941Smrg } 1028b8e80941Smrg} 1029b8e80941Smrg 1030b8e80941Smrgstatic bool 1031b8e80941Smrgsrc_is_load_deref(nir_src src, nir_src deref_src) 1032b8e80941Smrg{ 1033b8e80941Smrg nir_intrinsic_instr *load = nir_src_as_intrinsic(src); 1034b8e80941Smrg if (load == NULL || load->intrinsic != nir_intrinsic_load_deref) 1035b8e80941Smrg return false; 1036b8e80941Smrg 1037b8e80941Smrg assert(load->src[0].is_ssa); 1038b8e80941Smrg 1039b8e80941Smrg return load->src[0].ssa == deref_src.ssa; 1040b8e80941Smrg} 1041b8e80941Smrg 1042b8e80941Smrg/* Returns all non-self-referential components of a store instruction. A 1043b8e80941Smrg * component is self-referential if it comes from the same component of a load 1044b8e80941Smrg * instruction on the same deref. If the only data in a particular component 1045b8e80941Smrg * of a variable came directly from that component then it's undefined. The 1046b8e80941Smrg * only way to get defined data into a component of a variable is for it to 1047b8e80941Smrg * get written there by something outside or from a different component. 1048b8e80941Smrg * 1049b8e80941Smrg * This is a fairly common pattern in shaders that come from either GLSL IR or 1050b8e80941Smrg * GLSLang because both glsl_to_nir and GLSLang implement write-masking with 1051b8e80941Smrg * load-vec-store. 1052b8e80941Smrg */ 1053b8e80941Smrgstatic nir_component_mask_t 1054b8e80941Smrgget_non_self_referential_store_comps(nir_intrinsic_instr *store) 1055b8e80941Smrg{ 1056b8e80941Smrg nir_component_mask_t comps = nir_intrinsic_write_mask(store); 1057b8e80941Smrg 1058b8e80941Smrg assert(store->src[1].is_ssa); 1059b8e80941Smrg nir_instr *src_instr = store->src[1].ssa->parent_instr; 1060b8e80941Smrg if (src_instr->type != nir_instr_type_alu) 1061b8e80941Smrg return comps; 1062b8e80941Smrg 1063b8e80941Smrg nir_alu_instr *src_alu = nir_instr_as_alu(src_instr); 1064b8e80941Smrg 1065b8e80941Smrg if (src_alu->op == nir_op_imov || 1066b8e80941Smrg src_alu->op == nir_op_fmov) { 1067b8e80941Smrg /* If it's just a swizzle of a load from the same deref, discount any 1068b8e80941Smrg * channels that don't move in the swizzle. 1069b8e80941Smrg */ 1070b8e80941Smrg if (src_is_load_deref(src_alu->src[0].src, store->src[0])) { 1071b8e80941Smrg for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { 1072b8e80941Smrg if (src_alu->src[0].swizzle[i] == i) 1073b8e80941Smrg comps &= ~(1u << i); 1074b8e80941Smrg } 1075b8e80941Smrg } 1076b8e80941Smrg } else if (src_alu->op == nir_op_vec2 || 1077b8e80941Smrg src_alu->op == nir_op_vec3 || 1078b8e80941Smrg src_alu->op == nir_op_vec4) { 1079b8e80941Smrg /* If it's a vec, discount any channels that are just loads from the 1080b8e80941Smrg * same deref put in the same spot. 1081b8e80941Smrg */ 1082b8e80941Smrg for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) { 1083b8e80941Smrg if (src_is_load_deref(src_alu->src[i].src, store->src[0]) && 1084b8e80941Smrg src_alu->src[i].swizzle[0] == i) 1085b8e80941Smrg comps &= ~(1u << i); 1086b8e80941Smrg } 1087b8e80941Smrg } 1088b8e80941Smrg 1089b8e80941Smrg return comps; 1090b8e80941Smrg} 1091b8e80941Smrg 1092b8e80941Smrgstatic void 1093b8e80941Smrgfind_used_components_impl(nir_function_impl *impl, 1094b8e80941Smrg struct hash_table *var_usage_map, 1095b8e80941Smrg nir_variable_mode modes, 1096b8e80941Smrg void *mem_ctx) 1097b8e80941Smrg{ 1098b8e80941Smrg nir_foreach_block(block, impl) { 1099b8e80941Smrg nir_foreach_instr(instr, block) { 1100b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 1101b8e80941Smrg continue; 1102b8e80941Smrg 1103b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1104b8e80941Smrg switch (intrin->intrinsic) { 1105b8e80941Smrg case nir_intrinsic_load_deref: 1106b8e80941Smrg mark_deref_used(nir_src_as_deref(intrin->src[0]), 1107b8e80941Smrg nir_ssa_def_components_read(&intrin->dest.ssa), 0, 1108b8e80941Smrg NULL, var_usage_map, modes, mem_ctx); 1109b8e80941Smrg break; 1110b8e80941Smrg 1111b8e80941Smrg case nir_intrinsic_store_deref: 1112b8e80941Smrg mark_deref_used(nir_src_as_deref(intrin->src[0]), 1113b8e80941Smrg 0, get_non_self_referential_store_comps(intrin), 1114b8e80941Smrg NULL, var_usage_map, modes, mem_ctx); 1115b8e80941Smrg break; 1116b8e80941Smrg 1117b8e80941Smrg case nir_intrinsic_copy_deref: { 1118b8e80941Smrg /* Just mark everything used for copies. */ 1119b8e80941Smrg nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); 1120b8e80941Smrg nir_deref_instr *src = nir_src_as_deref(intrin->src[1]); 1121b8e80941Smrg mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx); 1122b8e80941Smrg mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx); 1123b8e80941Smrg break; 1124b8e80941Smrg } 1125b8e80941Smrg 1126b8e80941Smrg default: 1127b8e80941Smrg break; 1128b8e80941Smrg } 1129b8e80941Smrg } 1130b8e80941Smrg } 1131b8e80941Smrg} 1132b8e80941Smrg 1133b8e80941Smrgstatic bool 1134b8e80941Smrgshrink_vec_var_list(struct exec_list *vars, 1135b8e80941Smrg struct hash_table *var_usage_map) 1136b8e80941Smrg{ 1137b8e80941Smrg /* Initialize the components kept field of each variable. This is the 1138b8e80941Smrg * AND of the components written and components read. If a component is 1139b8e80941Smrg * written but never read, it's dead. If it is read but never written, 1140b8e80941Smrg * then all values read are undefined garbage and we may as well not read 1141b8e80941Smrg * them. 1142b8e80941Smrg * 1143b8e80941Smrg * The same logic applies to the array length. We make the array length 1144b8e80941Smrg * the minimum needed required length between read and write and plan to 1145b8e80941Smrg * discard any OOB access. The one exception here is indirect writes 1146b8e80941Smrg * because we don't know where they will land and we can't shrink an array 1147b8e80941Smrg * with indirect writes because previously in-bounds writes may become 1148b8e80941Smrg * out-of-bounds and have undefined behavior. 1149b8e80941Smrg * 1150b8e80941Smrg * Also, if we have a copy that to/from something we can't shrink, we need 1151b8e80941Smrg * to leave components and array_len of any wildcards alone. 1152b8e80941Smrg */ 1153b8e80941Smrg nir_foreach_variable(var, vars) { 1154b8e80941Smrg struct vec_var_usage *usage = 1155b8e80941Smrg get_vec_var_usage(var, var_usage_map, false, NULL); 1156b8e80941Smrg if (!usage) 1157b8e80941Smrg continue; 1158b8e80941Smrg 1159b8e80941Smrg assert(usage->comps_kept == 0); 1160b8e80941Smrg if (usage->has_external_copy) 1161b8e80941Smrg usage->comps_kept = usage->all_comps; 1162b8e80941Smrg else 1163b8e80941Smrg usage->comps_kept = usage->comps_read & usage->comps_written; 1164b8e80941Smrg 1165b8e80941Smrg for (unsigned i = 0; i < usage->num_levels; i++) { 1166b8e80941Smrg struct array_level_usage *level = &usage->levels[i]; 1167b8e80941Smrg assert(level->array_len > 0); 1168b8e80941Smrg 1169b8e80941Smrg if (level->max_written == UINT_MAX || level->has_external_copy) 1170b8e80941Smrg continue; /* Can't shrink */ 1171b8e80941Smrg 1172b8e80941Smrg unsigned max_used = MIN2(level->max_read, level->max_written); 1173b8e80941Smrg level->array_len = MIN2(max_used, level->array_len - 1) + 1; 1174b8e80941Smrg } 1175b8e80941Smrg } 1176b8e80941Smrg 1177b8e80941Smrg /* In order for variable copies to work, we have to have the same data type 1178b8e80941Smrg * on the source and the destination. In order to satisfy this, we run a 1179b8e80941Smrg * little fixed-point algorithm to transitively ensure that we get enough 1180b8e80941Smrg * components and array elements for this to hold for all copies. 1181b8e80941Smrg */ 1182b8e80941Smrg bool fp_progress; 1183b8e80941Smrg do { 1184b8e80941Smrg fp_progress = false; 1185b8e80941Smrg nir_foreach_variable(var, vars) { 1186b8e80941Smrg struct vec_var_usage *var_usage = 1187b8e80941Smrg get_vec_var_usage(var, var_usage_map, false, NULL); 1188b8e80941Smrg if (!var_usage || !var_usage->vars_copied) 1189b8e80941Smrg continue; 1190b8e80941Smrg 1191b8e80941Smrg set_foreach(var_usage->vars_copied, copy_entry) { 1192b8e80941Smrg struct vec_var_usage *copy_usage = (void *)copy_entry->key; 1193b8e80941Smrg if (copy_usage->comps_kept != var_usage->comps_kept) { 1194b8e80941Smrg nir_component_mask_t comps_kept = 1195b8e80941Smrg (var_usage->comps_kept | copy_usage->comps_kept); 1196b8e80941Smrg var_usage->comps_kept = comps_kept; 1197b8e80941Smrg copy_usage->comps_kept = comps_kept; 1198b8e80941Smrg fp_progress = true; 1199b8e80941Smrg } 1200b8e80941Smrg } 1201b8e80941Smrg 1202b8e80941Smrg for (unsigned i = 0; i < var_usage->num_levels; i++) { 1203b8e80941Smrg struct array_level_usage *var_level = &var_usage->levels[i]; 1204b8e80941Smrg if (!var_level->levels_copied) 1205b8e80941Smrg continue; 1206b8e80941Smrg 1207b8e80941Smrg set_foreach(var_level->levels_copied, copy_entry) { 1208b8e80941Smrg struct array_level_usage *copy_level = (void *)copy_entry->key; 1209b8e80941Smrg if (var_level->array_len != copy_level->array_len) { 1210b8e80941Smrg unsigned array_len = 1211b8e80941Smrg MAX2(var_level->array_len, copy_level->array_len); 1212b8e80941Smrg var_level->array_len = array_len; 1213b8e80941Smrg copy_level->array_len = array_len; 1214b8e80941Smrg fp_progress = true; 1215b8e80941Smrg } 1216b8e80941Smrg } 1217b8e80941Smrg } 1218b8e80941Smrg } 1219b8e80941Smrg } while (fp_progress); 1220b8e80941Smrg 1221b8e80941Smrg bool vars_shrunk = false; 1222b8e80941Smrg nir_foreach_variable_safe(var, vars) { 1223b8e80941Smrg struct vec_var_usage *usage = 1224b8e80941Smrg get_vec_var_usage(var, var_usage_map, false, NULL); 1225b8e80941Smrg if (!usage) 1226b8e80941Smrg continue; 1227b8e80941Smrg 1228b8e80941Smrg bool shrunk = false; 1229b8e80941Smrg const struct glsl_type *vec_type = var->type; 1230b8e80941Smrg for (unsigned i = 0; i < usage->num_levels; i++) { 1231b8e80941Smrg /* If we've reduced the array to zero elements at some level, just 1232b8e80941Smrg * set comps_kept to 0 and delete the variable. 1233b8e80941Smrg */ 1234b8e80941Smrg if (usage->levels[i].array_len == 0) { 1235b8e80941Smrg usage->comps_kept = 0; 1236b8e80941Smrg break; 1237b8e80941Smrg } 1238b8e80941Smrg 1239b8e80941Smrg assert(usage->levels[i].array_len <= glsl_get_length(vec_type)); 1240b8e80941Smrg if (usage->levels[i].array_len < glsl_get_length(vec_type)) 1241b8e80941Smrg shrunk = true; 1242b8e80941Smrg vec_type = glsl_get_array_element(vec_type); 1243b8e80941Smrg } 1244b8e80941Smrg assert(glsl_type_is_vector_or_scalar(vec_type)); 1245b8e80941Smrg 1246b8e80941Smrg assert(usage->comps_kept == (usage->comps_kept & usage->all_comps)); 1247b8e80941Smrg if (usage->comps_kept != usage->all_comps) 1248b8e80941Smrg shrunk = true; 1249b8e80941Smrg 1250b8e80941Smrg if (usage->comps_kept == 0) { 1251b8e80941Smrg /* This variable is dead, remove it */ 1252b8e80941Smrg vars_shrunk = true; 1253b8e80941Smrg exec_node_remove(&var->node); 1254b8e80941Smrg continue; 1255b8e80941Smrg } 1256b8e80941Smrg 1257b8e80941Smrg if (!shrunk) { 1258b8e80941Smrg /* This variable doesn't need to be shrunk. Remove it from the 1259b8e80941Smrg * hash table so later steps will ignore it. 1260b8e80941Smrg */ 1261b8e80941Smrg _mesa_hash_table_remove_key(var_usage_map, var); 1262b8e80941Smrg continue; 1263b8e80941Smrg } 1264b8e80941Smrg 1265b8e80941Smrg /* Build the new var type */ 1266b8e80941Smrg unsigned new_num_comps = util_bitcount(usage->comps_kept); 1267b8e80941Smrg const struct glsl_type *new_type = 1268b8e80941Smrg glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps); 1269b8e80941Smrg for (int i = usage->num_levels - 1; i >= 0; i--) { 1270b8e80941Smrg assert(usage->levels[i].array_len > 0); 1271b8e80941Smrg /* If the original type was a matrix type, we'd like to keep that so 1272b8e80941Smrg * we don't convert matrices into arrays. 1273b8e80941Smrg */ 1274b8e80941Smrg if (i == usage->num_levels - 1 && 1275b8e80941Smrg glsl_type_is_matrix(glsl_without_array(var->type)) && 1276b8e80941Smrg new_num_comps > 1 && usage->levels[i].array_len > 1) { 1277b8e80941Smrg new_type = glsl_matrix_type(glsl_get_base_type(new_type), 1278b8e80941Smrg new_num_comps, 1279b8e80941Smrg usage->levels[i].array_len); 1280b8e80941Smrg } else { 1281b8e80941Smrg new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0); 1282b8e80941Smrg } 1283b8e80941Smrg } 1284b8e80941Smrg var->type = new_type; 1285b8e80941Smrg 1286b8e80941Smrg vars_shrunk = true; 1287b8e80941Smrg } 1288b8e80941Smrg 1289b8e80941Smrg return vars_shrunk; 1290b8e80941Smrg} 1291b8e80941Smrg 1292b8e80941Smrgstatic bool 1293b8e80941Smrgvec_deref_is_oob(nir_deref_instr *deref, 1294b8e80941Smrg struct vec_var_usage *usage) 1295b8e80941Smrg{ 1296b8e80941Smrg nir_deref_path path; 1297b8e80941Smrg nir_deref_path_init(&path, deref, NULL); 1298b8e80941Smrg 1299b8e80941Smrg bool oob = false; 1300b8e80941Smrg for (unsigned i = 0; i < usage->num_levels; i++) { 1301b8e80941Smrg nir_deref_instr *p = path.path[i + 1]; 1302b8e80941Smrg if (p->deref_type == nir_deref_type_array_wildcard) 1303b8e80941Smrg continue; 1304b8e80941Smrg 1305b8e80941Smrg if (nir_src_is_const(p->arr.index) && 1306b8e80941Smrg nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) { 1307b8e80941Smrg oob = true; 1308b8e80941Smrg break; 1309b8e80941Smrg } 1310b8e80941Smrg } 1311b8e80941Smrg 1312b8e80941Smrg nir_deref_path_finish(&path); 1313b8e80941Smrg 1314b8e80941Smrg return oob; 1315b8e80941Smrg} 1316b8e80941Smrg 1317b8e80941Smrgstatic bool 1318b8e80941Smrgvec_deref_is_dead_or_oob(nir_deref_instr *deref, 1319b8e80941Smrg struct hash_table *var_usage_map, 1320b8e80941Smrg nir_variable_mode modes) 1321b8e80941Smrg{ 1322b8e80941Smrg struct vec_var_usage *usage = 1323b8e80941Smrg get_vec_deref_usage(deref, var_usage_map, modes, false, NULL); 1324b8e80941Smrg if (!usage) 1325b8e80941Smrg return false; 1326b8e80941Smrg 1327b8e80941Smrg return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage); 1328b8e80941Smrg} 1329b8e80941Smrg 1330b8e80941Smrgstatic void 1331b8e80941Smrgshrink_vec_var_access_impl(nir_function_impl *impl, 1332b8e80941Smrg struct hash_table *var_usage_map, 1333b8e80941Smrg nir_variable_mode modes) 1334b8e80941Smrg{ 1335b8e80941Smrg nir_builder b; 1336b8e80941Smrg nir_builder_init(&b, impl); 1337b8e80941Smrg 1338b8e80941Smrg nir_foreach_block(block, impl) { 1339b8e80941Smrg nir_foreach_instr_safe(instr, block) { 1340b8e80941Smrg switch (instr->type) { 1341b8e80941Smrg case nir_instr_type_deref: { 1342b8e80941Smrg nir_deref_instr *deref = nir_instr_as_deref(instr); 1343b8e80941Smrg if (!(deref->mode & modes)) 1344b8e80941Smrg break; 1345b8e80941Smrg 1346b8e80941Smrg /* Clean up any dead derefs we find lying around. They may refer 1347b8e80941Smrg * to variables we've deleted. 1348b8e80941Smrg */ 1349b8e80941Smrg if (nir_deref_instr_remove_if_unused(deref)) 1350b8e80941Smrg break; 1351b8e80941Smrg 1352b8e80941Smrg /* Update the type in the deref to keep the types consistent as 1353b8e80941Smrg * you walk down the chain. We don't need to check if this is one 1354b8e80941Smrg * of the derefs we're shrinking because this is a no-op if it 1355b8e80941Smrg * isn't. The worst that could happen is that we accidentally fix 1356b8e80941Smrg * an invalid deref. 1357b8e80941Smrg */ 1358b8e80941Smrg if (deref->deref_type == nir_deref_type_var) { 1359b8e80941Smrg deref->type = deref->var->type; 1360b8e80941Smrg } else if (deref->deref_type == nir_deref_type_array || 1361b8e80941Smrg deref->deref_type == nir_deref_type_array_wildcard) { 1362b8e80941Smrg nir_deref_instr *parent = nir_deref_instr_parent(deref); 1363b8e80941Smrg assert(glsl_type_is_array(parent->type) || 1364b8e80941Smrg glsl_type_is_matrix(parent->type)); 1365b8e80941Smrg deref->type = glsl_get_array_element(parent->type); 1366b8e80941Smrg } 1367b8e80941Smrg break; 1368b8e80941Smrg } 1369b8e80941Smrg 1370b8e80941Smrg case nir_instr_type_intrinsic: { 1371b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1372b8e80941Smrg 1373b8e80941Smrg /* If we have a copy whose source or destination has been deleted 1374b8e80941Smrg * because we determined the variable was dead, then we just 1375b8e80941Smrg * delete the copy instruction. If the source variable was dead 1376b8e80941Smrg * then it was writing undefined garbage anyway and if it's the 1377b8e80941Smrg * destination variable that's dead then the write isn't needed. 1378b8e80941Smrg */ 1379b8e80941Smrg if (intrin->intrinsic == nir_intrinsic_copy_deref) { 1380b8e80941Smrg nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); 1381b8e80941Smrg nir_deref_instr *src = nir_src_as_deref(intrin->src[1]); 1382b8e80941Smrg if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) || 1383b8e80941Smrg vec_deref_is_dead_or_oob(src, var_usage_map, modes)) { 1384b8e80941Smrg nir_instr_remove(&intrin->instr); 1385b8e80941Smrg nir_deref_instr_remove_if_unused(dst); 1386b8e80941Smrg nir_deref_instr_remove_if_unused(src); 1387b8e80941Smrg } 1388b8e80941Smrg continue; 1389b8e80941Smrg } 1390b8e80941Smrg 1391b8e80941Smrg if (intrin->intrinsic != nir_intrinsic_load_deref && 1392b8e80941Smrg intrin->intrinsic != nir_intrinsic_store_deref) 1393b8e80941Smrg continue; 1394b8e80941Smrg 1395b8e80941Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1396b8e80941Smrg if (!(deref->mode & modes)) 1397b8e80941Smrg continue; 1398b8e80941Smrg 1399b8e80941Smrg struct vec_var_usage *usage = 1400b8e80941Smrg get_vec_deref_usage(deref, var_usage_map, modes, false, NULL); 1401b8e80941Smrg if (!usage) 1402b8e80941Smrg continue; 1403b8e80941Smrg 1404b8e80941Smrg if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) { 1405b8e80941Smrg if (intrin->intrinsic == nir_intrinsic_load_deref) { 1406b8e80941Smrg nir_ssa_def *u = 1407b8e80941Smrg nir_ssa_undef(&b, intrin->dest.ssa.num_components, 1408b8e80941Smrg intrin->dest.ssa.bit_size); 1409b8e80941Smrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 1410b8e80941Smrg nir_src_for_ssa(u)); 1411b8e80941Smrg } 1412b8e80941Smrg nir_instr_remove(&intrin->instr); 1413b8e80941Smrg nir_deref_instr_remove_if_unused(deref); 1414b8e80941Smrg continue; 1415b8e80941Smrg } 1416b8e80941Smrg 1417b8e80941Smrg /* If we're not dropping any components, there's no need to 1418b8e80941Smrg * compact vectors. 1419b8e80941Smrg */ 1420b8e80941Smrg if (usage->comps_kept == usage->all_comps) 1421b8e80941Smrg continue; 1422b8e80941Smrg 1423b8e80941Smrg if (intrin->intrinsic == nir_intrinsic_load_deref) { 1424b8e80941Smrg b.cursor = nir_after_instr(&intrin->instr); 1425b8e80941Smrg 1426b8e80941Smrg nir_ssa_def *undef = 1427b8e80941Smrg nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size); 1428b8e80941Smrg nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS]; 1429b8e80941Smrg unsigned c = 0; 1430b8e80941Smrg for (unsigned i = 0; i < intrin->num_components; i++) { 1431b8e80941Smrg if (usage->comps_kept & (1u << i)) 1432b8e80941Smrg vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++); 1433b8e80941Smrg else 1434b8e80941Smrg vec_srcs[i] = undef; 1435b8e80941Smrg } 1436b8e80941Smrg nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components); 1437b8e80941Smrg 1438b8e80941Smrg nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, 1439b8e80941Smrg nir_src_for_ssa(vec), 1440b8e80941Smrg vec->parent_instr); 1441b8e80941Smrg 1442b8e80941Smrg /* The SSA def is now only used by the swizzle. It's safe to 1443b8e80941Smrg * shrink the number of components. 1444b8e80941Smrg */ 1445b8e80941Smrg assert(list_length(&intrin->dest.ssa.uses) == c); 1446b8e80941Smrg intrin->num_components = c; 1447b8e80941Smrg intrin->dest.ssa.num_components = c; 1448b8e80941Smrg } else { 1449b8e80941Smrg nir_component_mask_t write_mask = 1450b8e80941Smrg nir_intrinsic_write_mask(intrin); 1451b8e80941Smrg 1452b8e80941Smrg unsigned swizzle[NIR_MAX_VEC_COMPONENTS]; 1453b8e80941Smrg nir_component_mask_t new_write_mask = 0; 1454b8e80941Smrg unsigned c = 0; 1455b8e80941Smrg for (unsigned i = 0; i < intrin->num_components; i++) { 1456b8e80941Smrg if (usage->comps_kept & (1u << i)) { 1457b8e80941Smrg swizzle[c] = i; 1458b8e80941Smrg if (write_mask & (1u << i)) 1459b8e80941Smrg new_write_mask |= 1u << c; 1460b8e80941Smrg c++; 1461b8e80941Smrg } 1462b8e80941Smrg } 1463b8e80941Smrg 1464b8e80941Smrg b.cursor = nir_before_instr(&intrin->instr); 1465b8e80941Smrg 1466b8e80941Smrg nir_ssa_def *swizzled = 1467b8e80941Smrg nir_swizzle(&b, intrin->src[1].ssa, swizzle, c, false); 1468b8e80941Smrg 1469b8e80941Smrg /* Rewrite to use the compacted source */ 1470b8e80941Smrg nir_instr_rewrite_src(&intrin->instr, &intrin->src[1], 1471b8e80941Smrg nir_src_for_ssa(swizzled)); 1472b8e80941Smrg nir_intrinsic_set_write_mask(intrin, new_write_mask); 1473b8e80941Smrg intrin->num_components = c; 1474b8e80941Smrg } 1475b8e80941Smrg break; 1476b8e80941Smrg } 1477b8e80941Smrg 1478b8e80941Smrg default: 1479b8e80941Smrg break; 1480b8e80941Smrg } 1481b8e80941Smrg } 1482b8e80941Smrg } 1483b8e80941Smrg} 1484b8e80941Smrg 1485b8e80941Smrgstatic bool 1486b8e80941Smrgfunction_impl_has_vars_with_modes(nir_function_impl *impl, 1487b8e80941Smrg nir_variable_mode modes) 1488b8e80941Smrg{ 1489b8e80941Smrg nir_shader *shader = impl->function->shader; 1490b8e80941Smrg 1491b8e80941Smrg if ((modes & nir_var_shader_temp) && !exec_list_is_empty(&shader->globals)) 1492b8e80941Smrg return true; 1493b8e80941Smrg 1494b8e80941Smrg if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals)) 1495b8e80941Smrg return true; 1496b8e80941Smrg 1497b8e80941Smrg return false; 1498b8e80941Smrg} 1499b8e80941Smrg 1500b8e80941Smrg/** Attempt to shrink arrays of vectors 1501b8e80941Smrg * 1502b8e80941Smrg * This pass looks at variables which contain a vector or an array (possibly 1503b8e80941Smrg * multiple dimensions) of vectors and attempts to lower to a smaller vector 1504b8e80941Smrg * or array. If the pass can prove that a component of a vector (or array of 1505b8e80941Smrg * vectors) is never really used, then that component will be removed. 1506b8e80941Smrg * Similarly, the pass attempts to shorten arrays based on what elements it 1507b8e80941Smrg * can prove are never read or never contain valid data. 1508b8e80941Smrg */ 1509b8e80941Smrgbool 1510b8e80941Smrgnir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes) 1511b8e80941Smrg{ 1512b8e80941Smrg assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes); 1513b8e80941Smrg 1514b8e80941Smrg void *mem_ctx = ralloc_context(NULL); 1515b8e80941Smrg 1516b8e80941Smrg struct hash_table *var_usage_map = 1517b8e80941Smrg _mesa_pointer_hash_table_create(mem_ctx); 1518b8e80941Smrg 1519b8e80941Smrg bool has_vars_to_shrink = false; 1520b8e80941Smrg nir_foreach_function(function, shader) { 1521b8e80941Smrg if (!function->impl) 1522b8e80941Smrg continue; 1523b8e80941Smrg 1524b8e80941Smrg /* Don't even bother crawling the IR if we don't have any variables. 1525b8e80941Smrg * Given that this pass deletes any unused variables, it's likely that 1526b8e80941Smrg * we will be in this scenario eventually. 1527b8e80941Smrg */ 1528b8e80941Smrg if (function_impl_has_vars_with_modes(function->impl, modes)) { 1529b8e80941Smrg has_vars_to_shrink = true; 1530b8e80941Smrg find_used_components_impl(function->impl, var_usage_map, 1531b8e80941Smrg modes, mem_ctx); 1532b8e80941Smrg } 1533b8e80941Smrg } 1534b8e80941Smrg if (!has_vars_to_shrink) { 1535b8e80941Smrg ralloc_free(mem_ctx); 1536b8e80941Smrg return false; 1537b8e80941Smrg } 1538b8e80941Smrg 1539b8e80941Smrg bool globals_shrunk = false; 1540b8e80941Smrg if (modes & nir_var_shader_temp) 1541b8e80941Smrg globals_shrunk = shrink_vec_var_list(&shader->globals, var_usage_map); 1542b8e80941Smrg 1543b8e80941Smrg bool progress = false; 1544b8e80941Smrg nir_foreach_function(function, shader) { 1545b8e80941Smrg if (!function->impl) 1546b8e80941Smrg continue; 1547b8e80941Smrg 1548b8e80941Smrg bool locals_shrunk = false; 1549b8e80941Smrg if (modes & nir_var_function_temp) { 1550b8e80941Smrg locals_shrunk = shrink_vec_var_list(&function->impl->locals, 1551b8e80941Smrg var_usage_map); 1552b8e80941Smrg } 1553b8e80941Smrg 1554b8e80941Smrg if (globals_shrunk || locals_shrunk) { 1555b8e80941Smrg shrink_vec_var_access_impl(function->impl, var_usage_map, modes); 1556b8e80941Smrg 1557b8e80941Smrg nir_metadata_preserve(function->impl, nir_metadata_block_index | 1558b8e80941Smrg nir_metadata_dominance); 1559b8e80941Smrg progress = true; 1560b8e80941Smrg } 1561b8e80941Smrg } 1562b8e80941Smrg 1563b8e80941Smrg ralloc_free(mem_ctx); 1564b8e80941Smrg 1565b8e80941Smrg return progress; 1566b8e80941Smrg} 1567