101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2018 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include "nir.h" 2501e04c3fSmrg#include "nir_builder.h" 2601e04c3fSmrg#include "nir_deref.h" 2701e04c3fSmrg#include "nir_vla.h" 2801e04c3fSmrg 297ec681f3Smrg#include "util/set.h" 3001e04c3fSmrg#include "util/u_math.h" 3101e04c3fSmrg 327ec681f3Smrgstatic struct set * 337ec681f3Smrgget_complex_used_vars(nir_shader *shader, void *mem_ctx) 347ec681f3Smrg{ 357ec681f3Smrg struct set *complex_vars = _mesa_pointer_set_create(mem_ctx); 367ec681f3Smrg 377ec681f3Smrg nir_foreach_function(function, shader) { 387ec681f3Smrg if (!function->impl) 397ec681f3Smrg continue; 407ec681f3Smrg 417ec681f3Smrg nir_foreach_block(block, function->impl) { 427ec681f3Smrg nir_foreach_instr(instr, block) { 437ec681f3Smrg if (instr->type != nir_instr_type_deref) 447ec681f3Smrg continue; 457ec681f3Smrg 467ec681f3Smrg nir_deref_instr *deref = nir_instr_as_deref(instr); 477ec681f3Smrg 487ec681f3Smrg /* We only need to consider var derefs because 497ec681f3Smrg * nir_deref_instr_has_complex_use is recursive. 507ec681f3Smrg */ 517ec681f3Smrg if (deref->deref_type == nir_deref_type_var && 527ec681f3Smrg nir_deref_instr_has_complex_use(deref)) 537ec681f3Smrg _mesa_set_add(complex_vars, deref->var); 547ec681f3Smrg } 557ec681f3Smrg } 567ec681f3Smrg } 577ec681f3Smrg 587ec681f3Smrg return complex_vars; 597ec681f3Smrg} 6001e04c3fSmrg 6101e04c3fSmrgstruct split_var_state { 6201e04c3fSmrg void *mem_ctx; 6301e04c3fSmrg 6401e04c3fSmrg nir_shader *shader; 6501e04c3fSmrg nir_function_impl *impl; 6601e04c3fSmrg 6701e04c3fSmrg nir_variable *base_var; 6801e04c3fSmrg}; 6901e04c3fSmrg 7001e04c3fSmrgstruct field { 7101e04c3fSmrg struct field *parent; 7201e04c3fSmrg 7301e04c3fSmrg const struct glsl_type *type; 7401e04c3fSmrg 7501e04c3fSmrg unsigned num_fields; 7601e04c3fSmrg struct field *fields; 7701e04c3fSmrg 7801e04c3fSmrg nir_variable *var; 7901e04c3fSmrg}; 8001e04c3fSmrg 8101e04c3fSmrgstatic const struct glsl_type * 8201e04c3fSmrgwrap_type_in_array(const struct glsl_type *type, 8301e04c3fSmrg const struct glsl_type *array_type) 8401e04c3fSmrg{ 8501e04c3fSmrg if (!glsl_type_is_array(array_type)) 8601e04c3fSmrg return type; 8701e04c3fSmrg 8801e04c3fSmrg const struct glsl_type *elem_type = 8901e04c3fSmrg wrap_type_in_array(type, glsl_get_array_element(array_type)); 907e102996Smaya assert(glsl_get_explicit_stride(array_type) == 0); 917e102996Smaya return glsl_array_type(elem_type, glsl_get_length(array_type), 0); 9201e04c3fSmrg} 9301e04c3fSmrg 9401e04c3fSmrgstatic int 9501e04c3fSmrgnum_array_levels_in_array_of_vector_type(const struct glsl_type *type) 9601e04c3fSmrg{ 9701e04c3fSmrg int num_levels = 0; 9801e04c3fSmrg while (true) { 9901e04c3fSmrg if (glsl_type_is_array_or_matrix(type)) { 10001e04c3fSmrg num_levels++; 10101e04c3fSmrg type = glsl_get_array_element(type); 10201e04c3fSmrg } else if (glsl_type_is_vector_or_scalar(type)) { 10301e04c3fSmrg return num_levels; 10401e04c3fSmrg } else { 10501e04c3fSmrg /* Not an array of vectors */ 10601e04c3fSmrg return -1; 10701e04c3fSmrg } 10801e04c3fSmrg } 10901e04c3fSmrg} 11001e04c3fSmrg 11101e04c3fSmrgstatic void 11201e04c3fSmrginit_field_for_type(struct field *field, struct field *parent, 11301e04c3fSmrg const struct glsl_type *type, 11401e04c3fSmrg const char *name, 11501e04c3fSmrg struct split_var_state *state) 11601e04c3fSmrg{ 11701e04c3fSmrg *field = (struct field) { 11801e04c3fSmrg .parent = parent, 11901e04c3fSmrg .type = type, 12001e04c3fSmrg }; 12101e04c3fSmrg 12201e04c3fSmrg const struct glsl_type *struct_type = glsl_without_array(type); 1237e102996Smaya if (glsl_type_is_struct_or_ifc(struct_type)) { 12401e04c3fSmrg field->num_fields = glsl_get_length(struct_type), 12501e04c3fSmrg field->fields = ralloc_array(state->mem_ctx, struct field, 12601e04c3fSmrg field->num_fields); 12701e04c3fSmrg for (unsigned i = 0; i < field->num_fields; i++) { 12801e04c3fSmrg char *field_name = NULL; 12901e04c3fSmrg if (name) { 13001e04c3fSmrg field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name, 13101e04c3fSmrg glsl_get_struct_elem_name(struct_type, i)); 13201e04c3fSmrg } else { 13301e04c3fSmrg field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s", 13401e04c3fSmrg glsl_get_type_name(struct_type), 13501e04c3fSmrg glsl_get_struct_elem_name(struct_type, i)); 13601e04c3fSmrg } 13701e04c3fSmrg init_field_for_type(&field->fields[i], field, 13801e04c3fSmrg glsl_get_struct_field(struct_type, i), 13901e04c3fSmrg field_name, state); 14001e04c3fSmrg } 14101e04c3fSmrg } else { 14201e04c3fSmrg const struct glsl_type *var_type = type; 14301e04c3fSmrg for (struct field *f = field->parent; f; f = f->parent) 14401e04c3fSmrg var_type = wrap_type_in_array(var_type, f->type); 14501e04c3fSmrg 14601e04c3fSmrg nir_variable_mode mode = state->base_var->data.mode; 1477e102996Smaya if (mode == nir_var_function_temp) { 14801e04c3fSmrg field->var = nir_local_variable_create(state->impl, var_type, name); 14901e04c3fSmrg } else { 15001e04c3fSmrg field->var = nir_variable_create(state->shader, mode, var_type, name); 15101e04c3fSmrg } 15201e04c3fSmrg } 15301e04c3fSmrg} 15401e04c3fSmrg 15501e04c3fSmrgstatic bool 15601e04c3fSmrgsplit_var_list_structs(nir_shader *shader, 15701e04c3fSmrg nir_function_impl *impl, 15801e04c3fSmrg struct exec_list *vars, 1597ec681f3Smrg nir_variable_mode mode, 16001e04c3fSmrg struct hash_table *var_field_map, 1617ec681f3Smrg struct set **complex_vars, 16201e04c3fSmrg void *mem_ctx) 16301e04c3fSmrg{ 16401e04c3fSmrg struct split_var_state state = { 16501e04c3fSmrg .mem_ctx = mem_ctx, 16601e04c3fSmrg .shader = shader, 16701e04c3fSmrg .impl = impl, 16801e04c3fSmrg }; 16901e04c3fSmrg 17001e04c3fSmrg struct exec_list split_vars; 17101e04c3fSmrg exec_list_make_empty(&split_vars); 17201e04c3fSmrg 17301e04c3fSmrg /* To avoid list confusion (we'll be adding things as we split variables), 17401e04c3fSmrg * pull all of the variables we plan to split off of the list 17501e04c3fSmrg */ 1767ec681f3Smrg nir_foreach_variable_in_list_safe(var, vars) { 1777ec681f3Smrg if (var->data.mode != mode) 1787ec681f3Smrg continue; 1797ec681f3Smrg 1807e102996Smaya if (!glsl_type_is_struct_or_ifc(glsl_without_array(var->type))) 18101e04c3fSmrg continue; 18201e04c3fSmrg 1837ec681f3Smrg if (*complex_vars == NULL) 1847ec681f3Smrg *complex_vars = get_complex_used_vars(shader, mem_ctx); 1857ec681f3Smrg 1867ec681f3Smrg /* We can't split a variable that's referenced with deref that has any 1877ec681f3Smrg * sort of complex usage. 1887ec681f3Smrg */ 1897ec681f3Smrg if (_mesa_set_search(*complex_vars, var)) 1907ec681f3Smrg continue; 1917ec681f3Smrg 19201e04c3fSmrg exec_node_remove(&var->node); 19301e04c3fSmrg exec_list_push_tail(&split_vars, &var->node); 19401e04c3fSmrg } 19501e04c3fSmrg 1967ec681f3Smrg nir_foreach_variable_in_list(var, &split_vars) { 19701e04c3fSmrg state.base_var = var; 19801e04c3fSmrg 19901e04c3fSmrg struct field *root_field = ralloc(mem_ctx, struct field); 20001e04c3fSmrg init_field_for_type(root_field, NULL, var->type, var->name, &state); 20101e04c3fSmrg _mesa_hash_table_insert(var_field_map, var, root_field); 20201e04c3fSmrg } 20301e04c3fSmrg 20401e04c3fSmrg return !exec_list_is_empty(&split_vars); 20501e04c3fSmrg} 20601e04c3fSmrg 20701e04c3fSmrgstatic void 20801e04c3fSmrgsplit_struct_derefs_impl(nir_function_impl *impl, 20901e04c3fSmrg struct hash_table *var_field_map, 21001e04c3fSmrg nir_variable_mode modes, 21101e04c3fSmrg void *mem_ctx) 21201e04c3fSmrg{ 21301e04c3fSmrg nir_builder b; 21401e04c3fSmrg nir_builder_init(&b, impl); 21501e04c3fSmrg 21601e04c3fSmrg nir_foreach_block(block, impl) { 21701e04c3fSmrg nir_foreach_instr_safe(instr, block) { 21801e04c3fSmrg if (instr->type != nir_instr_type_deref) 21901e04c3fSmrg continue; 22001e04c3fSmrg 22101e04c3fSmrg nir_deref_instr *deref = nir_instr_as_deref(instr); 2227ec681f3Smrg if (!nir_deref_mode_may_be(deref, modes)) 22301e04c3fSmrg continue; 22401e04c3fSmrg 22501e04c3fSmrg /* Clean up any dead derefs we find lying around. They may refer to 22601e04c3fSmrg * variables we're planning to split. 22701e04c3fSmrg */ 22801e04c3fSmrg if (nir_deref_instr_remove_if_unused(deref)) 22901e04c3fSmrg continue; 23001e04c3fSmrg 23101e04c3fSmrg if (!glsl_type_is_vector_or_scalar(deref->type)) 23201e04c3fSmrg continue; 23301e04c3fSmrg 23401e04c3fSmrg nir_variable *base_var = nir_deref_instr_get_variable(deref); 2357ec681f3Smrg /* If we can't chase back to the variable, then we're a complex use. 2367ec681f3Smrg * This should have been detected by get_complex_used_vars() and the 2377ec681f3Smrg * variable should not have been split. However, we have no way of 2387ec681f3Smrg * knowing that here, so we just have to trust it. 2397ec681f3Smrg */ 2407ec681f3Smrg if (base_var == NULL) 2417ec681f3Smrg continue; 2427ec681f3Smrg 24301e04c3fSmrg struct hash_entry *entry = 24401e04c3fSmrg _mesa_hash_table_search(var_field_map, base_var); 24501e04c3fSmrg if (!entry) 24601e04c3fSmrg continue; 24701e04c3fSmrg 24801e04c3fSmrg struct field *root_field = entry->data; 24901e04c3fSmrg 25001e04c3fSmrg nir_deref_path path; 25101e04c3fSmrg nir_deref_path_init(&path, deref, mem_ctx); 25201e04c3fSmrg 25301e04c3fSmrg struct field *tail_field = root_field; 25401e04c3fSmrg for (unsigned i = 0; path.path[i]; i++) { 25501e04c3fSmrg if (path.path[i]->deref_type != nir_deref_type_struct) 25601e04c3fSmrg continue; 25701e04c3fSmrg 25801e04c3fSmrg assert(i > 0); 2597e102996Smaya assert(glsl_type_is_struct_or_ifc(path.path[i - 1]->type)); 26001e04c3fSmrg assert(path.path[i - 1]->type == 26101e04c3fSmrg glsl_without_array(tail_field->type)); 26201e04c3fSmrg 26301e04c3fSmrg tail_field = &tail_field->fields[path.path[i]->strct.index]; 26401e04c3fSmrg } 26501e04c3fSmrg nir_variable *split_var = tail_field->var; 26601e04c3fSmrg 26701e04c3fSmrg nir_deref_instr *new_deref = NULL; 26801e04c3fSmrg for (unsigned i = 0; path.path[i]; i++) { 26901e04c3fSmrg nir_deref_instr *p = path.path[i]; 27001e04c3fSmrg b.cursor = nir_after_instr(&p->instr); 27101e04c3fSmrg 27201e04c3fSmrg switch (p->deref_type) { 27301e04c3fSmrg case nir_deref_type_var: 27401e04c3fSmrg assert(new_deref == NULL); 27501e04c3fSmrg new_deref = nir_build_deref_var(&b, split_var); 27601e04c3fSmrg break; 27701e04c3fSmrg 27801e04c3fSmrg case nir_deref_type_array: 27901e04c3fSmrg case nir_deref_type_array_wildcard: 28001e04c3fSmrg new_deref = nir_build_deref_follower(&b, new_deref, p); 28101e04c3fSmrg break; 28201e04c3fSmrg 28301e04c3fSmrg case nir_deref_type_struct: 28401e04c3fSmrg /* Nothing to do; we're splitting structs */ 28501e04c3fSmrg break; 28601e04c3fSmrg 28701e04c3fSmrg default: 28801e04c3fSmrg unreachable("Invalid deref type in path"); 28901e04c3fSmrg } 29001e04c3fSmrg } 29101e04c3fSmrg 29201e04c3fSmrg assert(new_deref->type == deref->type); 29301e04c3fSmrg nir_ssa_def_rewrite_uses(&deref->dest.ssa, 2947ec681f3Smrg &new_deref->dest.ssa); 29501e04c3fSmrg nir_deref_instr_remove_if_unused(deref); 29601e04c3fSmrg } 29701e04c3fSmrg } 29801e04c3fSmrg} 29901e04c3fSmrg 30001e04c3fSmrg/** A pass for splitting structs into multiple variables 30101e04c3fSmrg * 30201e04c3fSmrg * This pass splits arrays of structs into multiple variables, one for each 30301e04c3fSmrg * (possibly nested) structure member. After this pass completes, no 30401e04c3fSmrg * variables of the given mode will contain a struct type. 30501e04c3fSmrg */ 30601e04c3fSmrgbool 30701e04c3fSmrgnir_split_struct_vars(nir_shader *shader, nir_variable_mode modes) 30801e04c3fSmrg{ 30901e04c3fSmrg void *mem_ctx = ralloc_context(NULL); 31001e04c3fSmrg struct hash_table *var_field_map = 3117e102996Smaya _mesa_pointer_hash_table_create(mem_ctx); 3127ec681f3Smrg struct set *complex_vars = NULL; 31301e04c3fSmrg 3147e102996Smaya assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes); 31501e04c3fSmrg 31601e04c3fSmrg bool has_global_splits = false; 3177e102996Smaya if (modes & nir_var_shader_temp) { 31801e04c3fSmrg has_global_splits = split_var_list_structs(shader, NULL, 3197ec681f3Smrg &shader->variables, 3207ec681f3Smrg nir_var_shader_temp, 3217ec681f3Smrg var_field_map, 3227ec681f3Smrg &complex_vars, 3237ec681f3Smrg mem_ctx); 32401e04c3fSmrg } 32501e04c3fSmrg 32601e04c3fSmrg bool progress = false; 32701e04c3fSmrg nir_foreach_function(function, shader) { 32801e04c3fSmrg if (!function->impl) 32901e04c3fSmrg continue; 33001e04c3fSmrg 33101e04c3fSmrg bool has_local_splits = false; 3327e102996Smaya if (modes & nir_var_function_temp) { 33301e04c3fSmrg has_local_splits = split_var_list_structs(shader, function->impl, 33401e04c3fSmrg &function->impl->locals, 3357ec681f3Smrg nir_var_function_temp, 3367ec681f3Smrg var_field_map, 3377ec681f3Smrg &complex_vars, 3387ec681f3Smrg mem_ctx); 33901e04c3fSmrg } 34001e04c3fSmrg 34101e04c3fSmrg if (has_global_splits || has_local_splits) { 34201e04c3fSmrg split_struct_derefs_impl(function->impl, var_field_map, 34301e04c3fSmrg modes, mem_ctx); 34401e04c3fSmrg 34501e04c3fSmrg nir_metadata_preserve(function->impl, nir_metadata_block_index | 34601e04c3fSmrg nir_metadata_dominance); 34701e04c3fSmrg progress = true; 3487ec681f3Smrg } else { 3497ec681f3Smrg nir_metadata_preserve(function->impl, nir_metadata_all); 35001e04c3fSmrg } 35101e04c3fSmrg } 35201e04c3fSmrg 35301e04c3fSmrg ralloc_free(mem_ctx); 35401e04c3fSmrg 35501e04c3fSmrg return progress; 35601e04c3fSmrg} 35701e04c3fSmrg 35801e04c3fSmrgstruct array_level_info { 35901e04c3fSmrg unsigned array_len; 36001e04c3fSmrg bool split; 36101e04c3fSmrg}; 36201e04c3fSmrg 36301e04c3fSmrgstruct array_split { 36401e04c3fSmrg /* Only set if this is the tail end of the splitting */ 36501e04c3fSmrg nir_variable *var; 36601e04c3fSmrg 36701e04c3fSmrg unsigned num_splits; 36801e04c3fSmrg struct array_split *splits; 36901e04c3fSmrg}; 37001e04c3fSmrg 37101e04c3fSmrgstruct array_var_info { 37201e04c3fSmrg nir_variable *base_var; 37301e04c3fSmrg 37401e04c3fSmrg const struct glsl_type *split_var_type; 37501e04c3fSmrg 37601e04c3fSmrg bool split_var; 37701e04c3fSmrg struct array_split root_split; 37801e04c3fSmrg 37901e04c3fSmrg unsigned num_levels; 38001e04c3fSmrg struct array_level_info levels[0]; 38101e04c3fSmrg}; 38201e04c3fSmrg 38301e04c3fSmrgstatic bool 3847ec681f3Smrginit_var_list_array_infos(nir_shader *shader, 3857ec681f3Smrg struct exec_list *vars, 3867ec681f3Smrg nir_variable_mode mode, 38701e04c3fSmrg struct hash_table *var_info_map, 3887ec681f3Smrg struct set **complex_vars, 38901e04c3fSmrg void *mem_ctx) 39001e04c3fSmrg{ 39101e04c3fSmrg bool has_array = false; 39201e04c3fSmrg 3937ec681f3Smrg nir_foreach_variable_in_list(var, vars) { 3947ec681f3Smrg if (var->data.mode != mode) 3957ec681f3Smrg continue; 3967ec681f3Smrg 39701e04c3fSmrg int num_levels = num_array_levels_in_array_of_vector_type(var->type); 39801e04c3fSmrg if (num_levels <= 0) 39901e04c3fSmrg continue; 40001e04c3fSmrg 4017ec681f3Smrg if (*complex_vars == NULL) 4027ec681f3Smrg *complex_vars = get_complex_used_vars(shader, mem_ctx); 4037ec681f3Smrg 4047ec681f3Smrg /* We can't split a variable that's referenced with deref that has any 4057ec681f3Smrg * sort of complex usage. 4067ec681f3Smrg */ 4077ec681f3Smrg if (_mesa_set_search(*complex_vars, var)) 4087ec681f3Smrg continue; 4097ec681f3Smrg 41001e04c3fSmrg struct array_var_info *info = 41101e04c3fSmrg rzalloc_size(mem_ctx, sizeof(*info) + 41201e04c3fSmrg num_levels * sizeof(info->levels[0])); 41301e04c3fSmrg 41401e04c3fSmrg info->base_var = var; 41501e04c3fSmrg info->num_levels = num_levels; 41601e04c3fSmrg 41701e04c3fSmrg const struct glsl_type *type = var->type; 41801e04c3fSmrg for (int i = 0; i < num_levels; i++) { 41901e04c3fSmrg info->levels[i].array_len = glsl_get_length(type); 42001e04c3fSmrg type = glsl_get_array_element(type); 42101e04c3fSmrg 42201e04c3fSmrg /* All levels start out initially as split */ 42301e04c3fSmrg info->levels[i].split = true; 42401e04c3fSmrg } 42501e04c3fSmrg 42601e04c3fSmrg _mesa_hash_table_insert(var_info_map, var, info); 42701e04c3fSmrg has_array = true; 42801e04c3fSmrg } 42901e04c3fSmrg 43001e04c3fSmrg return has_array; 43101e04c3fSmrg} 43201e04c3fSmrg 43301e04c3fSmrgstatic struct array_var_info * 43401e04c3fSmrgget_array_var_info(nir_variable *var, 43501e04c3fSmrg struct hash_table *var_info_map) 43601e04c3fSmrg{ 43701e04c3fSmrg struct hash_entry *entry = 43801e04c3fSmrg _mesa_hash_table_search(var_info_map, var); 43901e04c3fSmrg return entry ? entry->data : NULL; 44001e04c3fSmrg} 44101e04c3fSmrg 44201e04c3fSmrgstatic struct array_var_info * 44301e04c3fSmrgget_array_deref_info(nir_deref_instr *deref, 44401e04c3fSmrg struct hash_table *var_info_map, 44501e04c3fSmrg nir_variable_mode modes) 44601e04c3fSmrg{ 4477ec681f3Smrg if (!nir_deref_mode_may_be(deref, modes)) 44801e04c3fSmrg return NULL; 44901e04c3fSmrg 4507ec681f3Smrg nir_variable *var = nir_deref_instr_get_variable(deref); 4517ec681f3Smrg if (var == NULL) 4527ec681f3Smrg return NULL; 4537ec681f3Smrg 4547ec681f3Smrg return get_array_var_info(var, var_info_map); 45501e04c3fSmrg} 45601e04c3fSmrg 45701e04c3fSmrgstatic void 45801e04c3fSmrgmark_array_deref_used(nir_deref_instr *deref, 45901e04c3fSmrg struct hash_table *var_info_map, 46001e04c3fSmrg nir_variable_mode modes, 46101e04c3fSmrg void *mem_ctx) 46201e04c3fSmrg{ 46301e04c3fSmrg struct array_var_info *info = 46401e04c3fSmrg get_array_deref_info(deref, var_info_map, modes); 46501e04c3fSmrg if (!info) 46601e04c3fSmrg return; 46701e04c3fSmrg 46801e04c3fSmrg nir_deref_path path; 46901e04c3fSmrg nir_deref_path_init(&path, deref, mem_ctx); 47001e04c3fSmrg 47101e04c3fSmrg /* Walk the path and look for indirects. If we have an array deref with an 47201e04c3fSmrg * indirect, mark the given level as not being split. 47301e04c3fSmrg */ 47401e04c3fSmrg for (unsigned i = 0; i < info->num_levels; i++) { 47501e04c3fSmrg nir_deref_instr *p = path.path[i + 1]; 47601e04c3fSmrg if (p->deref_type == nir_deref_type_array && 47701e04c3fSmrg !nir_src_is_const(p->arr.index)) 47801e04c3fSmrg info->levels[i].split = false; 47901e04c3fSmrg } 48001e04c3fSmrg} 48101e04c3fSmrg 48201e04c3fSmrgstatic void 48301e04c3fSmrgmark_array_usage_impl(nir_function_impl *impl, 48401e04c3fSmrg struct hash_table *var_info_map, 48501e04c3fSmrg nir_variable_mode modes, 48601e04c3fSmrg void *mem_ctx) 48701e04c3fSmrg{ 48801e04c3fSmrg nir_foreach_block(block, impl) { 48901e04c3fSmrg nir_foreach_instr(instr, block) { 49001e04c3fSmrg if (instr->type != nir_instr_type_intrinsic) 49101e04c3fSmrg continue; 49201e04c3fSmrg 49301e04c3fSmrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 49401e04c3fSmrg switch (intrin->intrinsic) { 49501e04c3fSmrg case nir_intrinsic_copy_deref: 49601e04c3fSmrg mark_array_deref_used(nir_src_as_deref(intrin->src[1]), 49701e04c3fSmrg var_info_map, modes, mem_ctx); 4987ec681f3Smrg FALLTHROUGH; 49901e04c3fSmrg 50001e04c3fSmrg case nir_intrinsic_load_deref: 50101e04c3fSmrg case nir_intrinsic_store_deref: 50201e04c3fSmrg mark_array_deref_used(nir_src_as_deref(intrin->src[0]), 50301e04c3fSmrg var_info_map, modes, mem_ctx); 50401e04c3fSmrg break; 50501e04c3fSmrg 50601e04c3fSmrg default: 50701e04c3fSmrg break; 50801e04c3fSmrg } 50901e04c3fSmrg } 51001e04c3fSmrg } 51101e04c3fSmrg} 51201e04c3fSmrg 51301e04c3fSmrgstatic void 51401e04c3fSmrgcreate_split_array_vars(struct array_var_info *var_info, 51501e04c3fSmrg unsigned level, 51601e04c3fSmrg struct array_split *split, 51701e04c3fSmrg const char *name, 51801e04c3fSmrg nir_shader *shader, 51901e04c3fSmrg nir_function_impl *impl, 52001e04c3fSmrg void *mem_ctx) 52101e04c3fSmrg{ 52201e04c3fSmrg while (level < var_info->num_levels && !var_info->levels[level].split) { 52301e04c3fSmrg name = ralloc_asprintf(mem_ctx, "%s[*]", name); 52401e04c3fSmrg level++; 52501e04c3fSmrg } 52601e04c3fSmrg 52701e04c3fSmrg if (level == var_info->num_levels) { 52801e04c3fSmrg /* We add parens to the variable name so it looks like "(foo[2][*])" so 52901e04c3fSmrg * that further derefs will look like "(foo[2][*])[ssa_6]" 53001e04c3fSmrg */ 53101e04c3fSmrg name = ralloc_asprintf(mem_ctx, "(%s)", name); 53201e04c3fSmrg 53301e04c3fSmrg nir_variable_mode mode = var_info->base_var->data.mode; 5347e102996Smaya if (mode == nir_var_function_temp) { 53501e04c3fSmrg split->var = nir_local_variable_create(impl, 53601e04c3fSmrg var_info->split_var_type, name); 53701e04c3fSmrg } else { 53801e04c3fSmrg split->var = nir_variable_create(shader, mode, 53901e04c3fSmrg var_info->split_var_type, name); 54001e04c3fSmrg } 54101e04c3fSmrg } else { 54201e04c3fSmrg assert(var_info->levels[level].split); 54301e04c3fSmrg split->num_splits = var_info->levels[level].array_len; 54401e04c3fSmrg split->splits = rzalloc_array(mem_ctx, struct array_split, 54501e04c3fSmrg split->num_splits); 54601e04c3fSmrg for (unsigned i = 0; i < split->num_splits; i++) { 54701e04c3fSmrg create_split_array_vars(var_info, level + 1, &split->splits[i], 54801e04c3fSmrg ralloc_asprintf(mem_ctx, "%s[%d]", name, i), 54901e04c3fSmrg shader, impl, mem_ctx); 55001e04c3fSmrg } 55101e04c3fSmrg } 55201e04c3fSmrg} 55301e04c3fSmrg 55401e04c3fSmrgstatic bool 55501e04c3fSmrgsplit_var_list_arrays(nir_shader *shader, 55601e04c3fSmrg nir_function_impl *impl, 55701e04c3fSmrg struct exec_list *vars, 5587ec681f3Smrg nir_variable_mode mode, 55901e04c3fSmrg struct hash_table *var_info_map, 56001e04c3fSmrg void *mem_ctx) 56101e04c3fSmrg{ 56201e04c3fSmrg struct exec_list split_vars; 56301e04c3fSmrg exec_list_make_empty(&split_vars); 56401e04c3fSmrg 5657ec681f3Smrg nir_foreach_variable_in_list_safe(var, vars) { 5667ec681f3Smrg if (var->data.mode != mode) 5677ec681f3Smrg continue; 5687ec681f3Smrg 56901e04c3fSmrg struct array_var_info *info = get_array_var_info(var, var_info_map); 57001e04c3fSmrg if (!info) 57101e04c3fSmrg continue; 57201e04c3fSmrg 57301e04c3fSmrg bool has_split = false; 57401e04c3fSmrg const struct glsl_type *split_type = 57501e04c3fSmrg glsl_without_array_or_matrix(var->type); 57601e04c3fSmrg for (int i = info->num_levels - 1; i >= 0; i--) { 57701e04c3fSmrg if (info->levels[i].split) { 57801e04c3fSmrg has_split = true; 57901e04c3fSmrg continue; 58001e04c3fSmrg } 58101e04c3fSmrg 58201e04c3fSmrg /* If the original type was a matrix type, we'd like to keep that so 58301e04c3fSmrg * we don't convert matrices into arrays. 58401e04c3fSmrg */ 58501e04c3fSmrg if (i == info->num_levels - 1 && 58601e04c3fSmrg glsl_type_is_matrix(glsl_without_array(var->type))) { 58701e04c3fSmrg split_type = glsl_matrix_type(glsl_get_base_type(split_type), 58801e04c3fSmrg glsl_get_components(split_type), 58901e04c3fSmrg info->levels[i].array_len); 59001e04c3fSmrg } else { 5917e102996Smaya split_type = glsl_array_type(split_type, info->levels[i].array_len, 0); 59201e04c3fSmrg } 59301e04c3fSmrg } 59401e04c3fSmrg 59501e04c3fSmrg if (has_split) { 59601e04c3fSmrg info->split_var_type = split_type; 59701e04c3fSmrg /* To avoid list confusion (we'll be adding things as we split 59801e04c3fSmrg * variables), pull all of the variables we plan to split off of the 59901e04c3fSmrg * main variable list. 60001e04c3fSmrg */ 60101e04c3fSmrg exec_node_remove(&var->node); 60201e04c3fSmrg exec_list_push_tail(&split_vars, &var->node); 60301e04c3fSmrg } else { 6047e102996Smaya assert(split_type == glsl_get_bare_type(var->type)); 60501e04c3fSmrg /* If we're not modifying this variable, delete the info so we skip 60601e04c3fSmrg * it faster in later passes. 60701e04c3fSmrg */ 60801e04c3fSmrg _mesa_hash_table_remove_key(var_info_map, var); 60901e04c3fSmrg } 61001e04c3fSmrg } 61101e04c3fSmrg 6127ec681f3Smrg nir_foreach_variable_in_list(var, &split_vars) { 61301e04c3fSmrg struct array_var_info *info = get_array_var_info(var, var_info_map); 61401e04c3fSmrg create_split_array_vars(info, 0, &info->root_split, var->name, 61501e04c3fSmrg shader, impl, mem_ctx); 61601e04c3fSmrg } 61701e04c3fSmrg 61801e04c3fSmrg return !exec_list_is_empty(&split_vars); 61901e04c3fSmrg} 62001e04c3fSmrg 62101e04c3fSmrgstatic bool 62201e04c3fSmrgderef_has_split_wildcard(nir_deref_path *path, 62301e04c3fSmrg struct array_var_info *info) 62401e04c3fSmrg{ 62501e04c3fSmrg if (info == NULL) 62601e04c3fSmrg return false; 62701e04c3fSmrg 62801e04c3fSmrg assert(path->path[0]->var == info->base_var); 62901e04c3fSmrg for (unsigned i = 0; i < info->num_levels; i++) { 63001e04c3fSmrg if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard && 63101e04c3fSmrg info->levels[i].split) 63201e04c3fSmrg return true; 63301e04c3fSmrg } 63401e04c3fSmrg 63501e04c3fSmrg return false; 63601e04c3fSmrg} 63701e04c3fSmrg 63801e04c3fSmrgstatic bool 63901e04c3fSmrgarray_path_is_out_of_bounds(nir_deref_path *path, 64001e04c3fSmrg struct array_var_info *info) 64101e04c3fSmrg{ 64201e04c3fSmrg if (info == NULL) 64301e04c3fSmrg return false; 64401e04c3fSmrg 64501e04c3fSmrg assert(path->path[0]->var == info->base_var); 64601e04c3fSmrg for (unsigned i = 0; i < info->num_levels; i++) { 64701e04c3fSmrg nir_deref_instr *p = path->path[i + 1]; 64801e04c3fSmrg if (p->deref_type == nir_deref_type_array_wildcard) 64901e04c3fSmrg continue; 65001e04c3fSmrg 65101e04c3fSmrg if (nir_src_is_const(p->arr.index) && 65201e04c3fSmrg nir_src_as_uint(p->arr.index) >= info->levels[i].array_len) 65301e04c3fSmrg return true; 65401e04c3fSmrg } 65501e04c3fSmrg 65601e04c3fSmrg return false; 65701e04c3fSmrg} 65801e04c3fSmrg 65901e04c3fSmrgstatic void 66001e04c3fSmrgemit_split_copies(nir_builder *b, 66101e04c3fSmrg struct array_var_info *dst_info, nir_deref_path *dst_path, 66201e04c3fSmrg unsigned dst_level, nir_deref_instr *dst, 66301e04c3fSmrg struct array_var_info *src_info, nir_deref_path *src_path, 66401e04c3fSmrg unsigned src_level, nir_deref_instr *src) 66501e04c3fSmrg{ 66601e04c3fSmrg nir_deref_instr *dst_p, *src_p; 66701e04c3fSmrg 66801e04c3fSmrg while ((dst_p = dst_path->path[dst_level + 1])) { 66901e04c3fSmrg if (dst_p->deref_type == nir_deref_type_array_wildcard) 67001e04c3fSmrg break; 67101e04c3fSmrg 67201e04c3fSmrg dst = nir_build_deref_follower(b, dst, dst_p); 67301e04c3fSmrg dst_level++; 67401e04c3fSmrg } 67501e04c3fSmrg 67601e04c3fSmrg while ((src_p = src_path->path[src_level + 1])) { 67701e04c3fSmrg if (src_p->deref_type == nir_deref_type_array_wildcard) 67801e04c3fSmrg break; 67901e04c3fSmrg 68001e04c3fSmrg src = nir_build_deref_follower(b, src, src_p); 68101e04c3fSmrg src_level++; 68201e04c3fSmrg } 68301e04c3fSmrg 68401e04c3fSmrg if (src_p == NULL || dst_p == NULL) { 68501e04c3fSmrg assert(src_p == NULL && dst_p == NULL); 68601e04c3fSmrg nir_copy_deref(b, dst, src); 68701e04c3fSmrg } else { 68801e04c3fSmrg assert(dst_p->deref_type == nir_deref_type_array_wildcard && 68901e04c3fSmrg src_p->deref_type == nir_deref_type_array_wildcard); 69001e04c3fSmrg 69101e04c3fSmrg if ((dst_info && dst_info->levels[dst_level].split) || 69201e04c3fSmrg (src_info && src_info->levels[src_level].split)) { 69301e04c3fSmrg /* There are no indirects at this level on one of the source or the 69401e04c3fSmrg * destination so we are lowering it. 69501e04c3fSmrg */ 69601e04c3fSmrg assert(glsl_get_length(dst_path->path[dst_level]->type) == 69701e04c3fSmrg glsl_get_length(src_path->path[src_level]->type)); 69801e04c3fSmrg unsigned len = glsl_get_length(dst_path->path[dst_level]->type); 69901e04c3fSmrg for (unsigned i = 0; i < len; i++) { 70001e04c3fSmrg emit_split_copies(b, dst_info, dst_path, dst_level + 1, 7017e102996Smaya nir_build_deref_array_imm(b, dst, i), 70201e04c3fSmrg src_info, src_path, src_level + 1, 7037e102996Smaya nir_build_deref_array_imm(b, src, i)); 70401e04c3fSmrg } 70501e04c3fSmrg } else { 70601e04c3fSmrg /* Neither side is being split so we just keep going */ 70701e04c3fSmrg emit_split_copies(b, dst_info, dst_path, dst_level + 1, 70801e04c3fSmrg nir_build_deref_array_wildcard(b, dst), 70901e04c3fSmrg src_info, src_path, src_level + 1, 71001e04c3fSmrg nir_build_deref_array_wildcard(b, src)); 71101e04c3fSmrg } 71201e04c3fSmrg } 71301e04c3fSmrg} 71401e04c3fSmrg 71501e04c3fSmrgstatic void 71601e04c3fSmrgsplit_array_copies_impl(nir_function_impl *impl, 71701e04c3fSmrg struct hash_table *var_info_map, 71801e04c3fSmrg nir_variable_mode modes, 71901e04c3fSmrg void *mem_ctx) 72001e04c3fSmrg{ 72101e04c3fSmrg nir_builder b; 72201e04c3fSmrg nir_builder_init(&b, impl); 72301e04c3fSmrg 72401e04c3fSmrg nir_foreach_block(block, impl) { 72501e04c3fSmrg nir_foreach_instr_safe(instr, block) { 72601e04c3fSmrg if (instr->type != nir_instr_type_intrinsic) 72701e04c3fSmrg continue; 72801e04c3fSmrg 72901e04c3fSmrg nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr); 73001e04c3fSmrg if (copy->intrinsic != nir_intrinsic_copy_deref) 73101e04c3fSmrg continue; 73201e04c3fSmrg 73301e04c3fSmrg nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]); 73401e04c3fSmrg nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]); 73501e04c3fSmrg 73601e04c3fSmrg struct array_var_info *dst_info = 73701e04c3fSmrg get_array_deref_info(dst_deref, var_info_map, modes); 73801e04c3fSmrg struct array_var_info *src_info = 73901e04c3fSmrg get_array_deref_info(src_deref, var_info_map, modes); 74001e04c3fSmrg 74101e04c3fSmrg if (!src_info && !dst_info) 74201e04c3fSmrg continue; 74301e04c3fSmrg 74401e04c3fSmrg nir_deref_path dst_path, src_path; 74501e04c3fSmrg nir_deref_path_init(&dst_path, dst_deref, mem_ctx); 74601e04c3fSmrg nir_deref_path_init(&src_path, src_deref, mem_ctx); 74701e04c3fSmrg 74801e04c3fSmrg if (!deref_has_split_wildcard(&dst_path, dst_info) && 74901e04c3fSmrg !deref_has_split_wildcard(&src_path, src_info)) 75001e04c3fSmrg continue; 75101e04c3fSmrg 75201e04c3fSmrg b.cursor = nir_instr_remove(©->instr); 75301e04c3fSmrg 75401e04c3fSmrg emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0], 75501e04c3fSmrg src_info, &src_path, 0, src_path.path[0]); 75601e04c3fSmrg } 75701e04c3fSmrg } 75801e04c3fSmrg} 75901e04c3fSmrg 76001e04c3fSmrgstatic void 76101e04c3fSmrgsplit_array_access_impl(nir_function_impl *impl, 76201e04c3fSmrg struct hash_table *var_info_map, 76301e04c3fSmrg nir_variable_mode modes, 76401e04c3fSmrg void *mem_ctx) 76501e04c3fSmrg{ 76601e04c3fSmrg nir_builder b; 76701e04c3fSmrg nir_builder_init(&b, impl); 76801e04c3fSmrg 76901e04c3fSmrg nir_foreach_block(block, impl) { 77001e04c3fSmrg nir_foreach_instr_safe(instr, block) { 77101e04c3fSmrg if (instr->type == nir_instr_type_deref) { 77201e04c3fSmrg /* Clean up any dead derefs we find lying around. They may refer 77301e04c3fSmrg * to variables we're planning to split. 77401e04c3fSmrg */ 77501e04c3fSmrg nir_deref_instr *deref = nir_instr_as_deref(instr); 7767ec681f3Smrg if (nir_deref_mode_may_be(deref, modes)) 77701e04c3fSmrg nir_deref_instr_remove_if_unused(deref); 77801e04c3fSmrg continue; 77901e04c3fSmrg } 78001e04c3fSmrg 78101e04c3fSmrg if (instr->type != nir_instr_type_intrinsic) 78201e04c3fSmrg continue; 78301e04c3fSmrg 78401e04c3fSmrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 78501e04c3fSmrg if (intrin->intrinsic != nir_intrinsic_load_deref && 78601e04c3fSmrg intrin->intrinsic != nir_intrinsic_store_deref && 78701e04c3fSmrg intrin->intrinsic != nir_intrinsic_copy_deref) 78801e04c3fSmrg continue; 78901e04c3fSmrg 79001e04c3fSmrg const unsigned num_derefs = 79101e04c3fSmrg intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1; 79201e04c3fSmrg 79301e04c3fSmrg for (unsigned d = 0; d < num_derefs; d++) { 79401e04c3fSmrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]); 79501e04c3fSmrg 79601e04c3fSmrg struct array_var_info *info = 79701e04c3fSmrg get_array_deref_info(deref, var_info_map, modes); 79801e04c3fSmrg if (!info) 79901e04c3fSmrg continue; 80001e04c3fSmrg 80101e04c3fSmrg nir_deref_path path; 80201e04c3fSmrg nir_deref_path_init(&path, deref, mem_ctx); 80301e04c3fSmrg 80401e04c3fSmrg b.cursor = nir_before_instr(&intrin->instr); 80501e04c3fSmrg 80601e04c3fSmrg if (array_path_is_out_of_bounds(&path, info)) { 80701e04c3fSmrg /* If one of the derefs is out-of-bounds, we just delete the 80801e04c3fSmrg * instruction. If a destination is out of bounds, then it may 80901e04c3fSmrg * have been in-bounds prior to shrinking so we don't want to 81001e04c3fSmrg * accidentally stomp something. However, we've already proven 81101e04c3fSmrg * that it will never be read so it's safe to delete. If a 81201e04c3fSmrg * source is out of bounds then it is loading random garbage. 81301e04c3fSmrg * For loads, we replace their uses with an undef instruction 81401e04c3fSmrg * and for copies we just delete the copy since it was writing 81501e04c3fSmrg * undefined garbage anyway and we may as well leave the random 81601e04c3fSmrg * garbage in the destination alone. 81701e04c3fSmrg */ 81801e04c3fSmrg if (intrin->intrinsic == nir_intrinsic_load_deref) { 81901e04c3fSmrg nir_ssa_def *u = 82001e04c3fSmrg nir_ssa_undef(&b, intrin->dest.ssa.num_components, 82101e04c3fSmrg intrin->dest.ssa.bit_size); 82201e04c3fSmrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 8237ec681f3Smrg u); 82401e04c3fSmrg } 82501e04c3fSmrg nir_instr_remove(&intrin->instr); 82601e04c3fSmrg for (unsigned i = 0; i < num_derefs; i++) 82701e04c3fSmrg nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i])); 82801e04c3fSmrg break; 82901e04c3fSmrg } 83001e04c3fSmrg 83101e04c3fSmrg struct array_split *split = &info->root_split; 83201e04c3fSmrg for (unsigned i = 0; i < info->num_levels; i++) { 83301e04c3fSmrg if (info->levels[i].split) { 83401e04c3fSmrg nir_deref_instr *p = path.path[i + 1]; 83501e04c3fSmrg unsigned index = nir_src_as_uint(p->arr.index); 83601e04c3fSmrg assert(index < info->levels[i].array_len); 83701e04c3fSmrg split = &split->splits[index]; 83801e04c3fSmrg } 83901e04c3fSmrg } 84001e04c3fSmrg assert(!split->splits && split->var); 84101e04c3fSmrg 84201e04c3fSmrg nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var); 84301e04c3fSmrg for (unsigned i = 0; i < info->num_levels; i++) { 84401e04c3fSmrg if (!info->levels[i].split) { 84501e04c3fSmrg new_deref = nir_build_deref_follower(&b, new_deref, 84601e04c3fSmrg path.path[i + 1]); 84701e04c3fSmrg } 84801e04c3fSmrg } 84901e04c3fSmrg assert(new_deref->type == deref->type); 85001e04c3fSmrg 85101e04c3fSmrg /* Rewrite the deref source to point to the split one */ 85201e04c3fSmrg nir_instr_rewrite_src(&intrin->instr, &intrin->src[d], 85301e04c3fSmrg nir_src_for_ssa(&new_deref->dest.ssa)); 85401e04c3fSmrg nir_deref_instr_remove_if_unused(deref); 85501e04c3fSmrg } 85601e04c3fSmrg } 85701e04c3fSmrg } 85801e04c3fSmrg} 85901e04c3fSmrg 86001e04c3fSmrg/** A pass for splitting arrays of vectors into multiple variables 86101e04c3fSmrg * 86201e04c3fSmrg * This pass looks at arrays (possibly multiple levels) of vectors (not 86301e04c3fSmrg * structures or other types) and tries to split them into piles of variables, 86401e04c3fSmrg * one for each array element. The heuristic used is simple: If a given array 86501e04c3fSmrg * level is never used with an indirect, that array level will get split. 86601e04c3fSmrg * 86701e04c3fSmrg * This pass probably could handles structures easily enough but making a pass 86801e04c3fSmrg * that could see through an array of structures of arrays would be difficult 86901e04c3fSmrg * so it's best to just run nir_split_struct_vars first. 87001e04c3fSmrg */ 87101e04c3fSmrgbool 87201e04c3fSmrgnir_split_array_vars(nir_shader *shader, nir_variable_mode modes) 87301e04c3fSmrg{ 87401e04c3fSmrg void *mem_ctx = ralloc_context(NULL); 8757e102996Smaya struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx); 8767ec681f3Smrg struct set *complex_vars = NULL; 87701e04c3fSmrg 8787e102996Smaya assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes); 87901e04c3fSmrg 88001e04c3fSmrg bool has_global_array = false; 8817e102996Smaya if (modes & nir_var_shader_temp) { 8827ec681f3Smrg has_global_array = init_var_list_array_infos(shader, 8837ec681f3Smrg &shader->variables, 8847ec681f3Smrg nir_var_shader_temp, 8857ec681f3Smrg var_info_map, 8867ec681f3Smrg &complex_vars, 8877ec681f3Smrg mem_ctx); 88801e04c3fSmrg } 88901e04c3fSmrg 89001e04c3fSmrg bool has_any_array = false; 89101e04c3fSmrg nir_foreach_function(function, shader) { 89201e04c3fSmrg if (!function->impl) 89301e04c3fSmrg continue; 89401e04c3fSmrg 89501e04c3fSmrg bool has_local_array = false; 8967e102996Smaya if (modes & nir_var_function_temp) { 8977ec681f3Smrg has_local_array = init_var_list_array_infos(shader, 8987ec681f3Smrg &function->impl->locals, 8997ec681f3Smrg nir_var_function_temp, 9007ec681f3Smrg var_info_map, 9017ec681f3Smrg &complex_vars, 9027ec681f3Smrg mem_ctx); 90301e04c3fSmrg } 90401e04c3fSmrg 90501e04c3fSmrg if (has_global_array || has_local_array) { 90601e04c3fSmrg has_any_array = true; 90701e04c3fSmrg mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx); 90801e04c3fSmrg } 90901e04c3fSmrg } 91001e04c3fSmrg 91101e04c3fSmrg /* If we failed to find any arrays of arrays, bail early. */ 91201e04c3fSmrg if (!has_any_array) { 91301e04c3fSmrg ralloc_free(mem_ctx); 9147ec681f3Smrg nir_shader_preserve_all_metadata(shader); 91501e04c3fSmrg return false; 91601e04c3fSmrg } 91701e04c3fSmrg 91801e04c3fSmrg bool has_global_splits = false; 9197e102996Smaya if (modes & nir_var_shader_temp) { 92001e04c3fSmrg has_global_splits = split_var_list_arrays(shader, NULL, 9217ec681f3Smrg &shader->variables, 9227ec681f3Smrg nir_var_shader_temp, 92301e04c3fSmrg var_info_map, mem_ctx); 92401e04c3fSmrg } 92501e04c3fSmrg 92601e04c3fSmrg bool progress = false; 92701e04c3fSmrg nir_foreach_function(function, shader) { 92801e04c3fSmrg if (!function->impl) 92901e04c3fSmrg continue; 93001e04c3fSmrg 93101e04c3fSmrg bool has_local_splits = false; 9327e102996Smaya if (modes & nir_var_function_temp) { 93301e04c3fSmrg has_local_splits = split_var_list_arrays(shader, function->impl, 93401e04c3fSmrg &function->impl->locals, 9357ec681f3Smrg nir_var_function_temp, 93601e04c3fSmrg var_info_map, mem_ctx); 93701e04c3fSmrg } 93801e04c3fSmrg 93901e04c3fSmrg if (has_global_splits || has_local_splits) { 94001e04c3fSmrg split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx); 94101e04c3fSmrg split_array_access_impl(function->impl, var_info_map, modes, mem_ctx); 94201e04c3fSmrg 94301e04c3fSmrg nir_metadata_preserve(function->impl, nir_metadata_block_index | 94401e04c3fSmrg nir_metadata_dominance); 94501e04c3fSmrg progress = true; 9467ec681f3Smrg } else { 9477ec681f3Smrg nir_metadata_preserve(function->impl, nir_metadata_all); 94801e04c3fSmrg } 94901e04c3fSmrg } 95001e04c3fSmrg 95101e04c3fSmrg ralloc_free(mem_ctx); 95201e04c3fSmrg 95301e04c3fSmrg return progress; 95401e04c3fSmrg} 95501e04c3fSmrg 95601e04c3fSmrgstruct array_level_usage { 95701e04c3fSmrg unsigned array_len; 95801e04c3fSmrg 95901e04c3fSmrg /* The value UINT_MAX will be used to indicate an indirect */ 96001e04c3fSmrg unsigned max_read; 96101e04c3fSmrg unsigned max_written; 96201e04c3fSmrg 96301e04c3fSmrg /* True if there is a copy that isn't to/from a shrinkable array */ 96401e04c3fSmrg bool has_external_copy; 96501e04c3fSmrg struct set *levels_copied; 96601e04c3fSmrg}; 96701e04c3fSmrg 96801e04c3fSmrgstruct vec_var_usage { 96901e04c3fSmrg /* Convenience set of all components this variable has */ 97001e04c3fSmrg nir_component_mask_t all_comps; 97101e04c3fSmrg 97201e04c3fSmrg nir_component_mask_t comps_read; 97301e04c3fSmrg nir_component_mask_t comps_written; 97401e04c3fSmrg 97501e04c3fSmrg nir_component_mask_t comps_kept; 97601e04c3fSmrg 97701e04c3fSmrg /* True if there is a copy that isn't to/from a shrinkable vector */ 97801e04c3fSmrg bool has_external_copy; 9797ec681f3Smrg bool has_complex_use; 98001e04c3fSmrg struct set *vars_copied; 98101e04c3fSmrg 98201e04c3fSmrg unsigned num_levels; 98301e04c3fSmrg struct array_level_usage levels[0]; 98401e04c3fSmrg}; 98501e04c3fSmrg 98601e04c3fSmrgstatic struct vec_var_usage * 98701e04c3fSmrgget_vec_var_usage(nir_variable *var, 98801e04c3fSmrg struct hash_table *var_usage_map, 98901e04c3fSmrg bool add_usage_entry, void *mem_ctx) 99001e04c3fSmrg{ 99101e04c3fSmrg struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var); 99201e04c3fSmrg if (entry) 99301e04c3fSmrg return entry->data; 99401e04c3fSmrg 99501e04c3fSmrg if (!add_usage_entry) 99601e04c3fSmrg return NULL; 99701e04c3fSmrg 99801e04c3fSmrg /* Check to make sure that we are working with an array of vectors. We 99901e04c3fSmrg * don't bother to shrink single vectors because we figure that we can 100001e04c3fSmrg * clean it up better with SSA than by inserting piles of vecN instructions 100101e04c3fSmrg * to compact results. 100201e04c3fSmrg */ 100301e04c3fSmrg int num_levels = num_array_levels_in_array_of_vector_type(var->type); 100401e04c3fSmrg if (num_levels < 1) 100501e04c3fSmrg return NULL; /* Not an array of vectors */ 100601e04c3fSmrg 100701e04c3fSmrg struct vec_var_usage *usage = 100801e04c3fSmrg rzalloc_size(mem_ctx, sizeof(*usage) + 100901e04c3fSmrg num_levels * sizeof(usage->levels[0])); 101001e04c3fSmrg 101101e04c3fSmrg usage->num_levels = num_levels; 101201e04c3fSmrg const struct glsl_type *type = var->type; 101301e04c3fSmrg for (unsigned i = 0; i < num_levels; i++) { 101401e04c3fSmrg usage->levels[i].array_len = glsl_get_length(type); 101501e04c3fSmrg type = glsl_get_array_element(type); 101601e04c3fSmrg } 101701e04c3fSmrg assert(glsl_type_is_vector_or_scalar(type)); 101801e04c3fSmrg 101901e04c3fSmrg usage->all_comps = (1 << glsl_get_components(type)) - 1; 102001e04c3fSmrg 102101e04c3fSmrg _mesa_hash_table_insert(var_usage_map, var, usage); 102201e04c3fSmrg 102301e04c3fSmrg return usage; 102401e04c3fSmrg} 102501e04c3fSmrg 102601e04c3fSmrgstatic struct vec_var_usage * 102701e04c3fSmrgget_vec_deref_usage(nir_deref_instr *deref, 102801e04c3fSmrg struct hash_table *var_usage_map, 102901e04c3fSmrg nir_variable_mode modes, 103001e04c3fSmrg bool add_usage_entry, void *mem_ctx) 103101e04c3fSmrg{ 10327ec681f3Smrg if (!nir_deref_mode_may_be(deref, modes)) 10337ec681f3Smrg return NULL; 10347ec681f3Smrg 10357ec681f3Smrg nir_variable *var = nir_deref_instr_get_variable(deref); 10367ec681f3Smrg if (var == NULL) 103701e04c3fSmrg return NULL; 103801e04c3fSmrg 103901e04c3fSmrg return get_vec_var_usage(nir_deref_instr_get_variable(deref), 104001e04c3fSmrg var_usage_map, add_usage_entry, mem_ctx); 104101e04c3fSmrg} 104201e04c3fSmrg 10437ec681f3Smrgstatic void 10447ec681f3Smrgmark_deref_if_complex(nir_deref_instr *deref, 10457ec681f3Smrg struct hash_table *var_usage_map, 10467ec681f3Smrg nir_variable_mode modes, 10477ec681f3Smrg void *mem_ctx) 10487ec681f3Smrg{ 10497ec681f3Smrg /* Only bother with var derefs because nir_deref_instr_has_complex_use is 10507ec681f3Smrg * recursive. 10517ec681f3Smrg */ 10527ec681f3Smrg if (deref->deref_type != nir_deref_type_var) 10537ec681f3Smrg return; 10547ec681f3Smrg 10557ec681f3Smrg if (!(deref->var->data.mode & modes)) 10567ec681f3Smrg return; 10577ec681f3Smrg 10587ec681f3Smrg if (!nir_deref_instr_has_complex_use(deref)) 10597ec681f3Smrg return; 10607ec681f3Smrg 10617ec681f3Smrg struct vec_var_usage *usage = 10627ec681f3Smrg get_vec_var_usage(deref->var, var_usage_map, true, mem_ctx); 10637ec681f3Smrg if (!usage) 10647ec681f3Smrg return; 10657ec681f3Smrg 10667ec681f3Smrg usage->has_complex_use = true; 10677ec681f3Smrg} 10687ec681f3Smrg 106901e04c3fSmrgstatic void 107001e04c3fSmrgmark_deref_used(nir_deref_instr *deref, 107101e04c3fSmrg nir_component_mask_t comps_read, 107201e04c3fSmrg nir_component_mask_t comps_written, 107301e04c3fSmrg nir_deref_instr *copy_deref, 107401e04c3fSmrg struct hash_table *var_usage_map, 107501e04c3fSmrg nir_variable_mode modes, 107601e04c3fSmrg void *mem_ctx) 107701e04c3fSmrg{ 10787ec681f3Smrg if (!nir_deref_mode_may_be(deref, modes)) 107901e04c3fSmrg return; 108001e04c3fSmrg 108101e04c3fSmrg nir_variable *var = nir_deref_instr_get_variable(deref); 10827ec681f3Smrg if (var == NULL) 10837ec681f3Smrg return; 108401e04c3fSmrg 108501e04c3fSmrg struct vec_var_usage *usage = 108601e04c3fSmrg get_vec_var_usage(var, var_usage_map, true, mem_ctx); 108701e04c3fSmrg if (!usage) 108801e04c3fSmrg return; 108901e04c3fSmrg 109001e04c3fSmrg usage->comps_read |= comps_read & usage->all_comps; 109101e04c3fSmrg usage->comps_written |= comps_written & usage->all_comps; 109201e04c3fSmrg 109301e04c3fSmrg struct vec_var_usage *copy_usage = NULL; 109401e04c3fSmrg if (copy_deref) { 109501e04c3fSmrg copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes, 109601e04c3fSmrg true, mem_ctx); 109701e04c3fSmrg if (copy_usage) { 109801e04c3fSmrg if (usage->vars_copied == NULL) { 10997e102996Smaya usage->vars_copied = _mesa_pointer_set_create(mem_ctx); 110001e04c3fSmrg } 110101e04c3fSmrg _mesa_set_add(usage->vars_copied, copy_usage); 110201e04c3fSmrg } else { 110301e04c3fSmrg usage->has_external_copy = true; 110401e04c3fSmrg } 110501e04c3fSmrg } 110601e04c3fSmrg 110701e04c3fSmrg nir_deref_path path; 110801e04c3fSmrg nir_deref_path_init(&path, deref, mem_ctx); 110901e04c3fSmrg 111001e04c3fSmrg nir_deref_path copy_path; 111101e04c3fSmrg if (copy_usage) 111201e04c3fSmrg nir_deref_path_init(©_path, copy_deref, mem_ctx); 111301e04c3fSmrg 111401e04c3fSmrg unsigned copy_i = 0; 111501e04c3fSmrg for (unsigned i = 0; i < usage->num_levels; i++) { 111601e04c3fSmrg struct array_level_usage *level = &usage->levels[i]; 111701e04c3fSmrg nir_deref_instr *deref = path.path[i + 1]; 111801e04c3fSmrg assert(deref->deref_type == nir_deref_type_array || 111901e04c3fSmrg deref->deref_type == nir_deref_type_array_wildcard); 112001e04c3fSmrg 112101e04c3fSmrg unsigned max_used; 112201e04c3fSmrg if (deref->deref_type == nir_deref_type_array) { 112301e04c3fSmrg max_used = nir_src_is_const(deref->arr.index) ? 112401e04c3fSmrg nir_src_as_uint(deref->arr.index) : UINT_MAX; 112501e04c3fSmrg } else { 112601e04c3fSmrg /* For wildcards, we read or wrote the whole thing. */ 112701e04c3fSmrg assert(deref->deref_type == nir_deref_type_array_wildcard); 112801e04c3fSmrg max_used = level->array_len - 1; 112901e04c3fSmrg 113001e04c3fSmrg if (copy_usage) { 113101e04c3fSmrg /* Match each wildcard level with the level on copy_usage */ 113201e04c3fSmrg for (; copy_path.path[copy_i + 1]; copy_i++) { 113301e04c3fSmrg if (copy_path.path[copy_i + 1]->deref_type == 113401e04c3fSmrg nir_deref_type_array_wildcard) 113501e04c3fSmrg break; 113601e04c3fSmrg } 113701e04c3fSmrg struct array_level_usage *copy_level = 113801e04c3fSmrg ©_usage->levels[copy_i++]; 113901e04c3fSmrg 114001e04c3fSmrg if (level->levels_copied == NULL) { 11417e102996Smaya level->levels_copied = _mesa_pointer_set_create(mem_ctx); 114201e04c3fSmrg } 114301e04c3fSmrg _mesa_set_add(level->levels_copied, copy_level); 114401e04c3fSmrg } else { 114501e04c3fSmrg /* We have a wildcard and it comes from a variable we aren't 114601e04c3fSmrg * tracking; flag it and we'll know to not shorten this array. 114701e04c3fSmrg */ 114801e04c3fSmrg level->has_external_copy = true; 114901e04c3fSmrg } 115001e04c3fSmrg } 115101e04c3fSmrg 115201e04c3fSmrg if (comps_written) 115301e04c3fSmrg level->max_written = MAX2(level->max_written, max_used); 115401e04c3fSmrg if (comps_read) 115501e04c3fSmrg level->max_read = MAX2(level->max_read, max_used); 115601e04c3fSmrg } 115701e04c3fSmrg} 115801e04c3fSmrg 115901e04c3fSmrgstatic bool 116001e04c3fSmrgsrc_is_load_deref(nir_src src, nir_src deref_src) 116101e04c3fSmrg{ 11627e102996Smaya nir_intrinsic_instr *load = nir_src_as_intrinsic(src); 11637e102996Smaya if (load == NULL || load->intrinsic != nir_intrinsic_load_deref) 116401e04c3fSmrg return false; 116501e04c3fSmrg 116601e04c3fSmrg assert(load->src[0].is_ssa); 116701e04c3fSmrg 116801e04c3fSmrg return load->src[0].ssa == deref_src.ssa; 116901e04c3fSmrg} 117001e04c3fSmrg 117101e04c3fSmrg/* Returns all non-self-referential components of a store instruction. A 117201e04c3fSmrg * component is self-referential if it comes from the same component of a load 117301e04c3fSmrg * instruction on the same deref. If the only data in a particular component 117401e04c3fSmrg * of a variable came directly from that component then it's undefined. The 117501e04c3fSmrg * only way to get defined data into a component of a variable is for it to 117601e04c3fSmrg * get written there by something outside or from a different component. 117701e04c3fSmrg * 117801e04c3fSmrg * This is a fairly common pattern in shaders that come from either GLSL IR or 117901e04c3fSmrg * GLSLang because both glsl_to_nir and GLSLang implement write-masking with 118001e04c3fSmrg * load-vec-store. 118101e04c3fSmrg */ 118201e04c3fSmrgstatic nir_component_mask_t 118301e04c3fSmrgget_non_self_referential_store_comps(nir_intrinsic_instr *store) 118401e04c3fSmrg{ 118501e04c3fSmrg nir_component_mask_t comps = nir_intrinsic_write_mask(store); 118601e04c3fSmrg 118701e04c3fSmrg assert(store->src[1].is_ssa); 118801e04c3fSmrg nir_instr *src_instr = store->src[1].ssa->parent_instr; 118901e04c3fSmrg if (src_instr->type != nir_instr_type_alu) 119001e04c3fSmrg return comps; 119101e04c3fSmrg 119201e04c3fSmrg nir_alu_instr *src_alu = nir_instr_as_alu(src_instr); 119301e04c3fSmrg 11947ec681f3Smrg if (src_alu->op == nir_op_mov) { 119501e04c3fSmrg /* If it's just a swizzle of a load from the same deref, discount any 119601e04c3fSmrg * channels that don't move in the swizzle. 119701e04c3fSmrg */ 119801e04c3fSmrg if (src_is_load_deref(src_alu->src[0].src, store->src[0])) { 119901e04c3fSmrg for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { 120001e04c3fSmrg if (src_alu->src[0].swizzle[i] == i) 120101e04c3fSmrg comps &= ~(1u << i); 120201e04c3fSmrg } 120301e04c3fSmrg } 12047ec681f3Smrg } else if (nir_op_is_vec(src_alu->op)) { 120501e04c3fSmrg /* If it's a vec, discount any channels that are just loads from the 120601e04c3fSmrg * same deref put in the same spot. 120701e04c3fSmrg */ 120801e04c3fSmrg for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) { 120901e04c3fSmrg if (src_is_load_deref(src_alu->src[i].src, store->src[0]) && 121001e04c3fSmrg src_alu->src[i].swizzle[0] == i) 121101e04c3fSmrg comps &= ~(1u << i); 121201e04c3fSmrg } 121301e04c3fSmrg } 121401e04c3fSmrg 121501e04c3fSmrg return comps; 121601e04c3fSmrg} 121701e04c3fSmrg 121801e04c3fSmrgstatic void 121901e04c3fSmrgfind_used_components_impl(nir_function_impl *impl, 122001e04c3fSmrg struct hash_table *var_usage_map, 122101e04c3fSmrg nir_variable_mode modes, 122201e04c3fSmrg void *mem_ctx) 122301e04c3fSmrg{ 122401e04c3fSmrg nir_foreach_block(block, impl) { 122501e04c3fSmrg nir_foreach_instr(instr, block) { 12267ec681f3Smrg if (instr->type == nir_instr_type_deref) { 12277ec681f3Smrg mark_deref_if_complex(nir_instr_as_deref(instr), 12287ec681f3Smrg var_usage_map, modes, mem_ctx); 12297ec681f3Smrg } 12307ec681f3Smrg 123101e04c3fSmrg if (instr->type != nir_instr_type_intrinsic) 123201e04c3fSmrg continue; 123301e04c3fSmrg 123401e04c3fSmrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 123501e04c3fSmrg switch (intrin->intrinsic) { 123601e04c3fSmrg case nir_intrinsic_load_deref: 123701e04c3fSmrg mark_deref_used(nir_src_as_deref(intrin->src[0]), 123801e04c3fSmrg nir_ssa_def_components_read(&intrin->dest.ssa), 0, 123901e04c3fSmrg NULL, var_usage_map, modes, mem_ctx); 124001e04c3fSmrg break; 124101e04c3fSmrg 124201e04c3fSmrg case nir_intrinsic_store_deref: 124301e04c3fSmrg mark_deref_used(nir_src_as_deref(intrin->src[0]), 124401e04c3fSmrg 0, get_non_self_referential_store_comps(intrin), 124501e04c3fSmrg NULL, var_usage_map, modes, mem_ctx); 124601e04c3fSmrg break; 124701e04c3fSmrg 124801e04c3fSmrg case nir_intrinsic_copy_deref: { 124901e04c3fSmrg /* Just mark everything used for copies. */ 125001e04c3fSmrg nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); 125101e04c3fSmrg nir_deref_instr *src = nir_src_as_deref(intrin->src[1]); 125201e04c3fSmrg mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx); 125301e04c3fSmrg mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx); 125401e04c3fSmrg break; 125501e04c3fSmrg } 125601e04c3fSmrg 125701e04c3fSmrg default: 125801e04c3fSmrg break; 125901e04c3fSmrg } 126001e04c3fSmrg } 126101e04c3fSmrg } 126201e04c3fSmrg} 126301e04c3fSmrg 126401e04c3fSmrgstatic bool 126501e04c3fSmrgshrink_vec_var_list(struct exec_list *vars, 12667ec681f3Smrg nir_variable_mode mode, 126701e04c3fSmrg struct hash_table *var_usage_map) 126801e04c3fSmrg{ 126901e04c3fSmrg /* Initialize the components kept field of each variable. This is the 127001e04c3fSmrg * AND of the components written and components read. If a component is 127101e04c3fSmrg * written but never read, it's dead. If it is read but never written, 127201e04c3fSmrg * then all values read are undefined garbage and we may as well not read 127301e04c3fSmrg * them. 127401e04c3fSmrg * 127501e04c3fSmrg * The same logic applies to the array length. We make the array length 127601e04c3fSmrg * the minimum needed required length between read and write and plan to 127701e04c3fSmrg * discard any OOB access. The one exception here is indirect writes 127801e04c3fSmrg * because we don't know where they will land and we can't shrink an array 127901e04c3fSmrg * with indirect writes because previously in-bounds writes may become 128001e04c3fSmrg * out-of-bounds and have undefined behavior. 128101e04c3fSmrg * 128201e04c3fSmrg * Also, if we have a copy that to/from something we can't shrink, we need 128301e04c3fSmrg * to leave components and array_len of any wildcards alone. 128401e04c3fSmrg */ 12857ec681f3Smrg nir_foreach_variable_in_list(var, vars) { 12867ec681f3Smrg if (var->data.mode != mode) 12877ec681f3Smrg continue; 12887ec681f3Smrg 128901e04c3fSmrg struct vec_var_usage *usage = 129001e04c3fSmrg get_vec_var_usage(var, var_usage_map, false, NULL); 129101e04c3fSmrg if (!usage) 129201e04c3fSmrg continue; 129301e04c3fSmrg 129401e04c3fSmrg assert(usage->comps_kept == 0); 12957ec681f3Smrg if (usage->has_external_copy || usage->has_complex_use) 129601e04c3fSmrg usage->comps_kept = usage->all_comps; 129701e04c3fSmrg else 129801e04c3fSmrg usage->comps_kept = usage->comps_read & usage->comps_written; 129901e04c3fSmrg 130001e04c3fSmrg for (unsigned i = 0; i < usage->num_levels; i++) { 130101e04c3fSmrg struct array_level_usage *level = &usage->levels[i]; 130201e04c3fSmrg assert(level->array_len > 0); 130301e04c3fSmrg 13047ec681f3Smrg if (level->max_written == UINT_MAX || level->has_external_copy || 13057ec681f3Smrg usage->has_complex_use) 130601e04c3fSmrg continue; /* Can't shrink */ 130701e04c3fSmrg 130801e04c3fSmrg unsigned max_used = MIN2(level->max_read, level->max_written); 130901e04c3fSmrg level->array_len = MIN2(max_used, level->array_len - 1) + 1; 131001e04c3fSmrg } 131101e04c3fSmrg } 131201e04c3fSmrg 131301e04c3fSmrg /* In order for variable copies to work, we have to have the same data type 131401e04c3fSmrg * on the source and the destination. In order to satisfy this, we run a 131501e04c3fSmrg * little fixed-point algorithm to transitively ensure that we get enough 131601e04c3fSmrg * components and array elements for this to hold for all copies. 131701e04c3fSmrg */ 131801e04c3fSmrg bool fp_progress; 131901e04c3fSmrg do { 132001e04c3fSmrg fp_progress = false; 13217ec681f3Smrg nir_foreach_variable_in_list(var, vars) { 13227ec681f3Smrg if (var->data.mode != mode) 13237ec681f3Smrg continue; 13247ec681f3Smrg 132501e04c3fSmrg struct vec_var_usage *var_usage = 132601e04c3fSmrg get_vec_var_usage(var, var_usage_map, false, NULL); 132701e04c3fSmrg if (!var_usage || !var_usage->vars_copied) 132801e04c3fSmrg continue; 132901e04c3fSmrg 133001e04c3fSmrg set_foreach(var_usage->vars_copied, copy_entry) { 133101e04c3fSmrg struct vec_var_usage *copy_usage = (void *)copy_entry->key; 133201e04c3fSmrg if (copy_usage->comps_kept != var_usage->comps_kept) { 133301e04c3fSmrg nir_component_mask_t comps_kept = 133401e04c3fSmrg (var_usage->comps_kept | copy_usage->comps_kept); 133501e04c3fSmrg var_usage->comps_kept = comps_kept; 133601e04c3fSmrg copy_usage->comps_kept = comps_kept; 133701e04c3fSmrg fp_progress = true; 133801e04c3fSmrg } 133901e04c3fSmrg } 134001e04c3fSmrg 134101e04c3fSmrg for (unsigned i = 0; i < var_usage->num_levels; i++) { 134201e04c3fSmrg struct array_level_usage *var_level = &var_usage->levels[i]; 134301e04c3fSmrg if (!var_level->levels_copied) 134401e04c3fSmrg continue; 134501e04c3fSmrg 134601e04c3fSmrg set_foreach(var_level->levels_copied, copy_entry) { 134701e04c3fSmrg struct array_level_usage *copy_level = (void *)copy_entry->key; 134801e04c3fSmrg if (var_level->array_len != copy_level->array_len) { 134901e04c3fSmrg unsigned array_len = 135001e04c3fSmrg MAX2(var_level->array_len, copy_level->array_len); 135101e04c3fSmrg var_level->array_len = array_len; 135201e04c3fSmrg copy_level->array_len = array_len; 135301e04c3fSmrg fp_progress = true; 135401e04c3fSmrg } 135501e04c3fSmrg } 135601e04c3fSmrg } 135701e04c3fSmrg } 135801e04c3fSmrg } while (fp_progress); 135901e04c3fSmrg 136001e04c3fSmrg bool vars_shrunk = false; 13617ec681f3Smrg nir_foreach_variable_in_list_safe(var, vars) { 13627ec681f3Smrg if (var->data.mode != mode) 13637ec681f3Smrg continue; 13647ec681f3Smrg 136501e04c3fSmrg struct vec_var_usage *usage = 136601e04c3fSmrg get_vec_var_usage(var, var_usage_map, false, NULL); 136701e04c3fSmrg if (!usage) 136801e04c3fSmrg continue; 136901e04c3fSmrg 137001e04c3fSmrg bool shrunk = false; 137101e04c3fSmrg const struct glsl_type *vec_type = var->type; 137201e04c3fSmrg for (unsigned i = 0; i < usage->num_levels; i++) { 137301e04c3fSmrg /* If we've reduced the array to zero elements at some level, just 137401e04c3fSmrg * set comps_kept to 0 and delete the variable. 137501e04c3fSmrg */ 137601e04c3fSmrg if (usage->levels[i].array_len == 0) { 137701e04c3fSmrg usage->comps_kept = 0; 137801e04c3fSmrg break; 137901e04c3fSmrg } 138001e04c3fSmrg 138101e04c3fSmrg assert(usage->levels[i].array_len <= glsl_get_length(vec_type)); 138201e04c3fSmrg if (usage->levels[i].array_len < glsl_get_length(vec_type)) 138301e04c3fSmrg shrunk = true; 138401e04c3fSmrg vec_type = glsl_get_array_element(vec_type); 138501e04c3fSmrg } 138601e04c3fSmrg assert(glsl_type_is_vector_or_scalar(vec_type)); 138701e04c3fSmrg 138801e04c3fSmrg assert(usage->comps_kept == (usage->comps_kept & usage->all_comps)); 138901e04c3fSmrg if (usage->comps_kept != usage->all_comps) 139001e04c3fSmrg shrunk = true; 139101e04c3fSmrg 139201e04c3fSmrg if (usage->comps_kept == 0) { 139301e04c3fSmrg /* This variable is dead, remove it */ 139401e04c3fSmrg vars_shrunk = true; 139501e04c3fSmrg exec_node_remove(&var->node); 139601e04c3fSmrg continue; 139701e04c3fSmrg } 139801e04c3fSmrg 139901e04c3fSmrg if (!shrunk) { 140001e04c3fSmrg /* This variable doesn't need to be shrunk. Remove it from the 140101e04c3fSmrg * hash table so later steps will ignore it. 140201e04c3fSmrg */ 140301e04c3fSmrg _mesa_hash_table_remove_key(var_usage_map, var); 140401e04c3fSmrg continue; 140501e04c3fSmrg } 140601e04c3fSmrg 140701e04c3fSmrg /* Build the new var type */ 140801e04c3fSmrg unsigned new_num_comps = util_bitcount(usage->comps_kept); 140901e04c3fSmrg const struct glsl_type *new_type = 141001e04c3fSmrg glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps); 141101e04c3fSmrg for (int i = usage->num_levels - 1; i >= 0; i--) { 141201e04c3fSmrg assert(usage->levels[i].array_len > 0); 141301e04c3fSmrg /* If the original type was a matrix type, we'd like to keep that so 141401e04c3fSmrg * we don't convert matrices into arrays. 141501e04c3fSmrg */ 141601e04c3fSmrg if (i == usage->num_levels - 1 && 141701e04c3fSmrg glsl_type_is_matrix(glsl_without_array(var->type)) && 141801e04c3fSmrg new_num_comps > 1 && usage->levels[i].array_len > 1) { 141901e04c3fSmrg new_type = glsl_matrix_type(glsl_get_base_type(new_type), 142001e04c3fSmrg new_num_comps, 142101e04c3fSmrg usage->levels[i].array_len); 142201e04c3fSmrg } else { 14237e102996Smaya new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0); 142401e04c3fSmrg } 142501e04c3fSmrg } 142601e04c3fSmrg var->type = new_type; 142701e04c3fSmrg 142801e04c3fSmrg vars_shrunk = true; 142901e04c3fSmrg } 143001e04c3fSmrg 143101e04c3fSmrg return vars_shrunk; 143201e04c3fSmrg} 143301e04c3fSmrg 143401e04c3fSmrgstatic bool 143501e04c3fSmrgvec_deref_is_oob(nir_deref_instr *deref, 143601e04c3fSmrg struct vec_var_usage *usage) 143701e04c3fSmrg{ 143801e04c3fSmrg nir_deref_path path; 143901e04c3fSmrg nir_deref_path_init(&path, deref, NULL); 144001e04c3fSmrg 144101e04c3fSmrg bool oob = false; 144201e04c3fSmrg for (unsigned i = 0; i < usage->num_levels; i++) { 144301e04c3fSmrg nir_deref_instr *p = path.path[i + 1]; 144401e04c3fSmrg if (p->deref_type == nir_deref_type_array_wildcard) 144501e04c3fSmrg continue; 144601e04c3fSmrg 144701e04c3fSmrg if (nir_src_is_const(p->arr.index) && 144801e04c3fSmrg nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) { 144901e04c3fSmrg oob = true; 145001e04c3fSmrg break; 145101e04c3fSmrg } 145201e04c3fSmrg } 145301e04c3fSmrg 145401e04c3fSmrg nir_deref_path_finish(&path); 145501e04c3fSmrg 145601e04c3fSmrg return oob; 145701e04c3fSmrg} 145801e04c3fSmrg 145901e04c3fSmrgstatic bool 146001e04c3fSmrgvec_deref_is_dead_or_oob(nir_deref_instr *deref, 146101e04c3fSmrg struct hash_table *var_usage_map, 146201e04c3fSmrg nir_variable_mode modes) 146301e04c3fSmrg{ 146401e04c3fSmrg struct vec_var_usage *usage = 146501e04c3fSmrg get_vec_deref_usage(deref, var_usage_map, modes, false, NULL); 146601e04c3fSmrg if (!usage) 146701e04c3fSmrg return false; 146801e04c3fSmrg 146901e04c3fSmrg return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage); 147001e04c3fSmrg} 147101e04c3fSmrg 147201e04c3fSmrgstatic void 147301e04c3fSmrgshrink_vec_var_access_impl(nir_function_impl *impl, 147401e04c3fSmrg struct hash_table *var_usage_map, 147501e04c3fSmrg nir_variable_mode modes) 147601e04c3fSmrg{ 147701e04c3fSmrg nir_builder b; 147801e04c3fSmrg nir_builder_init(&b, impl); 147901e04c3fSmrg 148001e04c3fSmrg nir_foreach_block(block, impl) { 148101e04c3fSmrg nir_foreach_instr_safe(instr, block) { 148201e04c3fSmrg switch (instr->type) { 148301e04c3fSmrg case nir_instr_type_deref: { 148401e04c3fSmrg nir_deref_instr *deref = nir_instr_as_deref(instr); 14857ec681f3Smrg if (!nir_deref_mode_may_be(deref, modes)) 148601e04c3fSmrg break; 148701e04c3fSmrg 148801e04c3fSmrg /* Clean up any dead derefs we find lying around. They may refer 148901e04c3fSmrg * to variables we've deleted. 149001e04c3fSmrg */ 149101e04c3fSmrg if (nir_deref_instr_remove_if_unused(deref)) 149201e04c3fSmrg break; 149301e04c3fSmrg 149401e04c3fSmrg /* Update the type in the deref to keep the types consistent as 149501e04c3fSmrg * you walk down the chain. We don't need to check if this is one 149601e04c3fSmrg * of the derefs we're shrinking because this is a no-op if it 149701e04c3fSmrg * isn't. The worst that could happen is that we accidentally fix 149801e04c3fSmrg * an invalid deref. 149901e04c3fSmrg */ 150001e04c3fSmrg if (deref->deref_type == nir_deref_type_var) { 150101e04c3fSmrg deref->type = deref->var->type; 150201e04c3fSmrg } else if (deref->deref_type == nir_deref_type_array || 150301e04c3fSmrg deref->deref_type == nir_deref_type_array_wildcard) { 150401e04c3fSmrg nir_deref_instr *parent = nir_deref_instr_parent(deref); 150501e04c3fSmrg assert(glsl_type_is_array(parent->type) || 150601e04c3fSmrg glsl_type_is_matrix(parent->type)); 150701e04c3fSmrg deref->type = glsl_get_array_element(parent->type); 150801e04c3fSmrg } 150901e04c3fSmrg break; 151001e04c3fSmrg } 151101e04c3fSmrg 151201e04c3fSmrg case nir_instr_type_intrinsic: { 151301e04c3fSmrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 151401e04c3fSmrg 151501e04c3fSmrg /* If we have a copy whose source or destination has been deleted 151601e04c3fSmrg * because we determined the variable was dead, then we just 151701e04c3fSmrg * delete the copy instruction. If the source variable was dead 151801e04c3fSmrg * then it was writing undefined garbage anyway and if it's the 151901e04c3fSmrg * destination variable that's dead then the write isn't needed. 152001e04c3fSmrg */ 152101e04c3fSmrg if (intrin->intrinsic == nir_intrinsic_copy_deref) { 152201e04c3fSmrg nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); 152301e04c3fSmrg nir_deref_instr *src = nir_src_as_deref(intrin->src[1]); 152401e04c3fSmrg if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) || 152501e04c3fSmrg vec_deref_is_dead_or_oob(src, var_usage_map, modes)) { 152601e04c3fSmrg nir_instr_remove(&intrin->instr); 152701e04c3fSmrg nir_deref_instr_remove_if_unused(dst); 152801e04c3fSmrg nir_deref_instr_remove_if_unused(src); 152901e04c3fSmrg } 153001e04c3fSmrg continue; 153101e04c3fSmrg } 153201e04c3fSmrg 153301e04c3fSmrg if (intrin->intrinsic != nir_intrinsic_load_deref && 153401e04c3fSmrg intrin->intrinsic != nir_intrinsic_store_deref) 153501e04c3fSmrg continue; 153601e04c3fSmrg 153701e04c3fSmrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 15387ec681f3Smrg if (!nir_deref_mode_may_be(deref, modes)) 153901e04c3fSmrg continue; 154001e04c3fSmrg 154101e04c3fSmrg struct vec_var_usage *usage = 154201e04c3fSmrg get_vec_deref_usage(deref, var_usage_map, modes, false, NULL); 154301e04c3fSmrg if (!usage) 154401e04c3fSmrg continue; 154501e04c3fSmrg 154601e04c3fSmrg if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) { 154701e04c3fSmrg if (intrin->intrinsic == nir_intrinsic_load_deref) { 154801e04c3fSmrg nir_ssa_def *u = 154901e04c3fSmrg nir_ssa_undef(&b, intrin->dest.ssa.num_components, 155001e04c3fSmrg intrin->dest.ssa.bit_size); 155101e04c3fSmrg nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 15527ec681f3Smrg u); 155301e04c3fSmrg } 155401e04c3fSmrg nir_instr_remove(&intrin->instr); 155501e04c3fSmrg nir_deref_instr_remove_if_unused(deref); 155601e04c3fSmrg continue; 155701e04c3fSmrg } 155801e04c3fSmrg 15597e102996Smaya /* If we're not dropping any components, there's no need to 15607e102996Smaya * compact vectors. 15617e102996Smaya */ 15627e102996Smaya if (usage->comps_kept == usage->all_comps) 15637e102996Smaya continue; 15647e102996Smaya 156501e04c3fSmrg if (intrin->intrinsic == nir_intrinsic_load_deref) { 156601e04c3fSmrg b.cursor = nir_after_instr(&intrin->instr); 156701e04c3fSmrg 156801e04c3fSmrg nir_ssa_def *undef = 156901e04c3fSmrg nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size); 157001e04c3fSmrg nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS]; 157101e04c3fSmrg unsigned c = 0; 157201e04c3fSmrg for (unsigned i = 0; i < intrin->num_components; i++) { 157301e04c3fSmrg if (usage->comps_kept & (1u << i)) 157401e04c3fSmrg vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++); 157501e04c3fSmrg else 157601e04c3fSmrg vec_srcs[i] = undef; 157701e04c3fSmrg } 157801e04c3fSmrg nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components); 157901e04c3fSmrg 158001e04c3fSmrg nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, 15817ec681f3Smrg vec, 158201e04c3fSmrg vec->parent_instr); 158301e04c3fSmrg 158401e04c3fSmrg /* The SSA def is now only used by the swizzle. It's safe to 158501e04c3fSmrg * shrink the number of components. 158601e04c3fSmrg */ 158701e04c3fSmrg assert(list_length(&intrin->dest.ssa.uses) == c); 158801e04c3fSmrg intrin->num_components = c; 158901e04c3fSmrg intrin->dest.ssa.num_components = c; 159001e04c3fSmrg } else { 159101e04c3fSmrg nir_component_mask_t write_mask = 159201e04c3fSmrg nir_intrinsic_write_mask(intrin); 159301e04c3fSmrg 159401e04c3fSmrg unsigned swizzle[NIR_MAX_VEC_COMPONENTS]; 159501e04c3fSmrg nir_component_mask_t new_write_mask = 0; 159601e04c3fSmrg unsigned c = 0; 159701e04c3fSmrg for (unsigned i = 0; i < intrin->num_components; i++) { 159801e04c3fSmrg if (usage->comps_kept & (1u << i)) { 159901e04c3fSmrg swizzle[c] = i; 160001e04c3fSmrg if (write_mask & (1u << i)) 160101e04c3fSmrg new_write_mask |= 1u << c; 160201e04c3fSmrg c++; 160301e04c3fSmrg } 160401e04c3fSmrg } 160501e04c3fSmrg 160601e04c3fSmrg b.cursor = nir_before_instr(&intrin->instr); 160701e04c3fSmrg 160801e04c3fSmrg nir_ssa_def *swizzled = 16097ec681f3Smrg nir_swizzle(&b, intrin->src[1].ssa, swizzle, c); 161001e04c3fSmrg 161101e04c3fSmrg /* Rewrite to use the compacted source */ 161201e04c3fSmrg nir_instr_rewrite_src(&intrin->instr, &intrin->src[1], 161301e04c3fSmrg nir_src_for_ssa(swizzled)); 161401e04c3fSmrg nir_intrinsic_set_write_mask(intrin, new_write_mask); 161501e04c3fSmrg intrin->num_components = c; 161601e04c3fSmrg } 161701e04c3fSmrg break; 161801e04c3fSmrg } 161901e04c3fSmrg 162001e04c3fSmrg default: 162101e04c3fSmrg break; 162201e04c3fSmrg } 162301e04c3fSmrg } 162401e04c3fSmrg } 162501e04c3fSmrg} 162601e04c3fSmrg 162701e04c3fSmrgstatic bool 162801e04c3fSmrgfunction_impl_has_vars_with_modes(nir_function_impl *impl, 162901e04c3fSmrg nir_variable_mode modes) 163001e04c3fSmrg{ 163101e04c3fSmrg nir_shader *shader = impl->function->shader; 163201e04c3fSmrg 16337ec681f3Smrg if (modes & ~nir_var_function_temp) { 16347ec681f3Smrg nir_foreach_variable_with_modes(var, shader, 16357ec681f3Smrg modes & ~nir_var_function_temp) 16367ec681f3Smrg return true; 16377ec681f3Smrg } 163801e04c3fSmrg 16397e102996Smaya if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals)) 164001e04c3fSmrg return true; 164101e04c3fSmrg 164201e04c3fSmrg return false; 164301e04c3fSmrg} 164401e04c3fSmrg 164501e04c3fSmrg/** Attempt to shrink arrays of vectors 164601e04c3fSmrg * 164701e04c3fSmrg * This pass looks at variables which contain a vector or an array (possibly 164801e04c3fSmrg * multiple dimensions) of vectors and attempts to lower to a smaller vector 164901e04c3fSmrg * or array. If the pass can prove that a component of a vector (or array of 165001e04c3fSmrg * vectors) is never really used, then that component will be removed. 165101e04c3fSmrg * Similarly, the pass attempts to shorten arrays based on what elements it 165201e04c3fSmrg * can prove are never read or never contain valid data. 165301e04c3fSmrg */ 165401e04c3fSmrgbool 165501e04c3fSmrgnir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes) 165601e04c3fSmrg{ 16577e102996Smaya assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes); 165801e04c3fSmrg 165901e04c3fSmrg void *mem_ctx = ralloc_context(NULL); 166001e04c3fSmrg 166101e04c3fSmrg struct hash_table *var_usage_map = 16627e102996Smaya _mesa_pointer_hash_table_create(mem_ctx); 166301e04c3fSmrg 166401e04c3fSmrg bool has_vars_to_shrink = false; 166501e04c3fSmrg nir_foreach_function(function, shader) { 166601e04c3fSmrg if (!function->impl) 166701e04c3fSmrg continue; 166801e04c3fSmrg 166901e04c3fSmrg /* Don't even bother crawling the IR if we don't have any variables. 167001e04c3fSmrg * Given that this pass deletes any unused variables, it's likely that 167101e04c3fSmrg * we will be in this scenario eventually. 167201e04c3fSmrg */ 167301e04c3fSmrg if (function_impl_has_vars_with_modes(function->impl, modes)) { 167401e04c3fSmrg has_vars_to_shrink = true; 167501e04c3fSmrg find_used_components_impl(function->impl, var_usage_map, 167601e04c3fSmrg modes, mem_ctx); 167701e04c3fSmrg } 167801e04c3fSmrg } 167901e04c3fSmrg if (!has_vars_to_shrink) { 168001e04c3fSmrg ralloc_free(mem_ctx); 16817ec681f3Smrg nir_shader_preserve_all_metadata(shader); 168201e04c3fSmrg return false; 168301e04c3fSmrg } 168401e04c3fSmrg 168501e04c3fSmrg bool globals_shrunk = false; 16867ec681f3Smrg if (modes & nir_var_shader_temp) { 16877ec681f3Smrg globals_shrunk = shrink_vec_var_list(&shader->variables, 16887ec681f3Smrg nir_var_shader_temp, 16897ec681f3Smrg var_usage_map); 16907ec681f3Smrg } 169101e04c3fSmrg 169201e04c3fSmrg bool progress = false; 169301e04c3fSmrg nir_foreach_function(function, shader) { 169401e04c3fSmrg if (!function->impl) 169501e04c3fSmrg continue; 169601e04c3fSmrg 169701e04c3fSmrg bool locals_shrunk = false; 16987e102996Smaya if (modes & nir_var_function_temp) { 169901e04c3fSmrg locals_shrunk = shrink_vec_var_list(&function->impl->locals, 17007ec681f3Smrg nir_var_function_temp, 170101e04c3fSmrg var_usage_map); 170201e04c3fSmrg } 170301e04c3fSmrg 170401e04c3fSmrg if (globals_shrunk || locals_shrunk) { 170501e04c3fSmrg shrink_vec_var_access_impl(function->impl, var_usage_map, modes); 170601e04c3fSmrg 170701e04c3fSmrg nir_metadata_preserve(function->impl, nir_metadata_block_index | 170801e04c3fSmrg nir_metadata_dominance); 170901e04c3fSmrg progress = true; 17107ec681f3Smrg } else { 17117ec681f3Smrg nir_metadata_preserve(function->impl, nir_metadata_all); 171201e04c3fSmrg } 171301e04c3fSmrg } 171401e04c3fSmrg 171501e04c3fSmrg ralloc_free(mem_ctx); 171601e04c3fSmrg 171701e04c3fSmrg return progress; 171801e04c3fSmrg} 1719