1/* 2 * Copyright © 2018 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26#include "nir_deref.h" 27#include "nir_vla.h" 28 29#include "util/u_math.h" 30 31 32struct split_var_state { 33 void *mem_ctx; 34 35 nir_shader *shader; 36 nir_function_impl *impl; 37 38 nir_variable *base_var; 39}; 40 41struct field { 42 struct field *parent; 43 44 const struct glsl_type *type; 45 46 unsigned num_fields; 47 struct field *fields; 48 49 nir_variable *var; 50}; 51 52static const struct glsl_type * 53wrap_type_in_array(const struct glsl_type *type, 54 const struct glsl_type *array_type) 55{ 56 if (!glsl_type_is_array(array_type)) 57 return type; 58 59 const struct glsl_type *elem_type = 60 wrap_type_in_array(type, glsl_get_array_element(array_type)); 61 assert(glsl_get_explicit_stride(array_type) == 0); 62 return glsl_array_type(elem_type, glsl_get_length(array_type), 0); 63} 64 65static int 66num_array_levels_in_array_of_vector_type(const struct glsl_type *type) 67{ 68 int num_levels = 0; 69 while (true) { 70 if (glsl_type_is_array_or_matrix(type)) { 71 num_levels++; 72 type = glsl_get_array_element(type); 73 } else if (glsl_type_is_vector_or_scalar(type)) { 74 return num_levels; 75 } else { 76 /* Not an array of vectors */ 77 return -1; 78 } 79 } 80} 81 82static void 83init_field_for_type(struct field *field, struct field *parent, 84 const struct glsl_type *type, 85 const char *name, 86 struct split_var_state *state) 87{ 88 *field = (struct field) { 89 .parent = parent, 90 .type = type, 91 }; 92 93 const struct glsl_type *struct_type = glsl_without_array(type); 94 if (glsl_type_is_struct_or_ifc(struct_type)) { 95 field->num_fields = glsl_get_length(struct_type), 96 field->fields = ralloc_array(state->mem_ctx, struct field, 97 field->num_fields); 98 for (unsigned i = 0; i < field->num_fields; i++) { 99 char *field_name = NULL; 100 if (name) { 101 field_name = ralloc_asprintf(state->mem_ctx, "%s_%s", name, 102 glsl_get_struct_elem_name(struct_type, i)); 103 } else { 104 field_name = ralloc_asprintf(state->mem_ctx, "{unnamed %s}_%s", 105 glsl_get_type_name(struct_type), 106 glsl_get_struct_elem_name(struct_type, i)); 107 } 108 init_field_for_type(&field->fields[i], field, 109 glsl_get_struct_field(struct_type, i), 110 field_name, state); 111 } 112 } else { 113 const struct glsl_type *var_type = type; 114 for (struct field *f = field->parent; f; f = f->parent) 115 var_type = wrap_type_in_array(var_type, f->type); 116 117 nir_variable_mode mode = state->base_var->data.mode; 118 if (mode == nir_var_function_temp) { 119 field->var = nir_local_variable_create(state->impl, var_type, name); 120 } else { 121 field->var = nir_variable_create(state->shader, mode, var_type, name); 122 } 123 } 124} 125 126static bool 127split_var_list_structs(nir_shader *shader, 128 nir_function_impl *impl, 129 struct exec_list *vars, 130 struct hash_table *var_field_map, 131 void *mem_ctx) 132{ 133 struct split_var_state state = { 134 .mem_ctx = mem_ctx, 135 .shader = shader, 136 .impl = impl, 137 }; 138 139 struct exec_list split_vars; 140 exec_list_make_empty(&split_vars); 141 142 /* To avoid list confusion (we'll be adding things as we split variables), 143 * pull all of the variables we plan to split off of the list 144 */ 145 nir_foreach_variable_safe(var, vars) { 146 if (!glsl_type_is_struct_or_ifc(glsl_without_array(var->type))) 147 continue; 148 149 exec_node_remove(&var->node); 150 exec_list_push_tail(&split_vars, &var->node); 151 } 152 153 nir_foreach_variable(var, &split_vars) { 154 state.base_var = var; 155 156 struct field *root_field = ralloc(mem_ctx, struct field); 157 init_field_for_type(root_field, NULL, var->type, var->name, &state); 158 _mesa_hash_table_insert(var_field_map, var, root_field); 159 } 160 161 return !exec_list_is_empty(&split_vars); 162} 163 164static void 165split_struct_derefs_impl(nir_function_impl *impl, 166 struct hash_table *var_field_map, 167 nir_variable_mode modes, 168 void *mem_ctx) 169{ 170 nir_builder b; 171 nir_builder_init(&b, impl); 172 173 nir_foreach_block(block, impl) { 174 nir_foreach_instr_safe(instr, block) { 175 if (instr->type != nir_instr_type_deref) 176 continue; 177 178 nir_deref_instr *deref = nir_instr_as_deref(instr); 179 if (!(deref->mode & modes)) 180 continue; 181 182 /* Clean up any dead derefs we find lying around. They may refer to 183 * variables we're planning to split. 184 */ 185 if (nir_deref_instr_remove_if_unused(deref)) 186 continue; 187 188 if (!glsl_type_is_vector_or_scalar(deref->type)) 189 continue; 190 191 nir_variable *base_var = nir_deref_instr_get_variable(deref); 192 struct hash_entry *entry = 193 _mesa_hash_table_search(var_field_map, base_var); 194 if (!entry) 195 continue; 196 197 struct field *root_field = entry->data; 198 199 nir_deref_path path; 200 nir_deref_path_init(&path, deref, mem_ctx); 201 202 struct field *tail_field = root_field; 203 for (unsigned i = 0; path.path[i]; i++) { 204 if (path.path[i]->deref_type != nir_deref_type_struct) 205 continue; 206 207 assert(i > 0); 208 assert(glsl_type_is_struct_or_ifc(path.path[i - 1]->type)); 209 assert(path.path[i - 1]->type == 210 glsl_without_array(tail_field->type)); 211 212 tail_field = &tail_field->fields[path.path[i]->strct.index]; 213 } 214 nir_variable *split_var = tail_field->var; 215 216 nir_deref_instr *new_deref = NULL; 217 for (unsigned i = 0; path.path[i]; i++) { 218 nir_deref_instr *p = path.path[i]; 219 b.cursor = nir_after_instr(&p->instr); 220 221 switch (p->deref_type) { 222 case nir_deref_type_var: 223 assert(new_deref == NULL); 224 new_deref = nir_build_deref_var(&b, split_var); 225 break; 226 227 case nir_deref_type_array: 228 case nir_deref_type_array_wildcard: 229 new_deref = nir_build_deref_follower(&b, new_deref, p); 230 break; 231 232 case nir_deref_type_struct: 233 /* Nothing to do; we're splitting structs */ 234 break; 235 236 default: 237 unreachable("Invalid deref type in path"); 238 } 239 } 240 241 assert(new_deref->type == deref->type); 242 nir_ssa_def_rewrite_uses(&deref->dest.ssa, 243 nir_src_for_ssa(&new_deref->dest.ssa)); 244 nir_deref_instr_remove_if_unused(deref); 245 } 246 } 247} 248 249/** A pass for splitting structs into multiple variables 250 * 251 * This pass splits arrays of structs into multiple variables, one for each 252 * (possibly nested) structure member. After this pass completes, no 253 * variables of the given mode will contain a struct type. 254 */ 255bool 256nir_split_struct_vars(nir_shader *shader, nir_variable_mode modes) 257{ 258 void *mem_ctx = ralloc_context(NULL); 259 struct hash_table *var_field_map = 260 _mesa_pointer_hash_table_create(mem_ctx); 261 262 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes); 263 264 bool has_global_splits = false; 265 if (modes & nir_var_shader_temp) { 266 has_global_splits = split_var_list_structs(shader, NULL, 267 &shader->globals, 268 var_field_map, mem_ctx); 269 } 270 271 bool progress = false; 272 nir_foreach_function(function, shader) { 273 if (!function->impl) 274 continue; 275 276 bool has_local_splits = false; 277 if (modes & nir_var_function_temp) { 278 has_local_splits = split_var_list_structs(shader, function->impl, 279 &function->impl->locals, 280 var_field_map, mem_ctx); 281 } 282 283 if (has_global_splits || has_local_splits) { 284 split_struct_derefs_impl(function->impl, var_field_map, 285 modes, mem_ctx); 286 287 nir_metadata_preserve(function->impl, nir_metadata_block_index | 288 nir_metadata_dominance); 289 progress = true; 290 } 291 } 292 293 ralloc_free(mem_ctx); 294 295 return progress; 296} 297 298struct array_level_info { 299 unsigned array_len; 300 bool split; 301}; 302 303struct array_split { 304 /* Only set if this is the tail end of the splitting */ 305 nir_variable *var; 306 307 unsigned num_splits; 308 struct array_split *splits; 309}; 310 311struct array_var_info { 312 nir_variable *base_var; 313 314 const struct glsl_type *split_var_type; 315 316 bool split_var; 317 struct array_split root_split; 318 319 unsigned num_levels; 320 struct array_level_info levels[0]; 321}; 322 323static bool 324init_var_list_array_infos(struct exec_list *vars, 325 struct hash_table *var_info_map, 326 void *mem_ctx) 327{ 328 bool has_array = false; 329 330 nir_foreach_variable(var, vars) { 331 int num_levels = num_array_levels_in_array_of_vector_type(var->type); 332 if (num_levels <= 0) 333 continue; 334 335 struct array_var_info *info = 336 rzalloc_size(mem_ctx, sizeof(*info) + 337 num_levels * sizeof(info->levels[0])); 338 339 info->base_var = var; 340 info->num_levels = num_levels; 341 342 const struct glsl_type *type = var->type; 343 for (int i = 0; i < num_levels; i++) { 344 info->levels[i].array_len = glsl_get_length(type); 345 type = glsl_get_array_element(type); 346 347 /* All levels start out initially as split */ 348 info->levels[i].split = true; 349 } 350 351 _mesa_hash_table_insert(var_info_map, var, info); 352 has_array = true; 353 } 354 355 return has_array; 356} 357 358static struct array_var_info * 359get_array_var_info(nir_variable *var, 360 struct hash_table *var_info_map) 361{ 362 struct hash_entry *entry = 363 _mesa_hash_table_search(var_info_map, var); 364 return entry ? entry->data : NULL; 365} 366 367static struct array_var_info * 368get_array_deref_info(nir_deref_instr *deref, 369 struct hash_table *var_info_map, 370 nir_variable_mode modes) 371{ 372 if (!(deref->mode & modes)) 373 return NULL; 374 375 return get_array_var_info(nir_deref_instr_get_variable(deref), 376 var_info_map); 377} 378 379static void 380mark_array_deref_used(nir_deref_instr *deref, 381 struct hash_table *var_info_map, 382 nir_variable_mode modes, 383 void *mem_ctx) 384{ 385 struct array_var_info *info = 386 get_array_deref_info(deref, var_info_map, modes); 387 if (!info) 388 return; 389 390 nir_deref_path path; 391 nir_deref_path_init(&path, deref, mem_ctx); 392 393 /* Walk the path and look for indirects. If we have an array deref with an 394 * indirect, mark the given level as not being split. 395 */ 396 for (unsigned i = 0; i < info->num_levels; i++) { 397 nir_deref_instr *p = path.path[i + 1]; 398 if (p->deref_type == nir_deref_type_array && 399 !nir_src_is_const(p->arr.index)) 400 info->levels[i].split = false; 401 } 402} 403 404static void 405mark_array_usage_impl(nir_function_impl *impl, 406 struct hash_table *var_info_map, 407 nir_variable_mode modes, 408 void *mem_ctx) 409{ 410 nir_foreach_block(block, impl) { 411 nir_foreach_instr(instr, block) { 412 if (instr->type != nir_instr_type_intrinsic) 413 continue; 414 415 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 416 switch (intrin->intrinsic) { 417 case nir_intrinsic_copy_deref: 418 mark_array_deref_used(nir_src_as_deref(intrin->src[1]), 419 var_info_map, modes, mem_ctx); 420 /* Fall Through */ 421 422 case nir_intrinsic_load_deref: 423 case nir_intrinsic_store_deref: 424 mark_array_deref_used(nir_src_as_deref(intrin->src[0]), 425 var_info_map, modes, mem_ctx); 426 break; 427 428 default: 429 break; 430 } 431 } 432 } 433} 434 435static void 436create_split_array_vars(struct array_var_info *var_info, 437 unsigned level, 438 struct array_split *split, 439 const char *name, 440 nir_shader *shader, 441 nir_function_impl *impl, 442 void *mem_ctx) 443{ 444 while (level < var_info->num_levels && !var_info->levels[level].split) { 445 name = ralloc_asprintf(mem_ctx, "%s[*]", name); 446 level++; 447 } 448 449 if (level == var_info->num_levels) { 450 /* We add parens to the variable name so it looks like "(foo[2][*])" so 451 * that further derefs will look like "(foo[2][*])[ssa_6]" 452 */ 453 name = ralloc_asprintf(mem_ctx, "(%s)", name); 454 455 nir_variable_mode mode = var_info->base_var->data.mode; 456 if (mode == nir_var_function_temp) { 457 split->var = nir_local_variable_create(impl, 458 var_info->split_var_type, name); 459 } else { 460 split->var = nir_variable_create(shader, mode, 461 var_info->split_var_type, name); 462 } 463 } else { 464 assert(var_info->levels[level].split); 465 split->num_splits = var_info->levels[level].array_len; 466 split->splits = rzalloc_array(mem_ctx, struct array_split, 467 split->num_splits); 468 for (unsigned i = 0; i < split->num_splits; i++) { 469 create_split_array_vars(var_info, level + 1, &split->splits[i], 470 ralloc_asprintf(mem_ctx, "%s[%d]", name, i), 471 shader, impl, mem_ctx); 472 } 473 } 474} 475 476static bool 477split_var_list_arrays(nir_shader *shader, 478 nir_function_impl *impl, 479 struct exec_list *vars, 480 struct hash_table *var_info_map, 481 void *mem_ctx) 482{ 483 struct exec_list split_vars; 484 exec_list_make_empty(&split_vars); 485 486 nir_foreach_variable_safe(var, vars) { 487 struct array_var_info *info = get_array_var_info(var, var_info_map); 488 if (!info) 489 continue; 490 491 bool has_split = false; 492 const struct glsl_type *split_type = 493 glsl_without_array_or_matrix(var->type); 494 for (int i = info->num_levels - 1; i >= 0; i--) { 495 if (info->levels[i].split) { 496 has_split = true; 497 continue; 498 } 499 500 /* If the original type was a matrix type, we'd like to keep that so 501 * we don't convert matrices into arrays. 502 */ 503 if (i == info->num_levels - 1 && 504 glsl_type_is_matrix(glsl_without_array(var->type))) { 505 split_type = glsl_matrix_type(glsl_get_base_type(split_type), 506 glsl_get_components(split_type), 507 info->levels[i].array_len); 508 } else { 509 split_type = glsl_array_type(split_type, info->levels[i].array_len, 0); 510 } 511 } 512 513 if (has_split) { 514 info->split_var_type = split_type; 515 /* To avoid list confusion (we'll be adding things as we split 516 * variables), pull all of the variables we plan to split off of the 517 * main variable list. 518 */ 519 exec_node_remove(&var->node); 520 exec_list_push_tail(&split_vars, &var->node); 521 } else { 522 assert(split_type == glsl_get_bare_type(var->type)); 523 /* If we're not modifying this variable, delete the info so we skip 524 * it faster in later passes. 525 */ 526 _mesa_hash_table_remove_key(var_info_map, var); 527 } 528 } 529 530 nir_foreach_variable(var, &split_vars) { 531 struct array_var_info *info = get_array_var_info(var, var_info_map); 532 create_split_array_vars(info, 0, &info->root_split, var->name, 533 shader, impl, mem_ctx); 534 } 535 536 return !exec_list_is_empty(&split_vars); 537} 538 539static bool 540deref_has_split_wildcard(nir_deref_path *path, 541 struct array_var_info *info) 542{ 543 if (info == NULL) 544 return false; 545 546 assert(path->path[0]->var == info->base_var); 547 for (unsigned i = 0; i < info->num_levels; i++) { 548 if (path->path[i + 1]->deref_type == nir_deref_type_array_wildcard && 549 info->levels[i].split) 550 return true; 551 } 552 553 return false; 554} 555 556static bool 557array_path_is_out_of_bounds(nir_deref_path *path, 558 struct array_var_info *info) 559{ 560 if (info == NULL) 561 return false; 562 563 assert(path->path[0]->var == info->base_var); 564 for (unsigned i = 0; i < info->num_levels; i++) { 565 nir_deref_instr *p = path->path[i + 1]; 566 if (p->deref_type == nir_deref_type_array_wildcard) 567 continue; 568 569 if (nir_src_is_const(p->arr.index) && 570 nir_src_as_uint(p->arr.index) >= info->levels[i].array_len) 571 return true; 572 } 573 574 return false; 575} 576 577static void 578emit_split_copies(nir_builder *b, 579 struct array_var_info *dst_info, nir_deref_path *dst_path, 580 unsigned dst_level, nir_deref_instr *dst, 581 struct array_var_info *src_info, nir_deref_path *src_path, 582 unsigned src_level, nir_deref_instr *src) 583{ 584 nir_deref_instr *dst_p, *src_p; 585 586 while ((dst_p = dst_path->path[dst_level + 1])) { 587 if (dst_p->deref_type == nir_deref_type_array_wildcard) 588 break; 589 590 dst = nir_build_deref_follower(b, dst, dst_p); 591 dst_level++; 592 } 593 594 while ((src_p = src_path->path[src_level + 1])) { 595 if (src_p->deref_type == nir_deref_type_array_wildcard) 596 break; 597 598 src = nir_build_deref_follower(b, src, src_p); 599 src_level++; 600 } 601 602 if (src_p == NULL || dst_p == NULL) { 603 assert(src_p == NULL && dst_p == NULL); 604 nir_copy_deref(b, dst, src); 605 } else { 606 assert(dst_p->deref_type == nir_deref_type_array_wildcard && 607 src_p->deref_type == nir_deref_type_array_wildcard); 608 609 if ((dst_info && dst_info->levels[dst_level].split) || 610 (src_info && src_info->levels[src_level].split)) { 611 /* There are no indirects at this level on one of the source or the 612 * destination so we are lowering it. 613 */ 614 assert(glsl_get_length(dst_path->path[dst_level]->type) == 615 glsl_get_length(src_path->path[src_level]->type)); 616 unsigned len = glsl_get_length(dst_path->path[dst_level]->type); 617 for (unsigned i = 0; i < len; i++) { 618 emit_split_copies(b, dst_info, dst_path, dst_level + 1, 619 nir_build_deref_array_imm(b, dst, i), 620 src_info, src_path, src_level + 1, 621 nir_build_deref_array_imm(b, src, i)); 622 } 623 } else { 624 /* Neither side is being split so we just keep going */ 625 emit_split_copies(b, dst_info, dst_path, dst_level + 1, 626 nir_build_deref_array_wildcard(b, dst), 627 src_info, src_path, src_level + 1, 628 nir_build_deref_array_wildcard(b, src)); 629 } 630 } 631} 632 633static void 634split_array_copies_impl(nir_function_impl *impl, 635 struct hash_table *var_info_map, 636 nir_variable_mode modes, 637 void *mem_ctx) 638{ 639 nir_builder b; 640 nir_builder_init(&b, impl); 641 642 nir_foreach_block(block, impl) { 643 nir_foreach_instr_safe(instr, block) { 644 if (instr->type != nir_instr_type_intrinsic) 645 continue; 646 647 nir_intrinsic_instr *copy = nir_instr_as_intrinsic(instr); 648 if (copy->intrinsic != nir_intrinsic_copy_deref) 649 continue; 650 651 nir_deref_instr *dst_deref = nir_src_as_deref(copy->src[0]); 652 nir_deref_instr *src_deref = nir_src_as_deref(copy->src[1]); 653 654 struct array_var_info *dst_info = 655 get_array_deref_info(dst_deref, var_info_map, modes); 656 struct array_var_info *src_info = 657 get_array_deref_info(src_deref, var_info_map, modes); 658 659 if (!src_info && !dst_info) 660 continue; 661 662 nir_deref_path dst_path, src_path; 663 nir_deref_path_init(&dst_path, dst_deref, mem_ctx); 664 nir_deref_path_init(&src_path, src_deref, mem_ctx); 665 666 if (!deref_has_split_wildcard(&dst_path, dst_info) && 667 !deref_has_split_wildcard(&src_path, src_info)) 668 continue; 669 670 b.cursor = nir_instr_remove(©->instr); 671 672 emit_split_copies(&b, dst_info, &dst_path, 0, dst_path.path[0], 673 src_info, &src_path, 0, src_path.path[0]); 674 } 675 } 676} 677 678static void 679split_array_access_impl(nir_function_impl *impl, 680 struct hash_table *var_info_map, 681 nir_variable_mode modes, 682 void *mem_ctx) 683{ 684 nir_builder b; 685 nir_builder_init(&b, impl); 686 687 nir_foreach_block(block, impl) { 688 nir_foreach_instr_safe(instr, block) { 689 if (instr->type == nir_instr_type_deref) { 690 /* Clean up any dead derefs we find lying around. They may refer 691 * to variables we're planning to split. 692 */ 693 nir_deref_instr *deref = nir_instr_as_deref(instr); 694 if (deref->mode & modes) 695 nir_deref_instr_remove_if_unused(deref); 696 continue; 697 } 698 699 if (instr->type != nir_instr_type_intrinsic) 700 continue; 701 702 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 703 if (intrin->intrinsic != nir_intrinsic_load_deref && 704 intrin->intrinsic != nir_intrinsic_store_deref && 705 intrin->intrinsic != nir_intrinsic_copy_deref) 706 continue; 707 708 const unsigned num_derefs = 709 intrin->intrinsic == nir_intrinsic_copy_deref ? 2 : 1; 710 711 for (unsigned d = 0; d < num_derefs; d++) { 712 nir_deref_instr *deref = nir_src_as_deref(intrin->src[d]); 713 714 struct array_var_info *info = 715 get_array_deref_info(deref, var_info_map, modes); 716 if (!info) 717 continue; 718 719 nir_deref_path path; 720 nir_deref_path_init(&path, deref, mem_ctx); 721 722 b.cursor = nir_before_instr(&intrin->instr); 723 724 if (array_path_is_out_of_bounds(&path, info)) { 725 /* If one of the derefs is out-of-bounds, we just delete the 726 * instruction. If a destination is out of bounds, then it may 727 * have been in-bounds prior to shrinking so we don't want to 728 * accidentally stomp something. However, we've already proven 729 * that it will never be read so it's safe to delete. If a 730 * source is out of bounds then it is loading random garbage. 731 * For loads, we replace their uses with an undef instruction 732 * and for copies we just delete the copy since it was writing 733 * undefined garbage anyway and we may as well leave the random 734 * garbage in the destination alone. 735 */ 736 if (intrin->intrinsic == nir_intrinsic_load_deref) { 737 nir_ssa_def *u = 738 nir_ssa_undef(&b, intrin->dest.ssa.num_components, 739 intrin->dest.ssa.bit_size); 740 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 741 nir_src_for_ssa(u)); 742 } 743 nir_instr_remove(&intrin->instr); 744 for (unsigned i = 0; i < num_derefs; i++) 745 nir_deref_instr_remove_if_unused(nir_src_as_deref(intrin->src[i])); 746 break; 747 } 748 749 struct array_split *split = &info->root_split; 750 for (unsigned i = 0; i < info->num_levels; i++) { 751 if (info->levels[i].split) { 752 nir_deref_instr *p = path.path[i + 1]; 753 unsigned index = nir_src_as_uint(p->arr.index); 754 assert(index < info->levels[i].array_len); 755 split = &split->splits[index]; 756 } 757 } 758 assert(!split->splits && split->var); 759 760 nir_deref_instr *new_deref = nir_build_deref_var(&b, split->var); 761 for (unsigned i = 0; i < info->num_levels; i++) { 762 if (!info->levels[i].split) { 763 new_deref = nir_build_deref_follower(&b, new_deref, 764 path.path[i + 1]); 765 } 766 } 767 assert(new_deref->type == deref->type); 768 769 /* Rewrite the deref source to point to the split one */ 770 nir_instr_rewrite_src(&intrin->instr, &intrin->src[d], 771 nir_src_for_ssa(&new_deref->dest.ssa)); 772 nir_deref_instr_remove_if_unused(deref); 773 } 774 } 775 } 776} 777 778/** A pass for splitting arrays of vectors into multiple variables 779 * 780 * This pass looks at arrays (possibly multiple levels) of vectors (not 781 * structures or other types) and tries to split them into piles of variables, 782 * one for each array element. The heuristic used is simple: If a given array 783 * level is never used with an indirect, that array level will get split. 784 * 785 * This pass probably could handles structures easily enough but making a pass 786 * that could see through an array of structures of arrays would be difficult 787 * so it's best to just run nir_split_struct_vars first. 788 */ 789bool 790nir_split_array_vars(nir_shader *shader, nir_variable_mode modes) 791{ 792 void *mem_ctx = ralloc_context(NULL); 793 struct hash_table *var_info_map = _mesa_pointer_hash_table_create(mem_ctx); 794 795 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes); 796 797 bool has_global_array = false; 798 if (modes & nir_var_shader_temp) { 799 has_global_array = init_var_list_array_infos(&shader->globals, 800 var_info_map, mem_ctx); 801 } 802 803 bool has_any_array = false; 804 nir_foreach_function(function, shader) { 805 if (!function->impl) 806 continue; 807 808 bool has_local_array = false; 809 if (modes & nir_var_function_temp) { 810 has_local_array = init_var_list_array_infos(&function->impl->locals, 811 var_info_map, mem_ctx); 812 } 813 814 if (has_global_array || has_local_array) { 815 has_any_array = true; 816 mark_array_usage_impl(function->impl, var_info_map, modes, mem_ctx); 817 } 818 } 819 820 /* If we failed to find any arrays of arrays, bail early. */ 821 if (!has_any_array) { 822 ralloc_free(mem_ctx); 823 return false; 824 } 825 826 bool has_global_splits = false; 827 if (modes & nir_var_shader_temp) { 828 has_global_splits = split_var_list_arrays(shader, NULL, 829 &shader->globals, 830 var_info_map, mem_ctx); 831 } 832 833 bool progress = false; 834 nir_foreach_function(function, shader) { 835 if (!function->impl) 836 continue; 837 838 bool has_local_splits = false; 839 if (modes & nir_var_function_temp) { 840 has_local_splits = split_var_list_arrays(shader, function->impl, 841 &function->impl->locals, 842 var_info_map, mem_ctx); 843 } 844 845 if (has_global_splits || has_local_splits) { 846 split_array_copies_impl(function->impl, var_info_map, modes, mem_ctx); 847 split_array_access_impl(function->impl, var_info_map, modes, mem_ctx); 848 849 nir_metadata_preserve(function->impl, nir_metadata_block_index | 850 nir_metadata_dominance); 851 progress = true; 852 } 853 } 854 855 ralloc_free(mem_ctx); 856 857 return progress; 858} 859 860struct array_level_usage { 861 unsigned array_len; 862 863 /* The value UINT_MAX will be used to indicate an indirect */ 864 unsigned max_read; 865 unsigned max_written; 866 867 /* True if there is a copy that isn't to/from a shrinkable array */ 868 bool has_external_copy; 869 struct set *levels_copied; 870}; 871 872struct vec_var_usage { 873 /* Convenience set of all components this variable has */ 874 nir_component_mask_t all_comps; 875 876 nir_component_mask_t comps_read; 877 nir_component_mask_t comps_written; 878 879 nir_component_mask_t comps_kept; 880 881 /* True if there is a copy that isn't to/from a shrinkable vector */ 882 bool has_external_copy; 883 struct set *vars_copied; 884 885 unsigned num_levels; 886 struct array_level_usage levels[0]; 887}; 888 889static struct vec_var_usage * 890get_vec_var_usage(nir_variable *var, 891 struct hash_table *var_usage_map, 892 bool add_usage_entry, void *mem_ctx) 893{ 894 struct hash_entry *entry = _mesa_hash_table_search(var_usage_map, var); 895 if (entry) 896 return entry->data; 897 898 if (!add_usage_entry) 899 return NULL; 900 901 /* Check to make sure that we are working with an array of vectors. We 902 * don't bother to shrink single vectors because we figure that we can 903 * clean it up better with SSA than by inserting piles of vecN instructions 904 * to compact results. 905 */ 906 int num_levels = num_array_levels_in_array_of_vector_type(var->type); 907 if (num_levels < 1) 908 return NULL; /* Not an array of vectors */ 909 910 struct vec_var_usage *usage = 911 rzalloc_size(mem_ctx, sizeof(*usage) + 912 num_levels * sizeof(usage->levels[0])); 913 914 usage->num_levels = num_levels; 915 const struct glsl_type *type = var->type; 916 for (unsigned i = 0; i < num_levels; i++) { 917 usage->levels[i].array_len = glsl_get_length(type); 918 type = glsl_get_array_element(type); 919 } 920 assert(glsl_type_is_vector_or_scalar(type)); 921 922 usage->all_comps = (1 << glsl_get_components(type)) - 1; 923 924 _mesa_hash_table_insert(var_usage_map, var, usage); 925 926 return usage; 927} 928 929static struct vec_var_usage * 930get_vec_deref_usage(nir_deref_instr *deref, 931 struct hash_table *var_usage_map, 932 nir_variable_mode modes, 933 bool add_usage_entry, void *mem_ctx) 934{ 935 if (!(deref->mode & modes)) 936 return NULL; 937 938 return get_vec_var_usage(nir_deref_instr_get_variable(deref), 939 var_usage_map, add_usage_entry, mem_ctx); 940} 941 942static void 943mark_deref_used(nir_deref_instr *deref, 944 nir_component_mask_t comps_read, 945 nir_component_mask_t comps_written, 946 nir_deref_instr *copy_deref, 947 struct hash_table *var_usage_map, 948 nir_variable_mode modes, 949 void *mem_ctx) 950{ 951 if (!(deref->mode & modes)) 952 return; 953 954 nir_variable *var = nir_deref_instr_get_variable(deref); 955 956 struct vec_var_usage *usage = 957 get_vec_var_usage(var, var_usage_map, true, mem_ctx); 958 if (!usage) 959 return; 960 961 usage->comps_read |= comps_read & usage->all_comps; 962 usage->comps_written |= comps_written & usage->all_comps; 963 964 struct vec_var_usage *copy_usage = NULL; 965 if (copy_deref) { 966 copy_usage = get_vec_deref_usage(copy_deref, var_usage_map, modes, 967 true, mem_ctx); 968 if (copy_usage) { 969 if (usage->vars_copied == NULL) { 970 usage->vars_copied = _mesa_pointer_set_create(mem_ctx); 971 } 972 _mesa_set_add(usage->vars_copied, copy_usage); 973 } else { 974 usage->has_external_copy = true; 975 } 976 } 977 978 nir_deref_path path; 979 nir_deref_path_init(&path, deref, mem_ctx); 980 981 nir_deref_path copy_path; 982 if (copy_usage) 983 nir_deref_path_init(©_path, copy_deref, mem_ctx); 984 985 unsigned copy_i = 0; 986 for (unsigned i = 0; i < usage->num_levels; i++) { 987 struct array_level_usage *level = &usage->levels[i]; 988 nir_deref_instr *deref = path.path[i + 1]; 989 assert(deref->deref_type == nir_deref_type_array || 990 deref->deref_type == nir_deref_type_array_wildcard); 991 992 unsigned max_used; 993 if (deref->deref_type == nir_deref_type_array) { 994 max_used = nir_src_is_const(deref->arr.index) ? 995 nir_src_as_uint(deref->arr.index) : UINT_MAX; 996 } else { 997 /* For wildcards, we read or wrote the whole thing. */ 998 assert(deref->deref_type == nir_deref_type_array_wildcard); 999 max_used = level->array_len - 1; 1000 1001 if (copy_usage) { 1002 /* Match each wildcard level with the level on copy_usage */ 1003 for (; copy_path.path[copy_i + 1]; copy_i++) { 1004 if (copy_path.path[copy_i + 1]->deref_type == 1005 nir_deref_type_array_wildcard) 1006 break; 1007 } 1008 struct array_level_usage *copy_level = 1009 ©_usage->levels[copy_i++]; 1010 1011 if (level->levels_copied == NULL) { 1012 level->levels_copied = _mesa_pointer_set_create(mem_ctx); 1013 } 1014 _mesa_set_add(level->levels_copied, copy_level); 1015 } else { 1016 /* We have a wildcard and it comes from a variable we aren't 1017 * tracking; flag it and we'll know to not shorten this array. 1018 */ 1019 level->has_external_copy = true; 1020 } 1021 } 1022 1023 if (comps_written) 1024 level->max_written = MAX2(level->max_written, max_used); 1025 if (comps_read) 1026 level->max_read = MAX2(level->max_read, max_used); 1027 } 1028} 1029 1030static bool 1031src_is_load_deref(nir_src src, nir_src deref_src) 1032{ 1033 nir_intrinsic_instr *load = nir_src_as_intrinsic(src); 1034 if (load == NULL || load->intrinsic != nir_intrinsic_load_deref) 1035 return false; 1036 1037 assert(load->src[0].is_ssa); 1038 1039 return load->src[0].ssa == deref_src.ssa; 1040} 1041 1042/* Returns all non-self-referential components of a store instruction. A 1043 * component is self-referential if it comes from the same component of a load 1044 * instruction on the same deref. If the only data in a particular component 1045 * of a variable came directly from that component then it's undefined. The 1046 * only way to get defined data into a component of a variable is for it to 1047 * get written there by something outside or from a different component. 1048 * 1049 * This is a fairly common pattern in shaders that come from either GLSL IR or 1050 * GLSLang because both glsl_to_nir and GLSLang implement write-masking with 1051 * load-vec-store. 1052 */ 1053static nir_component_mask_t 1054get_non_self_referential_store_comps(nir_intrinsic_instr *store) 1055{ 1056 nir_component_mask_t comps = nir_intrinsic_write_mask(store); 1057 1058 assert(store->src[1].is_ssa); 1059 nir_instr *src_instr = store->src[1].ssa->parent_instr; 1060 if (src_instr->type != nir_instr_type_alu) 1061 return comps; 1062 1063 nir_alu_instr *src_alu = nir_instr_as_alu(src_instr); 1064 1065 if (src_alu->op == nir_op_imov || 1066 src_alu->op == nir_op_fmov) { 1067 /* If it's just a swizzle of a load from the same deref, discount any 1068 * channels that don't move in the swizzle. 1069 */ 1070 if (src_is_load_deref(src_alu->src[0].src, store->src[0])) { 1071 for (unsigned i = 0; i < NIR_MAX_VEC_COMPONENTS; i++) { 1072 if (src_alu->src[0].swizzle[i] == i) 1073 comps &= ~(1u << i); 1074 } 1075 } 1076 } else if (src_alu->op == nir_op_vec2 || 1077 src_alu->op == nir_op_vec3 || 1078 src_alu->op == nir_op_vec4) { 1079 /* If it's a vec, discount any channels that are just loads from the 1080 * same deref put in the same spot. 1081 */ 1082 for (unsigned i = 0; i < nir_op_infos[src_alu->op].num_inputs; i++) { 1083 if (src_is_load_deref(src_alu->src[i].src, store->src[0]) && 1084 src_alu->src[i].swizzle[0] == i) 1085 comps &= ~(1u << i); 1086 } 1087 } 1088 1089 return comps; 1090} 1091 1092static void 1093find_used_components_impl(nir_function_impl *impl, 1094 struct hash_table *var_usage_map, 1095 nir_variable_mode modes, 1096 void *mem_ctx) 1097{ 1098 nir_foreach_block(block, impl) { 1099 nir_foreach_instr(instr, block) { 1100 if (instr->type != nir_instr_type_intrinsic) 1101 continue; 1102 1103 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1104 switch (intrin->intrinsic) { 1105 case nir_intrinsic_load_deref: 1106 mark_deref_used(nir_src_as_deref(intrin->src[0]), 1107 nir_ssa_def_components_read(&intrin->dest.ssa), 0, 1108 NULL, var_usage_map, modes, mem_ctx); 1109 break; 1110 1111 case nir_intrinsic_store_deref: 1112 mark_deref_used(nir_src_as_deref(intrin->src[0]), 1113 0, get_non_self_referential_store_comps(intrin), 1114 NULL, var_usage_map, modes, mem_ctx); 1115 break; 1116 1117 case nir_intrinsic_copy_deref: { 1118 /* Just mark everything used for copies. */ 1119 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); 1120 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]); 1121 mark_deref_used(dst, 0, ~0, src, var_usage_map, modes, mem_ctx); 1122 mark_deref_used(src, ~0, 0, dst, var_usage_map, modes, mem_ctx); 1123 break; 1124 } 1125 1126 default: 1127 break; 1128 } 1129 } 1130 } 1131} 1132 1133static bool 1134shrink_vec_var_list(struct exec_list *vars, 1135 struct hash_table *var_usage_map) 1136{ 1137 /* Initialize the components kept field of each variable. This is the 1138 * AND of the components written and components read. If a component is 1139 * written but never read, it's dead. If it is read but never written, 1140 * then all values read are undefined garbage and we may as well not read 1141 * them. 1142 * 1143 * The same logic applies to the array length. We make the array length 1144 * the minimum needed required length between read and write and plan to 1145 * discard any OOB access. The one exception here is indirect writes 1146 * because we don't know where they will land and we can't shrink an array 1147 * with indirect writes because previously in-bounds writes may become 1148 * out-of-bounds and have undefined behavior. 1149 * 1150 * Also, if we have a copy that to/from something we can't shrink, we need 1151 * to leave components and array_len of any wildcards alone. 1152 */ 1153 nir_foreach_variable(var, vars) { 1154 struct vec_var_usage *usage = 1155 get_vec_var_usage(var, var_usage_map, false, NULL); 1156 if (!usage) 1157 continue; 1158 1159 assert(usage->comps_kept == 0); 1160 if (usage->has_external_copy) 1161 usage->comps_kept = usage->all_comps; 1162 else 1163 usage->comps_kept = usage->comps_read & usage->comps_written; 1164 1165 for (unsigned i = 0; i < usage->num_levels; i++) { 1166 struct array_level_usage *level = &usage->levels[i]; 1167 assert(level->array_len > 0); 1168 1169 if (level->max_written == UINT_MAX || level->has_external_copy) 1170 continue; /* Can't shrink */ 1171 1172 unsigned max_used = MIN2(level->max_read, level->max_written); 1173 level->array_len = MIN2(max_used, level->array_len - 1) + 1; 1174 } 1175 } 1176 1177 /* In order for variable copies to work, we have to have the same data type 1178 * on the source and the destination. In order to satisfy this, we run a 1179 * little fixed-point algorithm to transitively ensure that we get enough 1180 * components and array elements for this to hold for all copies. 1181 */ 1182 bool fp_progress; 1183 do { 1184 fp_progress = false; 1185 nir_foreach_variable(var, vars) { 1186 struct vec_var_usage *var_usage = 1187 get_vec_var_usage(var, var_usage_map, false, NULL); 1188 if (!var_usage || !var_usage->vars_copied) 1189 continue; 1190 1191 set_foreach(var_usage->vars_copied, copy_entry) { 1192 struct vec_var_usage *copy_usage = (void *)copy_entry->key; 1193 if (copy_usage->comps_kept != var_usage->comps_kept) { 1194 nir_component_mask_t comps_kept = 1195 (var_usage->comps_kept | copy_usage->comps_kept); 1196 var_usage->comps_kept = comps_kept; 1197 copy_usage->comps_kept = comps_kept; 1198 fp_progress = true; 1199 } 1200 } 1201 1202 for (unsigned i = 0; i < var_usage->num_levels; i++) { 1203 struct array_level_usage *var_level = &var_usage->levels[i]; 1204 if (!var_level->levels_copied) 1205 continue; 1206 1207 set_foreach(var_level->levels_copied, copy_entry) { 1208 struct array_level_usage *copy_level = (void *)copy_entry->key; 1209 if (var_level->array_len != copy_level->array_len) { 1210 unsigned array_len = 1211 MAX2(var_level->array_len, copy_level->array_len); 1212 var_level->array_len = array_len; 1213 copy_level->array_len = array_len; 1214 fp_progress = true; 1215 } 1216 } 1217 } 1218 } 1219 } while (fp_progress); 1220 1221 bool vars_shrunk = false; 1222 nir_foreach_variable_safe(var, vars) { 1223 struct vec_var_usage *usage = 1224 get_vec_var_usage(var, var_usage_map, false, NULL); 1225 if (!usage) 1226 continue; 1227 1228 bool shrunk = false; 1229 const struct glsl_type *vec_type = var->type; 1230 for (unsigned i = 0; i < usage->num_levels; i++) { 1231 /* If we've reduced the array to zero elements at some level, just 1232 * set comps_kept to 0 and delete the variable. 1233 */ 1234 if (usage->levels[i].array_len == 0) { 1235 usage->comps_kept = 0; 1236 break; 1237 } 1238 1239 assert(usage->levels[i].array_len <= glsl_get_length(vec_type)); 1240 if (usage->levels[i].array_len < glsl_get_length(vec_type)) 1241 shrunk = true; 1242 vec_type = glsl_get_array_element(vec_type); 1243 } 1244 assert(glsl_type_is_vector_or_scalar(vec_type)); 1245 1246 assert(usage->comps_kept == (usage->comps_kept & usage->all_comps)); 1247 if (usage->comps_kept != usage->all_comps) 1248 shrunk = true; 1249 1250 if (usage->comps_kept == 0) { 1251 /* This variable is dead, remove it */ 1252 vars_shrunk = true; 1253 exec_node_remove(&var->node); 1254 continue; 1255 } 1256 1257 if (!shrunk) { 1258 /* This variable doesn't need to be shrunk. Remove it from the 1259 * hash table so later steps will ignore it. 1260 */ 1261 _mesa_hash_table_remove_key(var_usage_map, var); 1262 continue; 1263 } 1264 1265 /* Build the new var type */ 1266 unsigned new_num_comps = util_bitcount(usage->comps_kept); 1267 const struct glsl_type *new_type = 1268 glsl_vector_type(glsl_get_base_type(vec_type), new_num_comps); 1269 for (int i = usage->num_levels - 1; i >= 0; i--) { 1270 assert(usage->levels[i].array_len > 0); 1271 /* If the original type was a matrix type, we'd like to keep that so 1272 * we don't convert matrices into arrays. 1273 */ 1274 if (i == usage->num_levels - 1 && 1275 glsl_type_is_matrix(glsl_without_array(var->type)) && 1276 new_num_comps > 1 && usage->levels[i].array_len > 1) { 1277 new_type = glsl_matrix_type(glsl_get_base_type(new_type), 1278 new_num_comps, 1279 usage->levels[i].array_len); 1280 } else { 1281 new_type = glsl_array_type(new_type, usage->levels[i].array_len, 0); 1282 } 1283 } 1284 var->type = new_type; 1285 1286 vars_shrunk = true; 1287 } 1288 1289 return vars_shrunk; 1290} 1291 1292static bool 1293vec_deref_is_oob(nir_deref_instr *deref, 1294 struct vec_var_usage *usage) 1295{ 1296 nir_deref_path path; 1297 nir_deref_path_init(&path, deref, NULL); 1298 1299 bool oob = false; 1300 for (unsigned i = 0; i < usage->num_levels; i++) { 1301 nir_deref_instr *p = path.path[i + 1]; 1302 if (p->deref_type == nir_deref_type_array_wildcard) 1303 continue; 1304 1305 if (nir_src_is_const(p->arr.index) && 1306 nir_src_as_uint(p->arr.index) >= usage->levels[i].array_len) { 1307 oob = true; 1308 break; 1309 } 1310 } 1311 1312 nir_deref_path_finish(&path); 1313 1314 return oob; 1315} 1316 1317static bool 1318vec_deref_is_dead_or_oob(nir_deref_instr *deref, 1319 struct hash_table *var_usage_map, 1320 nir_variable_mode modes) 1321{ 1322 struct vec_var_usage *usage = 1323 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL); 1324 if (!usage) 1325 return false; 1326 1327 return usage->comps_kept == 0 || vec_deref_is_oob(deref, usage); 1328} 1329 1330static void 1331shrink_vec_var_access_impl(nir_function_impl *impl, 1332 struct hash_table *var_usage_map, 1333 nir_variable_mode modes) 1334{ 1335 nir_builder b; 1336 nir_builder_init(&b, impl); 1337 1338 nir_foreach_block(block, impl) { 1339 nir_foreach_instr_safe(instr, block) { 1340 switch (instr->type) { 1341 case nir_instr_type_deref: { 1342 nir_deref_instr *deref = nir_instr_as_deref(instr); 1343 if (!(deref->mode & modes)) 1344 break; 1345 1346 /* Clean up any dead derefs we find lying around. They may refer 1347 * to variables we've deleted. 1348 */ 1349 if (nir_deref_instr_remove_if_unused(deref)) 1350 break; 1351 1352 /* Update the type in the deref to keep the types consistent as 1353 * you walk down the chain. We don't need to check if this is one 1354 * of the derefs we're shrinking because this is a no-op if it 1355 * isn't. The worst that could happen is that we accidentally fix 1356 * an invalid deref. 1357 */ 1358 if (deref->deref_type == nir_deref_type_var) { 1359 deref->type = deref->var->type; 1360 } else if (deref->deref_type == nir_deref_type_array || 1361 deref->deref_type == nir_deref_type_array_wildcard) { 1362 nir_deref_instr *parent = nir_deref_instr_parent(deref); 1363 assert(glsl_type_is_array(parent->type) || 1364 glsl_type_is_matrix(parent->type)); 1365 deref->type = glsl_get_array_element(parent->type); 1366 } 1367 break; 1368 } 1369 1370 case nir_instr_type_intrinsic: { 1371 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 1372 1373 /* If we have a copy whose source or destination has been deleted 1374 * because we determined the variable was dead, then we just 1375 * delete the copy instruction. If the source variable was dead 1376 * then it was writing undefined garbage anyway and if it's the 1377 * destination variable that's dead then the write isn't needed. 1378 */ 1379 if (intrin->intrinsic == nir_intrinsic_copy_deref) { 1380 nir_deref_instr *dst = nir_src_as_deref(intrin->src[0]); 1381 nir_deref_instr *src = nir_src_as_deref(intrin->src[1]); 1382 if (vec_deref_is_dead_or_oob(dst, var_usage_map, modes) || 1383 vec_deref_is_dead_or_oob(src, var_usage_map, modes)) { 1384 nir_instr_remove(&intrin->instr); 1385 nir_deref_instr_remove_if_unused(dst); 1386 nir_deref_instr_remove_if_unused(src); 1387 } 1388 continue; 1389 } 1390 1391 if (intrin->intrinsic != nir_intrinsic_load_deref && 1392 intrin->intrinsic != nir_intrinsic_store_deref) 1393 continue; 1394 1395 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 1396 if (!(deref->mode & modes)) 1397 continue; 1398 1399 struct vec_var_usage *usage = 1400 get_vec_deref_usage(deref, var_usage_map, modes, false, NULL); 1401 if (!usage) 1402 continue; 1403 1404 if (usage->comps_kept == 0 || vec_deref_is_oob(deref, usage)) { 1405 if (intrin->intrinsic == nir_intrinsic_load_deref) { 1406 nir_ssa_def *u = 1407 nir_ssa_undef(&b, intrin->dest.ssa.num_components, 1408 intrin->dest.ssa.bit_size); 1409 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 1410 nir_src_for_ssa(u)); 1411 } 1412 nir_instr_remove(&intrin->instr); 1413 nir_deref_instr_remove_if_unused(deref); 1414 continue; 1415 } 1416 1417 /* If we're not dropping any components, there's no need to 1418 * compact vectors. 1419 */ 1420 if (usage->comps_kept == usage->all_comps) 1421 continue; 1422 1423 if (intrin->intrinsic == nir_intrinsic_load_deref) { 1424 b.cursor = nir_after_instr(&intrin->instr); 1425 1426 nir_ssa_def *undef = 1427 nir_ssa_undef(&b, 1, intrin->dest.ssa.bit_size); 1428 nir_ssa_def *vec_srcs[NIR_MAX_VEC_COMPONENTS]; 1429 unsigned c = 0; 1430 for (unsigned i = 0; i < intrin->num_components; i++) { 1431 if (usage->comps_kept & (1u << i)) 1432 vec_srcs[i] = nir_channel(&b, &intrin->dest.ssa, c++); 1433 else 1434 vec_srcs[i] = undef; 1435 } 1436 nir_ssa_def *vec = nir_vec(&b, vec_srcs, intrin->num_components); 1437 1438 nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa, 1439 nir_src_for_ssa(vec), 1440 vec->parent_instr); 1441 1442 /* The SSA def is now only used by the swizzle. It's safe to 1443 * shrink the number of components. 1444 */ 1445 assert(list_length(&intrin->dest.ssa.uses) == c); 1446 intrin->num_components = c; 1447 intrin->dest.ssa.num_components = c; 1448 } else { 1449 nir_component_mask_t write_mask = 1450 nir_intrinsic_write_mask(intrin); 1451 1452 unsigned swizzle[NIR_MAX_VEC_COMPONENTS]; 1453 nir_component_mask_t new_write_mask = 0; 1454 unsigned c = 0; 1455 for (unsigned i = 0; i < intrin->num_components; i++) { 1456 if (usage->comps_kept & (1u << i)) { 1457 swizzle[c] = i; 1458 if (write_mask & (1u << i)) 1459 new_write_mask |= 1u << c; 1460 c++; 1461 } 1462 } 1463 1464 b.cursor = nir_before_instr(&intrin->instr); 1465 1466 nir_ssa_def *swizzled = 1467 nir_swizzle(&b, intrin->src[1].ssa, swizzle, c, false); 1468 1469 /* Rewrite to use the compacted source */ 1470 nir_instr_rewrite_src(&intrin->instr, &intrin->src[1], 1471 nir_src_for_ssa(swizzled)); 1472 nir_intrinsic_set_write_mask(intrin, new_write_mask); 1473 intrin->num_components = c; 1474 } 1475 break; 1476 } 1477 1478 default: 1479 break; 1480 } 1481 } 1482 } 1483} 1484 1485static bool 1486function_impl_has_vars_with_modes(nir_function_impl *impl, 1487 nir_variable_mode modes) 1488{ 1489 nir_shader *shader = impl->function->shader; 1490 1491 if ((modes & nir_var_shader_temp) && !exec_list_is_empty(&shader->globals)) 1492 return true; 1493 1494 if ((modes & nir_var_function_temp) && !exec_list_is_empty(&impl->locals)) 1495 return true; 1496 1497 return false; 1498} 1499 1500/** Attempt to shrink arrays of vectors 1501 * 1502 * This pass looks at variables which contain a vector or an array (possibly 1503 * multiple dimensions) of vectors and attempts to lower to a smaller vector 1504 * or array. If the pass can prove that a component of a vector (or array of 1505 * vectors) is never really used, then that component will be removed. 1506 * Similarly, the pass attempts to shorten arrays based on what elements it 1507 * can prove are never read or never contain valid data. 1508 */ 1509bool 1510nir_shrink_vec_array_vars(nir_shader *shader, nir_variable_mode modes) 1511{ 1512 assert((modes & (nir_var_shader_temp | nir_var_function_temp)) == modes); 1513 1514 void *mem_ctx = ralloc_context(NULL); 1515 1516 struct hash_table *var_usage_map = 1517 _mesa_pointer_hash_table_create(mem_ctx); 1518 1519 bool has_vars_to_shrink = false; 1520 nir_foreach_function(function, shader) { 1521 if (!function->impl) 1522 continue; 1523 1524 /* Don't even bother crawling the IR if we don't have any variables. 1525 * Given that this pass deletes any unused variables, it's likely that 1526 * we will be in this scenario eventually. 1527 */ 1528 if (function_impl_has_vars_with_modes(function->impl, modes)) { 1529 has_vars_to_shrink = true; 1530 find_used_components_impl(function->impl, var_usage_map, 1531 modes, mem_ctx); 1532 } 1533 } 1534 if (!has_vars_to_shrink) { 1535 ralloc_free(mem_ctx); 1536 return false; 1537 } 1538 1539 bool globals_shrunk = false; 1540 if (modes & nir_var_shader_temp) 1541 globals_shrunk = shrink_vec_var_list(&shader->globals, var_usage_map); 1542 1543 bool progress = false; 1544 nir_foreach_function(function, shader) { 1545 if (!function->impl) 1546 continue; 1547 1548 bool locals_shrunk = false; 1549 if (modes & nir_var_function_temp) { 1550 locals_shrunk = shrink_vec_var_list(&function->impl->locals, 1551 var_usage_map); 1552 } 1553 1554 if (globals_shrunk || locals_shrunk) { 1555 shrink_vec_var_access_impl(function->impl, var_usage_map, modes); 1556 1557 nir_metadata_preserve(function->impl, nir_metadata_block_index | 1558 nir_metadata_dominance); 1559 progress = true; 1560 } 1561 } 1562 1563 ralloc_free(mem_ctx); 1564 1565 return progress; 1566} 1567