1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26#include "util/set.h" 27#include "util/hash_table.h" 28 29/* This file contains various little helpers for doing simple linking in 30 * NIR. Eventually, we'll probably want a full-blown varying packing 31 * implementation in here. Right now, it just deletes unused things. 32 */ 33 34/** 35 * Returns the bits in the inputs_read, outputs_written, or 36 * system_values_read bitfield corresponding to this variable. 37 */ 38static uint64_t 39get_variable_io_mask(nir_variable *var, gl_shader_stage stage) 40{ 41 if (var->data.location < 0) 42 return 0; 43 44 unsigned location = var->data.patch ? 45 var->data.location - VARYING_SLOT_PATCH0 : var->data.location; 46 47 assert(var->data.mode == nir_var_shader_in || 48 var->data.mode == nir_var_shader_out || 49 var->data.mode == nir_var_system_value); 50 assert(var->data.location >= 0); 51 52 const struct glsl_type *type = var->type; 53 if (nir_is_per_vertex_io(var, stage)) { 54 assert(glsl_type_is_array(type)); 55 type = glsl_get_array_element(type); 56 } 57 58 unsigned slots = glsl_count_attribute_slots(type, false); 59 return ((1ull << slots) - 1) << location; 60} 61 62static uint8_t 63get_num_components(nir_variable *var) 64{ 65 if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type))) 66 return 4; 67 68 return glsl_get_vector_elements(glsl_without_array(var->type)); 69} 70 71static void 72tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read) 73{ 74 nir_foreach_function(function, shader) { 75 if (!function->impl) 76 continue; 77 78 nir_foreach_block(block, function->impl) { 79 nir_foreach_instr(instr, block) { 80 if (instr->type != nir_instr_type_intrinsic) 81 continue; 82 83 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 84 if (intrin->intrinsic != nir_intrinsic_load_deref) 85 continue; 86 87 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 88 if (deref->mode != nir_var_shader_out) 89 continue; 90 91 nir_variable *var = nir_deref_instr_get_variable(deref); 92 for (unsigned i = 0; i < get_num_components(var); i++) { 93 if (var->data.patch) { 94 patches_read[var->data.location_frac + i] |= 95 get_variable_io_mask(var, shader->info.stage); 96 } else { 97 read[var->data.location_frac + i] |= 98 get_variable_io_mask(var, shader->info.stage); 99 } 100 } 101 } 102 } 103 } 104} 105 106/** 107 * Helper for removing unused shader I/O variables, by demoting them to global 108 * variables (which may then by dead code eliminated). 109 * 110 * Example usage is: 111 * 112 * progress = nir_remove_unused_io_vars(producer, 113 * &producer->outputs, 114 * read, patches_read) || 115 * progress; 116 * 117 * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*) 118 * representing each .location_frac used. Note that for vector variables, 119 * only the first channel (.location_frac) is examined for deciding if the 120 * variable is used! 121 */ 122bool 123nir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list, 124 uint64_t *used_by_other_stage, 125 uint64_t *used_by_other_stage_patches) 126{ 127 bool progress = false; 128 uint64_t *used; 129 130 nir_foreach_variable_safe(var, var_list) { 131 if (var->data.patch) 132 used = used_by_other_stage_patches; 133 else 134 used = used_by_other_stage; 135 136 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0) 137 continue; 138 139 if (var->data.always_active_io) 140 continue; 141 142 if (var->data.explicit_xfb_buffer) 143 continue; 144 145 uint64_t other_stage = used[var->data.location_frac]; 146 147 if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) { 148 /* This one is invalid, make it a global variable instead */ 149 var->data.location = 0; 150 var->data.mode = nir_var_shader_temp; 151 152 exec_node_remove(&var->node); 153 exec_list_push_tail(&shader->globals, &var->node); 154 155 progress = true; 156 } 157 } 158 159 if (progress) 160 nir_fixup_deref_modes(shader); 161 162 return progress; 163} 164 165bool 166nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer) 167{ 168 assert(producer->info.stage != MESA_SHADER_FRAGMENT); 169 assert(consumer->info.stage != MESA_SHADER_VERTEX); 170 171 uint64_t read[4] = { 0 }, written[4] = { 0 }; 172 uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 }; 173 174 nir_foreach_variable(var, &producer->outputs) { 175 for (unsigned i = 0; i < get_num_components(var); i++) { 176 if (var->data.patch) { 177 patches_written[var->data.location_frac + i] |= 178 get_variable_io_mask(var, producer->info.stage); 179 } else { 180 written[var->data.location_frac + i] |= 181 get_variable_io_mask(var, producer->info.stage); 182 } 183 } 184 } 185 186 nir_foreach_variable(var, &consumer->inputs) { 187 for (unsigned i = 0; i < get_num_components(var); i++) { 188 if (var->data.patch) { 189 patches_read[var->data.location_frac + i] |= 190 get_variable_io_mask(var, consumer->info.stage); 191 } else { 192 read[var->data.location_frac + i] |= 193 get_variable_io_mask(var, consumer->info.stage); 194 } 195 } 196 } 197 198 /* Each TCS invocation can read data written by other TCS invocations, 199 * so even if the outputs are not used by the TES we must also make 200 * sure they are not read by the TCS before demoting them to globals. 201 */ 202 if (producer->info.stage == MESA_SHADER_TESS_CTRL) 203 tcs_add_output_reads(producer, read, patches_read); 204 205 bool progress = false; 206 progress = nir_remove_unused_io_vars(producer, &producer->outputs, read, 207 patches_read); 208 209 progress = nir_remove_unused_io_vars(consumer, &consumer->inputs, written, 210 patches_written) || progress; 211 212 return progress; 213} 214 215static uint8_t 216get_interp_type(nir_variable *var, const struct glsl_type *type, 217 bool default_to_smooth_interp) 218{ 219 if (glsl_type_is_integer(type)) 220 return INTERP_MODE_FLAT; 221 else if (var->data.interpolation != INTERP_MODE_NONE) 222 return var->data.interpolation; 223 else if (default_to_smooth_interp) 224 return INTERP_MODE_SMOOTH; 225 else 226 return INTERP_MODE_NONE; 227} 228 229#define INTERPOLATE_LOC_SAMPLE 0 230#define INTERPOLATE_LOC_CENTROID 1 231#define INTERPOLATE_LOC_CENTER 2 232 233static uint8_t 234get_interp_loc(nir_variable *var) 235{ 236 if (var->data.sample) 237 return INTERPOLATE_LOC_SAMPLE; 238 else if (var->data.centroid) 239 return INTERPOLATE_LOC_CENTROID; 240 else 241 return INTERPOLATE_LOC_CENTER; 242} 243 244static bool 245is_packing_supported_for_type(const struct glsl_type *type) 246{ 247 /* We ignore complex types such as arrays, matrices, structs and bitsizes 248 * other then 32bit. All other vector types should have been split into 249 * scalar variables by the lower_io_to_scalar pass. The only exception 250 * should be OpenGL xfb varyings. 251 * TODO: add support for more complex types? 252 */ 253 return glsl_type_is_scalar(type) && glsl_type_is_32bit(type); 254} 255 256struct assigned_comps 257{ 258 uint8_t comps; 259 uint8_t interp_type; 260 uint8_t interp_loc; 261 bool is_32bit; 262}; 263 264/* Packing arrays and dual slot varyings is difficult so to avoid complex 265 * algorithms this function just assigns them their existing location for now. 266 * TODO: allow better packing of complex types. 267 */ 268static void 269get_unmoveable_components_masks(struct exec_list *var_list, 270 struct assigned_comps *comps, 271 gl_shader_stage stage, 272 bool default_to_smooth_interp) 273{ 274 nir_foreach_variable_safe(var, var_list) { 275 assert(var->data.location >= 0); 276 277 /* Only remap things that aren't built-ins. */ 278 if (var->data.location >= VARYING_SLOT_VAR0 && 279 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 280 281 const struct glsl_type *type = var->type; 282 if (nir_is_per_vertex_io(var, stage)) { 283 assert(glsl_type_is_array(type)); 284 type = glsl_get_array_element(type); 285 } 286 287 /* If we can pack this varying then don't mark the components as 288 * used. 289 */ 290 if (is_packing_supported_for_type(type)) 291 continue; 292 293 unsigned location = var->data.location - VARYING_SLOT_VAR0; 294 295 unsigned elements = 296 glsl_type_is_vector_or_scalar(glsl_without_array(type)) ? 297 glsl_get_vector_elements(glsl_without_array(type)) : 4; 298 299 bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type)); 300 unsigned slots = glsl_count_attribute_slots(type, false); 301 unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1; 302 unsigned comps_slot2 = 0; 303 for (unsigned i = 0; i < slots; i++) { 304 if (dual_slot) { 305 if (i & 1) { 306 comps[location + i].comps |= ((1 << comps_slot2) - 1); 307 } else { 308 unsigned num_comps = 4 - var->data.location_frac; 309 comps_slot2 = (elements * dmul) - num_comps; 310 311 /* Assume ARB_enhanced_layouts packing rules for doubles */ 312 assert(var->data.location_frac == 0 || 313 var->data.location_frac == 2); 314 assert(comps_slot2 <= 4); 315 316 comps[location + i].comps |= 317 ((1 << num_comps) - 1) << var->data.location_frac; 318 } 319 } else { 320 comps[location + i].comps |= 321 ((1 << (elements * dmul)) - 1) << var->data.location_frac; 322 } 323 324 comps[location + i].interp_type = 325 get_interp_type(var, type, default_to_smooth_interp); 326 comps[location + i].interp_loc = get_interp_loc(var); 327 comps[location + i].is_32bit = 328 glsl_type_is_32bit(glsl_without_array(type)); 329 } 330 } 331 } 332} 333 334struct varying_loc 335{ 336 uint8_t component; 337 uint32_t location; 338}; 339 340static void 341mark_all_used_slots(nir_variable *var, uint64_t *slots_used, 342 uint64_t slots_used_mask, unsigned num_slots) 343{ 344 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 345 346 slots_used[var->data.patch ? 1 : 0] |= slots_used_mask & 347 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots); 348} 349 350static void 351mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset) 352{ 353 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 354 355 slots_used[var->data.patch ? 1 : 0] |= 356 BITFIELD64_BIT(var->data.location - loc_offset + offset); 357} 358 359static void 360remap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage, 361 struct varying_loc (*remap)[4], 362 uint64_t *slots_used, uint64_t *out_slots_read, 363 uint32_t *p_slots_used, uint32_t *p_out_slots_read) 364 { 365 uint64_t out_slots_read_tmp[2] = {0}; 366 uint64_t slots_used_tmp[2] = {0}; 367 368 /* We don't touch builtins so just copy the bitmask */ 369 slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0); 370 371 nir_foreach_variable(var, var_list) { 372 assert(var->data.location >= 0); 373 374 /* Only remap things that aren't built-ins */ 375 if (var->data.location >= VARYING_SLOT_VAR0 && 376 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 377 378 const struct glsl_type *type = var->type; 379 if (nir_is_per_vertex_io(var, stage)) { 380 assert(glsl_type_is_array(type)); 381 type = glsl_get_array_element(type); 382 } 383 384 unsigned num_slots = glsl_count_attribute_slots(type, false); 385 bool used_across_stages = false; 386 bool outputs_read = false; 387 388 unsigned location = var->data.location - VARYING_SLOT_VAR0; 389 struct varying_loc *new_loc = &remap[location][var->data.location_frac]; 390 391 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 392 uint64_t used = var->data.patch ? *p_slots_used : *slots_used; 393 uint64_t outs_used = 394 var->data.patch ? *p_out_slots_read : *out_slots_read; 395 uint64_t slots = 396 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots); 397 398 if (slots & used) 399 used_across_stages = true; 400 401 if (slots & outs_used) 402 outputs_read = true; 403 404 if (new_loc->location) { 405 var->data.location = new_loc->location; 406 var->data.location_frac = new_loc->component; 407 } 408 409 if (var->data.always_active_io) { 410 /* We can't apply link time optimisations (specifically array 411 * splitting) to these so we need to copy the existing mask 412 * otherwise we will mess up the mask for things like partially 413 * marked arrays. 414 */ 415 if (used_across_stages) 416 mark_all_used_slots(var, slots_used_tmp, used, num_slots); 417 418 if (outputs_read) { 419 mark_all_used_slots(var, out_slots_read_tmp, outs_used, 420 num_slots); 421 } 422 } else { 423 for (unsigned i = 0; i < num_slots; i++) { 424 if (used_across_stages) 425 mark_used_slot(var, slots_used_tmp, i); 426 427 if (outputs_read) 428 mark_used_slot(var, out_slots_read_tmp, i); 429 } 430 } 431 } 432 } 433 434 *slots_used = slots_used_tmp[0]; 435 *out_slots_read = out_slots_read_tmp[0]; 436 *p_slots_used = slots_used_tmp[1]; 437 *p_out_slots_read = out_slots_read_tmp[1]; 438} 439 440struct varying_component { 441 nir_variable *var; 442 uint8_t interp_type; 443 uint8_t interp_loc; 444 bool is_32bit; 445 bool is_patch; 446 bool initialised; 447}; 448 449static int 450cmp_varying_component(const void *comp1_v, const void *comp2_v) 451{ 452 struct varying_component *comp1 = (struct varying_component *) comp1_v; 453 struct varying_component *comp2 = (struct varying_component *) comp2_v; 454 455 /* We want patches to be order at the end of the array */ 456 if (comp1->is_patch != comp2->is_patch) 457 return comp1->is_patch ? 1 : -1; 458 459 /* We can only pack varyings with matching interpolation types so group 460 * them together. 461 */ 462 if (comp1->interp_type != comp2->interp_type) 463 return comp1->interp_type - comp2->interp_type; 464 465 /* Interpolation loc must match also. */ 466 if (comp1->interp_loc != comp2->interp_loc) 467 return comp1->interp_loc - comp2->interp_loc; 468 469 /* If everything else matches just use the original location to sort */ 470 return comp1->var->data.location - comp2->var->data.location; 471} 472 473static void 474gather_varying_component_info(nir_shader *consumer, 475 struct varying_component **varying_comp_info, 476 unsigned *varying_comp_info_size, 477 bool default_to_smooth_interp) 478{ 479 unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}}; 480 unsigned num_of_comps_to_pack = 0; 481 482 /* Count the number of varying that can be packed and create a mapping 483 * of those varyings to the array we will pass to qsort. 484 */ 485 nir_foreach_variable(var, &consumer->inputs) { 486 487 /* Only remap things that aren't builtins. */ 488 if (var->data.location >= VARYING_SLOT_VAR0 && 489 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 490 491 /* We can't repack xfb varyings. */ 492 if (var->data.always_active_io) 493 continue; 494 495 const struct glsl_type *type = var->type; 496 if (nir_is_per_vertex_io(var, consumer->info.stage)) { 497 assert(glsl_type_is_array(type)); 498 type = glsl_get_array_element(type); 499 } 500 501 if (!is_packing_supported_for_type(type)) 502 continue; 503 504 unsigned loc = var->data.location - VARYING_SLOT_VAR0; 505 store_varying_info_idx[loc][var->data.location_frac] = 506 ++num_of_comps_to_pack; 507 } 508 } 509 510 *varying_comp_info_size = num_of_comps_to_pack; 511 *varying_comp_info = rzalloc_array(NULL, struct varying_component, 512 num_of_comps_to_pack); 513 514 nir_function_impl *impl = nir_shader_get_entrypoint(consumer); 515 516 /* Walk over the shader and populate the varying component info array */ 517 nir_foreach_block(block, impl) { 518 nir_foreach_instr(instr, block) { 519 if (instr->type != nir_instr_type_intrinsic) 520 continue; 521 522 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 523 if (intr->intrinsic != nir_intrinsic_load_deref && 524 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid && 525 intr->intrinsic != nir_intrinsic_interp_deref_at_sample && 526 intr->intrinsic != nir_intrinsic_interp_deref_at_offset) 527 continue; 528 529 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 530 if (deref->mode != nir_var_shader_in) 531 continue; 532 533 /* We only remap things that aren't builtins. */ 534 nir_variable *in_var = nir_deref_instr_get_variable(deref); 535 if (in_var->data.location < VARYING_SLOT_VAR0) 536 continue; 537 538 unsigned location = in_var->data.location - VARYING_SLOT_VAR0; 539 if (location >= MAX_VARYINGS_INCL_PATCH) 540 continue; 541 542 unsigned var_info_idx = 543 store_varying_info_idx[location][in_var->data.location_frac]; 544 if (!var_info_idx) 545 continue; 546 547 struct varying_component *vc_info = 548 &(*varying_comp_info)[var_info_idx-1]; 549 550 if (!vc_info->initialised) { 551 const struct glsl_type *type = in_var->type; 552 if (nir_is_per_vertex_io(in_var, consumer->info.stage)) { 553 assert(glsl_type_is_array(type)); 554 type = glsl_get_array_element(type); 555 } 556 557 vc_info->var = in_var; 558 vc_info->interp_type = 559 get_interp_type(in_var, type, default_to_smooth_interp); 560 vc_info->interp_loc = get_interp_loc(in_var); 561 vc_info->is_32bit = glsl_type_is_32bit(type); 562 vc_info->is_patch = in_var->data.patch; 563 } 564 } 565 } 566} 567 568static void 569assign_remap_locations(struct varying_loc (*remap)[4], 570 struct assigned_comps *assigned_comps, 571 struct varying_component *info, 572 unsigned *cursor, unsigned *comp, 573 unsigned max_location) 574{ 575 unsigned tmp_cursor = *cursor; 576 unsigned tmp_comp = *comp; 577 578 for (; tmp_cursor < max_location; tmp_cursor++) { 579 580 if (assigned_comps[tmp_cursor].comps) { 581 /* We can only pack varyings with matching interpolation types, 582 * interpolation loc must match also. 583 * TODO: i965 can handle interpolation locations that don't match, 584 * but the radeonsi nir backend handles everything as vec4s and so 585 * expects this to be the same for all components. We could make this 586 * check driver specfific or drop it if NIR ever become the only 587 * radeonsi backend. 588 */ 589 if (assigned_comps[tmp_cursor].interp_type != info->interp_type || 590 assigned_comps[tmp_cursor].interp_loc != info->interp_loc) { 591 tmp_comp = 0; 592 continue; 593 } 594 595 /* We can only pack varyings with matching types, and the current 596 * algorithm only supports packing 32-bit. 597 */ 598 if (!assigned_comps[tmp_cursor].is_32bit) { 599 tmp_comp = 0; 600 continue; 601 } 602 603 while (tmp_comp < 4 && 604 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) { 605 tmp_comp++; 606 } 607 } 608 609 if (tmp_comp == 4) { 610 tmp_comp = 0; 611 continue; 612 } 613 614 unsigned location = info->var->data.location - VARYING_SLOT_VAR0; 615 616 /* Once we have assigned a location mark it as used */ 617 assigned_comps[tmp_cursor].comps |= (1 << tmp_comp); 618 assigned_comps[tmp_cursor].interp_type = info->interp_type; 619 assigned_comps[tmp_cursor].interp_loc = info->interp_loc; 620 assigned_comps[tmp_cursor].is_32bit = info->is_32bit; 621 622 /* Assign remap location */ 623 remap[location][info->var->data.location_frac].component = tmp_comp++; 624 remap[location][info->var->data.location_frac].location = 625 tmp_cursor + VARYING_SLOT_VAR0; 626 627 break; 628 } 629 630 *cursor = tmp_cursor; 631 *comp = tmp_comp; 632} 633 634/* If there are empty components in the slot compact the remaining components 635 * as close to component 0 as possible. This will make it easier to fill the 636 * empty components with components from a different slot in a following pass. 637 */ 638static void 639compact_components(nir_shader *producer, nir_shader *consumer, 640 struct assigned_comps *assigned_comps, 641 bool default_to_smooth_interp) 642{ 643 struct exec_list *input_list = &consumer->inputs; 644 struct exec_list *output_list = &producer->outputs; 645 struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}}; 646 struct varying_component *varying_comp_info; 647 unsigned varying_comp_info_size; 648 649 /* Gather varying component info */ 650 gather_varying_component_info(consumer, &varying_comp_info, 651 &varying_comp_info_size, 652 default_to_smooth_interp); 653 654 /* Sort varying components. */ 655 qsort(varying_comp_info, varying_comp_info_size, 656 sizeof(struct varying_component), cmp_varying_component); 657 658 unsigned cursor = 0; 659 unsigned comp = 0; 660 661 /* Set the remap array based on the sorted components */ 662 for (unsigned i = 0; i < varying_comp_info_size; i++ ) { 663 struct varying_component *info = &varying_comp_info[i]; 664 665 assert(info->is_patch || cursor < MAX_VARYING); 666 if (info->is_patch) { 667 /* The list should be sorted with all non-patch inputs first followed 668 * by patch inputs. When we hit our first patch input, we need to 669 * reset the cursor to MAX_VARYING so we put them in the right slot. 670 */ 671 if (cursor < MAX_VARYING) { 672 cursor = MAX_VARYING; 673 comp = 0; 674 } 675 676 assign_remap_locations(remap, assigned_comps, info, 677 &cursor, &comp, MAX_VARYINGS_INCL_PATCH); 678 } else { 679 assign_remap_locations(remap, assigned_comps, info, 680 &cursor, &comp, MAX_VARYING); 681 682 /* Check if we failed to assign a remap location. This can happen if 683 * for example there are a bunch of unmovable components with 684 * mismatching interpolation types causing us to skip over locations 685 * that would have been useful for packing later components. 686 * The solution is to iterate over the locations again (this should 687 * happen very rarely in practice). 688 */ 689 if (cursor == MAX_VARYING) { 690 cursor = 0; 691 comp = 0; 692 assign_remap_locations(remap, assigned_comps, info, 693 &cursor, &comp, MAX_VARYING); 694 } 695 } 696 } 697 698 ralloc_free(varying_comp_info); 699 700 uint64_t zero = 0; 701 uint32_t zero32 = 0; 702 remap_slots_and_components(input_list, consumer->info.stage, remap, 703 &consumer->info.inputs_read, &zero, 704 &consumer->info.patch_inputs_read, &zero32); 705 remap_slots_and_components(output_list, producer->info.stage, remap, 706 &producer->info.outputs_written, 707 &producer->info.outputs_read, 708 &producer->info.patch_outputs_written, 709 &producer->info.patch_outputs_read); 710} 711 712/* We assume that this has been called more-or-less directly after 713 * remove_unused_varyings. At this point, all of the varyings that we 714 * aren't going to be using have been completely removed and the 715 * inputs_read and outputs_written fields in nir_shader_info reflect 716 * this. Therefore, the total set of valid slots is the OR of the two 717 * sets of varyings; this accounts for varyings which one side may need 718 * to read/write even if the other doesn't. This can happen if, for 719 * instance, an array is used indirectly from one side causing it to be 720 * unsplittable but directly from the other. 721 */ 722void 723nir_compact_varyings(nir_shader *producer, nir_shader *consumer, 724 bool default_to_smooth_interp) 725{ 726 assert(producer->info.stage != MESA_SHADER_FRAGMENT); 727 assert(consumer->info.stage != MESA_SHADER_VERTEX); 728 729 struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}}; 730 731 get_unmoveable_components_masks(&producer->outputs, assigned_comps, 732 producer->info.stage, 733 default_to_smooth_interp); 734 get_unmoveable_components_masks(&consumer->inputs, assigned_comps, 735 consumer->info.stage, 736 default_to_smooth_interp); 737 738 compact_components(producer, consumer, assigned_comps, 739 default_to_smooth_interp); 740} 741 742/* 743 * Mark XFB varyings as always_active_io in the consumer so the linking opts 744 * don't touch them. 745 */ 746void 747nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer) 748{ 749 nir_variable *input_vars[MAX_VARYING] = { 0 }; 750 751 nir_foreach_variable(var, &consumer->inputs) { 752 if (var->data.location >= VARYING_SLOT_VAR0 && 753 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) { 754 755 unsigned location = var->data.location - VARYING_SLOT_VAR0; 756 input_vars[location] = var; 757 } 758 } 759 760 nir_foreach_variable(var, &producer->outputs) { 761 if (var->data.location >= VARYING_SLOT_VAR0 && 762 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) { 763 764 if (!var->data.always_active_io) 765 continue; 766 767 unsigned location = var->data.location - VARYING_SLOT_VAR0; 768 if (input_vars[location]) { 769 input_vars[location]->data.always_active_io = true; 770 } 771 } 772 } 773} 774 775static bool 776does_varying_match(nir_variable *out_var, nir_variable *in_var) 777{ 778 return in_var->data.location == out_var->data.location && 779 in_var->data.location_frac == out_var->data.location_frac; 780} 781 782static nir_variable * 783get_matching_input_var(nir_shader *consumer, nir_variable *out_var) 784{ 785 nir_foreach_variable(var, &consumer->inputs) { 786 if (does_varying_match(out_var, var)) 787 return var; 788 } 789 790 return NULL; 791} 792 793static bool 794can_replace_varying(nir_variable *out_var) 795{ 796 /* Skip types that require more complex handling. 797 * TODO: add support for these types. 798 */ 799 if (glsl_type_is_array(out_var->type) || 800 glsl_type_is_dual_slot(out_var->type) || 801 glsl_type_is_matrix(out_var->type) || 802 glsl_type_is_struct_or_ifc(out_var->type)) 803 return false; 804 805 /* Limit this pass to scalars for now to keep things simple. Most varyings 806 * should have been lowered to scalars at this point anyway. 807 */ 808 if (!glsl_type_is_scalar(out_var->type)) 809 return false; 810 811 if (out_var->data.location < VARYING_SLOT_VAR0 || 812 out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING) 813 return false; 814 815 return true; 816} 817 818static bool 819replace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr) 820{ 821 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 822 823 nir_builder b; 824 nir_builder_init(&b, impl); 825 826 nir_variable *out_var = 827 nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0])); 828 829 bool progress = false; 830 nir_foreach_block(block, impl) { 831 nir_foreach_instr(instr, block) { 832 if (instr->type != nir_instr_type_intrinsic) 833 continue; 834 835 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 836 if (intr->intrinsic != nir_intrinsic_load_deref) 837 continue; 838 839 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 840 if (in_deref->mode != nir_var_shader_in) 841 continue; 842 843 nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 844 845 if (!does_varying_match(out_var, in_var)) 846 continue; 847 848 b.cursor = nir_before_instr(instr); 849 850 nir_load_const_instr *out_const = 851 nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr); 852 853 /* Add new const to replace the input */ 854 nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components, 855 intr->dest.ssa.bit_size, 856 out_const->value); 857 858 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst)); 859 860 progress = true; 861 } 862 } 863 864 return progress; 865} 866 867static bool 868replace_duplicate_input(nir_shader *shader, nir_variable *input_var, 869 nir_intrinsic_instr *dup_store_intr) 870{ 871 assert(input_var); 872 873 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 874 875 nir_builder b; 876 nir_builder_init(&b, impl); 877 878 nir_variable *dup_out_var = 879 nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0])); 880 881 bool progress = false; 882 nir_foreach_block(block, impl) { 883 nir_foreach_instr(instr, block) { 884 if (instr->type != nir_instr_type_intrinsic) 885 continue; 886 887 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 888 if (intr->intrinsic != nir_intrinsic_load_deref) 889 continue; 890 891 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 892 if (in_deref->mode != nir_var_shader_in) 893 continue; 894 895 nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 896 897 if (!does_varying_match(dup_out_var, in_var) || 898 in_var->data.interpolation != input_var->data.interpolation || 899 get_interp_loc(in_var) != get_interp_loc(input_var)) 900 continue; 901 902 b.cursor = nir_before_instr(instr); 903 904 nir_ssa_def *load = nir_load_var(&b, input_var); 905 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load)); 906 907 progress = true; 908 } 909 } 910 911 return progress; 912} 913 914bool 915nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer) 916{ 917 /* TODO: Add support for more shader stage combinations */ 918 if (consumer->info.stage != MESA_SHADER_FRAGMENT || 919 (producer->info.stage != MESA_SHADER_VERTEX && 920 producer->info.stage != MESA_SHADER_TESS_EVAL)) 921 return false; 922 923 bool progress = false; 924 925 nir_function_impl *impl = nir_shader_get_entrypoint(producer); 926 927 struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL); 928 929 /* If we find a store in the last block of the producer we can be sure this 930 * is the only possible value for this output. 931 */ 932 nir_block *last_block = nir_impl_last_block(impl); 933 nir_foreach_instr_reverse(instr, last_block) { 934 if (instr->type != nir_instr_type_intrinsic) 935 continue; 936 937 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 938 939 if (intr->intrinsic != nir_intrinsic_store_deref) 940 continue; 941 942 nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]); 943 if (out_deref->mode != nir_var_shader_out) 944 continue; 945 946 nir_variable *out_var = nir_deref_instr_get_variable(out_deref); 947 if (!can_replace_varying(out_var)) 948 continue; 949 950 if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) { 951 progress |= replace_constant_input(consumer, intr); 952 } else { 953 struct hash_entry *entry = 954 _mesa_hash_table_search(varying_values, intr->src[1].ssa); 955 if (entry) { 956 progress |= replace_duplicate_input(consumer, 957 (nir_variable *) entry->data, 958 intr); 959 } else { 960 nir_variable *in_var = get_matching_input_var(consumer, out_var); 961 if (in_var) { 962 _mesa_hash_table_insert(varying_values, intr->src[1].ssa, 963 in_var); 964 } 965 } 966 } 967 } 968 969 _mesa_hash_table_destroy(varying_values, NULL); 970 971 return progress; 972} 973