1/* 2 * Copyright © 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26#include "util/set.h" 27#include "util/hash_table.h" 28 29/* This file contains various little helpers for doing simple linking in 30 * NIR. Eventually, we'll probably want a full-blown varying packing 31 * implementation in here. Right now, it just deletes unused things. 32 */ 33 34/** 35 * Returns the bits in the inputs_read, or outputs_written 36 * bitfield corresponding to this variable. 37 */ 38static uint64_t 39get_variable_io_mask(nir_variable *var, gl_shader_stage stage) 40{ 41 if (var->data.location < 0) 42 return 0; 43 44 unsigned location = var->data.patch ? 45 var->data.location - VARYING_SLOT_PATCH0 : var->data.location; 46 47 assert(var->data.mode == nir_var_shader_in || 48 var->data.mode == nir_var_shader_out); 49 assert(var->data.location >= 0); 50 51 const struct glsl_type *type = var->type; 52 if (nir_is_arrayed_io(var, stage) || var->data.per_view) { 53 assert(glsl_type_is_array(type)); 54 type = glsl_get_array_element(type); 55 } 56 57 unsigned slots = glsl_count_attribute_slots(type, false); 58 return ((1ull << slots) - 1) << location; 59} 60 61static bool 62is_non_generic_patch_var(nir_variable *var) 63{ 64 return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || 65 var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER || 66 var->data.location == VARYING_SLOT_BOUNDING_BOX0 || 67 var->data.location == VARYING_SLOT_BOUNDING_BOX1; 68} 69 70static uint8_t 71get_num_components(nir_variable *var) 72{ 73 if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type))) 74 return 4; 75 76 return glsl_get_vector_elements(glsl_without_array(var->type)); 77} 78 79static void 80tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read) 81{ 82 nir_foreach_function(function, shader) { 83 if (!function->impl) 84 continue; 85 86 nir_foreach_block(block, function->impl) { 87 nir_foreach_instr(instr, block) { 88 if (instr->type != nir_instr_type_intrinsic) 89 continue; 90 91 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 92 if (intrin->intrinsic != nir_intrinsic_load_deref) 93 continue; 94 95 nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 96 if (!nir_deref_mode_is(deref, nir_var_shader_out)) 97 continue; 98 99 nir_variable *var = nir_deref_instr_get_variable(deref); 100 for (unsigned i = 0; i < get_num_components(var); i++) { 101 if (var->data.patch) { 102 if (is_non_generic_patch_var(var)) 103 continue; 104 105 patches_read[var->data.location_frac + i] |= 106 get_variable_io_mask(var, shader->info.stage); 107 } else { 108 read[var->data.location_frac + i] |= 109 get_variable_io_mask(var, shader->info.stage); 110 } 111 } 112 } 113 } 114 } 115} 116 117/** 118 * Helper for removing unused shader I/O variables, by demoting them to global 119 * variables (which may then by dead code eliminated). 120 * 121 * Example usage is: 122 * 123 * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, 124 * read, patches_read) || 125 * progress; 126 * 127 * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*) 128 * representing each .location_frac used. Note that for vector variables, 129 * only the first channel (.location_frac) is examined for deciding if the 130 * variable is used! 131 */ 132bool 133nir_remove_unused_io_vars(nir_shader *shader, 134 nir_variable_mode mode, 135 uint64_t *used_by_other_stage, 136 uint64_t *used_by_other_stage_patches) 137{ 138 bool progress = false; 139 uint64_t *used; 140 141 assert(mode == nir_var_shader_in || mode == nir_var_shader_out); 142 143 nir_foreach_variable_with_modes_safe(var, shader, mode) { 144 if (var->data.patch) 145 used = used_by_other_stage_patches; 146 else 147 used = used_by_other_stage; 148 149 if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0) 150 continue; 151 152 if (var->data.always_active_io) 153 continue; 154 155 if (var->data.explicit_xfb_buffer) 156 continue; 157 158 uint64_t other_stage = used[var->data.location_frac]; 159 160 if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) { 161 /* This one is invalid, make it a global variable instead */ 162 var->data.location = 0; 163 var->data.mode = nir_var_shader_temp; 164 165 progress = true; 166 } 167 } 168 169 if (progress) 170 nir_fixup_deref_modes(shader); 171 172 return progress; 173} 174 175bool 176nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer) 177{ 178 assert(producer->info.stage != MESA_SHADER_FRAGMENT); 179 assert(consumer->info.stage != MESA_SHADER_VERTEX); 180 181 uint64_t read[4] = { 0 }, written[4] = { 0 }; 182 uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 }; 183 184 nir_foreach_shader_out_variable(var, producer) { 185 for (unsigned i = 0; i < get_num_components(var); i++) { 186 if (var->data.patch) { 187 if (is_non_generic_patch_var(var)) 188 continue; 189 190 patches_written[var->data.location_frac + i] |= 191 get_variable_io_mask(var, producer->info.stage); 192 } else { 193 written[var->data.location_frac + i] |= 194 get_variable_io_mask(var, producer->info.stage); 195 } 196 } 197 } 198 199 nir_foreach_shader_in_variable(var, consumer) { 200 for (unsigned i = 0; i < get_num_components(var); i++) { 201 if (var->data.patch) { 202 if (is_non_generic_patch_var(var)) 203 continue; 204 205 patches_read[var->data.location_frac + i] |= 206 get_variable_io_mask(var, consumer->info.stage); 207 } else { 208 read[var->data.location_frac + i] |= 209 get_variable_io_mask(var, consumer->info.stage); 210 } 211 } 212 } 213 214 /* Each TCS invocation can read data written by other TCS invocations, 215 * so even if the outputs are not used by the TES we must also make 216 * sure they are not read by the TCS before demoting them to globals. 217 */ 218 if (producer->info.stage == MESA_SHADER_TESS_CTRL) 219 tcs_add_output_reads(producer, read, patches_read); 220 221 bool progress = false; 222 progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read, 223 patches_read); 224 225 progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written, 226 patches_written) || progress; 227 228 return progress; 229} 230 231static uint8_t 232get_interp_type(nir_variable *var, const struct glsl_type *type, 233 bool default_to_smooth_interp) 234{ 235 if (glsl_type_is_integer(type)) 236 return INTERP_MODE_FLAT; 237 else if (var->data.interpolation != INTERP_MODE_NONE) 238 return var->data.interpolation; 239 else if (default_to_smooth_interp) 240 return INTERP_MODE_SMOOTH; 241 else 242 return INTERP_MODE_NONE; 243} 244 245#define INTERPOLATE_LOC_SAMPLE 0 246#define INTERPOLATE_LOC_CENTROID 1 247#define INTERPOLATE_LOC_CENTER 2 248 249static uint8_t 250get_interp_loc(nir_variable *var) 251{ 252 if (var->data.sample) 253 return INTERPOLATE_LOC_SAMPLE; 254 else if (var->data.centroid) 255 return INTERPOLATE_LOC_CENTROID; 256 else 257 return INTERPOLATE_LOC_CENTER; 258} 259 260static bool 261is_packing_supported_for_type(const struct glsl_type *type) 262{ 263 /* We ignore complex types such as arrays, matrices, structs and bitsizes 264 * other then 32bit. All other vector types should have been split into 265 * scalar variables by the lower_io_to_scalar pass. The only exception 266 * should be OpenGL xfb varyings. 267 * TODO: add support for more complex types? 268 */ 269 return glsl_type_is_scalar(type) && glsl_type_is_32bit(type); 270} 271 272struct assigned_comps 273{ 274 uint8_t comps; 275 uint8_t interp_type; 276 uint8_t interp_loc; 277 bool is_32bit; 278 bool is_mediump; 279}; 280 281/* Packing arrays and dual slot varyings is difficult so to avoid complex 282 * algorithms this function just assigns them their existing location for now. 283 * TODO: allow better packing of complex types. 284 */ 285static void 286get_unmoveable_components_masks(nir_shader *shader, 287 nir_variable_mode mode, 288 struct assigned_comps *comps, 289 gl_shader_stage stage, 290 bool default_to_smooth_interp) 291{ 292 nir_foreach_variable_with_modes_safe(var, shader, mode) { 293 assert(var->data.location >= 0); 294 295 /* Only remap things that aren't built-ins. */ 296 if (var->data.location >= VARYING_SLOT_VAR0 && 297 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 298 299 const struct glsl_type *type = var->type; 300 if (nir_is_arrayed_io(var, stage) || var->data.per_view) { 301 assert(glsl_type_is_array(type)); 302 type = glsl_get_array_element(type); 303 } 304 305 /* If we can pack this varying then don't mark the components as 306 * used. 307 */ 308 if (is_packing_supported_for_type(type)) 309 continue; 310 311 unsigned location = var->data.location - VARYING_SLOT_VAR0; 312 313 unsigned elements = 314 glsl_type_is_vector_or_scalar(glsl_without_array(type)) ? 315 glsl_get_vector_elements(glsl_without_array(type)) : 4; 316 317 bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type)); 318 unsigned slots = glsl_count_attribute_slots(type, false); 319 unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1; 320 unsigned comps_slot2 = 0; 321 for (unsigned i = 0; i < slots; i++) { 322 if (dual_slot) { 323 if (i & 1) { 324 comps[location + i].comps |= ((1 << comps_slot2) - 1); 325 } else { 326 unsigned num_comps = 4 - var->data.location_frac; 327 comps_slot2 = (elements * dmul) - num_comps; 328 329 /* Assume ARB_enhanced_layouts packing rules for doubles */ 330 assert(var->data.location_frac == 0 || 331 var->data.location_frac == 2); 332 assert(comps_slot2 <= 4); 333 334 comps[location + i].comps |= 335 ((1 << num_comps) - 1) << var->data.location_frac; 336 } 337 } else { 338 comps[location + i].comps |= 339 ((1 << (elements * dmul)) - 1) << var->data.location_frac; 340 } 341 342 comps[location + i].interp_type = 343 get_interp_type(var, type, default_to_smooth_interp); 344 comps[location + i].interp_loc = get_interp_loc(var); 345 comps[location + i].is_32bit = 346 glsl_type_is_32bit(glsl_without_array(type)); 347 comps[location + i].is_mediump = 348 var->data.precision == GLSL_PRECISION_MEDIUM || 349 var->data.precision == GLSL_PRECISION_LOW; 350 } 351 } 352 } 353} 354 355struct varying_loc 356{ 357 uint8_t component; 358 uint32_t location; 359}; 360 361static void 362mark_all_used_slots(nir_variable *var, uint64_t *slots_used, 363 uint64_t slots_used_mask, unsigned num_slots) 364{ 365 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 366 367 slots_used[var->data.patch ? 1 : 0] |= slots_used_mask & 368 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots); 369} 370 371static void 372mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset) 373{ 374 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 375 376 slots_used[var->data.patch ? 1 : 0] |= 377 BITFIELD64_BIT(var->data.location - loc_offset + offset); 378} 379 380static void 381remap_slots_and_components(nir_shader *shader, nir_variable_mode mode, 382 struct varying_loc (*remap)[4], 383 uint64_t *slots_used, uint64_t *out_slots_read, 384 uint32_t *p_slots_used, uint32_t *p_out_slots_read) 385 { 386 const gl_shader_stage stage = shader->info.stage; 387 uint64_t out_slots_read_tmp[2] = {0}; 388 uint64_t slots_used_tmp[2] = {0}; 389 390 /* We don't touch builtins so just copy the bitmask */ 391 slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0); 392 393 nir_foreach_variable_with_modes(var, shader, mode) { 394 assert(var->data.location >= 0); 395 396 /* Only remap things that aren't built-ins */ 397 if (var->data.location >= VARYING_SLOT_VAR0 && 398 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 399 400 const struct glsl_type *type = var->type; 401 if (nir_is_arrayed_io(var, stage) || var->data.per_view) { 402 assert(glsl_type_is_array(type)); 403 type = glsl_get_array_element(type); 404 } 405 406 unsigned num_slots = glsl_count_attribute_slots(type, false); 407 bool used_across_stages = false; 408 bool outputs_read = false; 409 410 unsigned location = var->data.location - VARYING_SLOT_VAR0; 411 struct varying_loc *new_loc = &remap[location][var->data.location_frac]; 412 413 unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 414 uint64_t used = var->data.patch ? *p_slots_used : *slots_used; 415 uint64_t outs_used = 416 var->data.patch ? *p_out_slots_read : *out_slots_read; 417 uint64_t slots = 418 BITFIELD64_RANGE(var->data.location - loc_offset, num_slots); 419 420 if (slots & used) 421 used_across_stages = true; 422 423 if (slots & outs_used) 424 outputs_read = true; 425 426 if (new_loc->location) { 427 var->data.location = new_loc->location; 428 var->data.location_frac = new_loc->component; 429 } 430 431 if (var->data.always_active_io) { 432 /* We can't apply link time optimisations (specifically array 433 * splitting) to these so we need to copy the existing mask 434 * otherwise we will mess up the mask for things like partially 435 * marked arrays. 436 */ 437 if (used_across_stages) 438 mark_all_used_slots(var, slots_used_tmp, used, num_slots); 439 440 if (outputs_read) { 441 mark_all_used_slots(var, out_slots_read_tmp, outs_used, 442 num_slots); 443 } 444 } else { 445 for (unsigned i = 0; i < num_slots; i++) { 446 if (used_across_stages) 447 mark_used_slot(var, slots_used_tmp, i); 448 449 if (outputs_read) 450 mark_used_slot(var, out_slots_read_tmp, i); 451 } 452 } 453 } 454 } 455 456 *slots_used = slots_used_tmp[0]; 457 *out_slots_read = out_slots_read_tmp[0]; 458 *p_slots_used = slots_used_tmp[1]; 459 *p_out_slots_read = out_slots_read_tmp[1]; 460} 461 462struct varying_component { 463 nir_variable *var; 464 uint8_t interp_type; 465 uint8_t interp_loc; 466 bool is_32bit; 467 bool is_patch; 468 bool is_mediump; 469 bool is_intra_stage_only; 470 bool initialised; 471}; 472 473static int 474cmp_varying_component(const void *comp1_v, const void *comp2_v) 475{ 476 struct varying_component *comp1 = (struct varying_component *) comp1_v; 477 struct varying_component *comp2 = (struct varying_component *) comp2_v; 478 479 /* We want patches to be order at the end of the array */ 480 if (comp1->is_patch != comp2->is_patch) 481 return comp1->is_patch ? 1 : -1; 482 483 /* We want to try to group together TCS outputs that are only read by other 484 * TCS invocations and not consumed by the follow stage. 485 */ 486 if (comp1->is_intra_stage_only != comp2->is_intra_stage_only) 487 return comp1->is_intra_stage_only ? 1 : -1; 488 489 /* Group mediump varyings together. */ 490 if (comp1->is_mediump != comp2->is_mediump) 491 return comp1->is_mediump ? 1 : -1; 492 493 /* We can only pack varyings with matching interpolation types so group 494 * them together. 495 */ 496 if (comp1->interp_type != comp2->interp_type) 497 return comp1->interp_type - comp2->interp_type; 498 499 /* Interpolation loc must match also. */ 500 if (comp1->interp_loc != comp2->interp_loc) 501 return comp1->interp_loc - comp2->interp_loc; 502 503 /* If everything else matches just use the original location to sort */ 504 const struct nir_variable_data *const data1 = &comp1->var->data; 505 const struct nir_variable_data *const data2 = &comp2->var->data; 506 if (data1->location != data2->location) 507 return data1->location - data2->location; 508 return (int)data1->location_frac - (int)data2->location_frac; 509} 510 511static void 512gather_varying_component_info(nir_shader *producer, nir_shader *consumer, 513 struct varying_component **varying_comp_info, 514 unsigned *varying_comp_info_size, 515 bool default_to_smooth_interp) 516{ 517 unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}}; 518 unsigned num_of_comps_to_pack = 0; 519 520 /* Count the number of varying that can be packed and create a mapping 521 * of those varyings to the array we will pass to qsort. 522 */ 523 nir_foreach_shader_out_variable(var, producer) { 524 525 /* Only remap things that aren't builtins. */ 526 if (var->data.location >= VARYING_SLOT_VAR0 && 527 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 528 529 /* We can't repack xfb varyings. */ 530 if (var->data.always_active_io) 531 continue; 532 533 const struct glsl_type *type = var->type; 534 if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) { 535 assert(glsl_type_is_array(type)); 536 type = glsl_get_array_element(type); 537 } 538 539 if (!is_packing_supported_for_type(type)) 540 continue; 541 542 unsigned loc = var->data.location - VARYING_SLOT_VAR0; 543 store_varying_info_idx[loc][var->data.location_frac] = 544 ++num_of_comps_to_pack; 545 } 546 } 547 548 *varying_comp_info_size = num_of_comps_to_pack; 549 *varying_comp_info = rzalloc_array(NULL, struct varying_component, 550 num_of_comps_to_pack); 551 552 nir_function_impl *impl = nir_shader_get_entrypoint(consumer); 553 554 /* Walk over the shader and populate the varying component info array */ 555 nir_foreach_block(block, impl) { 556 nir_foreach_instr(instr, block) { 557 if (instr->type != nir_instr_type_intrinsic) 558 continue; 559 560 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 561 if (intr->intrinsic != nir_intrinsic_load_deref && 562 intr->intrinsic != nir_intrinsic_interp_deref_at_centroid && 563 intr->intrinsic != nir_intrinsic_interp_deref_at_sample && 564 intr->intrinsic != nir_intrinsic_interp_deref_at_offset && 565 intr->intrinsic != nir_intrinsic_interp_deref_at_vertex) 566 continue; 567 568 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 569 if (!nir_deref_mode_is(deref, nir_var_shader_in)) 570 continue; 571 572 /* We only remap things that aren't builtins. */ 573 nir_variable *in_var = nir_deref_instr_get_variable(deref); 574 if (in_var->data.location < VARYING_SLOT_VAR0) 575 continue; 576 577 unsigned location = in_var->data.location - VARYING_SLOT_VAR0; 578 if (location >= MAX_VARYINGS_INCL_PATCH) 579 continue; 580 581 unsigned var_info_idx = 582 store_varying_info_idx[location][in_var->data.location_frac]; 583 if (!var_info_idx) 584 continue; 585 586 struct varying_component *vc_info = 587 &(*varying_comp_info)[var_info_idx-1]; 588 589 if (!vc_info->initialised) { 590 const struct glsl_type *type = in_var->type; 591 if (nir_is_arrayed_io(in_var, consumer->info.stage) || 592 in_var->data.per_view) { 593 assert(glsl_type_is_array(type)); 594 type = glsl_get_array_element(type); 595 } 596 597 vc_info->var = in_var; 598 vc_info->interp_type = 599 get_interp_type(in_var, type, default_to_smooth_interp); 600 vc_info->interp_loc = get_interp_loc(in_var); 601 vc_info->is_32bit = glsl_type_is_32bit(type); 602 vc_info->is_patch = in_var->data.patch; 603 vc_info->is_mediump = !producer->options->linker_ignore_precision && 604 (in_var->data.precision == GLSL_PRECISION_MEDIUM || 605 in_var->data.precision == GLSL_PRECISION_LOW); 606 vc_info->is_intra_stage_only = false; 607 vc_info->initialised = true; 608 } 609 } 610 } 611 612 /* Walk over the shader and populate the varying component info array 613 * for varyings which are read by other TCS instances but are not consumed 614 * by the TES. 615 */ 616 if (producer->info.stage == MESA_SHADER_TESS_CTRL) { 617 impl = nir_shader_get_entrypoint(producer); 618 619 nir_foreach_block(block, impl) { 620 nir_foreach_instr(instr, block) { 621 if (instr->type != nir_instr_type_intrinsic) 622 continue; 623 624 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 625 if (intr->intrinsic != nir_intrinsic_load_deref) 626 continue; 627 628 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 629 if (!nir_deref_mode_is(deref, nir_var_shader_out)) 630 continue; 631 632 /* We only remap things that aren't builtins. */ 633 nir_variable *out_var = nir_deref_instr_get_variable(deref); 634 if (out_var->data.location < VARYING_SLOT_VAR0) 635 continue; 636 637 unsigned location = out_var->data.location - VARYING_SLOT_VAR0; 638 if (location >= MAX_VARYINGS_INCL_PATCH) 639 continue; 640 641 unsigned var_info_idx = 642 store_varying_info_idx[location][out_var->data.location_frac]; 643 if (!var_info_idx) { 644 /* Something went wrong, the shader interfaces didn't match, so 645 * abandon packing. This can happen for example when the 646 * inputs are scalars but the outputs are struct members. 647 */ 648 *varying_comp_info_size = 0; 649 break; 650 } 651 652 struct varying_component *vc_info = 653 &(*varying_comp_info)[var_info_idx-1]; 654 655 if (!vc_info->initialised) { 656 const struct glsl_type *type = out_var->type; 657 if (nir_is_arrayed_io(out_var, producer->info.stage)) { 658 assert(glsl_type_is_array(type)); 659 type = glsl_get_array_element(type); 660 } 661 662 vc_info->var = out_var; 663 vc_info->interp_type = 664 get_interp_type(out_var, type, default_to_smooth_interp); 665 vc_info->interp_loc = get_interp_loc(out_var); 666 vc_info->is_32bit = glsl_type_is_32bit(type); 667 vc_info->is_patch = out_var->data.patch; 668 vc_info->is_mediump = !producer->options->linker_ignore_precision && 669 (out_var->data.precision == GLSL_PRECISION_MEDIUM || 670 out_var->data.precision == GLSL_PRECISION_LOW); 671 vc_info->is_intra_stage_only = true; 672 vc_info->initialised = true; 673 } 674 } 675 } 676 } 677 678 for (unsigned i = 0; i < *varying_comp_info_size; i++ ) { 679 struct varying_component *vc_info = &(*varying_comp_info)[i]; 680 if (!vc_info->initialised) { 681 /* Something went wrong, the shader interfaces didn't match, so 682 * abandon packing. This can happen for example when the outputs are 683 * scalars but the inputs are struct members. 684 */ 685 *varying_comp_info_size = 0; 686 break; 687 } 688 } 689} 690 691static bool 692allow_pack_interp_type(nir_pack_varying_options options, int type) 693{ 694 int sel; 695 696 switch (type) { 697 case INTERP_MODE_NONE: 698 sel = nir_pack_varying_interp_mode_none; 699 break; 700 case INTERP_MODE_SMOOTH: 701 sel = nir_pack_varying_interp_mode_smooth; 702 break; 703 case INTERP_MODE_FLAT: 704 sel = nir_pack_varying_interp_mode_flat; 705 break; 706 case INTERP_MODE_NOPERSPECTIVE: 707 sel = nir_pack_varying_interp_mode_noperspective; 708 break; 709 default: 710 return false; 711 } 712 713 return options & sel; 714} 715 716static bool 717allow_pack_interp_loc(nir_pack_varying_options options, int loc) 718{ 719 int sel; 720 721 switch (loc) { 722 case INTERPOLATE_LOC_SAMPLE: 723 sel = nir_pack_varying_interp_loc_sample; 724 break; 725 case INTERPOLATE_LOC_CENTROID: 726 sel = nir_pack_varying_interp_loc_centroid; 727 break; 728 case INTERPOLATE_LOC_CENTER: 729 sel = nir_pack_varying_interp_loc_center; 730 break; 731 default: 732 return false; 733 } 734 735 return options & sel; 736} 737 738static void 739assign_remap_locations(struct varying_loc (*remap)[4], 740 struct assigned_comps *assigned_comps, 741 struct varying_component *info, 742 unsigned *cursor, unsigned *comp, 743 unsigned max_location, 744 nir_pack_varying_options options) 745{ 746 unsigned tmp_cursor = *cursor; 747 unsigned tmp_comp = *comp; 748 749 for (; tmp_cursor < max_location; tmp_cursor++) { 750 751 if (assigned_comps[tmp_cursor].comps) { 752 /* We can only pack varyings with matching precision. */ 753 if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) { 754 tmp_comp = 0; 755 continue; 756 } 757 758 /* We can only pack varyings with matching interpolation type 759 * if driver does not support it. 760 */ 761 if (assigned_comps[tmp_cursor].interp_type != info->interp_type && 762 (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) || 763 !allow_pack_interp_type(options, info->interp_type))) { 764 tmp_comp = 0; 765 continue; 766 } 767 768 /* We can only pack varyings with matching interpolation location 769 * if driver does not support it. 770 */ 771 if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc && 772 (!allow_pack_interp_loc(options, assigned_comps[tmp_cursor].interp_loc) || 773 !allow_pack_interp_loc(options, info->interp_loc))) { 774 tmp_comp = 0; 775 continue; 776 } 777 778 /* We can only pack varyings with matching types, and the current 779 * algorithm only supports packing 32-bit. 780 */ 781 if (!assigned_comps[tmp_cursor].is_32bit) { 782 tmp_comp = 0; 783 continue; 784 } 785 786 while (tmp_comp < 4 && 787 (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) { 788 tmp_comp++; 789 } 790 } 791 792 if (tmp_comp == 4) { 793 tmp_comp = 0; 794 continue; 795 } 796 797 unsigned location = info->var->data.location - VARYING_SLOT_VAR0; 798 799 /* Once we have assigned a location mark it as used */ 800 assigned_comps[tmp_cursor].comps |= (1 << tmp_comp); 801 assigned_comps[tmp_cursor].interp_type = info->interp_type; 802 assigned_comps[tmp_cursor].interp_loc = info->interp_loc; 803 assigned_comps[tmp_cursor].is_32bit = info->is_32bit; 804 assigned_comps[tmp_cursor].is_mediump = info->is_mediump; 805 806 /* Assign remap location */ 807 remap[location][info->var->data.location_frac].component = tmp_comp++; 808 remap[location][info->var->data.location_frac].location = 809 tmp_cursor + VARYING_SLOT_VAR0; 810 811 break; 812 } 813 814 *cursor = tmp_cursor; 815 *comp = tmp_comp; 816} 817 818/* If there are empty components in the slot compact the remaining components 819 * as close to component 0 as possible. This will make it easier to fill the 820 * empty components with components from a different slot in a following pass. 821 */ 822static void 823compact_components(nir_shader *producer, nir_shader *consumer, 824 struct assigned_comps *assigned_comps, 825 bool default_to_smooth_interp) 826{ 827 struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}}; 828 struct varying_component *varying_comp_info; 829 unsigned varying_comp_info_size; 830 831 /* Gather varying component info */ 832 gather_varying_component_info(producer, consumer, &varying_comp_info, 833 &varying_comp_info_size, 834 default_to_smooth_interp); 835 836 /* Sort varying components. */ 837 qsort(varying_comp_info, varying_comp_info_size, 838 sizeof(struct varying_component), cmp_varying_component); 839 840 nir_pack_varying_options options = consumer->options->pack_varying_options; 841 842 unsigned cursor = 0; 843 unsigned comp = 0; 844 845 /* Set the remap array based on the sorted components */ 846 for (unsigned i = 0; i < varying_comp_info_size; i++ ) { 847 struct varying_component *info = &varying_comp_info[i]; 848 849 assert(info->is_patch || cursor < MAX_VARYING); 850 if (info->is_patch) { 851 /* The list should be sorted with all non-patch inputs first followed 852 * by patch inputs. When we hit our first patch input, we need to 853 * reset the cursor to MAX_VARYING so we put them in the right slot. 854 */ 855 if (cursor < MAX_VARYING) { 856 cursor = MAX_VARYING; 857 comp = 0; 858 } 859 860 assign_remap_locations(remap, assigned_comps, info, 861 &cursor, &comp, MAX_VARYINGS_INCL_PATCH, 862 options); 863 } else { 864 assign_remap_locations(remap, assigned_comps, info, 865 &cursor, &comp, MAX_VARYING, 866 options); 867 868 /* Check if we failed to assign a remap location. This can happen if 869 * for example there are a bunch of unmovable components with 870 * mismatching interpolation types causing us to skip over locations 871 * that would have been useful for packing later components. 872 * The solution is to iterate over the locations again (this should 873 * happen very rarely in practice). 874 */ 875 if (cursor == MAX_VARYING) { 876 cursor = 0; 877 comp = 0; 878 assign_remap_locations(remap, assigned_comps, info, 879 &cursor, &comp, MAX_VARYING, 880 options); 881 } 882 } 883 } 884 885 ralloc_free(varying_comp_info); 886 887 uint64_t zero = 0; 888 uint32_t zero32 = 0; 889 remap_slots_and_components(consumer, nir_var_shader_in, remap, 890 &consumer->info.inputs_read, &zero, 891 &consumer->info.patch_inputs_read, &zero32); 892 remap_slots_and_components(producer, nir_var_shader_out, remap, 893 &producer->info.outputs_written, 894 &producer->info.outputs_read, 895 &producer->info.patch_outputs_written, 896 &producer->info.patch_outputs_read); 897} 898 899/* We assume that this has been called more-or-less directly after 900 * remove_unused_varyings. At this point, all of the varyings that we 901 * aren't going to be using have been completely removed and the 902 * inputs_read and outputs_written fields in nir_shader_info reflect 903 * this. Therefore, the total set of valid slots is the OR of the two 904 * sets of varyings; this accounts for varyings which one side may need 905 * to read/write even if the other doesn't. This can happen if, for 906 * instance, an array is used indirectly from one side causing it to be 907 * unsplittable but directly from the other. 908 */ 909void 910nir_compact_varyings(nir_shader *producer, nir_shader *consumer, 911 bool default_to_smooth_interp) 912{ 913 assert(producer->info.stage != MESA_SHADER_FRAGMENT); 914 assert(consumer->info.stage != MESA_SHADER_VERTEX); 915 916 struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}}; 917 918 get_unmoveable_components_masks(producer, nir_var_shader_out, 919 assigned_comps, 920 producer->info.stage, 921 default_to_smooth_interp); 922 get_unmoveable_components_masks(consumer, nir_var_shader_in, 923 assigned_comps, 924 consumer->info.stage, 925 default_to_smooth_interp); 926 927 compact_components(producer, consumer, assigned_comps, 928 default_to_smooth_interp); 929} 930 931/* 932 * Mark XFB varyings as always_active_io in the consumer so the linking opts 933 * don't touch them. 934 */ 935void 936nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer) 937{ 938 nir_variable *input_vars[MAX_VARYING] = { 0 }; 939 940 nir_foreach_shader_in_variable(var, consumer) { 941 if (var->data.location >= VARYING_SLOT_VAR0 && 942 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) { 943 944 unsigned location = var->data.location - VARYING_SLOT_VAR0; 945 input_vars[location] = var; 946 } 947 } 948 949 nir_foreach_shader_out_variable(var, producer) { 950 if (var->data.location >= VARYING_SLOT_VAR0 && 951 var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) { 952 953 if (!var->data.always_active_io) 954 continue; 955 956 unsigned location = var->data.location - VARYING_SLOT_VAR0; 957 if (input_vars[location]) { 958 input_vars[location]->data.always_active_io = true; 959 } 960 } 961 } 962} 963 964static bool 965does_varying_match(nir_variable *out_var, nir_variable *in_var) 966{ 967 return in_var->data.location == out_var->data.location && 968 in_var->data.location_frac == out_var->data.location_frac; 969} 970 971static nir_variable * 972get_matching_input_var(nir_shader *consumer, nir_variable *out_var) 973{ 974 nir_foreach_shader_in_variable(var, consumer) { 975 if (does_varying_match(out_var, var)) 976 return var; 977 } 978 979 return NULL; 980} 981 982static bool 983can_replace_varying(nir_variable *out_var) 984{ 985 /* Skip types that require more complex handling. 986 * TODO: add support for these types. 987 */ 988 if (glsl_type_is_array(out_var->type) || 989 glsl_type_is_dual_slot(out_var->type) || 990 glsl_type_is_matrix(out_var->type) || 991 glsl_type_is_struct_or_ifc(out_var->type)) 992 return false; 993 994 /* Limit this pass to scalars for now to keep things simple. Most varyings 995 * should have been lowered to scalars at this point anyway. 996 */ 997 if (!glsl_type_is_scalar(out_var->type)) 998 return false; 999 1000 if (out_var->data.location < VARYING_SLOT_VAR0 || 1001 out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING) 1002 return false; 1003 1004 return true; 1005} 1006 1007static bool 1008replace_varying_input_by_constant_load(nir_shader *shader, 1009 nir_intrinsic_instr *store_intr) 1010{ 1011 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 1012 1013 nir_builder b; 1014 nir_builder_init(&b, impl); 1015 1016 nir_variable *out_var = 1017 nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0])); 1018 1019 bool progress = false; 1020 nir_foreach_block(block, impl) { 1021 nir_foreach_instr(instr, block) { 1022 if (instr->type != nir_instr_type_intrinsic) 1023 continue; 1024 1025 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1026 if (intr->intrinsic != nir_intrinsic_load_deref) 1027 continue; 1028 1029 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 1030 if (!nir_deref_mode_is(in_deref, nir_var_shader_in)) 1031 continue; 1032 1033 nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 1034 1035 if (!does_varying_match(out_var, in_var)) 1036 continue; 1037 1038 b.cursor = nir_before_instr(instr); 1039 1040 nir_load_const_instr *out_const = 1041 nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr); 1042 1043 /* Add new const to replace the input */ 1044 nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components, 1045 intr->dest.ssa.bit_size, 1046 out_const->value); 1047 1048 nir_ssa_def_rewrite_uses(&intr->dest.ssa, nconst); 1049 1050 progress = true; 1051 } 1052 } 1053 1054 return progress; 1055} 1056 1057static bool 1058replace_duplicate_input(nir_shader *shader, nir_variable *input_var, 1059 nir_intrinsic_instr *dup_store_intr) 1060{ 1061 assert(input_var); 1062 1063 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 1064 1065 nir_builder b; 1066 nir_builder_init(&b, impl); 1067 1068 nir_variable *dup_out_var = 1069 nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0])); 1070 1071 bool progress = false; 1072 nir_foreach_block(block, impl) { 1073 nir_foreach_instr(instr, block) { 1074 if (instr->type != nir_instr_type_intrinsic) 1075 continue; 1076 1077 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1078 if (intr->intrinsic != nir_intrinsic_load_deref) 1079 continue; 1080 1081 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 1082 if (!nir_deref_mode_is(in_deref, nir_var_shader_in)) 1083 continue; 1084 1085 nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 1086 1087 if (!does_varying_match(dup_out_var, in_var) || 1088 in_var->data.interpolation != input_var->data.interpolation || 1089 get_interp_loc(in_var) != get_interp_loc(input_var)) 1090 continue; 1091 1092 b.cursor = nir_before_instr(instr); 1093 1094 nir_ssa_def *load = nir_load_var(&b, input_var); 1095 nir_ssa_def_rewrite_uses(&intr->dest.ssa, load); 1096 1097 progress = true; 1098 } 1099 } 1100 1101 return progress; 1102} 1103 1104static bool 1105is_direct_uniform_load(nir_ssa_def *def, nir_ssa_scalar *s) 1106{ 1107 /* def is sure to be scalar as can_replace_varying() filter out vector case. */ 1108 assert(def->num_components == 1); 1109 1110 /* Uniform load may hide behind some move instruction for converting 1111 * vector to scalar: 1112 * 1113 * vec1 32 ssa_1 = deref_var &color (uniform vec3) 1114 * vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0) 1115 * vec1 32 ssa_3 = mov ssa_2.x 1116 * vec1 32 ssa_4 = deref_var &color_out (shader_out float) 1117 * intrinsic store_deref (ssa_4, ssa_3) (1, 0) 1118 */ 1119 *s = nir_ssa_scalar_resolved(def, 0); 1120 1121 nir_ssa_def *ssa = s->def; 1122 if (ssa->parent_instr->type != nir_instr_type_intrinsic) 1123 return false; 1124 1125 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr); 1126 if (intr->intrinsic != nir_intrinsic_load_deref) 1127 return false; 1128 1129 nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 1130 /* TODO: support nir_var_mem_ubo. */ 1131 if (!nir_deref_mode_is(deref, nir_var_uniform)) 1132 return false; 1133 1134 /* Does not support indirect uniform load. */ 1135 return !nir_deref_instr_has_indirect(deref); 1136} 1137 1138static nir_variable * 1139get_uniform_var_in_consumer(nir_shader *consumer, 1140 nir_variable *var_in_producer) 1141{ 1142 /* Find if uniform already exists in consumer. */ 1143 nir_variable *new_var = NULL; 1144 nir_foreach_uniform_variable(v, consumer) { 1145 if (!strcmp(var_in_producer->name, v->name)) { 1146 new_var = v; 1147 break; 1148 } 1149 } 1150 1151 /* Create a variable if not exist. */ 1152 if (!new_var) { 1153 new_var = nir_variable_clone(var_in_producer, consumer); 1154 nir_shader_add_variable(consumer, new_var); 1155 } 1156 1157 return new_var; 1158} 1159 1160static nir_deref_instr * 1161clone_deref_instr(nir_builder *b, nir_variable *var, nir_deref_instr *deref) 1162{ 1163 if (deref->deref_type == nir_deref_type_var) 1164 return nir_build_deref_var(b, var); 1165 1166 nir_deref_instr *parent_deref = nir_deref_instr_parent(deref); 1167 nir_deref_instr *parent = clone_deref_instr(b, var, parent_deref); 1168 1169 /* Build array and struct deref instruction. 1170 * "deref" instr is sure to be direct (see is_direct_uniform_load()). 1171 */ 1172 switch (deref->deref_type) { 1173 case nir_deref_type_array: { 1174 nir_load_const_instr *index = 1175 nir_instr_as_load_const(deref->arr.index.ssa->parent_instr); 1176 return nir_build_deref_array_imm(b, parent, index->value->i64); 1177 } 1178 case nir_deref_type_ptr_as_array: { 1179 nir_load_const_instr *index = 1180 nir_instr_as_load_const(deref->arr.index.ssa->parent_instr); 1181 nir_ssa_def *ssa = nir_imm_intN_t(b, index->value->i64, 1182 parent->dest.ssa.bit_size); 1183 return nir_build_deref_ptr_as_array(b, parent, ssa); 1184 } 1185 case nir_deref_type_struct: 1186 return nir_build_deref_struct(b, parent, deref->strct.index); 1187 default: 1188 unreachable("invalid type"); 1189 return NULL; 1190 } 1191} 1192 1193static bool 1194replace_varying_input_by_uniform_load(nir_shader *shader, 1195 nir_intrinsic_instr *store_intr, 1196 nir_ssa_scalar *scalar) 1197{ 1198 nir_function_impl *impl = nir_shader_get_entrypoint(shader); 1199 1200 nir_builder b; 1201 nir_builder_init(&b, impl); 1202 1203 nir_variable *out_var = 1204 nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0])); 1205 1206 nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr); 1207 nir_deref_instr *deref = nir_src_as_deref(load->src[0]); 1208 nir_variable *uni_var = nir_deref_instr_get_variable(deref); 1209 uni_var = get_uniform_var_in_consumer(shader, uni_var); 1210 1211 bool progress = false; 1212 nir_foreach_block(block, impl) { 1213 nir_foreach_instr(instr, block) { 1214 if (instr->type != nir_instr_type_intrinsic) 1215 continue; 1216 1217 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1218 if (intr->intrinsic != nir_intrinsic_load_deref) 1219 continue; 1220 1221 nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 1222 if (!nir_deref_mode_is(in_deref, nir_var_shader_in)) 1223 continue; 1224 1225 nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 1226 1227 if (!does_varying_match(out_var, in_var)) 1228 continue; 1229 1230 b.cursor = nir_before_instr(instr); 1231 1232 /* Clone instructions start from deref load to variable deref. */ 1233 nir_deref_instr *uni_deref = clone_deref_instr(&b, uni_var, deref); 1234 nir_ssa_def *uni_def = nir_load_deref(&b, uni_deref); 1235 1236 /* Add a vector to scalar move if uniform is a vector. */ 1237 if (uni_def->num_components > 1) { 1238 nir_alu_src src = {0}; 1239 src.src = nir_src_for_ssa(uni_def); 1240 src.swizzle[0] = scalar->comp; 1241 uni_def = nir_mov_alu(&b, src, 1); 1242 } 1243 1244 /* Replace load input with load uniform. */ 1245 nir_ssa_def_rewrite_uses(&intr->dest.ssa, uni_def); 1246 1247 progress = true; 1248 } 1249 } 1250 1251 return progress; 1252} 1253 1254/* The GLSL ES 3.20 spec says: 1255 * 1256 * "The precision of a vertex output does not need to match the precision of 1257 * the corresponding fragment input. The minimum precision at which vertex 1258 * outputs are interpolated is the minimum of the vertex output precision and 1259 * the fragment input precision, with the exception that for highp, 1260 * implementations do not have to support full IEEE 754 precision." (9.1 "Input 1261 * Output Matching by Name in Linked Programs") 1262 * 1263 * To implement this, when linking shaders we will take the minimum precision 1264 * qualifier (allowing drivers to interpolate at lower precision). For 1265 * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec 1266 * requires we use the *last* specified precision if there is a conflict. 1267 * 1268 * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is 1269 * NONE, we'll return the other precision, since there is no conflict. 1270 * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH, 1271 * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is 1272 * "backwards". For non-fragment stages, we'll pick the latter precision to 1273 * comply with the spec. (Note that the order matters.) 1274 * 1275 * For streamout, "Variables declared with lowp or mediump precision are 1276 * promoted to highp before being written." (12.2 "Transform Feedback", p. 341 1277 * of OpenGL ES 3.2 specification). So drivers should promote them 1278 * the transform feedback memory store, but not the output store. 1279 */ 1280 1281static unsigned 1282nir_link_precision(unsigned producer, unsigned consumer, bool fs) 1283{ 1284 if (producer == GLSL_PRECISION_NONE) 1285 return consumer; 1286 else if (consumer == GLSL_PRECISION_NONE) 1287 return producer; 1288 else 1289 return fs ? MAX2(producer, consumer) : consumer; 1290} 1291 1292void 1293nir_link_varying_precision(nir_shader *producer, nir_shader *consumer) 1294{ 1295 bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT; 1296 1297 nir_foreach_shader_out_variable(producer_var, producer) { 1298 /* Skip if the slot is not assigned */ 1299 if (producer_var->data.location < 0) 1300 continue; 1301 1302 nir_variable *consumer_var = nir_find_variable_with_location(consumer, 1303 nir_var_shader_in, producer_var->data.location); 1304 1305 /* Skip if the variable will be eliminated */ 1306 if (!consumer_var) 1307 continue; 1308 1309 /* Now we have a pair of variables. Let's pick the smaller precision. */ 1310 unsigned precision_1 = producer_var->data.precision; 1311 unsigned precision_2 = consumer_var->data.precision; 1312 unsigned minimum = nir_link_precision(precision_1, precision_2, frag); 1313 1314 /* Propagate the new precision */ 1315 producer_var->data.precision = consumer_var->data.precision = minimum; 1316 } 1317} 1318 1319bool 1320nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer) 1321{ 1322 /* TODO: Add support for more shader stage combinations */ 1323 if (consumer->info.stage != MESA_SHADER_FRAGMENT || 1324 (producer->info.stage != MESA_SHADER_VERTEX && 1325 producer->info.stage != MESA_SHADER_TESS_EVAL)) 1326 return false; 1327 1328 bool progress = false; 1329 1330 nir_function_impl *impl = nir_shader_get_entrypoint(producer); 1331 1332 struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL); 1333 1334 /* If we find a store in the last block of the producer we can be sure this 1335 * is the only possible value for this output. 1336 */ 1337 nir_block *last_block = nir_impl_last_block(impl); 1338 nir_foreach_instr_reverse(instr, last_block) { 1339 if (instr->type != nir_instr_type_intrinsic) 1340 continue; 1341 1342 nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1343 1344 if (intr->intrinsic != nir_intrinsic_store_deref) 1345 continue; 1346 1347 nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]); 1348 if (!nir_deref_mode_is(out_deref, nir_var_shader_out)) 1349 continue; 1350 1351 nir_variable *out_var = nir_deref_instr_get_variable(out_deref); 1352 if (!can_replace_varying(out_var)) 1353 continue; 1354 1355 nir_ssa_scalar uni_scalar; 1356 nir_ssa_def *ssa = intr->src[1].ssa; 1357 if (ssa->parent_instr->type == nir_instr_type_load_const) { 1358 progress |= replace_varying_input_by_constant_load(consumer, intr); 1359 } else if (is_direct_uniform_load(ssa, &uni_scalar)) { 1360 progress |= replace_varying_input_by_uniform_load(consumer, intr, 1361 &uni_scalar); 1362 } else { 1363 struct hash_entry *entry = 1364 _mesa_hash_table_search(varying_values, ssa); 1365 if (entry) { 1366 progress |= replace_duplicate_input(consumer, 1367 (nir_variable *) entry->data, 1368 intr); 1369 } else { 1370 nir_variable *in_var = get_matching_input_var(consumer, out_var); 1371 if (in_var) { 1372 _mesa_hash_table_insert(varying_values, ssa, in_var); 1373 } 1374 } 1375 } 1376 } 1377 1378 _mesa_hash_table_destroy(varying_values, NULL); 1379 1380 return progress; 1381} 1382 1383/* TODO any better helper somewhere to sort a list? */ 1384 1385static void 1386insert_sorted(struct exec_list *var_list, nir_variable *new_var) 1387{ 1388 nir_foreach_variable_in_list(var, var_list) { 1389 if (var->data.location > new_var->data.location) { 1390 exec_node_insert_node_before(&var->node, &new_var->node); 1391 return; 1392 } 1393 } 1394 exec_list_push_tail(var_list, &new_var->node); 1395} 1396 1397static void 1398sort_varyings(nir_shader *shader, nir_variable_mode mode, 1399 struct exec_list *sorted_list) 1400{ 1401 exec_list_make_empty(sorted_list); 1402 nir_foreach_variable_with_modes_safe(var, shader, mode) { 1403 exec_node_remove(&var->node); 1404 insert_sorted(sorted_list, var); 1405 } 1406} 1407 1408void 1409nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode, 1410 unsigned *size, gl_shader_stage stage) 1411{ 1412 unsigned location = 0; 1413 unsigned assigned_locations[VARYING_SLOT_TESS_MAX]; 1414 uint64_t processed_locs[2] = {0}; 1415 1416 struct exec_list io_vars; 1417 sort_varyings(shader, mode, &io_vars); 1418 1419 int UNUSED last_loc = 0; 1420 bool last_partial = false; 1421 nir_foreach_variable_in_list(var, &io_vars) { 1422 const struct glsl_type *type = var->type; 1423 if (nir_is_arrayed_io(var, stage)) { 1424 assert(glsl_type_is_array(type)); 1425 type = glsl_get_array_element(type); 1426 } 1427 1428 int base; 1429 if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX) 1430 base = VERT_ATTRIB_GENERIC0; 1431 else if (var->data.mode == nir_var_shader_out && 1432 stage == MESA_SHADER_FRAGMENT) 1433 base = FRAG_RESULT_DATA0; 1434 else 1435 base = VARYING_SLOT_VAR0; 1436 1437 unsigned var_size, driver_size; 1438 if (var->data.compact) { 1439 /* If we are inside a partial compact, 1440 * don't allow another compact to be in this slot 1441 * if it starts at component 0. 1442 */ 1443 if (last_partial && var->data.location_frac == 0) { 1444 location++; 1445 } 1446 1447 /* compact variables must be arrays of scalars */ 1448 assert(!var->data.per_view); 1449 assert(glsl_type_is_array(type)); 1450 assert(glsl_type_is_scalar(glsl_get_array_element(type))); 1451 unsigned start = 4 * location + var->data.location_frac; 1452 unsigned end = start + glsl_get_length(type); 1453 var_size = driver_size = end / 4 - location; 1454 last_partial = end % 4 != 0; 1455 } else { 1456 /* Compact variables bypass the normal varying compacting pass, 1457 * which means they cannot be in the same vec4 slot as a normal 1458 * variable. If part of the current slot is taken up by a compact 1459 * variable, we need to go to the next one. 1460 */ 1461 if (last_partial) { 1462 location++; 1463 last_partial = false; 1464 } 1465 1466 /* per-view variables have an extra array dimension, which is ignored 1467 * when counting user-facing slots (var->data.location), but *not* 1468 * with driver slots (var->data.driver_location). That is, each user 1469 * slot maps to multiple driver slots. 1470 */ 1471 driver_size = glsl_count_attribute_slots(type, false); 1472 if (var->data.per_view) { 1473 assert(glsl_type_is_array(type)); 1474 var_size = 1475 glsl_count_attribute_slots(glsl_get_array_element(type), false); 1476 } else { 1477 var_size = driver_size; 1478 } 1479 } 1480 1481 /* Builtins don't allow component packing so we only need to worry about 1482 * user defined varyings sharing the same location. 1483 */ 1484 bool processed = false; 1485 if (var->data.location >= base) { 1486 unsigned glsl_location = var->data.location - base; 1487 1488 for (unsigned i = 0; i < var_size; i++) { 1489 if (processed_locs[var->data.index] & 1490 ((uint64_t)1 << (glsl_location + i))) 1491 processed = true; 1492 else 1493 processed_locs[var->data.index] |= 1494 ((uint64_t)1 << (glsl_location + i)); 1495 } 1496 } 1497 1498 /* Because component packing allows varyings to share the same location 1499 * we may have already have processed this location. 1500 */ 1501 if (processed) { 1502 /* TODO handle overlapping per-view variables */ 1503 assert(!var->data.per_view); 1504 unsigned driver_location = assigned_locations[var->data.location]; 1505 var->data.driver_location = driver_location; 1506 1507 /* An array may be packed such that is crosses multiple other arrays 1508 * or variables, we need to make sure we have allocated the elements 1509 * consecutively if the previously proccessed var was shorter than 1510 * the current array we are processing. 1511 * 1512 * NOTE: The code below assumes the var list is ordered in ascending 1513 * location order. 1514 */ 1515 assert(last_loc <= var->data.location); 1516 last_loc = var->data.location; 1517 unsigned last_slot_location = driver_location + var_size; 1518 if (last_slot_location > location) { 1519 unsigned num_unallocated_slots = last_slot_location - location; 1520 unsigned first_unallocated_slot = var_size - num_unallocated_slots; 1521 for (unsigned i = first_unallocated_slot; i < var_size; i++) { 1522 assigned_locations[var->data.location + i] = location; 1523 location++; 1524 } 1525 } 1526 continue; 1527 } 1528 1529 for (unsigned i = 0; i < var_size; i++) { 1530 assigned_locations[var->data.location + i] = location + i; 1531 } 1532 1533 var->data.driver_location = location; 1534 location += driver_size; 1535 } 1536 1537 if (last_partial) 1538 location++; 1539 1540 exec_list_append(&shader->variables, &io_vars); 1541 *size = location; 1542} 1543 1544static uint64_t 1545get_linked_variable_location(unsigned location, bool patch) 1546{ 1547 if (!patch) 1548 return location; 1549 1550 /* Reserve locations 0...3 for special patch variables 1551 * like tess factors and bounding boxes, and the generic patch 1552 * variables will come after them. 1553 */ 1554 if (location >= VARYING_SLOT_PATCH0) 1555 return location - VARYING_SLOT_PATCH0 + 4; 1556 else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER && 1557 location <= VARYING_SLOT_BOUNDING_BOX1) 1558 return location - VARYING_SLOT_TESS_LEVEL_OUTER; 1559 else 1560 unreachable("Unsupported variable in get_linked_variable_location."); 1561} 1562 1563static uint64_t 1564get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage) 1565{ 1566 const struct glsl_type *type = variable->type; 1567 1568 if (nir_is_arrayed_io(variable, stage)) { 1569 assert(glsl_type_is_array(type)); 1570 type = glsl_get_array_element(type); 1571 } 1572 1573 unsigned slots = glsl_count_attribute_slots(type, false); 1574 if (variable->data.compact) { 1575 unsigned component_count = variable->data.location_frac + glsl_get_length(type); 1576 slots = DIV_ROUND_UP(component_count, 4); 1577 } 1578 1579 uint64_t mask = u_bit_consecutive64(0, slots); 1580 return mask; 1581} 1582 1583nir_linked_io_var_info 1584nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer) 1585{ 1586 assert(producer); 1587 assert(consumer); 1588 1589 uint64_t producer_output_mask = 0; 1590 uint64_t producer_patch_output_mask = 0; 1591 1592 nir_foreach_shader_out_variable(variable, producer) { 1593 uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage); 1594 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); 1595 1596 if (variable->data.patch) 1597 producer_patch_output_mask |= mask << loc; 1598 else 1599 producer_output_mask |= mask << loc; 1600 } 1601 1602 uint64_t consumer_input_mask = 0; 1603 uint64_t consumer_patch_input_mask = 0; 1604 1605 nir_foreach_shader_in_variable(variable, consumer) { 1606 uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage); 1607 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); 1608 1609 if (variable->data.patch) 1610 consumer_patch_input_mask |= mask << loc; 1611 else 1612 consumer_input_mask |= mask << loc; 1613 } 1614 1615 uint64_t io_mask = producer_output_mask | consumer_input_mask; 1616 uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask; 1617 1618 nir_foreach_shader_out_variable(variable, producer) { 1619 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); 1620 1621 if (variable->data.patch) 1622 variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)); 1623 else 1624 variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)); 1625 } 1626 1627 nir_foreach_shader_in_variable(variable, consumer) { 1628 uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); 1629 1630 if (variable->data.patch) 1631 variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)); 1632 else 1633 variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)); 1634 } 1635 1636 nir_linked_io_var_info result = { 1637 .num_linked_io_vars = util_bitcount64(io_mask), 1638 .num_linked_patch_io_vars = util_bitcount64(patch_io_mask), 1639 }; 1640 1641 return result; 1642} 1643