1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2015 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "nir.h" 25b8e80941Smrg#include "nir_builder.h" 26b8e80941Smrg#include "util/set.h" 27b8e80941Smrg#include "util/hash_table.h" 28b8e80941Smrg 29b8e80941Smrg/* This file contains various little helpers for doing simple linking in 30b8e80941Smrg * NIR. Eventually, we'll probably want a full-blown varying packing 31b8e80941Smrg * implementation in here. Right now, it just deletes unused things. 32b8e80941Smrg */ 33b8e80941Smrg 34b8e80941Smrg/** 35b8e80941Smrg * Returns the bits in the inputs_read, outputs_written, or 36b8e80941Smrg * system_values_read bitfield corresponding to this variable. 37b8e80941Smrg */ 38b8e80941Smrgstatic uint64_t 39b8e80941Smrgget_variable_io_mask(nir_variable *var, gl_shader_stage stage) 40b8e80941Smrg{ 41b8e80941Smrg if (var->data.location < 0) 42b8e80941Smrg return 0; 43b8e80941Smrg 44b8e80941Smrg unsigned location = var->data.patch ? 45b8e80941Smrg var->data.location - VARYING_SLOT_PATCH0 : var->data.location; 46b8e80941Smrg 47b8e80941Smrg assert(var->data.mode == nir_var_shader_in || 48b8e80941Smrg var->data.mode == nir_var_shader_out || 49b8e80941Smrg var->data.mode == nir_var_system_value); 50b8e80941Smrg assert(var->data.location >= 0); 51b8e80941Smrg 52b8e80941Smrg const struct glsl_type *type = var->type; 53b8e80941Smrg if (nir_is_per_vertex_io(var, stage)) { 54b8e80941Smrg assert(glsl_type_is_array(type)); 55b8e80941Smrg type = glsl_get_array_element(type); 56b8e80941Smrg } 57b8e80941Smrg 58b8e80941Smrg unsigned slots = glsl_count_attribute_slots(type, false); 59b8e80941Smrg return ((1ull << slots) - 1) << location; 60b8e80941Smrg} 61b8e80941Smrg 62b8e80941Smrgstatic uint8_t 63b8e80941Smrgget_num_components(nir_variable *var) 64b8e80941Smrg{ 65b8e80941Smrg if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type))) 66b8e80941Smrg return 4; 67b8e80941Smrg 68b8e80941Smrg return glsl_get_vector_elements(glsl_without_array(var->type)); 69b8e80941Smrg} 70b8e80941Smrg 71b8e80941Smrgstatic void 72b8e80941Smrgtcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read) 73b8e80941Smrg{ 74b8e80941Smrg nir_foreach_function(function, shader) { 75b8e80941Smrg if (!function->impl) 76b8e80941Smrg continue; 77b8e80941Smrg 78b8e80941Smrg nir_foreach_block(block, function->impl) { 79b8e80941Smrg nir_foreach_instr(instr, block) { 80b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 81b8e80941Smrg continue; 82b8e80941Smrg 83b8e80941Smrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 84b8e80941Smrg if (intrin->intrinsic != nir_intrinsic_load_deref) 85b8e80941Smrg continue; 86b8e80941Smrg 87b8e80941Smrg nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 88b8e80941Smrg if (deref->mode != nir_var_shader_out) 89b8e80941Smrg continue; 90b8e80941Smrg 91b8e80941Smrg nir_variable *var = nir_deref_instr_get_variable(deref); 92b8e80941Smrg for (unsigned i = 0; i < get_num_components(var); i++) { 93b8e80941Smrg if (var->data.patch) { 94b8e80941Smrg patches_read[var->data.location_frac + i] |= 95b8e80941Smrg get_variable_io_mask(var, shader->info.stage); 96b8e80941Smrg } else { 97b8e80941Smrg read[var->data.location_frac + i] |= 98b8e80941Smrg get_variable_io_mask(var, shader->info.stage); 99b8e80941Smrg } 100b8e80941Smrg } 101b8e80941Smrg } 102b8e80941Smrg } 103b8e80941Smrg } 104b8e80941Smrg} 105b8e80941Smrg 106b8e80941Smrg/** 107b8e80941Smrg * Helper for removing unused shader I/O variables, by demoting them to global 108b8e80941Smrg * variables (which may then by dead code eliminated). 109b8e80941Smrg * 110b8e80941Smrg * Example usage is: 111b8e80941Smrg * 112b8e80941Smrg * progress = nir_remove_unused_io_vars(producer, 113b8e80941Smrg * &producer->outputs, 114b8e80941Smrg * read, patches_read) || 115b8e80941Smrg * progress; 116b8e80941Smrg * 117b8e80941Smrg * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*) 118b8e80941Smrg * representing each .location_frac used. Note that for vector variables, 119b8e80941Smrg * only the first channel (.location_frac) is examined for deciding if the 120b8e80941Smrg * variable is used! 121b8e80941Smrg */ 122b8e80941Smrgbool 123b8e80941Smrgnir_remove_unused_io_vars(nir_shader *shader, struct exec_list *var_list, 124b8e80941Smrg uint64_t *used_by_other_stage, 125b8e80941Smrg uint64_t *used_by_other_stage_patches) 126b8e80941Smrg{ 127b8e80941Smrg bool progress = false; 128b8e80941Smrg uint64_t *used; 129b8e80941Smrg 130b8e80941Smrg nir_foreach_variable_safe(var, var_list) { 131b8e80941Smrg if (var->data.patch) 132b8e80941Smrg used = used_by_other_stage_patches; 133b8e80941Smrg else 134b8e80941Smrg used = used_by_other_stage; 135b8e80941Smrg 136b8e80941Smrg if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0) 137b8e80941Smrg continue; 138b8e80941Smrg 139b8e80941Smrg if (var->data.always_active_io) 140b8e80941Smrg continue; 141b8e80941Smrg 142b8e80941Smrg if (var->data.explicit_xfb_buffer) 143b8e80941Smrg continue; 144b8e80941Smrg 145b8e80941Smrg uint64_t other_stage = used[var->data.location_frac]; 146b8e80941Smrg 147b8e80941Smrg if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) { 148b8e80941Smrg /* This one is invalid, make it a global variable instead */ 149b8e80941Smrg var->data.location = 0; 150b8e80941Smrg var->data.mode = nir_var_shader_temp; 151b8e80941Smrg 152b8e80941Smrg exec_node_remove(&var->node); 153b8e80941Smrg exec_list_push_tail(&shader->globals, &var->node); 154b8e80941Smrg 155b8e80941Smrg progress = true; 156b8e80941Smrg } 157b8e80941Smrg } 158b8e80941Smrg 159b8e80941Smrg if (progress) 160b8e80941Smrg nir_fixup_deref_modes(shader); 161b8e80941Smrg 162b8e80941Smrg return progress; 163b8e80941Smrg} 164b8e80941Smrg 165b8e80941Smrgbool 166b8e80941Smrgnir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer) 167b8e80941Smrg{ 168b8e80941Smrg assert(producer->info.stage != MESA_SHADER_FRAGMENT); 169b8e80941Smrg assert(consumer->info.stage != MESA_SHADER_VERTEX); 170b8e80941Smrg 171b8e80941Smrg uint64_t read[4] = { 0 }, written[4] = { 0 }; 172b8e80941Smrg uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 }; 173b8e80941Smrg 174b8e80941Smrg nir_foreach_variable(var, &producer->outputs) { 175b8e80941Smrg for (unsigned i = 0; i < get_num_components(var); i++) { 176b8e80941Smrg if (var->data.patch) { 177b8e80941Smrg patches_written[var->data.location_frac + i] |= 178b8e80941Smrg get_variable_io_mask(var, producer->info.stage); 179b8e80941Smrg } else { 180b8e80941Smrg written[var->data.location_frac + i] |= 181b8e80941Smrg get_variable_io_mask(var, producer->info.stage); 182b8e80941Smrg } 183b8e80941Smrg } 184b8e80941Smrg } 185b8e80941Smrg 186b8e80941Smrg nir_foreach_variable(var, &consumer->inputs) { 187b8e80941Smrg for (unsigned i = 0; i < get_num_components(var); i++) { 188b8e80941Smrg if (var->data.patch) { 189b8e80941Smrg patches_read[var->data.location_frac + i] |= 190b8e80941Smrg get_variable_io_mask(var, consumer->info.stage); 191b8e80941Smrg } else { 192b8e80941Smrg read[var->data.location_frac + i] |= 193b8e80941Smrg get_variable_io_mask(var, consumer->info.stage); 194b8e80941Smrg } 195b8e80941Smrg } 196b8e80941Smrg } 197b8e80941Smrg 198b8e80941Smrg /* Each TCS invocation can read data written by other TCS invocations, 199b8e80941Smrg * so even if the outputs are not used by the TES we must also make 200b8e80941Smrg * sure they are not read by the TCS before demoting them to globals. 201b8e80941Smrg */ 202b8e80941Smrg if (producer->info.stage == MESA_SHADER_TESS_CTRL) 203b8e80941Smrg tcs_add_output_reads(producer, read, patches_read); 204b8e80941Smrg 205b8e80941Smrg bool progress = false; 206b8e80941Smrg progress = nir_remove_unused_io_vars(producer, &producer->outputs, read, 207b8e80941Smrg patches_read); 208b8e80941Smrg 209b8e80941Smrg progress = nir_remove_unused_io_vars(consumer, &consumer->inputs, written, 210b8e80941Smrg patches_written) || progress; 211b8e80941Smrg 212b8e80941Smrg return progress; 213b8e80941Smrg} 214b8e80941Smrg 215b8e80941Smrgstatic uint8_t 216b8e80941Smrgget_interp_type(nir_variable *var, const struct glsl_type *type, 217b8e80941Smrg bool default_to_smooth_interp) 218b8e80941Smrg{ 219b8e80941Smrg if (glsl_type_is_integer(type)) 220b8e80941Smrg return INTERP_MODE_FLAT; 221b8e80941Smrg else if (var->data.interpolation != INTERP_MODE_NONE) 222b8e80941Smrg return var->data.interpolation; 223b8e80941Smrg else if (default_to_smooth_interp) 224b8e80941Smrg return INTERP_MODE_SMOOTH; 225b8e80941Smrg else 226b8e80941Smrg return INTERP_MODE_NONE; 227b8e80941Smrg} 228b8e80941Smrg 229b8e80941Smrg#define INTERPOLATE_LOC_SAMPLE 0 230b8e80941Smrg#define INTERPOLATE_LOC_CENTROID 1 231b8e80941Smrg#define INTERPOLATE_LOC_CENTER 2 232b8e80941Smrg 233b8e80941Smrgstatic uint8_t 234b8e80941Smrgget_interp_loc(nir_variable *var) 235b8e80941Smrg{ 236b8e80941Smrg if (var->data.sample) 237b8e80941Smrg return INTERPOLATE_LOC_SAMPLE; 238b8e80941Smrg else if (var->data.centroid) 239b8e80941Smrg return INTERPOLATE_LOC_CENTROID; 240b8e80941Smrg else 241b8e80941Smrg return INTERPOLATE_LOC_CENTER; 242b8e80941Smrg} 243b8e80941Smrg 244b8e80941Smrgstatic bool 245b8e80941Smrgis_packing_supported_for_type(const struct glsl_type *type) 246b8e80941Smrg{ 247b8e80941Smrg /* We ignore complex types such as arrays, matrices, structs and bitsizes 248b8e80941Smrg * other then 32bit. All other vector types should have been split into 249b8e80941Smrg * scalar variables by the lower_io_to_scalar pass. The only exception 250b8e80941Smrg * should be OpenGL xfb varyings. 251b8e80941Smrg * TODO: add support for more complex types? 252b8e80941Smrg */ 253b8e80941Smrg return glsl_type_is_scalar(type) && glsl_type_is_32bit(type); 254b8e80941Smrg} 255b8e80941Smrg 256b8e80941Smrgstruct assigned_comps 257b8e80941Smrg{ 258b8e80941Smrg uint8_t comps; 259b8e80941Smrg uint8_t interp_type; 260b8e80941Smrg uint8_t interp_loc; 261b8e80941Smrg bool is_32bit; 262b8e80941Smrg}; 263b8e80941Smrg 264b8e80941Smrg/* Packing arrays and dual slot varyings is difficult so to avoid complex 265b8e80941Smrg * algorithms this function just assigns them their existing location for now. 266b8e80941Smrg * TODO: allow better packing of complex types. 267b8e80941Smrg */ 268b8e80941Smrgstatic void 269b8e80941Smrgget_unmoveable_components_masks(struct exec_list *var_list, 270b8e80941Smrg struct assigned_comps *comps, 271b8e80941Smrg gl_shader_stage stage, 272b8e80941Smrg bool default_to_smooth_interp) 273b8e80941Smrg{ 274b8e80941Smrg nir_foreach_variable_safe(var, var_list) { 275b8e80941Smrg assert(var->data.location >= 0); 276b8e80941Smrg 277b8e80941Smrg /* Only remap things that aren't built-ins. */ 278b8e80941Smrg if (var->data.location >= VARYING_SLOT_VAR0 && 279b8e80941Smrg var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 280b8e80941Smrg 281b8e80941Smrg const struct glsl_type *type = var->type; 282b8e80941Smrg if (nir_is_per_vertex_io(var, stage)) { 283b8e80941Smrg assert(glsl_type_is_array(type)); 284b8e80941Smrg type = glsl_get_array_element(type); 285b8e80941Smrg } 286b8e80941Smrg 287b8e80941Smrg /* If we can pack this varying then don't mark the components as 288b8e80941Smrg * used. 289b8e80941Smrg */ 290b8e80941Smrg if (is_packing_supported_for_type(type)) 291b8e80941Smrg continue; 292b8e80941Smrg 293b8e80941Smrg unsigned location = var->data.location - VARYING_SLOT_VAR0; 294b8e80941Smrg 295b8e80941Smrg unsigned elements = 296b8e80941Smrg glsl_type_is_vector_or_scalar(glsl_without_array(type)) ? 297b8e80941Smrg glsl_get_vector_elements(glsl_without_array(type)) : 4; 298b8e80941Smrg 299b8e80941Smrg bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type)); 300b8e80941Smrg unsigned slots = glsl_count_attribute_slots(type, false); 301b8e80941Smrg unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1; 302b8e80941Smrg unsigned comps_slot2 = 0; 303b8e80941Smrg for (unsigned i = 0; i < slots; i++) { 304b8e80941Smrg if (dual_slot) { 305b8e80941Smrg if (i & 1) { 306b8e80941Smrg comps[location + i].comps |= ((1 << comps_slot2) - 1); 307b8e80941Smrg } else { 308b8e80941Smrg unsigned num_comps = 4 - var->data.location_frac; 309b8e80941Smrg comps_slot2 = (elements * dmul) - num_comps; 310b8e80941Smrg 311b8e80941Smrg /* Assume ARB_enhanced_layouts packing rules for doubles */ 312b8e80941Smrg assert(var->data.location_frac == 0 || 313b8e80941Smrg var->data.location_frac == 2); 314b8e80941Smrg assert(comps_slot2 <= 4); 315b8e80941Smrg 316b8e80941Smrg comps[location + i].comps |= 317b8e80941Smrg ((1 << num_comps) - 1) << var->data.location_frac; 318b8e80941Smrg } 319b8e80941Smrg } else { 320b8e80941Smrg comps[location + i].comps |= 321b8e80941Smrg ((1 << (elements * dmul)) - 1) << var->data.location_frac; 322b8e80941Smrg } 323b8e80941Smrg 324b8e80941Smrg comps[location + i].interp_type = 325b8e80941Smrg get_interp_type(var, type, default_to_smooth_interp); 326b8e80941Smrg comps[location + i].interp_loc = get_interp_loc(var); 327b8e80941Smrg comps[location + i].is_32bit = 328b8e80941Smrg glsl_type_is_32bit(glsl_without_array(type)); 329b8e80941Smrg } 330b8e80941Smrg } 331b8e80941Smrg } 332b8e80941Smrg} 333b8e80941Smrg 334b8e80941Smrgstruct varying_loc 335b8e80941Smrg{ 336b8e80941Smrg uint8_t component; 337b8e80941Smrg uint32_t location; 338b8e80941Smrg}; 339b8e80941Smrg 340b8e80941Smrgstatic void 341b8e80941Smrgmark_all_used_slots(nir_variable *var, uint64_t *slots_used, 342b8e80941Smrg uint64_t slots_used_mask, unsigned num_slots) 343b8e80941Smrg{ 344b8e80941Smrg unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 345b8e80941Smrg 346b8e80941Smrg slots_used[var->data.patch ? 1 : 0] |= slots_used_mask & 347b8e80941Smrg BITFIELD64_RANGE(var->data.location - loc_offset, num_slots); 348b8e80941Smrg} 349b8e80941Smrg 350b8e80941Smrgstatic void 351b8e80941Smrgmark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset) 352b8e80941Smrg{ 353b8e80941Smrg unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 354b8e80941Smrg 355b8e80941Smrg slots_used[var->data.patch ? 1 : 0] |= 356b8e80941Smrg BITFIELD64_BIT(var->data.location - loc_offset + offset); 357b8e80941Smrg} 358b8e80941Smrg 359b8e80941Smrgstatic void 360b8e80941Smrgremap_slots_and_components(struct exec_list *var_list, gl_shader_stage stage, 361b8e80941Smrg struct varying_loc (*remap)[4], 362b8e80941Smrg uint64_t *slots_used, uint64_t *out_slots_read, 363b8e80941Smrg uint32_t *p_slots_used, uint32_t *p_out_slots_read) 364b8e80941Smrg { 365b8e80941Smrg uint64_t out_slots_read_tmp[2] = {0}; 366b8e80941Smrg uint64_t slots_used_tmp[2] = {0}; 367b8e80941Smrg 368b8e80941Smrg /* We don't touch builtins so just copy the bitmask */ 369b8e80941Smrg slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0); 370b8e80941Smrg 371b8e80941Smrg nir_foreach_variable(var, var_list) { 372b8e80941Smrg assert(var->data.location >= 0); 373b8e80941Smrg 374b8e80941Smrg /* Only remap things that aren't built-ins */ 375b8e80941Smrg if (var->data.location >= VARYING_SLOT_VAR0 && 376b8e80941Smrg var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 377b8e80941Smrg 378b8e80941Smrg const struct glsl_type *type = var->type; 379b8e80941Smrg if (nir_is_per_vertex_io(var, stage)) { 380b8e80941Smrg assert(glsl_type_is_array(type)); 381b8e80941Smrg type = glsl_get_array_element(type); 382b8e80941Smrg } 383b8e80941Smrg 384b8e80941Smrg unsigned num_slots = glsl_count_attribute_slots(type, false); 385b8e80941Smrg bool used_across_stages = false; 386b8e80941Smrg bool outputs_read = false; 387b8e80941Smrg 388b8e80941Smrg unsigned location = var->data.location - VARYING_SLOT_VAR0; 389b8e80941Smrg struct varying_loc *new_loc = &remap[location][var->data.location_frac]; 390b8e80941Smrg 391b8e80941Smrg unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 392b8e80941Smrg uint64_t used = var->data.patch ? *p_slots_used : *slots_used; 393b8e80941Smrg uint64_t outs_used = 394b8e80941Smrg var->data.patch ? *p_out_slots_read : *out_slots_read; 395b8e80941Smrg uint64_t slots = 396b8e80941Smrg BITFIELD64_RANGE(var->data.location - loc_offset, num_slots); 397b8e80941Smrg 398b8e80941Smrg if (slots & used) 399b8e80941Smrg used_across_stages = true; 400b8e80941Smrg 401b8e80941Smrg if (slots & outs_used) 402b8e80941Smrg outputs_read = true; 403b8e80941Smrg 404b8e80941Smrg if (new_loc->location) { 405b8e80941Smrg var->data.location = new_loc->location; 406b8e80941Smrg var->data.location_frac = new_loc->component; 407b8e80941Smrg } 408b8e80941Smrg 409b8e80941Smrg if (var->data.always_active_io) { 410b8e80941Smrg /* We can't apply link time optimisations (specifically array 411b8e80941Smrg * splitting) to these so we need to copy the existing mask 412b8e80941Smrg * otherwise we will mess up the mask for things like partially 413b8e80941Smrg * marked arrays. 414b8e80941Smrg */ 415b8e80941Smrg if (used_across_stages) 416b8e80941Smrg mark_all_used_slots(var, slots_used_tmp, used, num_slots); 417b8e80941Smrg 418b8e80941Smrg if (outputs_read) { 419b8e80941Smrg mark_all_used_slots(var, out_slots_read_tmp, outs_used, 420b8e80941Smrg num_slots); 421b8e80941Smrg } 422b8e80941Smrg } else { 423b8e80941Smrg for (unsigned i = 0; i < num_slots; i++) { 424b8e80941Smrg if (used_across_stages) 425b8e80941Smrg mark_used_slot(var, slots_used_tmp, i); 426b8e80941Smrg 427b8e80941Smrg if (outputs_read) 428b8e80941Smrg mark_used_slot(var, out_slots_read_tmp, i); 429b8e80941Smrg } 430b8e80941Smrg } 431b8e80941Smrg } 432b8e80941Smrg } 433b8e80941Smrg 434b8e80941Smrg *slots_used = slots_used_tmp[0]; 435b8e80941Smrg *out_slots_read = out_slots_read_tmp[0]; 436b8e80941Smrg *p_slots_used = slots_used_tmp[1]; 437b8e80941Smrg *p_out_slots_read = out_slots_read_tmp[1]; 438b8e80941Smrg} 439b8e80941Smrg 440b8e80941Smrgstruct varying_component { 441b8e80941Smrg nir_variable *var; 442b8e80941Smrg uint8_t interp_type; 443b8e80941Smrg uint8_t interp_loc; 444b8e80941Smrg bool is_32bit; 445b8e80941Smrg bool is_patch; 446b8e80941Smrg bool initialised; 447b8e80941Smrg}; 448b8e80941Smrg 449b8e80941Smrgstatic int 450b8e80941Smrgcmp_varying_component(const void *comp1_v, const void *comp2_v) 451b8e80941Smrg{ 452b8e80941Smrg struct varying_component *comp1 = (struct varying_component *) comp1_v; 453b8e80941Smrg struct varying_component *comp2 = (struct varying_component *) comp2_v; 454b8e80941Smrg 455b8e80941Smrg /* We want patches to be order at the end of the array */ 456b8e80941Smrg if (comp1->is_patch != comp2->is_patch) 457b8e80941Smrg return comp1->is_patch ? 1 : -1; 458b8e80941Smrg 459b8e80941Smrg /* We can only pack varyings with matching interpolation types so group 460b8e80941Smrg * them together. 461b8e80941Smrg */ 462b8e80941Smrg if (comp1->interp_type != comp2->interp_type) 463b8e80941Smrg return comp1->interp_type - comp2->interp_type; 464b8e80941Smrg 465b8e80941Smrg /* Interpolation loc must match also. */ 466b8e80941Smrg if (comp1->interp_loc != comp2->interp_loc) 467b8e80941Smrg return comp1->interp_loc - comp2->interp_loc; 468b8e80941Smrg 469b8e80941Smrg /* If everything else matches just use the original location to sort */ 470b8e80941Smrg return comp1->var->data.location - comp2->var->data.location; 471b8e80941Smrg} 472b8e80941Smrg 473b8e80941Smrgstatic void 474b8e80941Smrggather_varying_component_info(nir_shader *consumer, 475b8e80941Smrg struct varying_component **varying_comp_info, 476b8e80941Smrg unsigned *varying_comp_info_size, 477b8e80941Smrg bool default_to_smooth_interp) 478b8e80941Smrg{ 479b8e80941Smrg unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}}; 480b8e80941Smrg unsigned num_of_comps_to_pack = 0; 481b8e80941Smrg 482b8e80941Smrg /* Count the number of varying that can be packed and create a mapping 483b8e80941Smrg * of those varyings to the array we will pass to qsort. 484b8e80941Smrg */ 485b8e80941Smrg nir_foreach_variable(var, &consumer->inputs) { 486b8e80941Smrg 487b8e80941Smrg /* Only remap things that aren't builtins. */ 488b8e80941Smrg if (var->data.location >= VARYING_SLOT_VAR0 && 489b8e80941Smrg var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 490b8e80941Smrg 491b8e80941Smrg /* We can't repack xfb varyings. */ 492b8e80941Smrg if (var->data.always_active_io) 493b8e80941Smrg continue; 494b8e80941Smrg 495b8e80941Smrg const struct glsl_type *type = var->type; 496b8e80941Smrg if (nir_is_per_vertex_io(var, consumer->info.stage)) { 497b8e80941Smrg assert(glsl_type_is_array(type)); 498b8e80941Smrg type = glsl_get_array_element(type); 499b8e80941Smrg } 500b8e80941Smrg 501b8e80941Smrg if (!is_packing_supported_for_type(type)) 502b8e80941Smrg continue; 503b8e80941Smrg 504b8e80941Smrg unsigned loc = var->data.location - VARYING_SLOT_VAR0; 505b8e80941Smrg store_varying_info_idx[loc][var->data.location_frac] = 506b8e80941Smrg ++num_of_comps_to_pack; 507b8e80941Smrg } 508b8e80941Smrg } 509b8e80941Smrg 510b8e80941Smrg *varying_comp_info_size = num_of_comps_to_pack; 511b8e80941Smrg *varying_comp_info = rzalloc_array(NULL, struct varying_component, 512b8e80941Smrg num_of_comps_to_pack); 513b8e80941Smrg 514b8e80941Smrg nir_function_impl *impl = nir_shader_get_entrypoint(consumer); 515b8e80941Smrg 516b8e80941Smrg /* Walk over the shader and populate the varying component info array */ 517b8e80941Smrg nir_foreach_block(block, impl) { 518b8e80941Smrg nir_foreach_instr(instr, block) { 519b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 520b8e80941Smrg continue; 521b8e80941Smrg 522b8e80941Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 523b8e80941Smrg if (intr->intrinsic != nir_intrinsic_load_deref && 524b8e80941Smrg intr->intrinsic != nir_intrinsic_interp_deref_at_centroid && 525b8e80941Smrg intr->intrinsic != nir_intrinsic_interp_deref_at_sample && 526b8e80941Smrg intr->intrinsic != nir_intrinsic_interp_deref_at_offset) 527b8e80941Smrg continue; 528b8e80941Smrg 529b8e80941Smrg nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 530b8e80941Smrg if (deref->mode != nir_var_shader_in) 531b8e80941Smrg continue; 532b8e80941Smrg 533b8e80941Smrg /* We only remap things that aren't builtins. */ 534b8e80941Smrg nir_variable *in_var = nir_deref_instr_get_variable(deref); 535b8e80941Smrg if (in_var->data.location < VARYING_SLOT_VAR0) 536b8e80941Smrg continue; 537b8e80941Smrg 538b8e80941Smrg unsigned location = in_var->data.location - VARYING_SLOT_VAR0; 539b8e80941Smrg if (location >= MAX_VARYINGS_INCL_PATCH) 540b8e80941Smrg continue; 541b8e80941Smrg 542b8e80941Smrg unsigned var_info_idx = 543b8e80941Smrg store_varying_info_idx[location][in_var->data.location_frac]; 544b8e80941Smrg if (!var_info_idx) 545b8e80941Smrg continue; 546b8e80941Smrg 547b8e80941Smrg struct varying_component *vc_info = 548b8e80941Smrg &(*varying_comp_info)[var_info_idx-1]; 549b8e80941Smrg 550b8e80941Smrg if (!vc_info->initialised) { 551b8e80941Smrg const struct glsl_type *type = in_var->type; 552b8e80941Smrg if (nir_is_per_vertex_io(in_var, consumer->info.stage)) { 553b8e80941Smrg assert(glsl_type_is_array(type)); 554b8e80941Smrg type = glsl_get_array_element(type); 555b8e80941Smrg } 556b8e80941Smrg 557b8e80941Smrg vc_info->var = in_var; 558b8e80941Smrg vc_info->interp_type = 559b8e80941Smrg get_interp_type(in_var, type, default_to_smooth_interp); 560b8e80941Smrg vc_info->interp_loc = get_interp_loc(in_var); 561b8e80941Smrg vc_info->is_32bit = glsl_type_is_32bit(type); 562b8e80941Smrg vc_info->is_patch = in_var->data.patch; 563b8e80941Smrg } 564b8e80941Smrg } 565b8e80941Smrg } 566b8e80941Smrg} 567b8e80941Smrg 568b8e80941Smrgstatic void 569b8e80941Smrgassign_remap_locations(struct varying_loc (*remap)[4], 570b8e80941Smrg struct assigned_comps *assigned_comps, 571b8e80941Smrg struct varying_component *info, 572b8e80941Smrg unsigned *cursor, unsigned *comp, 573b8e80941Smrg unsigned max_location) 574b8e80941Smrg{ 575b8e80941Smrg unsigned tmp_cursor = *cursor; 576b8e80941Smrg unsigned tmp_comp = *comp; 577b8e80941Smrg 578b8e80941Smrg for (; tmp_cursor < max_location; tmp_cursor++) { 579b8e80941Smrg 580b8e80941Smrg if (assigned_comps[tmp_cursor].comps) { 581b8e80941Smrg /* We can only pack varyings with matching interpolation types, 582b8e80941Smrg * interpolation loc must match also. 583b8e80941Smrg * TODO: i965 can handle interpolation locations that don't match, 584b8e80941Smrg * but the radeonsi nir backend handles everything as vec4s and so 585b8e80941Smrg * expects this to be the same for all components. We could make this 586b8e80941Smrg * check driver specfific or drop it if NIR ever become the only 587b8e80941Smrg * radeonsi backend. 588b8e80941Smrg */ 589b8e80941Smrg if (assigned_comps[tmp_cursor].interp_type != info->interp_type || 590b8e80941Smrg assigned_comps[tmp_cursor].interp_loc != info->interp_loc) { 591b8e80941Smrg tmp_comp = 0; 592b8e80941Smrg continue; 593b8e80941Smrg } 594b8e80941Smrg 595b8e80941Smrg /* We can only pack varyings with matching types, and the current 596b8e80941Smrg * algorithm only supports packing 32-bit. 597b8e80941Smrg */ 598b8e80941Smrg if (!assigned_comps[tmp_cursor].is_32bit) { 599b8e80941Smrg tmp_comp = 0; 600b8e80941Smrg continue; 601b8e80941Smrg } 602b8e80941Smrg 603b8e80941Smrg while (tmp_comp < 4 && 604b8e80941Smrg (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) { 605b8e80941Smrg tmp_comp++; 606b8e80941Smrg } 607b8e80941Smrg } 608b8e80941Smrg 609b8e80941Smrg if (tmp_comp == 4) { 610b8e80941Smrg tmp_comp = 0; 611b8e80941Smrg continue; 612b8e80941Smrg } 613b8e80941Smrg 614b8e80941Smrg unsigned location = info->var->data.location - VARYING_SLOT_VAR0; 615b8e80941Smrg 616b8e80941Smrg /* Once we have assigned a location mark it as used */ 617b8e80941Smrg assigned_comps[tmp_cursor].comps |= (1 << tmp_comp); 618b8e80941Smrg assigned_comps[tmp_cursor].interp_type = info->interp_type; 619b8e80941Smrg assigned_comps[tmp_cursor].interp_loc = info->interp_loc; 620b8e80941Smrg assigned_comps[tmp_cursor].is_32bit = info->is_32bit; 621b8e80941Smrg 622b8e80941Smrg /* Assign remap location */ 623b8e80941Smrg remap[location][info->var->data.location_frac].component = tmp_comp++; 624b8e80941Smrg remap[location][info->var->data.location_frac].location = 625b8e80941Smrg tmp_cursor + VARYING_SLOT_VAR0; 626b8e80941Smrg 627b8e80941Smrg break; 628b8e80941Smrg } 629b8e80941Smrg 630b8e80941Smrg *cursor = tmp_cursor; 631b8e80941Smrg *comp = tmp_comp; 632b8e80941Smrg} 633b8e80941Smrg 634b8e80941Smrg/* If there are empty components in the slot compact the remaining components 635b8e80941Smrg * as close to component 0 as possible. This will make it easier to fill the 636b8e80941Smrg * empty components with components from a different slot in a following pass. 637b8e80941Smrg */ 638b8e80941Smrgstatic void 639b8e80941Smrgcompact_components(nir_shader *producer, nir_shader *consumer, 640b8e80941Smrg struct assigned_comps *assigned_comps, 641b8e80941Smrg bool default_to_smooth_interp) 642b8e80941Smrg{ 643b8e80941Smrg struct exec_list *input_list = &consumer->inputs; 644b8e80941Smrg struct exec_list *output_list = &producer->outputs; 645b8e80941Smrg struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}}; 646b8e80941Smrg struct varying_component *varying_comp_info; 647b8e80941Smrg unsigned varying_comp_info_size; 648b8e80941Smrg 649b8e80941Smrg /* Gather varying component info */ 650b8e80941Smrg gather_varying_component_info(consumer, &varying_comp_info, 651b8e80941Smrg &varying_comp_info_size, 652b8e80941Smrg default_to_smooth_interp); 653b8e80941Smrg 654b8e80941Smrg /* Sort varying components. */ 655b8e80941Smrg qsort(varying_comp_info, varying_comp_info_size, 656b8e80941Smrg sizeof(struct varying_component), cmp_varying_component); 657b8e80941Smrg 658b8e80941Smrg unsigned cursor = 0; 659b8e80941Smrg unsigned comp = 0; 660b8e80941Smrg 661b8e80941Smrg /* Set the remap array based on the sorted components */ 662b8e80941Smrg for (unsigned i = 0; i < varying_comp_info_size; i++ ) { 663b8e80941Smrg struct varying_component *info = &varying_comp_info[i]; 664b8e80941Smrg 665b8e80941Smrg assert(info->is_patch || cursor < MAX_VARYING); 666b8e80941Smrg if (info->is_patch) { 667b8e80941Smrg /* The list should be sorted with all non-patch inputs first followed 668b8e80941Smrg * by patch inputs. When we hit our first patch input, we need to 669b8e80941Smrg * reset the cursor to MAX_VARYING so we put them in the right slot. 670b8e80941Smrg */ 671b8e80941Smrg if (cursor < MAX_VARYING) { 672b8e80941Smrg cursor = MAX_VARYING; 673b8e80941Smrg comp = 0; 674b8e80941Smrg } 675b8e80941Smrg 676b8e80941Smrg assign_remap_locations(remap, assigned_comps, info, 677b8e80941Smrg &cursor, &comp, MAX_VARYINGS_INCL_PATCH); 678b8e80941Smrg } else { 679b8e80941Smrg assign_remap_locations(remap, assigned_comps, info, 680b8e80941Smrg &cursor, &comp, MAX_VARYING); 681b8e80941Smrg 682b8e80941Smrg /* Check if we failed to assign a remap location. This can happen if 683b8e80941Smrg * for example there are a bunch of unmovable components with 684b8e80941Smrg * mismatching interpolation types causing us to skip over locations 685b8e80941Smrg * that would have been useful for packing later components. 686b8e80941Smrg * The solution is to iterate over the locations again (this should 687b8e80941Smrg * happen very rarely in practice). 688b8e80941Smrg */ 689b8e80941Smrg if (cursor == MAX_VARYING) { 690b8e80941Smrg cursor = 0; 691b8e80941Smrg comp = 0; 692b8e80941Smrg assign_remap_locations(remap, assigned_comps, info, 693b8e80941Smrg &cursor, &comp, MAX_VARYING); 694b8e80941Smrg } 695b8e80941Smrg } 696b8e80941Smrg } 697b8e80941Smrg 698b8e80941Smrg ralloc_free(varying_comp_info); 699b8e80941Smrg 700b8e80941Smrg uint64_t zero = 0; 701b8e80941Smrg uint32_t zero32 = 0; 702b8e80941Smrg remap_slots_and_components(input_list, consumer->info.stage, remap, 703b8e80941Smrg &consumer->info.inputs_read, &zero, 704b8e80941Smrg &consumer->info.patch_inputs_read, &zero32); 705b8e80941Smrg remap_slots_and_components(output_list, producer->info.stage, remap, 706b8e80941Smrg &producer->info.outputs_written, 707b8e80941Smrg &producer->info.outputs_read, 708b8e80941Smrg &producer->info.patch_outputs_written, 709b8e80941Smrg &producer->info.patch_outputs_read); 710b8e80941Smrg} 711b8e80941Smrg 712b8e80941Smrg/* We assume that this has been called more-or-less directly after 713b8e80941Smrg * remove_unused_varyings. At this point, all of the varyings that we 714b8e80941Smrg * aren't going to be using have been completely removed and the 715b8e80941Smrg * inputs_read and outputs_written fields in nir_shader_info reflect 716b8e80941Smrg * this. Therefore, the total set of valid slots is the OR of the two 717b8e80941Smrg * sets of varyings; this accounts for varyings which one side may need 718b8e80941Smrg * to read/write even if the other doesn't. This can happen if, for 719b8e80941Smrg * instance, an array is used indirectly from one side causing it to be 720b8e80941Smrg * unsplittable but directly from the other. 721b8e80941Smrg */ 722b8e80941Smrgvoid 723b8e80941Smrgnir_compact_varyings(nir_shader *producer, nir_shader *consumer, 724b8e80941Smrg bool default_to_smooth_interp) 725b8e80941Smrg{ 726b8e80941Smrg assert(producer->info.stage != MESA_SHADER_FRAGMENT); 727b8e80941Smrg assert(consumer->info.stage != MESA_SHADER_VERTEX); 728b8e80941Smrg 729b8e80941Smrg struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}}; 730b8e80941Smrg 731b8e80941Smrg get_unmoveable_components_masks(&producer->outputs, assigned_comps, 732b8e80941Smrg producer->info.stage, 733b8e80941Smrg default_to_smooth_interp); 734b8e80941Smrg get_unmoveable_components_masks(&consumer->inputs, assigned_comps, 735b8e80941Smrg consumer->info.stage, 736b8e80941Smrg default_to_smooth_interp); 737b8e80941Smrg 738b8e80941Smrg compact_components(producer, consumer, assigned_comps, 739b8e80941Smrg default_to_smooth_interp); 740b8e80941Smrg} 741b8e80941Smrg 742b8e80941Smrg/* 743b8e80941Smrg * Mark XFB varyings as always_active_io in the consumer so the linking opts 744b8e80941Smrg * don't touch them. 745b8e80941Smrg */ 746b8e80941Smrgvoid 747b8e80941Smrgnir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer) 748b8e80941Smrg{ 749b8e80941Smrg nir_variable *input_vars[MAX_VARYING] = { 0 }; 750b8e80941Smrg 751b8e80941Smrg nir_foreach_variable(var, &consumer->inputs) { 752b8e80941Smrg if (var->data.location >= VARYING_SLOT_VAR0 && 753b8e80941Smrg var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) { 754b8e80941Smrg 755b8e80941Smrg unsigned location = var->data.location - VARYING_SLOT_VAR0; 756b8e80941Smrg input_vars[location] = var; 757b8e80941Smrg } 758b8e80941Smrg } 759b8e80941Smrg 760b8e80941Smrg nir_foreach_variable(var, &producer->outputs) { 761b8e80941Smrg if (var->data.location >= VARYING_SLOT_VAR0 && 762b8e80941Smrg var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) { 763b8e80941Smrg 764b8e80941Smrg if (!var->data.always_active_io) 765b8e80941Smrg continue; 766b8e80941Smrg 767b8e80941Smrg unsigned location = var->data.location - VARYING_SLOT_VAR0; 768b8e80941Smrg if (input_vars[location]) { 769b8e80941Smrg input_vars[location]->data.always_active_io = true; 770b8e80941Smrg } 771b8e80941Smrg } 772b8e80941Smrg } 773b8e80941Smrg} 774b8e80941Smrg 775b8e80941Smrgstatic bool 776b8e80941Smrgdoes_varying_match(nir_variable *out_var, nir_variable *in_var) 777b8e80941Smrg{ 778b8e80941Smrg return in_var->data.location == out_var->data.location && 779b8e80941Smrg in_var->data.location_frac == out_var->data.location_frac; 780b8e80941Smrg} 781b8e80941Smrg 782b8e80941Smrgstatic nir_variable * 783b8e80941Smrgget_matching_input_var(nir_shader *consumer, nir_variable *out_var) 784b8e80941Smrg{ 785b8e80941Smrg nir_foreach_variable(var, &consumer->inputs) { 786b8e80941Smrg if (does_varying_match(out_var, var)) 787b8e80941Smrg return var; 788b8e80941Smrg } 789b8e80941Smrg 790b8e80941Smrg return NULL; 791b8e80941Smrg} 792b8e80941Smrg 793b8e80941Smrgstatic bool 794b8e80941Smrgcan_replace_varying(nir_variable *out_var) 795b8e80941Smrg{ 796b8e80941Smrg /* Skip types that require more complex handling. 797b8e80941Smrg * TODO: add support for these types. 798b8e80941Smrg */ 799b8e80941Smrg if (glsl_type_is_array(out_var->type) || 800b8e80941Smrg glsl_type_is_dual_slot(out_var->type) || 801b8e80941Smrg glsl_type_is_matrix(out_var->type) || 802b8e80941Smrg glsl_type_is_struct_or_ifc(out_var->type)) 803b8e80941Smrg return false; 804b8e80941Smrg 805b8e80941Smrg /* Limit this pass to scalars for now to keep things simple. Most varyings 806b8e80941Smrg * should have been lowered to scalars at this point anyway. 807b8e80941Smrg */ 808b8e80941Smrg if (!glsl_type_is_scalar(out_var->type)) 809b8e80941Smrg return false; 810b8e80941Smrg 811b8e80941Smrg if (out_var->data.location < VARYING_SLOT_VAR0 || 812b8e80941Smrg out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING) 813b8e80941Smrg return false; 814b8e80941Smrg 815b8e80941Smrg return true; 816b8e80941Smrg} 817b8e80941Smrg 818b8e80941Smrgstatic bool 819b8e80941Smrgreplace_constant_input(nir_shader *shader, nir_intrinsic_instr *store_intr) 820b8e80941Smrg{ 821b8e80941Smrg nir_function_impl *impl = nir_shader_get_entrypoint(shader); 822b8e80941Smrg 823b8e80941Smrg nir_builder b; 824b8e80941Smrg nir_builder_init(&b, impl); 825b8e80941Smrg 826b8e80941Smrg nir_variable *out_var = 827b8e80941Smrg nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0])); 828b8e80941Smrg 829b8e80941Smrg bool progress = false; 830b8e80941Smrg nir_foreach_block(block, impl) { 831b8e80941Smrg nir_foreach_instr(instr, block) { 832b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 833b8e80941Smrg continue; 834b8e80941Smrg 835b8e80941Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 836b8e80941Smrg if (intr->intrinsic != nir_intrinsic_load_deref) 837b8e80941Smrg continue; 838b8e80941Smrg 839b8e80941Smrg nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 840b8e80941Smrg if (in_deref->mode != nir_var_shader_in) 841b8e80941Smrg continue; 842b8e80941Smrg 843b8e80941Smrg nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 844b8e80941Smrg 845b8e80941Smrg if (!does_varying_match(out_var, in_var)) 846b8e80941Smrg continue; 847b8e80941Smrg 848b8e80941Smrg b.cursor = nir_before_instr(instr); 849b8e80941Smrg 850b8e80941Smrg nir_load_const_instr *out_const = 851b8e80941Smrg nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr); 852b8e80941Smrg 853b8e80941Smrg /* Add new const to replace the input */ 854b8e80941Smrg nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components, 855b8e80941Smrg intr->dest.ssa.bit_size, 856b8e80941Smrg out_const->value); 857b8e80941Smrg 858b8e80941Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(nconst)); 859b8e80941Smrg 860b8e80941Smrg progress = true; 861b8e80941Smrg } 862b8e80941Smrg } 863b8e80941Smrg 864b8e80941Smrg return progress; 865b8e80941Smrg} 866b8e80941Smrg 867b8e80941Smrgstatic bool 868b8e80941Smrgreplace_duplicate_input(nir_shader *shader, nir_variable *input_var, 869b8e80941Smrg nir_intrinsic_instr *dup_store_intr) 870b8e80941Smrg{ 871b8e80941Smrg assert(input_var); 872b8e80941Smrg 873b8e80941Smrg nir_function_impl *impl = nir_shader_get_entrypoint(shader); 874b8e80941Smrg 875b8e80941Smrg nir_builder b; 876b8e80941Smrg nir_builder_init(&b, impl); 877b8e80941Smrg 878b8e80941Smrg nir_variable *dup_out_var = 879b8e80941Smrg nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0])); 880b8e80941Smrg 881b8e80941Smrg bool progress = false; 882b8e80941Smrg nir_foreach_block(block, impl) { 883b8e80941Smrg nir_foreach_instr(instr, block) { 884b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 885b8e80941Smrg continue; 886b8e80941Smrg 887b8e80941Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 888b8e80941Smrg if (intr->intrinsic != nir_intrinsic_load_deref) 889b8e80941Smrg continue; 890b8e80941Smrg 891b8e80941Smrg nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 892b8e80941Smrg if (in_deref->mode != nir_var_shader_in) 893b8e80941Smrg continue; 894b8e80941Smrg 895b8e80941Smrg nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 896b8e80941Smrg 897b8e80941Smrg if (!does_varying_match(dup_out_var, in_var) || 898b8e80941Smrg in_var->data.interpolation != input_var->data.interpolation || 899b8e80941Smrg get_interp_loc(in_var) != get_interp_loc(input_var)) 900b8e80941Smrg continue; 901b8e80941Smrg 902b8e80941Smrg b.cursor = nir_before_instr(instr); 903b8e80941Smrg 904b8e80941Smrg nir_ssa_def *load = nir_load_var(&b, input_var); 905b8e80941Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(load)); 906b8e80941Smrg 907b8e80941Smrg progress = true; 908b8e80941Smrg } 909b8e80941Smrg } 910b8e80941Smrg 911b8e80941Smrg return progress; 912b8e80941Smrg} 913b8e80941Smrg 914b8e80941Smrgbool 915b8e80941Smrgnir_link_opt_varyings(nir_shader *producer, nir_shader *consumer) 916b8e80941Smrg{ 917b8e80941Smrg /* TODO: Add support for more shader stage combinations */ 918b8e80941Smrg if (consumer->info.stage != MESA_SHADER_FRAGMENT || 919b8e80941Smrg (producer->info.stage != MESA_SHADER_VERTEX && 920b8e80941Smrg producer->info.stage != MESA_SHADER_TESS_EVAL)) 921b8e80941Smrg return false; 922b8e80941Smrg 923b8e80941Smrg bool progress = false; 924b8e80941Smrg 925b8e80941Smrg nir_function_impl *impl = nir_shader_get_entrypoint(producer); 926b8e80941Smrg 927b8e80941Smrg struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL); 928b8e80941Smrg 929b8e80941Smrg /* If we find a store in the last block of the producer we can be sure this 930b8e80941Smrg * is the only possible value for this output. 931b8e80941Smrg */ 932b8e80941Smrg nir_block *last_block = nir_impl_last_block(impl); 933b8e80941Smrg nir_foreach_instr_reverse(instr, last_block) { 934b8e80941Smrg if (instr->type != nir_instr_type_intrinsic) 935b8e80941Smrg continue; 936b8e80941Smrg 937b8e80941Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 938b8e80941Smrg 939b8e80941Smrg if (intr->intrinsic != nir_intrinsic_store_deref) 940b8e80941Smrg continue; 941b8e80941Smrg 942b8e80941Smrg nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]); 943b8e80941Smrg if (out_deref->mode != nir_var_shader_out) 944b8e80941Smrg continue; 945b8e80941Smrg 946b8e80941Smrg nir_variable *out_var = nir_deref_instr_get_variable(out_deref); 947b8e80941Smrg if (!can_replace_varying(out_var)) 948b8e80941Smrg continue; 949b8e80941Smrg 950b8e80941Smrg if (intr->src[1].ssa->parent_instr->type == nir_instr_type_load_const) { 951b8e80941Smrg progress |= replace_constant_input(consumer, intr); 952b8e80941Smrg } else { 953b8e80941Smrg struct hash_entry *entry = 954b8e80941Smrg _mesa_hash_table_search(varying_values, intr->src[1].ssa); 955b8e80941Smrg if (entry) { 956b8e80941Smrg progress |= replace_duplicate_input(consumer, 957b8e80941Smrg (nir_variable *) entry->data, 958b8e80941Smrg intr); 959b8e80941Smrg } else { 960b8e80941Smrg nir_variable *in_var = get_matching_input_var(consumer, out_var); 961b8e80941Smrg if (in_var) { 962b8e80941Smrg _mesa_hash_table_insert(varying_values, intr->src[1].ssa, 963b8e80941Smrg in_var); 964b8e80941Smrg } 965b8e80941Smrg } 966b8e80941Smrg } 967b8e80941Smrg } 968b8e80941Smrg 969b8e80941Smrg _mesa_hash_table_destroy(varying_values, NULL); 970b8e80941Smrg 971b8e80941Smrg return progress; 972b8e80941Smrg} 973