101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2015 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include "nir.h" 257e102996Smaya#include "nir_builder.h" 2601e04c3fSmrg#include "util/set.h" 2701e04c3fSmrg#include "util/hash_table.h" 2801e04c3fSmrg 2901e04c3fSmrg/* This file contains various little helpers for doing simple linking in 3001e04c3fSmrg * NIR. Eventually, we'll probably want a full-blown varying packing 3101e04c3fSmrg * implementation in here. Right now, it just deletes unused things. 3201e04c3fSmrg */ 3301e04c3fSmrg 3401e04c3fSmrg/** 357ec681f3Smrg * Returns the bits in the inputs_read, or outputs_written 367ec681f3Smrg * bitfield corresponding to this variable. 3701e04c3fSmrg */ 3801e04c3fSmrgstatic uint64_t 3901e04c3fSmrgget_variable_io_mask(nir_variable *var, gl_shader_stage stage) 4001e04c3fSmrg{ 4101e04c3fSmrg if (var->data.location < 0) 4201e04c3fSmrg return 0; 4301e04c3fSmrg 4401e04c3fSmrg unsigned location = var->data.patch ? 4501e04c3fSmrg var->data.location - VARYING_SLOT_PATCH0 : var->data.location; 4601e04c3fSmrg 4701e04c3fSmrg assert(var->data.mode == nir_var_shader_in || 487ec681f3Smrg var->data.mode == nir_var_shader_out); 4901e04c3fSmrg assert(var->data.location >= 0); 5001e04c3fSmrg 5101e04c3fSmrg const struct glsl_type *type = var->type; 527ec681f3Smrg if (nir_is_arrayed_io(var, stage) || var->data.per_view) { 5301e04c3fSmrg assert(glsl_type_is_array(type)); 5401e04c3fSmrg type = glsl_get_array_element(type); 5501e04c3fSmrg } 5601e04c3fSmrg 5701e04c3fSmrg unsigned slots = glsl_count_attribute_slots(type, false); 5801e04c3fSmrg return ((1ull << slots) - 1) << location; 5901e04c3fSmrg} 6001e04c3fSmrg 617ec681f3Smrgstatic bool 627ec681f3Smrgis_non_generic_patch_var(nir_variable *var) 637ec681f3Smrg{ 647ec681f3Smrg return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER || 657ec681f3Smrg var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER || 667ec681f3Smrg var->data.location == VARYING_SLOT_BOUNDING_BOX0 || 677ec681f3Smrg var->data.location == VARYING_SLOT_BOUNDING_BOX1; 687ec681f3Smrg} 697ec681f3Smrg 707e102996Smayastatic uint8_t 717e102996Smayaget_num_components(nir_variable *var) 727e102996Smaya{ 737e102996Smaya if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type))) 747e102996Smaya return 4; 757e102996Smaya 767e102996Smaya return glsl_get_vector_elements(glsl_without_array(var->type)); 777e102996Smaya} 787e102996Smaya 7901e04c3fSmrgstatic void 8001e04c3fSmrgtcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read) 8101e04c3fSmrg{ 8201e04c3fSmrg nir_foreach_function(function, shader) { 8301e04c3fSmrg if (!function->impl) 8401e04c3fSmrg continue; 8501e04c3fSmrg 8601e04c3fSmrg nir_foreach_block(block, function->impl) { 8701e04c3fSmrg nir_foreach_instr(instr, block) { 8801e04c3fSmrg if (instr->type != nir_instr_type_intrinsic) 8901e04c3fSmrg continue; 9001e04c3fSmrg 9101e04c3fSmrg nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 9201e04c3fSmrg if (intrin->intrinsic != nir_intrinsic_load_deref) 9301e04c3fSmrg continue; 9401e04c3fSmrg 957e102996Smaya nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]); 967ec681f3Smrg if (!nir_deref_mode_is(deref, nir_var_shader_out)) 9701e04c3fSmrg continue; 9801e04c3fSmrg 997e102996Smaya nir_variable *var = nir_deref_instr_get_variable(deref); 1007e102996Smaya for (unsigned i = 0; i < get_num_components(var); i++) { 1017e102996Smaya if (var->data.patch) { 1027ec681f3Smrg if (is_non_generic_patch_var(var)) 1037ec681f3Smrg continue; 1047ec681f3Smrg 1057e102996Smaya patches_read[var->data.location_frac + i] |= 1067e102996Smaya get_variable_io_mask(var, shader->info.stage); 1077e102996Smaya } else { 1087e102996Smaya read[var->data.location_frac + i] |= 1097e102996Smaya get_variable_io_mask(var, shader->info.stage); 1107e102996Smaya } 11101e04c3fSmrg } 11201e04c3fSmrg } 11301e04c3fSmrg } 11401e04c3fSmrg } 11501e04c3fSmrg} 11601e04c3fSmrg 11701e04c3fSmrg/** 11801e04c3fSmrg * Helper for removing unused shader I/O variables, by demoting them to global 11901e04c3fSmrg * variables (which may then by dead code eliminated). 12001e04c3fSmrg * 12101e04c3fSmrg * Example usage is: 12201e04c3fSmrg * 1237ec681f3Smrg * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, 12401e04c3fSmrg * read, patches_read) || 12501e04c3fSmrg * progress; 12601e04c3fSmrg * 12701e04c3fSmrg * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*) 12801e04c3fSmrg * representing each .location_frac used. Note that for vector variables, 12901e04c3fSmrg * only the first channel (.location_frac) is examined for deciding if the 13001e04c3fSmrg * variable is used! 13101e04c3fSmrg */ 13201e04c3fSmrgbool 1337ec681f3Smrgnir_remove_unused_io_vars(nir_shader *shader, 1347ec681f3Smrg nir_variable_mode mode, 13501e04c3fSmrg uint64_t *used_by_other_stage, 13601e04c3fSmrg uint64_t *used_by_other_stage_patches) 13701e04c3fSmrg{ 13801e04c3fSmrg bool progress = false; 13901e04c3fSmrg uint64_t *used; 14001e04c3fSmrg 1417ec681f3Smrg assert(mode == nir_var_shader_in || mode == nir_var_shader_out); 1427ec681f3Smrg 1437ec681f3Smrg nir_foreach_variable_with_modes_safe(var, shader, mode) { 14401e04c3fSmrg if (var->data.patch) 14501e04c3fSmrg used = used_by_other_stage_patches; 14601e04c3fSmrg else 14701e04c3fSmrg used = used_by_other_stage; 14801e04c3fSmrg 14901e04c3fSmrg if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0) 15001e04c3fSmrg continue; 15101e04c3fSmrg 15201e04c3fSmrg if (var->data.always_active_io) 15301e04c3fSmrg continue; 15401e04c3fSmrg 1557e102996Smaya if (var->data.explicit_xfb_buffer) 1567e102996Smaya continue; 1577e102996Smaya 15801e04c3fSmrg uint64_t other_stage = used[var->data.location_frac]; 15901e04c3fSmrg 16001e04c3fSmrg if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) { 16101e04c3fSmrg /* This one is invalid, make it a global variable instead */ 16201e04c3fSmrg var->data.location = 0; 1637e102996Smaya var->data.mode = nir_var_shader_temp; 16401e04c3fSmrg 16501e04c3fSmrg progress = true; 16601e04c3fSmrg } 16701e04c3fSmrg } 16801e04c3fSmrg 16901e04c3fSmrg if (progress) 17001e04c3fSmrg nir_fixup_deref_modes(shader); 17101e04c3fSmrg 17201e04c3fSmrg return progress; 17301e04c3fSmrg} 17401e04c3fSmrg 17501e04c3fSmrgbool 17601e04c3fSmrgnir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer) 17701e04c3fSmrg{ 17801e04c3fSmrg assert(producer->info.stage != MESA_SHADER_FRAGMENT); 17901e04c3fSmrg assert(consumer->info.stage != MESA_SHADER_VERTEX); 18001e04c3fSmrg 18101e04c3fSmrg uint64_t read[4] = { 0 }, written[4] = { 0 }; 18201e04c3fSmrg uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 }; 18301e04c3fSmrg 1847ec681f3Smrg nir_foreach_shader_out_variable(var, producer) { 1857e102996Smaya for (unsigned i = 0; i < get_num_components(var); i++) { 1867e102996Smaya if (var->data.patch) { 1877ec681f3Smrg if (is_non_generic_patch_var(var)) 1887ec681f3Smrg continue; 1897ec681f3Smrg 1907e102996Smaya patches_written[var->data.location_frac + i] |= 1917e102996Smaya get_variable_io_mask(var, producer->info.stage); 1927e102996Smaya } else { 1937e102996Smaya written[var->data.location_frac + i] |= 1947e102996Smaya get_variable_io_mask(var, producer->info.stage); 1957e102996Smaya } 19601e04c3fSmrg } 19701e04c3fSmrg } 19801e04c3fSmrg 1997ec681f3Smrg nir_foreach_shader_in_variable(var, consumer) { 2007e102996Smaya for (unsigned i = 0; i < get_num_components(var); i++) { 2017e102996Smaya if (var->data.patch) { 2027ec681f3Smrg if (is_non_generic_patch_var(var)) 2037ec681f3Smrg continue; 2047ec681f3Smrg 2057e102996Smaya patches_read[var->data.location_frac + i] |= 2067e102996Smaya get_variable_io_mask(var, consumer->info.stage); 2077e102996Smaya } else { 2087e102996Smaya read[var->data.location_frac + i] |= 2097e102996Smaya get_variable_io_mask(var, consumer->info.stage); 2107e102996Smaya } 21101e04c3fSmrg } 21201e04c3fSmrg } 21301e04c3fSmrg 21401e04c3fSmrg /* Each TCS invocation can read data written by other TCS invocations, 21501e04c3fSmrg * so even if the outputs are not used by the TES we must also make 21601e04c3fSmrg * sure they are not read by the TCS before demoting them to globals. 21701e04c3fSmrg */ 21801e04c3fSmrg if (producer->info.stage == MESA_SHADER_TESS_CTRL) 21901e04c3fSmrg tcs_add_output_reads(producer, read, patches_read); 22001e04c3fSmrg 22101e04c3fSmrg bool progress = false; 2227ec681f3Smrg progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read, 22301e04c3fSmrg patches_read); 22401e04c3fSmrg 2257ec681f3Smrg progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written, 22601e04c3fSmrg patches_written) || progress; 22701e04c3fSmrg 22801e04c3fSmrg return progress; 22901e04c3fSmrg} 23001e04c3fSmrg 23101e04c3fSmrgstatic uint8_t 23201e04c3fSmrgget_interp_type(nir_variable *var, const struct glsl_type *type, 23301e04c3fSmrg bool default_to_smooth_interp) 23401e04c3fSmrg{ 23501e04c3fSmrg if (glsl_type_is_integer(type)) 23601e04c3fSmrg return INTERP_MODE_FLAT; 23701e04c3fSmrg else if (var->data.interpolation != INTERP_MODE_NONE) 23801e04c3fSmrg return var->data.interpolation; 23901e04c3fSmrg else if (default_to_smooth_interp) 24001e04c3fSmrg return INTERP_MODE_SMOOTH; 24101e04c3fSmrg else 24201e04c3fSmrg return INTERP_MODE_NONE; 24301e04c3fSmrg} 24401e04c3fSmrg 24501e04c3fSmrg#define INTERPOLATE_LOC_SAMPLE 0 24601e04c3fSmrg#define INTERPOLATE_LOC_CENTROID 1 24701e04c3fSmrg#define INTERPOLATE_LOC_CENTER 2 24801e04c3fSmrg 24901e04c3fSmrgstatic uint8_t 25001e04c3fSmrgget_interp_loc(nir_variable *var) 25101e04c3fSmrg{ 25201e04c3fSmrg if (var->data.sample) 25301e04c3fSmrg return INTERPOLATE_LOC_SAMPLE; 25401e04c3fSmrg else if (var->data.centroid) 25501e04c3fSmrg return INTERPOLATE_LOC_CENTROID; 25601e04c3fSmrg else 25701e04c3fSmrg return INTERPOLATE_LOC_CENTER; 25801e04c3fSmrg} 25901e04c3fSmrg 2607e102996Smayastatic bool 2617e102996Smayais_packing_supported_for_type(const struct glsl_type *type) 2627e102996Smaya{ 2637e102996Smaya /* We ignore complex types such as arrays, matrices, structs and bitsizes 2647e102996Smaya * other then 32bit. All other vector types should have been split into 2657e102996Smaya * scalar variables by the lower_io_to_scalar pass. The only exception 2667e102996Smaya * should be OpenGL xfb varyings. 2677e102996Smaya * TODO: add support for more complex types? 2687e102996Smaya */ 2697e102996Smaya return glsl_type_is_scalar(type) && glsl_type_is_32bit(type); 2707e102996Smaya} 2717e102996Smaya 2727e102996Smayastruct assigned_comps 2737e102996Smaya{ 2747e102996Smaya uint8_t comps; 2757e102996Smaya uint8_t interp_type; 2767e102996Smaya uint8_t interp_loc; 2777e102996Smaya bool is_32bit; 2787ec681f3Smrg bool is_mediump; 2797e102996Smaya}; 2807e102996Smaya 2817e102996Smaya/* Packing arrays and dual slot varyings is difficult so to avoid complex 2827e102996Smaya * algorithms this function just assigns them their existing location for now. 2837e102996Smaya * TODO: allow better packing of complex types. 2847e102996Smaya */ 28501e04c3fSmrgstatic void 2867ec681f3Smrgget_unmoveable_components_masks(nir_shader *shader, 2877ec681f3Smrg nir_variable_mode mode, 2887e102996Smaya struct assigned_comps *comps, 2897e102996Smaya gl_shader_stage stage, 2907e102996Smaya bool default_to_smooth_interp) 29101e04c3fSmrg{ 2927ec681f3Smrg nir_foreach_variable_with_modes_safe(var, shader, mode) { 29301e04c3fSmrg assert(var->data.location >= 0); 29401e04c3fSmrg 2957e102996Smaya /* Only remap things that aren't built-ins. */ 29601e04c3fSmrg if (var->data.location >= VARYING_SLOT_VAR0 && 2977e102996Smaya var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 29801e04c3fSmrg 29901e04c3fSmrg const struct glsl_type *type = var->type; 3007ec681f3Smrg if (nir_is_arrayed_io(var, stage) || var->data.per_view) { 30101e04c3fSmrg assert(glsl_type_is_array(type)); 30201e04c3fSmrg type = glsl_get_array_element(type); 30301e04c3fSmrg } 30401e04c3fSmrg 3057e102996Smaya /* If we can pack this varying then don't mark the components as 3067e102996Smaya * used. 3077e102996Smaya */ 3087e102996Smaya if (is_packing_supported_for_type(type)) 3097e102996Smaya continue; 3107e102996Smaya 31101e04c3fSmrg unsigned location = var->data.location - VARYING_SLOT_VAR0; 3127e102996Smaya 31301e04c3fSmrg unsigned elements = 3147e102996Smaya glsl_type_is_vector_or_scalar(glsl_without_array(type)) ? 3157e102996Smaya glsl_get_vector_elements(glsl_without_array(type)) : 4; 31601e04c3fSmrg 31701e04c3fSmrg bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type)); 31801e04c3fSmrg unsigned slots = glsl_count_attribute_slots(type, false); 3197e102996Smaya unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1; 32001e04c3fSmrg unsigned comps_slot2 = 0; 32101e04c3fSmrg for (unsigned i = 0; i < slots; i++) { 32201e04c3fSmrg if (dual_slot) { 32301e04c3fSmrg if (i & 1) { 3247e102996Smaya comps[location + i].comps |= ((1 << comps_slot2) - 1); 32501e04c3fSmrg } else { 32601e04c3fSmrg unsigned num_comps = 4 - var->data.location_frac; 3277e102996Smaya comps_slot2 = (elements * dmul) - num_comps; 32801e04c3fSmrg 32901e04c3fSmrg /* Assume ARB_enhanced_layouts packing rules for doubles */ 33001e04c3fSmrg assert(var->data.location_frac == 0 || 33101e04c3fSmrg var->data.location_frac == 2); 33201e04c3fSmrg assert(comps_slot2 <= 4); 33301e04c3fSmrg 3347e102996Smaya comps[location + i].comps |= 33501e04c3fSmrg ((1 << num_comps) - 1) << var->data.location_frac; 33601e04c3fSmrg } 33701e04c3fSmrg } else { 3387e102996Smaya comps[location + i].comps |= 3397e102996Smaya ((1 << (elements * dmul)) - 1) << var->data.location_frac; 34001e04c3fSmrg } 3417e102996Smaya 3427e102996Smaya comps[location + i].interp_type = 3437e102996Smaya get_interp_type(var, type, default_to_smooth_interp); 3447e102996Smaya comps[location + i].interp_loc = get_interp_loc(var); 3457e102996Smaya comps[location + i].is_32bit = 3467e102996Smaya glsl_type_is_32bit(glsl_without_array(type)); 3477ec681f3Smrg comps[location + i].is_mediump = 3487ec681f3Smrg var->data.precision == GLSL_PRECISION_MEDIUM || 3497ec681f3Smrg var->data.precision == GLSL_PRECISION_LOW; 35001e04c3fSmrg } 35101e04c3fSmrg } 35201e04c3fSmrg } 35301e04c3fSmrg} 35401e04c3fSmrg 35501e04c3fSmrgstruct varying_loc 35601e04c3fSmrg{ 35701e04c3fSmrg uint8_t component; 35801e04c3fSmrg uint32_t location; 35901e04c3fSmrg}; 36001e04c3fSmrg 3617e102996Smayastatic void 3627e102996Smayamark_all_used_slots(nir_variable *var, uint64_t *slots_used, 3637e102996Smaya uint64_t slots_used_mask, unsigned num_slots) 3647e102996Smaya{ 3657e102996Smaya unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 3667e102996Smaya 3677e102996Smaya slots_used[var->data.patch ? 1 : 0] |= slots_used_mask & 3687e102996Smaya BITFIELD64_RANGE(var->data.location - loc_offset, num_slots); 3697e102996Smaya} 3707e102996Smaya 3717e102996Smayastatic void 3727e102996Smayamark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset) 3737e102996Smaya{ 3747e102996Smaya unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 3757e102996Smaya 3767e102996Smaya slots_used[var->data.patch ? 1 : 0] |= 3777e102996Smaya BITFIELD64_BIT(var->data.location - loc_offset + offset); 3787e102996Smaya} 3797e102996Smaya 38001e04c3fSmrgstatic void 3817ec681f3Smrgremap_slots_and_components(nir_shader *shader, nir_variable_mode mode, 38201e04c3fSmrg struct varying_loc (*remap)[4], 3837e102996Smaya uint64_t *slots_used, uint64_t *out_slots_read, 3847e102996Smaya uint32_t *p_slots_used, uint32_t *p_out_slots_read) 38501e04c3fSmrg { 3867ec681f3Smrg const gl_shader_stage stage = shader->info.stage; 3877e102996Smaya uint64_t out_slots_read_tmp[2] = {0}; 3887e102996Smaya uint64_t slots_used_tmp[2] = {0}; 38901e04c3fSmrg 39001e04c3fSmrg /* We don't touch builtins so just copy the bitmask */ 3917e102996Smaya slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0); 39201e04c3fSmrg 3937ec681f3Smrg nir_foreach_variable_with_modes(var, shader, mode) { 39401e04c3fSmrg assert(var->data.location >= 0); 39501e04c3fSmrg 39601e04c3fSmrg /* Only remap things that aren't built-ins */ 39701e04c3fSmrg if (var->data.location >= VARYING_SLOT_VAR0 && 3987e102996Smaya var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 39901e04c3fSmrg 40001e04c3fSmrg const struct glsl_type *type = var->type; 4017ec681f3Smrg if (nir_is_arrayed_io(var, stage) || var->data.per_view) { 40201e04c3fSmrg assert(glsl_type_is_array(type)); 40301e04c3fSmrg type = glsl_get_array_element(type); 40401e04c3fSmrg } 40501e04c3fSmrg 40601e04c3fSmrg unsigned num_slots = glsl_count_attribute_slots(type, false); 40701e04c3fSmrg bool used_across_stages = false; 40801e04c3fSmrg bool outputs_read = false; 40901e04c3fSmrg 41001e04c3fSmrg unsigned location = var->data.location - VARYING_SLOT_VAR0; 41101e04c3fSmrg struct varying_loc *new_loc = &remap[location][var->data.location_frac]; 41201e04c3fSmrg 4137e102996Smaya unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0; 4147e102996Smaya uint64_t used = var->data.patch ? *p_slots_used : *slots_used; 4157e102996Smaya uint64_t outs_used = 4167e102996Smaya var->data.patch ? *p_out_slots_read : *out_slots_read; 4177e102996Smaya uint64_t slots = 4187e102996Smaya BITFIELD64_RANGE(var->data.location - loc_offset, num_slots); 4197e102996Smaya 4207e102996Smaya if (slots & used) 42101e04c3fSmrg used_across_stages = true; 42201e04c3fSmrg 4237e102996Smaya if (slots & outs_used) 42401e04c3fSmrg outputs_read = true; 42501e04c3fSmrg 42601e04c3fSmrg if (new_loc->location) { 42701e04c3fSmrg var->data.location = new_loc->location; 42801e04c3fSmrg var->data.location_frac = new_loc->component; 42901e04c3fSmrg } 43001e04c3fSmrg 43101e04c3fSmrg if (var->data.always_active_io) { 43201e04c3fSmrg /* We can't apply link time optimisations (specifically array 43301e04c3fSmrg * splitting) to these so we need to copy the existing mask 43401e04c3fSmrg * otherwise we will mess up the mask for things like partially 43501e04c3fSmrg * marked arrays. 43601e04c3fSmrg */ 4377e102996Smaya if (used_across_stages) 4387e102996Smaya mark_all_used_slots(var, slots_used_tmp, used, num_slots); 43901e04c3fSmrg 44001e04c3fSmrg if (outputs_read) { 4417e102996Smaya mark_all_used_slots(var, out_slots_read_tmp, outs_used, 4427e102996Smaya num_slots); 44301e04c3fSmrg } 44401e04c3fSmrg } else { 44501e04c3fSmrg for (unsigned i = 0; i < num_slots; i++) { 44601e04c3fSmrg if (used_across_stages) 4477e102996Smaya mark_used_slot(var, slots_used_tmp, i); 44801e04c3fSmrg 44901e04c3fSmrg if (outputs_read) 4507e102996Smaya mark_used_slot(var, out_slots_read_tmp, i); 45101e04c3fSmrg } 45201e04c3fSmrg } 45301e04c3fSmrg } 45401e04c3fSmrg } 45501e04c3fSmrg 4567e102996Smaya *slots_used = slots_used_tmp[0]; 4577e102996Smaya *out_slots_read = out_slots_read_tmp[0]; 4587e102996Smaya *p_slots_used = slots_used_tmp[1]; 4597e102996Smaya *p_out_slots_read = out_slots_read_tmp[1]; 46001e04c3fSmrg} 46101e04c3fSmrg 4627e102996Smayastruct varying_component { 4637e102996Smaya nir_variable *var; 4647e102996Smaya uint8_t interp_type; 4657e102996Smaya uint8_t interp_loc; 4667e102996Smaya bool is_32bit; 4677e102996Smaya bool is_patch; 4687ec681f3Smrg bool is_mediump; 4697ec681f3Smrg bool is_intra_stage_only; 4707e102996Smaya bool initialised; 4717e102996Smaya}; 4727e102996Smaya 4737e102996Smayastatic int 4747e102996Smayacmp_varying_component(const void *comp1_v, const void *comp2_v) 47501e04c3fSmrg{ 4767e102996Smaya struct varying_component *comp1 = (struct varying_component *) comp1_v; 4777e102996Smaya struct varying_component *comp2 = (struct varying_component *) comp2_v; 4787e102996Smaya 4797e102996Smaya /* We want patches to be order at the end of the array */ 4807e102996Smaya if (comp1->is_patch != comp2->is_patch) 4817e102996Smaya return comp1->is_patch ? 1 : -1; 4827e102996Smaya 4837ec681f3Smrg /* We want to try to group together TCS outputs that are only read by other 4847ec681f3Smrg * TCS invocations and not consumed by the follow stage. 4857ec681f3Smrg */ 4867ec681f3Smrg if (comp1->is_intra_stage_only != comp2->is_intra_stage_only) 4877ec681f3Smrg return comp1->is_intra_stage_only ? 1 : -1; 4887ec681f3Smrg 4897ec681f3Smrg /* Group mediump varyings together. */ 4907ec681f3Smrg if (comp1->is_mediump != comp2->is_mediump) 4917ec681f3Smrg return comp1->is_mediump ? 1 : -1; 4927ec681f3Smrg 4937e102996Smaya /* We can only pack varyings with matching interpolation types so group 4947e102996Smaya * them together. 4957e102996Smaya */ 4967e102996Smaya if (comp1->interp_type != comp2->interp_type) 4977e102996Smaya return comp1->interp_type - comp2->interp_type; 4987e102996Smaya 4997e102996Smaya /* Interpolation loc must match also. */ 5007e102996Smaya if (comp1->interp_loc != comp2->interp_loc) 5017e102996Smaya return comp1->interp_loc - comp2->interp_loc; 5027e102996Smaya 5037e102996Smaya /* If everything else matches just use the original location to sort */ 5047ec681f3Smrg const struct nir_variable_data *const data1 = &comp1->var->data; 5057ec681f3Smrg const struct nir_variable_data *const data2 = &comp2->var->data; 5067ec681f3Smrg if (data1->location != data2->location) 5077ec681f3Smrg return data1->location - data2->location; 5087ec681f3Smrg return (int)data1->location_frac - (int)data2->location_frac; 5097e102996Smaya} 51001e04c3fSmrg 5117e102996Smayastatic void 5127ec681f3Smrggather_varying_component_info(nir_shader *producer, nir_shader *consumer, 5137e102996Smaya struct varying_component **varying_comp_info, 5147e102996Smaya unsigned *varying_comp_info_size, 5157e102996Smaya bool default_to_smooth_interp) 5167e102996Smaya{ 5177e102996Smaya unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}}; 5187e102996Smaya unsigned num_of_comps_to_pack = 0; 51901e04c3fSmrg 5207e102996Smaya /* Count the number of varying that can be packed and create a mapping 5217e102996Smaya * of those varyings to the array we will pass to qsort. 52201e04c3fSmrg */ 5237ec681f3Smrg nir_foreach_shader_out_variable(var, producer) { 52401e04c3fSmrg 5257e102996Smaya /* Only remap things that aren't builtins. */ 52601e04c3fSmrg if (var->data.location >= VARYING_SLOT_VAR0 && 5277e102996Smaya var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) { 52801e04c3fSmrg 52901e04c3fSmrg /* We can't repack xfb varyings. */ 53001e04c3fSmrg if (var->data.always_active_io) 53101e04c3fSmrg continue; 53201e04c3fSmrg 53301e04c3fSmrg const struct glsl_type *type = var->type; 5347ec681f3Smrg if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) { 53501e04c3fSmrg assert(glsl_type_is_array(type)); 53601e04c3fSmrg type = glsl_get_array_element(type); 53701e04c3fSmrg } 53801e04c3fSmrg 5397e102996Smaya if (!is_packing_supported_for_type(type)) 54001e04c3fSmrg continue; 54101e04c3fSmrg 5427e102996Smaya unsigned loc = var->data.location - VARYING_SLOT_VAR0; 5437e102996Smaya store_varying_info_idx[loc][var->data.location_frac] = 5447e102996Smaya ++num_of_comps_to_pack; 5457e102996Smaya } 5467e102996Smaya } 5477e102996Smaya 5487e102996Smaya *varying_comp_info_size = num_of_comps_to_pack; 5497e102996Smaya *varying_comp_info = rzalloc_array(NULL, struct varying_component, 5507e102996Smaya num_of_comps_to_pack); 5517e102996Smaya 5527e102996Smaya nir_function_impl *impl = nir_shader_get_entrypoint(consumer); 5537e102996Smaya 5547e102996Smaya /* Walk over the shader and populate the varying component info array */ 5557e102996Smaya nir_foreach_block(block, impl) { 5567e102996Smaya nir_foreach_instr(instr, block) { 5577e102996Smaya if (instr->type != nir_instr_type_intrinsic) 55801e04c3fSmrg continue; 55901e04c3fSmrg 5607e102996Smaya nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 5617e102996Smaya if (intr->intrinsic != nir_intrinsic_load_deref && 5627e102996Smaya intr->intrinsic != nir_intrinsic_interp_deref_at_centroid && 5637e102996Smaya intr->intrinsic != nir_intrinsic_interp_deref_at_sample && 5647ec681f3Smrg intr->intrinsic != nir_intrinsic_interp_deref_at_offset && 5657ec681f3Smrg intr->intrinsic != nir_intrinsic_interp_deref_at_vertex) 5667e102996Smaya continue; 5677e102996Smaya 5687e102996Smaya nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 5697ec681f3Smrg if (!nir_deref_mode_is(deref, nir_var_shader_in)) 5707e102996Smaya continue; 5717e102996Smaya 5727e102996Smaya /* We only remap things that aren't builtins. */ 5737e102996Smaya nir_variable *in_var = nir_deref_instr_get_variable(deref); 5747e102996Smaya if (in_var->data.location < VARYING_SLOT_VAR0) 5757e102996Smaya continue; 5767e102996Smaya 5777e102996Smaya unsigned location = in_var->data.location - VARYING_SLOT_VAR0; 5787e102996Smaya if (location >= MAX_VARYINGS_INCL_PATCH) 5797e102996Smaya continue; 5807e102996Smaya 5817e102996Smaya unsigned var_info_idx = 5827e102996Smaya store_varying_info_idx[location][in_var->data.location_frac]; 5837e102996Smaya if (!var_info_idx) 5847e102996Smaya continue; 5857e102996Smaya 5867e102996Smaya struct varying_component *vc_info = 5877e102996Smaya &(*varying_comp_info)[var_info_idx-1]; 5887e102996Smaya 5897e102996Smaya if (!vc_info->initialised) { 5907e102996Smaya const struct glsl_type *type = in_var->type; 5917ec681f3Smrg if (nir_is_arrayed_io(in_var, consumer->info.stage) || 5927ec681f3Smrg in_var->data.per_view) { 5937e102996Smaya assert(glsl_type_is_array(type)); 5947e102996Smaya type = glsl_get_array_element(type); 5957e102996Smaya } 5967e102996Smaya 5977e102996Smaya vc_info->var = in_var; 5987e102996Smaya vc_info->interp_type = 5997e102996Smaya get_interp_type(in_var, type, default_to_smooth_interp); 6007e102996Smaya vc_info->interp_loc = get_interp_loc(in_var); 6017e102996Smaya vc_info->is_32bit = glsl_type_is_32bit(type); 6027e102996Smaya vc_info->is_patch = in_var->data.patch; 6037ec681f3Smrg vc_info->is_mediump = !producer->options->linker_ignore_precision && 6047ec681f3Smrg (in_var->data.precision == GLSL_PRECISION_MEDIUM || 6057ec681f3Smrg in_var->data.precision == GLSL_PRECISION_LOW); 6067ec681f3Smrg vc_info->is_intra_stage_only = false; 6077ec681f3Smrg vc_info->initialised = true; 6087e102996Smaya } 6097e102996Smaya } 6107e102996Smaya } 6117ec681f3Smrg 6127ec681f3Smrg /* Walk over the shader and populate the varying component info array 6137ec681f3Smrg * for varyings which are read by other TCS instances but are not consumed 6147ec681f3Smrg * by the TES. 6157ec681f3Smrg */ 6167ec681f3Smrg if (producer->info.stage == MESA_SHADER_TESS_CTRL) { 6177ec681f3Smrg impl = nir_shader_get_entrypoint(producer); 6187ec681f3Smrg 6197ec681f3Smrg nir_foreach_block(block, impl) { 6207ec681f3Smrg nir_foreach_instr(instr, block) { 6217ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 6227ec681f3Smrg continue; 6237ec681f3Smrg 6247ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 6257ec681f3Smrg if (intr->intrinsic != nir_intrinsic_load_deref) 6267ec681f3Smrg continue; 6277ec681f3Smrg 6287ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 6297ec681f3Smrg if (!nir_deref_mode_is(deref, nir_var_shader_out)) 6307ec681f3Smrg continue; 6317ec681f3Smrg 6327ec681f3Smrg /* We only remap things that aren't builtins. */ 6337ec681f3Smrg nir_variable *out_var = nir_deref_instr_get_variable(deref); 6347ec681f3Smrg if (out_var->data.location < VARYING_SLOT_VAR0) 6357ec681f3Smrg continue; 6367ec681f3Smrg 6377ec681f3Smrg unsigned location = out_var->data.location - VARYING_SLOT_VAR0; 6387ec681f3Smrg if (location >= MAX_VARYINGS_INCL_PATCH) 6397ec681f3Smrg continue; 6407ec681f3Smrg 6417ec681f3Smrg unsigned var_info_idx = 6427ec681f3Smrg store_varying_info_idx[location][out_var->data.location_frac]; 6437ec681f3Smrg if (!var_info_idx) { 6447ec681f3Smrg /* Something went wrong, the shader interfaces didn't match, so 6457ec681f3Smrg * abandon packing. This can happen for example when the 6467ec681f3Smrg * inputs are scalars but the outputs are struct members. 6477ec681f3Smrg */ 6487ec681f3Smrg *varying_comp_info_size = 0; 6497ec681f3Smrg break; 6507ec681f3Smrg } 6517ec681f3Smrg 6527ec681f3Smrg struct varying_component *vc_info = 6537ec681f3Smrg &(*varying_comp_info)[var_info_idx-1]; 6547ec681f3Smrg 6557ec681f3Smrg if (!vc_info->initialised) { 6567ec681f3Smrg const struct glsl_type *type = out_var->type; 6577ec681f3Smrg if (nir_is_arrayed_io(out_var, producer->info.stage)) { 6587ec681f3Smrg assert(glsl_type_is_array(type)); 6597ec681f3Smrg type = glsl_get_array_element(type); 6607ec681f3Smrg } 6617ec681f3Smrg 6627ec681f3Smrg vc_info->var = out_var; 6637ec681f3Smrg vc_info->interp_type = 6647ec681f3Smrg get_interp_type(out_var, type, default_to_smooth_interp); 6657ec681f3Smrg vc_info->interp_loc = get_interp_loc(out_var); 6667ec681f3Smrg vc_info->is_32bit = glsl_type_is_32bit(type); 6677ec681f3Smrg vc_info->is_patch = out_var->data.patch; 6687ec681f3Smrg vc_info->is_mediump = !producer->options->linker_ignore_precision && 6697ec681f3Smrg (out_var->data.precision == GLSL_PRECISION_MEDIUM || 6707ec681f3Smrg out_var->data.precision == GLSL_PRECISION_LOW); 6717ec681f3Smrg vc_info->is_intra_stage_only = true; 6727ec681f3Smrg vc_info->initialised = true; 6737ec681f3Smrg } 6747ec681f3Smrg } 6757ec681f3Smrg } 6767ec681f3Smrg } 6777ec681f3Smrg 6787ec681f3Smrg for (unsigned i = 0; i < *varying_comp_info_size; i++ ) { 6797ec681f3Smrg struct varying_component *vc_info = &(*varying_comp_info)[i]; 6807ec681f3Smrg if (!vc_info->initialised) { 6817ec681f3Smrg /* Something went wrong, the shader interfaces didn't match, so 6827ec681f3Smrg * abandon packing. This can happen for example when the outputs are 6837ec681f3Smrg * scalars but the inputs are struct members. 6847ec681f3Smrg */ 6857ec681f3Smrg *varying_comp_info_size = 0; 6867ec681f3Smrg break; 6877ec681f3Smrg } 6887ec681f3Smrg } 6897ec681f3Smrg} 6907ec681f3Smrg 6917ec681f3Smrgstatic bool 6927ec681f3Smrgallow_pack_interp_type(nir_pack_varying_options options, int type) 6937ec681f3Smrg{ 6947ec681f3Smrg int sel; 6957ec681f3Smrg 6967ec681f3Smrg switch (type) { 6977ec681f3Smrg case INTERP_MODE_NONE: 6987ec681f3Smrg sel = nir_pack_varying_interp_mode_none; 6997ec681f3Smrg break; 7007ec681f3Smrg case INTERP_MODE_SMOOTH: 7017ec681f3Smrg sel = nir_pack_varying_interp_mode_smooth; 7027ec681f3Smrg break; 7037ec681f3Smrg case INTERP_MODE_FLAT: 7047ec681f3Smrg sel = nir_pack_varying_interp_mode_flat; 7057ec681f3Smrg break; 7067ec681f3Smrg case INTERP_MODE_NOPERSPECTIVE: 7077ec681f3Smrg sel = nir_pack_varying_interp_mode_noperspective; 7087ec681f3Smrg break; 7097ec681f3Smrg default: 7107ec681f3Smrg return false; 7117ec681f3Smrg } 7127ec681f3Smrg 7137ec681f3Smrg return options & sel; 7147ec681f3Smrg} 7157ec681f3Smrg 7167ec681f3Smrgstatic bool 7177ec681f3Smrgallow_pack_interp_loc(nir_pack_varying_options options, int loc) 7187ec681f3Smrg{ 7197ec681f3Smrg int sel; 7207ec681f3Smrg 7217ec681f3Smrg switch (loc) { 7227ec681f3Smrg case INTERPOLATE_LOC_SAMPLE: 7237ec681f3Smrg sel = nir_pack_varying_interp_loc_sample; 7247ec681f3Smrg break; 7257ec681f3Smrg case INTERPOLATE_LOC_CENTROID: 7267ec681f3Smrg sel = nir_pack_varying_interp_loc_centroid; 7277ec681f3Smrg break; 7287ec681f3Smrg case INTERPOLATE_LOC_CENTER: 7297ec681f3Smrg sel = nir_pack_varying_interp_loc_center; 7307ec681f3Smrg break; 7317ec681f3Smrg default: 7327ec681f3Smrg return false; 7337ec681f3Smrg } 7347ec681f3Smrg 7357ec681f3Smrg return options & sel; 7367e102996Smaya} 73701e04c3fSmrg 7387e102996Smayastatic void 7397e102996Smayaassign_remap_locations(struct varying_loc (*remap)[4], 7407e102996Smaya struct assigned_comps *assigned_comps, 7417e102996Smaya struct varying_component *info, 7427e102996Smaya unsigned *cursor, unsigned *comp, 7437ec681f3Smrg unsigned max_location, 7447ec681f3Smrg nir_pack_varying_options options) 7457e102996Smaya{ 7467e102996Smaya unsigned tmp_cursor = *cursor; 7477e102996Smaya unsigned tmp_comp = *comp; 7487e102996Smaya 7497e102996Smaya for (; tmp_cursor < max_location; tmp_cursor++) { 7507e102996Smaya 7517e102996Smaya if (assigned_comps[tmp_cursor].comps) { 7527ec681f3Smrg /* We can only pack varyings with matching precision. */ 7537ec681f3Smrg if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) { 7547ec681f3Smrg tmp_comp = 0; 7557ec681f3Smrg continue; 7567ec681f3Smrg } 7577ec681f3Smrg 7587ec681f3Smrg /* We can only pack varyings with matching interpolation type 7597ec681f3Smrg * if driver does not support it. 7607ec681f3Smrg */ 7617ec681f3Smrg if (assigned_comps[tmp_cursor].interp_type != info->interp_type && 7627ec681f3Smrg (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) || 7637ec681f3Smrg !allow_pack_interp_type(options, info->interp_type))) { 7647ec681f3Smrg tmp_comp = 0; 7657ec681f3Smrg continue; 7667ec681f3Smrg } 7677ec681f3Smrg 7687ec681f3Smrg /* We can only pack varyings with matching interpolation location 7697ec681f3Smrg * if driver does not support it. 77001e04c3fSmrg */ 7717ec681f3Smrg if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc && 7727ec681f3Smrg (!allow_pack_interp_loc(options, assigned_comps[tmp_cursor].interp_loc) || 7737ec681f3Smrg !allow_pack_interp_loc(options, info->interp_loc))) { 7747e102996Smaya tmp_comp = 0; 77501e04c3fSmrg continue; 7767e102996Smaya } 77701e04c3fSmrg 7787e102996Smaya /* We can only pack varyings with matching types, and the current 7797e102996Smaya * algorithm only supports packing 32-bit. 7807e102996Smaya */ 7817e102996Smaya if (!assigned_comps[tmp_cursor].is_32bit) { 7827e102996Smaya tmp_comp = 0; 7837e102996Smaya continue; 7847e102996Smaya } 78501e04c3fSmrg 7867e102996Smaya while (tmp_comp < 4 && 7877e102996Smaya (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) { 7887e102996Smaya tmp_comp++; 7897e102996Smaya } 7907e102996Smaya } 79101e04c3fSmrg 7927e102996Smaya if (tmp_comp == 4) { 7937e102996Smaya tmp_comp = 0; 7947e102996Smaya continue; 7957e102996Smaya } 79601e04c3fSmrg 7977e102996Smaya unsigned location = info->var->data.location - VARYING_SLOT_VAR0; 79801e04c3fSmrg 7997e102996Smaya /* Once we have assigned a location mark it as used */ 8007e102996Smaya assigned_comps[tmp_cursor].comps |= (1 << tmp_comp); 8017e102996Smaya assigned_comps[tmp_cursor].interp_type = info->interp_type; 8027e102996Smaya assigned_comps[tmp_cursor].interp_loc = info->interp_loc; 8037e102996Smaya assigned_comps[tmp_cursor].is_32bit = info->is_32bit; 8047ec681f3Smrg assigned_comps[tmp_cursor].is_mediump = info->is_mediump; 80501e04c3fSmrg 8067e102996Smaya /* Assign remap location */ 8077e102996Smaya remap[location][info->var->data.location_frac].component = tmp_comp++; 8087e102996Smaya remap[location][info->var->data.location_frac].location = 8097e102996Smaya tmp_cursor + VARYING_SLOT_VAR0; 81001e04c3fSmrg 8117e102996Smaya break; 8127e102996Smaya } 81301e04c3fSmrg 8147e102996Smaya *cursor = tmp_cursor; 8157e102996Smaya *comp = tmp_comp; 8167e102996Smaya} 81701e04c3fSmrg 8187e102996Smaya/* If there are empty components in the slot compact the remaining components 8197e102996Smaya * as close to component 0 as possible. This will make it easier to fill the 8207e102996Smaya * empty components with components from a different slot in a following pass. 8217e102996Smaya */ 8227e102996Smayastatic void 8237e102996Smayacompact_components(nir_shader *producer, nir_shader *consumer, 8247e102996Smaya struct assigned_comps *assigned_comps, 8257e102996Smaya bool default_to_smooth_interp) 8267e102996Smaya{ 8277e102996Smaya struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}}; 8287e102996Smaya struct varying_component *varying_comp_info; 8297e102996Smaya unsigned varying_comp_info_size; 8307e102996Smaya 8317e102996Smaya /* Gather varying component info */ 8327ec681f3Smrg gather_varying_component_info(producer, consumer, &varying_comp_info, 8337e102996Smaya &varying_comp_info_size, 8347e102996Smaya default_to_smooth_interp); 8357e102996Smaya 8367e102996Smaya /* Sort varying components. */ 8377e102996Smaya qsort(varying_comp_info, varying_comp_info_size, 8387e102996Smaya sizeof(struct varying_component), cmp_varying_component); 8397e102996Smaya 8407ec681f3Smrg nir_pack_varying_options options = consumer->options->pack_varying_options; 8417ec681f3Smrg 8427e102996Smaya unsigned cursor = 0; 8437e102996Smaya unsigned comp = 0; 8447e102996Smaya 8457e102996Smaya /* Set the remap array based on the sorted components */ 8467e102996Smaya for (unsigned i = 0; i < varying_comp_info_size; i++ ) { 8477e102996Smaya struct varying_component *info = &varying_comp_info[i]; 8487e102996Smaya 8497e102996Smaya assert(info->is_patch || cursor < MAX_VARYING); 8507e102996Smaya if (info->is_patch) { 8517e102996Smaya /* The list should be sorted with all non-patch inputs first followed 8527e102996Smaya * by patch inputs. When we hit our first patch input, we need to 8537e102996Smaya * reset the cursor to MAX_VARYING so we put them in the right slot. 8547e102996Smaya */ 8557e102996Smaya if (cursor < MAX_VARYING) { 8567e102996Smaya cursor = MAX_VARYING; 8577e102996Smaya comp = 0; 8587e102996Smaya } 85901e04c3fSmrg 8607e102996Smaya assign_remap_locations(remap, assigned_comps, info, 8617ec681f3Smrg &cursor, &comp, MAX_VARYINGS_INCL_PATCH, 8627ec681f3Smrg options); 8637e102996Smaya } else { 8647e102996Smaya assign_remap_locations(remap, assigned_comps, info, 8657ec681f3Smrg &cursor, &comp, MAX_VARYING, 8667ec681f3Smrg options); 8677e102996Smaya 8687e102996Smaya /* Check if we failed to assign a remap location. This can happen if 8697e102996Smaya * for example there are a bunch of unmovable components with 8707e102996Smaya * mismatching interpolation types causing us to skip over locations 8717e102996Smaya * that would have been useful for packing later components. 8727e102996Smaya * The solution is to iterate over the locations again (this should 8737e102996Smaya * happen very rarely in practice). 8747e102996Smaya */ 8757e102996Smaya if (cursor == MAX_VARYING) { 8767e102996Smaya cursor = 0; 8777e102996Smaya comp = 0; 8787e102996Smaya assign_remap_locations(remap, assigned_comps, info, 8797ec681f3Smrg &cursor, &comp, MAX_VARYING, 8807ec681f3Smrg options); 88101e04c3fSmrg } 88201e04c3fSmrg } 88301e04c3fSmrg } 88401e04c3fSmrg 8857e102996Smaya ralloc_free(varying_comp_info); 8867e102996Smaya 88701e04c3fSmrg uint64_t zero = 0; 8887e102996Smaya uint32_t zero32 = 0; 8897ec681f3Smrg remap_slots_and_components(consumer, nir_var_shader_in, remap, 8907e102996Smaya &consumer->info.inputs_read, &zero, 8917e102996Smaya &consumer->info.patch_inputs_read, &zero32); 8927ec681f3Smrg remap_slots_and_components(producer, nir_var_shader_out, remap, 89301e04c3fSmrg &producer->info.outputs_written, 8947e102996Smaya &producer->info.outputs_read, 8957e102996Smaya &producer->info.patch_outputs_written, 8967e102996Smaya &producer->info.patch_outputs_read); 89701e04c3fSmrg} 89801e04c3fSmrg 89901e04c3fSmrg/* We assume that this has been called more-or-less directly after 90001e04c3fSmrg * remove_unused_varyings. At this point, all of the varyings that we 90101e04c3fSmrg * aren't going to be using have been completely removed and the 90201e04c3fSmrg * inputs_read and outputs_written fields in nir_shader_info reflect 90301e04c3fSmrg * this. Therefore, the total set of valid slots is the OR of the two 90401e04c3fSmrg * sets of varyings; this accounts for varyings which one side may need 90501e04c3fSmrg * to read/write even if the other doesn't. This can happen if, for 90601e04c3fSmrg * instance, an array is used indirectly from one side causing it to be 90701e04c3fSmrg * unsplittable but directly from the other. 90801e04c3fSmrg */ 90901e04c3fSmrgvoid 91001e04c3fSmrgnir_compact_varyings(nir_shader *producer, nir_shader *consumer, 91101e04c3fSmrg bool default_to_smooth_interp) 91201e04c3fSmrg{ 91301e04c3fSmrg assert(producer->info.stage != MESA_SHADER_FRAGMENT); 91401e04c3fSmrg assert(consumer->info.stage != MESA_SHADER_VERTEX); 91501e04c3fSmrg 9167e102996Smaya struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}}; 91701e04c3fSmrg 9187ec681f3Smrg get_unmoveable_components_masks(producer, nir_var_shader_out, 9197ec681f3Smrg assigned_comps, 9207e102996Smaya producer->info.stage, 9217e102996Smaya default_to_smooth_interp); 9227ec681f3Smrg get_unmoveable_components_masks(consumer, nir_var_shader_in, 9237ec681f3Smrg assigned_comps, 9247e102996Smaya consumer->info.stage, 9257e102996Smaya default_to_smooth_interp); 92601e04c3fSmrg 9277e102996Smaya compact_components(producer, consumer, assigned_comps, 92801e04c3fSmrg default_to_smooth_interp); 92901e04c3fSmrg} 93001e04c3fSmrg 93101e04c3fSmrg/* 93201e04c3fSmrg * Mark XFB varyings as always_active_io in the consumer so the linking opts 93301e04c3fSmrg * don't touch them. 93401e04c3fSmrg */ 93501e04c3fSmrgvoid 93601e04c3fSmrgnir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer) 93701e04c3fSmrg{ 93801e04c3fSmrg nir_variable *input_vars[MAX_VARYING] = { 0 }; 93901e04c3fSmrg 9407ec681f3Smrg nir_foreach_shader_in_variable(var, consumer) { 94101e04c3fSmrg if (var->data.location >= VARYING_SLOT_VAR0 && 94201e04c3fSmrg var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) { 94301e04c3fSmrg 94401e04c3fSmrg unsigned location = var->data.location - VARYING_SLOT_VAR0; 94501e04c3fSmrg input_vars[location] = var; 94601e04c3fSmrg } 94701e04c3fSmrg } 94801e04c3fSmrg 9497ec681f3Smrg nir_foreach_shader_out_variable(var, producer) { 95001e04c3fSmrg if (var->data.location >= VARYING_SLOT_VAR0 && 95101e04c3fSmrg var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) { 95201e04c3fSmrg 95301e04c3fSmrg if (!var->data.always_active_io) 95401e04c3fSmrg continue; 95501e04c3fSmrg 95601e04c3fSmrg unsigned location = var->data.location - VARYING_SLOT_VAR0; 95701e04c3fSmrg if (input_vars[location]) { 95801e04c3fSmrg input_vars[location]->data.always_active_io = true; 95901e04c3fSmrg } 96001e04c3fSmrg } 96101e04c3fSmrg } 96201e04c3fSmrg} 9637e102996Smaya 9647e102996Smayastatic bool 9657e102996Smayadoes_varying_match(nir_variable *out_var, nir_variable *in_var) 9667e102996Smaya{ 9677e102996Smaya return in_var->data.location == out_var->data.location && 9687e102996Smaya in_var->data.location_frac == out_var->data.location_frac; 9697e102996Smaya} 9707e102996Smaya 9717e102996Smayastatic nir_variable * 9727e102996Smayaget_matching_input_var(nir_shader *consumer, nir_variable *out_var) 9737e102996Smaya{ 9747ec681f3Smrg nir_foreach_shader_in_variable(var, consumer) { 9757e102996Smaya if (does_varying_match(out_var, var)) 9767e102996Smaya return var; 9777e102996Smaya } 9787e102996Smaya 9797e102996Smaya return NULL; 9807e102996Smaya} 9817e102996Smaya 9827e102996Smayastatic bool 9837e102996Smayacan_replace_varying(nir_variable *out_var) 9847e102996Smaya{ 9857e102996Smaya /* Skip types that require more complex handling. 9867e102996Smaya * TODO: add support for these types. 9877e102996Smaya */ 9887e102996Smaya if (glsl_type_is_array(out_var->type) || 9897e102996Smaya glsl_type_is_dual_slot(out_var->type) || 9907e102996Smaya glsl_type_is_matrix(out_var->type) || 9917e102996Smaya glsl_type_is_struct_or_ifc(out_var->type)) 9927e102996Smaya return false; 9937e102996Smaya 9947e102996Smaya /* Limit this pass to scalars for now to keep things simple. Most varyings 9957e102996Smaya * should have been lowered to scalars at this point anyway. 9967e102996Smaya */ 9977e102996Smaya if (!glsl_type_is_scalar(out_var->type)) 9987e102996Smaya return false; 9997e102996Smaya 10007e102996Smaya if (out_var->data.location < VARYING_SLOT_VAR0 || 10017e102996Smaya out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING) 10027e102996Smaya return false; 10037e102996Smaya 10047e102996Smaya return true; 10057e102996Smaya} 10067e102996Smaya 10077e102996Smayastatic bool 10087ec681f3Smrgreplace_varying_input_by_constant_load(nir_shader *shader, 10097ec681f3Smrg nir_intrinsic_instr *store_intr) 10107e102996Smaya{ 10117e102996Smaya nir_function_impl *impl = nir_shader_get_entrypoint(shader); 10127e102996Smaya 10137e102996Smaya nir_builder b; 10147e102996Smaya nir_builder_init(&b, impl); 10157e102996Smaya 10167e102996Smaya nir_variable *out_var = 10177e102996Smaya nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0])); 10187e102996Smaya 10197e102996Smaya bool progress = false; 10207e102996Smaya nir_foreach_block(block, impl) { 10217e102996Smaya nir_foreach_instr(instr, block) { 10227e102996Smaya if (instr->type != nir_instr_type_intrinsic) 10237e102996Smaya continue; 10247e102996Smaya 10257e102996Smaya nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 10267e102996Smaya if (intr->intrinsic != nir_intrinsic_load_deref) 10277e102996Smaya continue; 10287e102996Smaya 10297e102996Smaya nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 10307ec681f3Smrg if (!nir_deref_mode_is(in_deref, nir_var_shader_in)) 10317e102996Smaya continue; 10327e102996Smaya 10337e102996Smaya nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 10347e102996Smaya 10357e102996Smaya if (!does_varying_match(out_var, in_var)) 10367e102996Smaya continue; 10377e102996Smaya 10387e102996Smaya b.cursor = nir_before_instr(instr); 10397e102996Smaya 10407e102996Smaya nir_load_const_instr *out_const = 10417e102996Smaya nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr); 10427e102996Smaya 10437e102996Smaya /* Add new const to replace the input */ 10447e102996Smaya nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components, 10457e102996Smaya intr->dest.ssa.bit_size, 10467e102996Smaya out_const->value); 10477e102996Smaya 10487ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, nconst); 10497e102996Smaya 10507e102996Smaya progress = true; 10517e102996Smaya } 10527e102996Smaya } 10537e102996Smaya 10547e102996Smaya return progress; 10557e102996Smaya} 10567e102996Smaya 10577e102996Smayastatic bool 10587e102996Smayareplace_duplicate_input(nir_shader *shader, nir_variable *input_var, 10597e102996Smaya nir_intrinsic_instr *dup_store_intr) 10607e102996Smaya{ 10617e102996Smaya assert(input_var); 10627e102996Smaya 10637e102996Smaya nir_function_impl *impl = nir_shader_get_entrypoint(shader); 10647e102996Smaya 10657e102996Smaya nir_builder b; 10667e102996Smaya nir_builder_init(&b, impl); 10677e102996Smaya 10687e102996Smaya nir_variable *dup_out_var = 10697e102996Smaya nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0])); 10707e102996Smaya 10717e102996Smaya bool progress = false; 10727e102996Smaya nir_foreach_block(block, impl) { 10737e102996Smaya nir_foreach_instr(instr, block) { 10747e102996Smaya if (instr->type != nir_instr_type_intrinsic) 10757e102996Smaya continue; 10767e102996Smaya 10777e102996Smaya nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 10787e102996Smaya if (intr->intrinsic != nir_intrinsic_load_deref) 10797e102996Smaya continue; 10807e102996Smaya 10817e102996Smaya nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 10827ec681f3Smrg if (!nir_deref_mode_is(in_deref, nir_var_shader_in)) 10837e102996Smaya continue; 10847e102996Smaya 10857e102996Smaya nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 10867e102996Smaya 10877e102996Smaya if (!does_varying_match(dup_out_var, in_var) || 10887e102996Smaya in_var->data.interpolation != input_var->data.interpolation || 10897e102996Smaya get_interp_loc(in_var) != get_interp_loc(input_var)) 10907e102996Smaya continue; 10917e102996Smaya 10927e102996Smaya b.cursor = nir_before_instr(instr); 10937e102996Smaya 10947e102996Smaya nir_ssa_def *load = nir_load_var(&b, input_var); 10957ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, load); 10967e102996Smaya 10977e102996Smaya progress = true; 10987e102996Smaya } 10997e102996Smaya } 11007e102996Smaya 11017e102996Smaya return progress; 11027e102996Smaya} 11037e102996Smaya 11047ec681f3Smrgstatic bool 11057ec681f3Smrgis_direct_uniform_load(nir_ssa_def *def, nir_ssa_scalar *s) 11067ec681f3Smrg{ 11077ec681f3Smrg /* def is sure to be scalar as can_replace_varying() filter out vector case. */ 11087ec681f3Smrg assert(def->num_components == 1); 11097ec681f3Smrg 11107ec681f3Smrg /* Uniform load may hide behind some move instruction for converting 11117ec681f3Smrg * vector to scalar: 11127ec681f3Smrg * 11137ec681f3Smrg * vec1 32 ssa_1 = deref_var &color (uniform vec3) 11147ec681f3Smrg * vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0) 11157ec681f3Smrg * vec1 32 ssa_3 = mov ssa_2.x 11167ec681f3Smrg * vec1 32 ssa_4 = deref_var &color_out (shader_out float) 11177ec681f3Smrg * intrinsic store_deref (ssa_4, ssa_3) (1, 0) 11187ec681f3Smrg */ 11197ec681f3Smrg *s = nir_ssa_scalar_resolved(def, 0); 11207ec681f3Smrg 11217ec681f3Smrg nir_ssa_def *ssa = s->def; 11227ec681f3Smrg if (ssa->parent_instr->type != nir_instr_type_intrinsic) 11237ec681f3Smrg return false; 11247ec681f3Smrg 11257ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr); 11267ec681f3Smrg if (intr->intrinsic != nir_intrinsic_load_deref) 11277ec681f3Smrg return false; 11287ec681f3Smrg 11297ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(intr->src[0]); 11307ec681f3Smrg /* TODO: support nir_var_mem_ubo. */ 11317ec681f3Smrg if (!nir_deref_mode_is(deref, nir_var_uniform)) 11327ec681f3Smrg return false; 11337ec681f3Smrg 11347ec681f3Smrg /* Does not support indirect uniform load. */ 11357ec681f3Smrg return !nir_deref_instr_has_indirect(deref); 11367ec681f3Smrg} 11377ec681f3Smrg 11387ec681f3Smrgstatic nir_variable * 11397ec681f3Smrgget_uniform_var_in_consumer(nir_shader *consumer, 11407ec681f3Smrg nir_variable *var_in_producer) 11417ec681f3Smrg{ 11427ec681f3Smrg /* Find if uniform already exists in consumer. */ 11437ec681f3Smrg nir_variable *new_var = NULL; 11447ec681f3Smrg nir_foreach_uniform_variable(v, consumer) { 11457ec681f3Smrg if (!strcmp(var_in_producer->name, v->name)) { 11467ec681f3Smrg new_var = v; 11477ec681f3Smrg break; 11487ec681f3Smrg } 11497ec681f3Smrg } 11507ec681f3Smrg 11517ec681f3Smrg /* Create a variable if not exist. */ 11527ec681f3Smrg if (!new_var) { 11537ec681f3Smrg new_var = nir_variable_clone(var_in_producer, consumer); 11547ec681f3Smrg nir_shader_add_variable(consumer, new_var); 11557ec681f3Smrg } 11567ec681f3Smrg 11577ec681f3Smrg return new_var; 11587ec681f3Smrg} 11597ec681f3Smrg 11607ec681f3Smrgstatic nir_deref_instr * 11617ec681f3Smrgclone_deref_instr(nir_builder *b, nir_variable *var, nir_deref_instr *deref) 11627ec681f3Smrg{ 11637ec681f3Smrg if (deref->deref_type == nir_deref_type_var) 11647ec681f3Smrg return nir_build_deref_var(b, var); 11657ec681f3Smrg 11667ec681f3Smrg nir_deref_instr *parent_deref = nir_deref_instr_parent(deref); 11677ec681f3Smrg nir_deref_instr *parent = clone_deref_instr(b, var, parent_deref); 11687ec681f3Smrg 11697ec681f3Smrg /* Build array and struct deref instruction. 11707ec681f3Smrg * "deref" instr is sure to be direct (see is_direct_uniform_load()). 11717ec681f3Smrg */ 11727ec681f3Smrg switch (deref->deref_type) { 11737ec681f3Smrg case nir_deref_type_array: { 11747ec681f3Smrg nir_load_const_instr *index = 11757ec681f3Smrg nir_instr_as_load_const(deref->arr.index.ssa->parent_instr); 11767ec681f3Smrg return nir_build_deref_array_imm(b, parent, index->value->i64); 11777ec681f3Smrg } 11787ec681f3Smrg case nir_deref_type_ptr_as_array: { 11797ec681f3Smrg nir_load_const_instr *index = 11807ec681f3Smrg nir_instr_as_load_const(deref->arr.index.ssa->parent_instr); 11817ec681f3Smrg nir_ssa_def *ssa = nir_imm_intN_t(b, index->value->i64, 11827ec681f3Smrg parent->dest.ssa.bit_size); 11837ec681f3Smrg return nir_build_deref_ptr_as_array(b, parent, ssa); 11847ec681f3Smrg } 11857ec681f3Smrg case nir_deref_type_struct: 11867ec681f3Smrg return nir_build_deref_struct(b, parent, deref->strct.index); 11877ec681f3Smrg default: 11887ec681f3Smrg unreachable("invalid type"); 11897ec681f3Smrg return NULL; 11907ec681f3Smrg } 11917ec681f3Smrg} 11927ec681f3Smrg 11937ec681f3Smrgstatic bool 11947ec681f3Smrgreplace_varying_input_by_uniform_load(nir_shader *shader, 11957ec681f3Smrg nir_intrinsic_instr *store_intr, 11967ec681f3Smrg nir_ssa_scalar *scalar) 11977ec681f3Smrg{ 11987ec681f3Smrg nir_function_impl *impl = nir_shader_get_entrypoint(shader); 11997ec681f3Smrg 12007ec681f3Smrg nir_builder b; 12017ec681f3Smrg nir_builder_init(&b, impl); 12027ec681f3Smrg 12037ec681f3Smrg nir_variable *out_var = 12047ec681f3Smrg nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0])); 12057ec681f3Smrg 12067ec681f3Smrg nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr); 12077ec681f3Smrg nir_deref_instr *deref = nir_src_as_deref(load->src[0]); 12087ec681f3Smrg nir_variable *uni_var = nir_deref_instr_get_variable(deref); 12097ec681f3Smrg uni_var = get_uniform_var_in_consumer(shader, uni_var); 12107ec681f3Smrg 12117ec681f3Smrg bool progress = false; 12127ec681f3Smrg nir_foreach_block(block, impl) { 12137ec681f3Smrg nir_foreach_instr(instr, block) { 12147ec681f3Smrg if (instr->type != nir_instr_type_intrinsic) 12157ec681f3Smrg continue; 12167ec681f3Smrg 12177ec681f3Smrg nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 12187ec681f3Smrg if (intr->intrinsic != nir_intrinsic_load_deref) 12197ec681f3Smrg continue; 12207ec681f3Smrg 12217ec681f3Smrg nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]); 12227ec681f3Smrg if (!nir_deref_mode_is(in_deref, nir_var_shader_in)) 12237ec681f3Smrg continue; 12247ec681f3Smrg 12257ec681f3Smrg nir_variable *in_var = nir_deref_instr_get_variable(in_deref); 12267ec681f3Smrg 12277ec681f3Smrg if (!does_varying_match(out_var, in_var)) 12287ec681f3Smrg continue; 12297ec681f3Smrg 12307ec681f3Smrg b.cursor = nir_before_instr(instr); 12317ec681f3Smrg 12327ec681f3Smrg /* Clone instructions start from deref load to variable deref. */ 12337ec681f3Smrg nir_deref_instr *uni_deref = clone_deref_instr(&b, uni_var, deref); 12347ec681f3Smrg nir_ssa_def *uni_def = nir_load_deref(&b, uni_deref); 12357ec681f3Smrg 12367ec681f3Smrg /* Add a vector to scalar move if uniform is a vector. */ 12377ec681f3Smrg if (uni_def->num_components > 1) { 12387ec681f3Smrg nir_alu_src src = {0}; 12397ec681f3Smrg src.src = nir_src_for_ssa(uni_def); 12407ec681f3Smrg src.swizzle[0] = scalar->comp; 12417ec681f3Smrg uni_def = nir_mov_alu(&b, src, 1); 12427ec681f3Smrg } 12437ec681f3Smrg 12447ec681f3Smrg /* Replace load input with load uniform. */ 12457ec681f3Smrg nir_ssa_def_rewrite_uses(&intr->dest.ssa, uni_def); 12467ec681f3Smrg 12477ec681f3Smrg progress = true; 12487ec681f3Smrg } 12497ec681f3Smrg } 12507ec681f3Smrg 12517ec681f3Smrg return progress; 12527ec681f3Smrg} 12537ec681f3Smrg 12547ec681f3Smrg/* The GLSL ES 3.20 spec says: 12557ec681f3Smrg * 12567ec681f3Smrg * "The precision of a vertex output does not need to match the precision of 12577ec681f3Smrg * the corresponding fragment input. The minimum precision at which vertex 12587ec681f3Smrg * outputs are interpolated is the minimum of the vertex output precision and 12597ec681f3Smrg * the fragment input precision, with the exception that for highp, 12607ec681f3Smrg * implementations do not have to support full IEEE 754 precision." (9.1 "Input 12617ec681f3Smrg * Output Matching by Name in Linked Programs") 12627ec681f3Smrg * 12637ec681f3Smrg * To implement this, when linking shaders we will take the minimum precision 12647ec681f3Smrg * qualifier (allowing drivers to interpolate at lower precision). For 12657ec681f3Smrg * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec 12667ec681f3Smrg * requires we use the *last* specified precision if there is a conflict. 12677ec681f3Smrg * 12687ec681f3Smrg * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is 12697ec681f3Smrg * NONE, we'll return the other precision, since there is no conflict. 12707ec681f3Smrg * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH, 12717ec681f3Smrg * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is 12727ec681f3Smrg * "backwards". For non-fragment stages, we'll pick the latter precision to 12737ec681f3Smrg * comply with the spec. (Note that the order matters.) 12747ec681f3Smrg * 12757ec681f3Smrg * For streamout, "Variables declared with lowp or mediump precision are 12767ec681f3Smrg * promoted to highp before being written." (12.2 "Transform Feedback", p. 341 12777ec681f3Smrg * of OpenGL ES 3.2 specification). So drivers should promote them 12787ec681f3Smrg * the transform feedback memory store, but not the output store. 12797ec681f3Smrg */ 12807ec681f3Smrg 12817ec681f3Smrgstatic unsigned 12827ec681f3Smrgnir_link_precision(unsigned producer, unsigned consumer, bool fs) 12837ec681f3Smrg{ 12847ec681f3Smrg if (producer == GLSL_PRECISION_NONE) 12857ec681f3Smrg return consumer; 12867ec681f3Smrg else if (consumer == GLSL_PRECISION_NONE) 12877ec681f3Smrg return producer; 12887ec681f3Smrg else 12897ec681f3Smrg return fs ? MAX2(producer, consumer) : consumer; 12907ec681f3Smrg} 12917ec681f3Smrg 12927ec681f3Smrgvoid 12937ec681f3Smrgnir_link_varying_precision(nir_shader *producer, nir_shader *consumer) 12947ec681f3Smrg{ 12957ec681f3Smrg bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT; 12967ec681f3Smrg 12977ec681f3Smrg nir_foreach_shader_out_variable(producer_var, producer) { 12987ec681f3Smrg /* Skip if the slot is not assigned */ 12997ec681f3Smrg if (producer_var->data.location < 0) 13007ec681f3Smrg continue; 13017ec681f3Smrg 13027ec681f3Smrg nir_variable *consumer_var = nir_find_variable_with_location(consumer, 13037ec681f3Smrg nir_var_shader_in, producer_var->data.location); 13047ec681f3Smrg 13057ec681f3Smrg /* Skip if the variable will be eliminated */ 13067ec681f3Smrg if (!consumer_var) 13077ec681f3Smrg continue; 13087ec681f3Smrg 13097ec681f3Smrg /* Now we have a pair of variables. Let's pick the smaller precision. */ 13107ec681f3Smrg unsigned precision_1 = producer_var->data.precision; 13117ec681f3Smrg unsigned precision_2 = consumer_var->data.precision; 13127ec681f3Smrg unsigned minimum = nir_link_precision(precision_1, precision_2, frag); 13137ec681f3Smrg 13147ec681f3Smrg /* Propagate the new precision */ 13157ec681f3Smrg producer_var->data.precision = consumer_var->data.precision = minimum; 13167ec681f3Smrg } 13177ec681f3Smrg} 13187ec681f3Smrg 13197e102996Smayabool 13207e102996Smayanir_link_opt_varyings(nir_shader *producer, nir_shader *consumer) 13217e102996Smaya{ 13227e102996Smaya /* TODO: Add support for more shader stage combinations */ 13237e102996Smaya if (consumer->info.stage != MESA_SHADER_FRAGMENT || 13247e102996Smaya (producer->info.stage != MESA_SHADER_VERTEX && 13257e102996Smaya producer->info.stage != MESA_SHADER_TESS_EVAL)) 13267e102996Smaya return false; 13277e102996Smaya 13287e102996Smaya bool progress = false; 13297e102996Smaya 13307e102996Smaya nir_function_impl *impl = nir_shader_get_entrypoint(producer); 13317e102996Smaya 13327e102996Smaya struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL); 13337e102996Smaya 13347e102996Smaya /* If we find a store in the last block of the producer we can be sure this 13357e102996Smaya * is the only possible value for this output. 13367e102996Smaya */ 13377e102996Smaya nir_block *last_block = nir_impl_last_block(impl); 13387e102996Smaya nir_foreach_instr_reverse(instr, last_block) { 13397e102996Smaya if (instr->type != nir_instr_type_intrinsic) 13407e102996Smaya continue; 13417e102996Smaya 13427e102996Smaya nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 13437e102996Smaya 13447e102996Smaya if (intr->intrinsic != nir_intrinsic_store_deref) 13457e102996Smaya continue; 13467e102996Smaya 13477e102996Smaya nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]); 13487ec681f3Smrg if (!nir_deref_mode_is(out_deref, nir_var_shader_out)) 13497e102996Smaya continue; 13507e102996Smaya 13517e102996Smaya nir_variable *out_var = nir_deref_instr_get_variable(out_deref); 13527e102996Smaya if (!can_replace_varying(out_var)) 13537e102996Smaya continue; 13547e102996Smaya 13557ec681f3Smrg nir_ssa_scalar uni_scalar; 13567ec681f3Smrg nir_ssa_def *ssa = intr->src[1].ssa; 13577ec681f3Smrg if (ssa->parent_instr->type == nir_instr_type_load_const) { 13587ec681f3Smrg progress |= replace_varying_input_by_constant_load(consumer, intr); 13597ec681f3Smrg } else if (is_direct_uniform_load(ssa, &uni_scalar)) { 13607ec681f3Smrg progress |= replace_varying_input_by_uniform_load(consumer, intr, 13617ec681f3Smrg &uni_scalar); 13627e102996Smaya } else { 13637e102996Smaya struct hash_entry *entry = 13647ec681f3Smrg _mesa_hash_table_search(varying_values, ssa); 13657e102996Smaya if (entry) { 13667e102996Smaya progress |= replace_duplicate_input(consumer, 13677e102996Smaya (nir_variable *) entry->data, 13687e102996Smaya intr); 13697e102996Smaya } else { 13707e102996Smaya nir_variable *in_var = get_matching_input_var(consumer, out_var); 13717e102996Smaya if (in_var) { 13727ec681f3Smrg _mesa_hash_table_insert(varying_values, ssa, in_var); 13737e102996Smaya } 13747e102996Smaya } 13757e102996Smaya } 13767e102996Smaya } 13777e102996Smaya 13787e102996Smaya _mesa_hash_table_destroy(varying_values, NULL); 13797e102996Smaya 13807e102996Smaya return progress; 13817e102996Smaya} 13827ec681f3Smrg 13837ec681f3Smrg/* TODO any better helper somewhere to sort a list? */ 13847ec681f3Smrg 13857ec681f3Smrgstatic void 13867ec681f3Smrginsert_sorted(struct exec_list *var_list, nir_variable *new_var) 13877ec681f3Smrg{ 13887ec681f3Smrg nir_foreach_variable_in_list(var, var_list) { 13897ec681f3Smrg if (var->data.location > new_var->data.location) { 13907ec681f3Smrg exec_node_insert_node_before(&var->node, &new_var->node); 13917ec681f3Smrg return; 13927ec681f3Smrg } 13937ec681f3Smrg } 13947ec681f3Smrg exec_list_push_tail(var_list, &new_var->node); 13957ec681f3Smrg} 13967ec681f3Smrg 13977ec681f3Smrgstatic void 13987ec681f3Smrgsort_varyings(nir_shader *shader, nir_variable_mode mode, 13997ec681f3Smrg struct exec_list *sorted_list) 14007ec681f3Smrg{ 14017ec681f3Smrg exec_list_make_empty(sorted_list); 14027ec681f3Smrg nir_foreach_variable_with_modes_safe(var, shader, mode) { 14037ec681f3Smrg exec_node_remove(&var->node); 14047ec681f3Smrg insert_sorted(sorted_list, var); 14057ec681f3Smrg } 14067ec681f3Smrg} 14077ec681f3Smrg 14087ec681f3Smrgvoid 14097ec681f3Smrgnir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode, 14107ec681f3Smrg unsigned *size, gl_shader_stage stage) 14117ec681f3Smrg{ 14127ec681f3Smrg unsigned location = 0; 14137ec681f3Smrg unsigned assigned_locations[VARYING_SLOT_TESS_MAX]; 14147ec681f3Smrg uint64_t processed_locs[2] = {0}; 14157ec681f3Smrg 14167ec681f3Smrg struct exec_list io_vars; 14177ec681f3Smrg sort_varyings(shader, mode, &io_vars); 14187ec681f3Smrg 14197ec681f3Smrg int UNUSED last_loc = 0; 14207ec681f3Smrg bool last_partial = false; 14217ec681f3Smrg nir_foreach_variable_in_list(var, &io_vars) { 14227ec681f3Smrg const struct glsl_type *type = var->type; 14237ec681f3Smrg if (nir_is_arrayed_io(var, stage)) { 14247ec681f3Smrg assert(glsl_type_is_array(type)); 14257ec681f3Smrg type = glsl_get_array_element(type); 14267ec681f3Smrg } 14277ec681f3Smrg 14287ec681f3Smrg int base; 14297ec681f3Smrg if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX) 14307ec681f3Smrg base = VERT_ATTRIB_GENERIC0; 14317ec681f3Smrg else if (var->data.mode == nir_var_shader_out && 14327ec681f3Smrg stage == MESA_SHADER_FRAGMENT) 14337ec681f3Smrg base = FRAG_RESULT_DATA0; 14347ec681f3Smrg else 14357ec681f3Smrg base = VARYING_SLOT_VAR0; 14367ec681f3Smrg 14377ec681f3Smrg unsigned var_size, driver_size; 14387ec681f3Smrg if (var->data.compact) { 14397ec681f3Smrg /* If we are inside a partial compact, 14407ec681f3Smrg * don't allow another compact to be in this slot 14417ec681f3Smrg * if it starts at component 0. 14427ec681f3Smrg */ 14437ec681f3Smrg if (last_partial && var->data.location_frac == 0) { 14447ec681f3Smrg location++; 14457ec681f3Smrg } 14467ec681f3Smrg 14477ec681f3Smrg /* compact variables must be arrays of scalars */ 14487ec681f3Smrg assert(!var->data.per_view); 14497ec681f3Smrg assert(glsl_type_is_array(type)); 14507ec681f3Smrg assert(glsl_type_is_scalar(glsl_get_array_element(type))); 14517ec681f3Smrg unsigned start = 4 * location + var->data.location_frac; 14527ec681f3Smrg unsigned end = start + glsl_get_length(type); 14537ec681f3Smrg var_size = driver_size = end / 4 - location; 14547ec681f3Smrg last_partial = end % 4 != 0; 14557ec681f3Smrg } else { 14567ec681f3Smrg /* Compact variables bypass the normal varying compacting pass, 14577ec681f3Smrg * which means they cannot be in the same vec4 slot as a normal 14587ec681f3Smrg * variable. If part of the current slot is taken up by a compact 14597ec681f3Smrg * variable, we need to go to the next one. 14607ec681f3Smrg */ 14617ec681f3Smrg if (last_partial) { 14627ec681f3Smrg location++; 14637ec681f3Smrg last_partial = false; 14647ec681f3Smrg } 14657ec681f3Smrg 14667ec681f3Smrg /* per-view variables have an extra array dimension, which is ignored 14677ec681f3Smrg * when counting user-facing slots (var->data.location), but *not* 14687ec681f3Smrg * with driver slots (var->data.driver_location). That is, each user 14697ec681f3Smrg * slot maps to multiple driver slots. 14707ec681f3Smrg */ 14717ec681f3Smrg driver_size = glsl_count_attribute_slots(type, false); 14727ec681f3Smrg if (var->data.per_view) { 14737ec681f3Smrg assert(glsl_type_is_array(type)); 14747ec681f3Smrg var_size = 14757ec681f3Smrg glsl_count_attribute_slots(glsl_get_array_element(type), false); 14767ec681f3Smrg } else { 14777ec681f3Smrg var_size = driver_size; 14787ec681f3Smrg } 14797ec681f3Smrg } 14807ec681f3Smrg 14817ec681f3Smrg /* Builtins don't allow component packing so we only need to worry about 14827ec681f3Smrg * user defined varyings sharing the same location. 14837ec681f3Smrg */ 14847ec681f3Smrg bool processed = false; 14857ec681f3Smrg if (var->data.location >= base) { 14867ec681f3Smrg unsigned glsl_location = var->data.location - base; 14877ec681f3Smrg 14887ec681f3Smrg for (unsigned i = 0; i < var_size; i++) { 14897ec681f3Smrg if (processed_locs[var->data.index] & 14907ec681f3Smrg ((uint64_t)1 << (glsl_location + i))) 14917ec681f3Smrg processed = true; 14927ec681f3Smrg else 14937ec681f3Smrg processed_locs[var->data.index] |= 14947ec681f3Smrg ((uint64_t)1 << (glsl_location + i)); 14957ec681f3Smrg } 14967ec681f3Smrg } 14977ec681f3Smrg 14987ec681f3Smrg /* Because component packing allows varyings to share the same location 14997ec681f3Smrg * we may have already have processed this location. 15007ec681f3Smrg */ 15017ec681f3Smrg if (processed) { 15027ec681f3Smrg /* TODO handle overlapping per-view variables */ 15037ec681f3Smrg assert(!var->data.per_view); 15047ec681f3Smrg unsigned driver_location = assigned_locations[var->data.location]; 15057ec681f3Smrg var->data.driver_location = driver_location; 15067ec681f3Smrg 15077ec681f3Smrg /* An array may be packed such that is crosses multiple other arrays 15087ec681f3Smrg * or variables, we need to make sure we have allocated the elements 15097ec681f3Smrg * consecutively if the previously proccessed var was shorter than 15107ec681f3Smrg * the current array we are processing. 15117ec681f3Smrg * 15127ec681f3Smrg * NOTE: The code below assumes the var list is ordered in ascending 15137ec681f3Smrg * location order. 15147ec681f3Smrg */ 15157ec681f3Smrg assert(last_loc <= var->data.location); 15167ec681f3Smrg last_loc = var->data.location; 15177ec681f3Smrg unsigned last_slot_location = driver_location + var_size; 15187ec681f3Smrg if (last_slot_location > location) { 15197ec681f3Smrg unsigned num_unallocated_slots = last_slot_location - location; 15207ec681f3Smrg unsigned first_unallocated_slot = var_size - num_unallocated_slots; 15217ec681f3Smrg for (unsigned i = first_unallocated_slot; i < var_size; i++) { 15227ec681f3Smrg assigned_locations[var->data.location + i] = location; 15237ec681f3Smrg location++; 15247ec681f3Smrg } 15257ec681f3Smrg } 15267ec681f3Smrg continue; 15277ec681f3Smrg } 15287ec681f3Smrg 15297ec681f3Smrg for (unsigned i = 0; i < var_size; i++) { 15307ec681f3Smrg assigned_locations[var->data.location + i] = location + i; 15317ec681f3Smrg } 15327ec681f3Smrg 15337ec681f3Smrg var->data.driver_location = location; 15347ec681f3Smrg location += driver_size; 15357ec681f3Smrg } 15367ec681f3Smrg 15377ec681f3Smrg if (last_partial) 15387ec681f3Smrg location++; 15397ec681f3Smrg 15407ec681f3Smrg exec_list_append(&shader->variables, &io_vars); 15417ec681f3Smrg *size = location; 15427ec681f3Smrg} 15437ec681f3Smrg 15447ec681f3Smrgstatic uint64_t 15457ec681f3Smrgget_linked_variable_location(unsigned location, bool patch) 15467ec681f3Smrg{ 15477ec681f3Smrg if (!patch) 15487ec681f3Smrg return location; 15497ec681f3Smrg 15507ec681f3Smrg /* Reserve locations 0...3 for special patch variables 15517ec681f3Smrg * like tess factors and bounding boxes, and the generic patch 15527ec681f3Smrg * variables will come after them. 15537ec681f3Smrg */ 15547ec681f3Smrg if (location >= VARYING_SLOT_PATCH0) 15557ec681f3Smrg return location - VARYING_SLOT_PATCH0 + 4; 15567ec681f3Smrg else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER && 15577ec681f3Smrg location <= VARYING_SLOT_BOUNDING_BOX1) 15587ec681f3Smrg return location - VARYING_SLOT_TESS_LEVEL_OUTER; 15597ec681f3Smrg else 15607ec681f3Smrg unreachable("Unsupported variable in get_linked_variable_location."); 15617ec681f3Smrg} 15627ec681f3Smrg 15637ec681f3Smrgstatic uint64_t 15647ec681f3Smrgget_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage) 15657ec681f3Smrg{ 15667ec681f3Smrg const struct glsl_type *type = variable->type; 15677ec681f3Smrg 15687ec681f3Smrg if (nir_is_arrayed_io(variable, stage)) { 15697ec681f3Smrg assert(glsl_type_is_array(type)); 15707ec681f3Smrg type = glsl_get_array_element(type); 15717ec681f3Smrg } 15727ec681f3Smrg 15737ec681f3Smrg unsigned slots = glsl_count_attribute_slots(type, false); 15747ec681f3Smrg if (variable->data.compact) { 15757ec681f3Smrg unsigned component_count = variable->data.location_frac + glsl_get_length(type); 15767ec681f3Smrg slots = DIV_ROUND_UP(component_count, 4); 15777ec681f3Smrg } 15787ec681f3Smrg 15797ec681f3Smrg uint64_t mask = u_bit_consecutive64(0, slots); 15807ec681f3Smrg return mask; 15817ec681f3Smrg} 15827ec681f3Smrg 15837ec681f3Smrgnir_linked_io_var_info 15847ec681f3Smrgnir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer) 15857ec681f3Smrg{ 15867ec681f3Smrg assert(producer); 15877ec681f3Smrg assert(consumer); 15887ec681f3Smrg 15897ec681f3Smrg uint64_t producer_output_mask = 0; 15907ec681f3Smrg uint64_t producer_patch_output_mask = 0; 15917ec681f3Smrg 15927ec681f3Smrg nir_foreach_shader_out_variable(variable, producer) { 15937ec681f3Smrg uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage); 15947ec681f3Smrg uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); 15957ec681f3Smrg 15967ec681f3Smrg if (variable->data.patch) 15977ec681f3Smrg producer_patch_output_mask |= mask << loc; 15987ec681f3Smrg else 15997ec681f3Smrg producer_output_mask |= mask << loc; 16007ec681f3Smrg } 16017ec681f3Smrg 16027ec681f3Smrg uint64_t consumer_input_mask = 0; 16037ec681f3Smrg uint64_t consumer_patch_input_mask = 0; 16047ec681f3Smrg 16057ec681f3Smrg nir_foreach_shader_in_variable(variable, consumer) { 16067ec681f3Smrg uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage); 16077ec681f3Smrg uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); 16087ec681f3Smrg 16097ec681f3Smrg if (variable->data.patch) 16107ec681f3Smrg consumer_patch_input_mask |= mask << loc; 16117ec681f3Smrg else 16127ec681f3Smrg consumer_input_mask |= mask << loc; 16137ec681f3Smrg } 16147ec681f3Smrg 16157ec681f3Smrg uint64_t io_mask = producer_output_mask | consumer_input_mask; 16167ec681f3Smrg uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask; 16177ec681f3Smrg 16187ec681f3Smrg nir_foreach_shader_out_variable(variable, producer) { 16197ec681f3Smrg uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); 16207ec681f3Smrg 16217ec681f3Smrg if (variable->data.patch) 16227ec681f3Smrg variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)); 16237ec681f3Smrg else 16247ec681f3Smrg variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)); 16257ec681f3Smrg } 16267ec681f3Smrg 16277ec681f3Smrg nir_foreach_shader_in_variable(variable, consumer) { 16287ec681f3Smrg uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch); 16297ec681f3Smrg 16307ec681f3Smrg if (variable->data.patch) 16317ec681f3Smrg variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc)); 16327ec681f3Smrg else 16337ec681f3Smrg variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc)); 16347ec681f3Smrg } 16357ec681f3Smrg 16367ec681f3Smrg nir_linked_io_var_info result = { 16377ec681f3Smrg .num_linked_io_vars = util_bitcount64(io_mask), 16387ec681f3Smrg .num_linked_patch_io_vars = util_bitcount64(patch_io_mask), 16397ec681f3Smrg }; 16407ec681f3Smrg 16417ec681f3Smrg return result; 16427ec681f3Smrg} 1643