101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2012 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 2101e04c3fSmrg * DEALINGS IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg/** 2501e04c3fSmrg * \file link_varyings.cpp 2601e04c3fSmrg * 2701e04c3fSmrg * Linker functions related specifically to linking varyings between shader 2801e04c3fSmrg * stages. 2901e04c3fSmrg */ 3001e04c3fSmrg 3101e04c3fSmrg 3201e04c3fSmrg#include "main/errors.h" 3301e04c3fSmrg#include "main/mtypes.h" 3401e04c3fSmrg#include "glsl_symbol_table.h" 3501e04c3fSmrg#include "glsl_parser_extras.h" 3601e04c3fSmrg#include "ir_optimization.h" 3701e04c3fSmrg#include "linker.h" 3801e04c3fSmrg#include "link_varyings.h" 3901e04c3fSmrg#include "main/macros.h" 4001e04c3fSmrg#include "util/hash_table.h" 4101e04c3fSmrg#include "util/u_math.h" 4201e04c3fSmrg#include "program.h" 4301e04c3fSmrg 4401e04c3fSmrg 4501e04c3fSmrg/** 4601e04c3fSmrg * Get the varying type stripped of the outermost array if we're processing 4701e04c3fSmrg * a stage whose varyings are arrays indexed by a vertex number (such as 4801e04c3fSmrg * geometry shader inputs). 4901e04c3fSmrg */ 5001e04c3fSmrgstatic const glsl_type * 5101e04c3fSmrgget_varying_type(const ir_variable *var, gl_shader_stage stage) 5201e04c3fSmrg{ 5301e04c3fSmrg const glsl_type *type = var->type; 5401e04c3fSmrg 5501e04c3fSmrg if (!var->data.patch && 5601e04c3fSmrg ((var->data.mode == ir_var_shader_out && 5701e04c3fSmrg stage == MESA_SHADER_TESS_CTRL) || 5801e04c3fSmrg (var->data.mode == ir_var_shader_in && 5901e04c3fSmrg (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL || 6001e04c3fSmrg stage == MESA_SHADER_GEOMETRY)))) { 6101e04c3fSmrg assert(type->is_array()); 6201e04c3fSmrg type = type->fields.array; 6301e04c3fSmrg } 6401e04c3fSmrg 6501e04c3fSmrg return type; 6601e04c3fSmrg} 6701e04c3fSmrg 687ec681f3Smrgstatic bool 697ec681f3Smrgvarying_has_user_specified_location(const ir_variable *var) 707ec681f3Smrg{ 717ec681f3Smrg return var->data.explicit_location && 727ec681f3Smrg var->data.location >= VARYING_SLOT_VAR0; 737ec681f3Smrg} 747ec681f3Smrg 7501e04c3fSmrgstatic void 7601e04c3fSmrgcreate_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name, 7701e04c3fSmrg size_t name_length, unsigned *count, 7801e04c3fSmrg const char *ifc_member_name, 7901e04c3fSmrg const glsl_type *ifc_member_t, char ***varying_names) 8001e04c3fSmrg{ 8101e04c3fSmrg if (t->is_interface()) { 8201e04c3fSmrg size_t new_length = name_length; 8301e04c3fSmrg 8401e04c3fSmrg assert(ifc_member_name && ifc_member_t); 8501e04c3fSmrg ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name); 8601e04c3fSmrg 8701e04c3fSmrg create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count, 8801e04c3fSmrg NULL, NULL, varying_names); 897e102996Smaya } else if (t->is_struct()) { 9001e04c3fSmrg for (unsigned i = 0; i < t->length; i++) { 9101e04c3fSmrg const char *field = t->fields.structure[i].name; 9201e04c3fSmrg size_t new_length = name_length; 9301e04c3fSmrg 9401e04c3fSmrg ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field); 9501e04c3fSmrg 9601e04c3fSmrg create_xfb_varying_names(mem_ctx, t->fields.structure[i].type, name, 9701e04c3fSmrg new_length, count, NULL, NULL, 9801e04c3fSmrg varying_names); 9901e04c3fSmrg } 1007e102996Smaya } else if (t->without_array()->is_struct() || 10101e04c3fSmrg t->without_array()->is_interface() || 10201e04c3fSmrg (t->is_array() && t->fields.array->is_array())) { 10301e04c3fSmrg for (unsigned i = 0; i < t->length; i++) { 10401e04c3fSmrg size_t new_length = name_length; 10501e04c3fSmrg 10601e04c3fSmrg /* Append the subscript to the current variable name */ 10701e04c3fSmrg ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); 10801e04c3fSmrg 10901e04c3fSmrg create_xfb_varying_names(mem_ctx, t->fields.array, name, new_length, 11001e04c3fSmrg count, ifc_member_name, ifc_member_t, 11101e04c3fSmrg varying_names); 11201e04c3fSmrg } 11301e04c3fSmrg } else { 11401e04c3fSmrg (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name); 11501e04c3fSmrg } 11601e04c3fSmrg} 11701e04c3fSmrg 11801e04c3fSmrgstatic bool 11901e04c3fSmrgprocess_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh, 12001e04c3fSmrg struct gl_shader_program *prog, 12101e04c3fSmrg unsigned *num_tfeedback_decls, 12201e04c3fSmrg char ***varying_names) 12301e04c3fSmrg{ 12401e04c3fSmrg bool has_xfb_qualifiers = false; 12501e04c3fSmrg 12601e04c3fSmrg /* We still need to enable transform feedback mode even if xfb_stride is 12701e04c3fSmrg * only applied to a global out. Also we don't bother to propagate 12801e04c3fSmrg * xfb_stride to interface block members so this will catch that case also. 12901e04c3fSmrg */ 13001e04c3fSmrg for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { 13101e04c3fSmrg if (prog->TransformFeedback.BufferStride[j]) { 13201e04c3fSmrg has_xfb_qualifiers = true; 13301e04c3fSmrg break; 13401e04c3fSmrg } 13501e04c3fSmrg } 13601e04c3fSmrg 13701e04c3fSmrg foreach_in_list(ir_instruction, node, sh->ir) { 13801e04c3fSmrg ir_variable *var = node->as_variable(); 13901e04c3fSmrg if (!var || var->data.mode != ir_var_shader_out) 14001e04c3fSmrg continue; 14101e04c3fSmrg 14201e04c3fSmrg /* From the ARB_enhanced_layouts spec: 14301e04c3fSmrg * 14401e04c3fSmrg * "Any shader making any static use (after preprocessing) of any of 14501e04c3fSmrg * these *xfb_* qualifiers will cause the shader to be in a 14601e04c3fSmrg * transform feedback capturing mode and hence responsible for 14701e04c3fSmrg * describing the transform feedback setup. This mode will capture 14801e04c3fSmrg * any output selected by *xfb_offset*, directly or indirectly, to 14901e04c3fSmrg * a transform feedback buffer." 15001e04c3fSmrg */ 15101e04c3fSmrg if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) { 15201e04c3fSmrg has_xfb_qualifiers = true; 15301e04c3fSmrg } 15401e04c3fSmrg 15501e04c3fSmrg if (var->data.explicit_xfb_offset) { 15601e04c3fSmrg *num_tfeedback_decls += var->type->varying_count(); 15701e04c3fSmrg has_xfb_qualifiers = true; 15801e04c3fSmrg } 15901e04c3fSmrg } 16001e04c3fSmrg 16101e04c3fSmrg if (*num_tfeedback_decls == 0) 16201e04c3fSmrg return has_xfb_qualifiers; 16301e04c3fSmrg 16401e04c3fSmrg unsigned i = 0; 16501e04c3fSmrg *varying_names = ralloc_array(mem_ctx, char *, *num_tfeedback_decls); 16601e04c3fSmrg foreach_in_list(ir_instruction, node, sh->ir) { 16701e04c3fSmrg ir_variable *var = node->as_variable(); 16801e04c3fSmrg if (!var || var->data.mode != ir_var_shader_out) 16901e04c3fSmrg continue; 17001e04c3fSmrg 17101e04c3fSmrg if (var->data.explicit_xfb_offset) { 17201e04c3fSmrg char *name; 17301e04c3fSmrg const glsl_type *type, *member_type; 17401e04c3fSmrg 17501e04c3fSmrg if (var->data.from_named_ifc_block) { 17601e04c3fSmrg type = var->get_interface_type(); 17701e04c3fSmrg 17801e04c3fSmrg /* Find the member type before it was altered by lowering */ 17901e04c3fSmrg const glsl_type *type_wa = type->without_array(); 18001e04c3fSmrg member_type = 18101e04c3fSmrg type_wa->fields.structure[type_wa->field_index(var->name)].type; 18201e04c3fSmrg name = ralloc_strdup(NULL, type_wa->name); 18301e04c3fSmrg } else { 18401e04c3fSmrg type = var->type; 18501e04c3fSmrg member_type = NULL; 18601e04c3fSmrg name = ralloc_strdup(NULL, var->name); 18701e04c3fSmrg } 18801e04c3fSmrg create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i, 18901e04c3fSmrg var->name, member_type, varying_names); 19001e04c3fSmrg ralloc_free(name); 19101e04c3fSmrg } 19201e04c3fSmrg } 19301e04c3fSmrg 19401e04c3fSmrg assert(i == *num_tfeedback_decls); 19501e04c3fSmrg return has_xfb_qualifiers; 19601e04c3fSmrg} 19701e04c3fSmrg 19801e04c3fSmrg/** 19901e04c3fSmrg * Validate the types and qualifiers of an output from one stage against the 20001e04c3fSmrg * matching input to another stage. 20101e04c3fSmrg */ 20201e04c3fSmrgstatic void 20301e04c3fSmrgcross_validate_types_and_qualifiers(struct gl_context *ctx, 20401e04c3fSmrg struct gl_shader_program *prog, 20501e04c3fSmrg const ir_variable *input, 20601e04c3fSmrg const ir_variable *output, 20701e04c3fSmrg gl_shader_stage consumer_stage, 20801e04c3fSmrg gl_shader_stage producer_stage) 20901e04c3fSmrg{ 21001e04c3fSmrg /* Check that the types match between stages. 21101e04c3fSmrg */ 21201e04c3fSmrg const glsl_type *type_to_match = input->type; 21301e04c3fSmrg 21401e04c3fSmrg /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */ 21501e04c3fSmrg const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX && 21601e04c3fSmrg consumer_stage != MESA_SHADER_FRAGMENT) || 21701e04c3fSmrg consumer_stage == MESA_SHADER_GEOMETRY; 21801e04c3fSmrg if (extra_array_level) { 21901e04c3fSmrg assert(type_to_match->is_array()); 22001e04c3fSmrg type_to_match = type_to_match->fields.array; 22101e04c3fSmrg } 22201e04c3fSmrg 22301e04c3fSmrg if (type_to_match != output->type) { 2247e102996Smaya if (output->type->is_struct()) { 2257e102996Smaya /* Structures across shader stages can have different name 2267e102996Smaya * and considered to match in type if and only if structure 2277e102996Smaya * members match in name, type, qualification, and declaration 2287ec681f3Smrg * order. The precision doesn’t need to match. 2297e102996Smaya */ 2307ec681f3Smrg if (!output->type->record_compare(type_to_match, 2317ec681f3Smrg false, /* match_name */ 2327ec681f3Smrg true, /* match_locations */ 2337ec681f3Smrg false /* match_precision */)) { 2347e102996Smaya linker_error(prog, 2357e102996Smaya "%s shader output `%s' declared as struct `%s', " 2367e102996Smaya "doesn't match in type with %s shader input " 2377e102996Smaya "declared as struct `%s'\n", 2387e102996Smaya _mesa_shader_stage_to_string(producer_stage), 2397e102996Smaya output->name, 2407e102996Smaya output->type->name, 2417e102996Smaya _mesa_shader_stage_to_string(consumer_stage), 2427e102996Smaya input->type->name); 2437e102996Smaya } 2447e102996Smaya } else if (!output->type->is_array() || !is_gl_identifier(output->name)) { 2457e102996Smaya /* There is a bit of a special case for gl_TexCoord. This 2467e102996Smaya * built-in is unsized by default. Applications that variable 2477e102996Smaya * access it must redeclare it with a size. There is some 2487e102996Smaya * language in the GLSL spec that implies the fragment shader 2497e102996Smaya * and vertex shader do not have to agree on this size. Other 2507e102996Smaya * driver behave this way, and one or two applications seem to 2517e102996Smaya * rely on it. 2527e102996Smaya * 2537e102996Smaya * Neither declaration needs to be modified here because the array 2547e102996Smaya * sizes are fixed later when update_array_sizes is called. 2557e102996Smaya * 2567e102996Smaya * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec: 2577e102996Smaya * 2587e102996Smaya * "Unlike user-defined varying variables, the built-in 2597e102996Smaya * varying variables don't have a strict one-to-one 2607e102996Smaya * correspondence between the vertex language and the 2617e102996Smaya * fragment language." 2627e102996Smaya */ 26301e04c3fSmrg linker_error(prog, 26401e04c3fSmrg "%s shader output `%s' declared as type `%s', " 26501e04c3fSmrg "but %s shader input declared as type `%s'\n", 26601e04c3fSmrg _mesa_shader_stage_to_string(producer_stage), 26701e04c3fSmrg output->name, 26801e04c3fSmrg output->type->name, 26901e04c3fSmrg _mesa_shader_stage_to_string(consumer_stage), 27001e04c3fSmrg input->type->name); 27101e04c3fSmrg return; 27201e04c3fSmrg } 27301e04c3fSmrg } 27401e04c3fSmrg 27501e04c3fSmrg /* Check that all of the qualifiers match between stages. 27601e04c3fSmrg */ 27701e04c3fSmrg 27801e04c3fSmrg /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier 27901e04c3fSmrg * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0 28001e04c3fSmrg * conformance test suite does not verify that the qualifiers must match. 28101e04c3fSmrg * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for 28201e04c3fSmrg * OpenGLES 3.0 drivers, so we relax the checking in all cases. 28301e04c3fSmrg */ 28401e04c3fSmrg if (false /* always skip the centroid check */ && 28501e04c3fSmrg prog->data->Version < (prog->IsES ? 310 : 430) && 28601e04c3fSmrg input->data.centroid != output->data.centroid) { 28701e04c3fSmrg linker_error(prog, 28801e04c3fSmrg "%s shader output `%s' %s centroid qualifier, " 28901e04c3fSmrg "but %s shader input %s centroid qualifier\n", 29001e04c3fSmrg _mesa_shader_stage_to_string(producer_stage), 29101e04c3fSmrg output->name, 29201e04c3fSmrg (output->data.centroid) ? "has" : "lacks", 29301e04c3fSmrg _mesa_shader_stage_to_string(consumer_stage), 29401e04c3fSmrg (input->data.centroid) ? "has" : "lacks"); 29501e04c3fSmrg return; 29601e04c3fSmrg } 29701e04c3fSmrg 29801e04c3fSmrg if (input->data.sample != output->data.sample) { 29901e04c3fSmrg linker_error(prog, 30001e04c3fSmrg "%s shader output `%s' %s sample qualifier, " 30101e04c3fSmrg "but %s shader input %s sample qualifier\n", 30201e04c3fSmrg _mesa_shader_stage_to_string(producer_stage), 30301e04c3fSmrg output->name, 30401e04c3fSmrg (output->data.sample) ? "has" : "lacks", 30501e04c3fSmrg _mesa_shader_stage_to_string(consumer_stage), 30601e04c3fSmrg (input->data.sample) ? "has" : "lacks"); 30701e04c3fSmrg return; 30801e04c3fSmrg } 30901e04c3fSmrg 31001e04c3fSmrg if (input->data.patch != output->data.patch) { 31101e04c3fSmrg linker_error(prog, 31201e04c3fSmrg "%s shader output `%s' %s patch qualifier, " 31301e04c3fSmrg "but %s shader input %s patch qualifier\n", 31401e04c3fSmrg _mesa_shader_stage_to_string(producer_stage), 31501e04c3fSmrg output->name, 31601e04c3fSmrg (output->data.patch) ? "has" : "lacks", 31701e04c3fSmrg _mesa_shader_stage_to_string(consumer_stage), 31801e04c3fSmrg (input->data.patch) ? "has" : "lacks"); 31901e04c3fSmrg return; 32001e04c3fSmrg } 32101e04c3fSmrg 3227ec681f3Smrg /* The GLSL 4.20 and GLSL ES 3.00 specifications say: 32301e04c3fSmrg * 32401e04c3fSmrg * "As only outputs need be declared with invariant, an output from 32501e04c3fSmrg * one shader stage will still match an input of a subsequent stage 32601e04c3fSmrg * without the input being declared as invariant." 32701e04c3fSmrg * 3287ec681f3Smrg * while GLSL 4.10 says: 32901e04c3fSmrg * 33001e04c3fSmrg * "For variables leaving one shader and coming into another shader, 33101e04c3fSmrg * the invariant keyword has to be used in both shaders, or a link 33201e04c3fSmrg * error will result." 33301e04c3fSmrg * 33401e04c3fSmrg * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says: 33501e04c3fSmrg * 33601e04c3fSmrg * "The invariance of varyings that are declared in both the vertex 33701e04c3fSmrg * and fragment shaders must match." 33801e04c3fSmrg */ 339993e1d59Smrg if (input->data.explicit_invariant != output->data.explicit_invariant && 3407ec681f3Smrg prog->data->Version < (prog->IsES ? 300 : 420)) { 34101e04c3fSmrg linker_error(prog, 34201e04c3fSmrg "%s shader output `%s' %s invariant qualifier, " 34301e04c3fSmrg "but %s shader input %s invariant qualifier\n", 34401e04c3fSmrg _mesa_shader_stage_to_string(producer_stage), 34501e04c3fSmrg output->name, 346993e1d59Smrg (output->data.explicit_invariant) ? "has" : "lacks", 34701e04c3fSmrg _mesa_shader_stage_to_string(consumer_stage), 348993e1d59Smrg (input->data.explicit_invariant) ? "has" : "lacks"); 34901e04c3fSmrg return; 35001e04c3fSmrg } 35101e04c3fSmrg 35201e04c3fSmrg /* GLSL >= 4.40 removes text requiring interpolation qualifiers 35301e04c3fSmrg * to match cross stage, they must only match within the same stage. 35401e04c3fSmrg * 35501e04c3fSmrg * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec: 35601e04c3fSmrg * 35701e04c3fSmrg * "It is a link-time error if, within the same stage, the interpolation 35801e04c3fSmrg * qualifiers of variables of the same name do not match. 35901e04c3fSmrg * 36001e04c3fSmrg * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says: 36101e04c3fSmrg * 36201e04c3fSmrg * "When no interpolation qualifier is present, smooth interpolation 36301e04c3fSmrg * is used." 36401e04c3fSmrg * 36501e04c3fSmrg * So we match variables where one is smooth and the other has no explicit 36601e04c3fSmrg * qualifier. 36701e04c3fSmrg */ 36801e04c3fSmrg unsigned input_interpolation = input->data.interpolation; 36901e04c3fSmrg unsigned output_interpolation = output->data.interpolation; 37001e04c3fSmrg if (prog->IsES) { 37101e04c3fSmrg if (input_interpolation == INTERP_MODE_NONE) 37201e04c3fSmrg input_interpolation = INTERP_MODE_SMOOTH; 37301e04c3fSmrg if (output_interpolation == INTERP_MODE_NONE) 37401e04c3fSmrg output_interpolation = INTERP_MODE_SMOOTH; 37501e04c3fSmrg } 37601e04c3fSmrg if (input_interpolation != output_interpolation && 37701e04c3fSmrg prog->data->Version < 440) { 37801e04c3fSmrg if (!ctx->Const.AllowGLSLCrossStageInterpolationMismatch) { 37901e04c3fSmrg linker_error(prog, 38001e04c3fSmrg "%s shader output `%s' specifies %s " 38101e04c3fSmrg "interpolation qualifier, " 38201e04c3fSmrg "but %s shader input specifies %s " 38301e04c3fSmrg "interpolation qualifier\n", 38401e04c3fSmrg _mesa_shader_stage_to_string(producer_stage), 38501e04c3fSmrg output->name, 38601e04c3fSmrg interpolation_string(output->data.interpolation), 38701e04c3fSmrg _mesa_shader_stage_to_string(consumer_stage), 38801e04c3fSmrg interpolation_string(input->data.interpolation)); 38901e04c3fSmrg return; 39001e04c3fSmrg } else { 39101e04c3fSmrg linker_warning(prog, 39201e04c3fSmrg "%s shader output `%s' specifies %s " 39301e04c3fSmrg "interpolation qualifier, " 39401e04c3fSmrg "but %s shader input specifies %s " 39501e04c3fSmrg "interpolation qualifier\n", 39601e04c3fSmrg _mesa_shader_stage_to_string(producer_stage), 39701e04c3fSmrg output->name, 39801e04c3fSmrg interpolation_string(output->data.interpolation), 39901e04c3fSmrg _mesa_shader_stage_to_string(consumer_stage), 40001e04c3fSmrg interpolation_string(input->data.interpolation)); 40101e04c3fSmrg } 40201e04c3fSmrg } 40301e04c3fSmrg} 40401e04c3fSmrg 40501e04c3fSmrg/** 40601e04c3fSmrg * Validate front and back color outputs against single color input 40701e04c3fSmrg */ 40801e04c3fSmrgstatic void 40901e04c3fSmrgcross_validate_front_and_back_color(struct gl_context *ctx, 41001e04c3fSmrg struct gl_shader_program *prog, 41101e04c3fSmrg const ir_variable *input, 41201e04c3fSmrg const ir_variable *front_color, 41301e04c3fSmrg const ir_variable *back_color, 41401e04c3fSmrg gl_shader_stage consumer_stage, 41501e04c3fSmrg gl_shader_stage producer_stage) 41601e04c3fSmrg{ 41701e04c3fSmrg if (front_color != NULL && front_color->data.assigned) 41801e04c3fSmrg cross_validate_types_and_qualifiers(ctx, prog, input, front_color, 41901e04c3fSmrg consumer_stage, producer_stage); 42001e04c3fSmrg 42101e04c3fSmrg if (back_color != NULL && back_color->data.assigned) 42201e04c3fSmrg cross_validate_types_and_qualifiers(ctx, prog, input, back_color, 42301e04c3fSmrg consumer_stage, producer_stage); 42401e04c3fSmrg} 42501e04c3fSmrg 42601e04c3fSmrgstatic unsigned 42701e04c3fSmrgcompute_variable_location_slot(ir_variable *var, gl_shader_stage stage) 42801e04c3fSmrg{ 42901e04c3fSmrg unsigned location_start = VARYING_SLOT_VAR0; 43001e04c3fSmrg 43101e04c3fSmrg switch (stage) { 43201e04c3fSmrg case MESA_SHADER_VERTEX: 43301e04c3fSmrg if (var->data.mode == ir_var_shader_in) 43401e04c3fSmrg location_start = VERT_ATTRIB_GENERIC0; 43501e04c3fSmrg break; 43601e04c3fSmrg case MESA_SHADER_TESS_CTRL: 43701e04c3fSmrg case MESA_SHADER_TESS_EVAL: 43801e04c3fSmrg if (var->data.patch) 43901e04c3fSmrg location_start = VARYING_SLOT_PATCH0; 44001e04c3fSmrg break; 44101e04c3fSmrg case MESA_SHADER_FRAGMENT: 44201e04c3fSmrg if (var->data.mode == ir_var_shader_out) 44301e04c3fSmrg location_start = FRAG_RESULT_DATA0; 44401e04c3fSmrg break; 44501e04c3fSmrg default: 44601e04c3fSmrg break; 44701e04c3fSmrg } 44801e04c3fSmrg 44901e04c3fSmrg return var->data.location - location_start; 45001e04c3fSmrg} 45101e04c3fSmrg 45201e04c3fSmrgstruct explicit_location_info { 45301e04c3fSmrg ir_variable *var; 4547e102996Smaya bool base_type_is_integer; 4557e102996Smaya unsigned base_type_bit_size; 45601e04c3fSmrg unsigned interpolation; 45701e04c3fSmrg bool centroid; 45801e04c3fSmrg bool sample; 45901e04c3fSmrg bool patch; 46001e04c3fSmrg}; 46101e04c3fSmrg 46201e04c3fSmrgstatic bool 46301e04c3fSmrgcheck_location_aliasing(struct explicit_location_info explicit_locations[][4], 46401e04c3fSmrg ir_variable *var, 46501e04c3fSmrg unsigned location, 46601e04c3fSmrg unsigned component, 46701e04c3fSmrg unsigned location_limit, 46801e04c3fSmrg const glsl_type *type, 46901e04c3fSmrg unsigned interpolation, 47001e04c3fSmrg bool centroid, 47101e04c3fSmrg bool sample, 47201e04c3fSmrg bool patch, 47301e04c3fSmrg gl_shader_program *prog, 47401e04c3fSmrg gl_shader_stage stage) 47501e04c3fSmrg{ 47601e04c3fSmrg unsigned last_comp; 4777e102996Smaya unsigned base_type_bit_size; 4787e102996Smaya const glsl_type *type_without_array = type->without_array(); 4797e102996Smaya const bool base_type_is_integer = 4807e102996Smaya glsl_base_type_is_integer(type_without_array->base_type); 4817e102996Smaya const bool is_struct = type_without_array->is_struct(); 4827e102996Smaya if (is_struct) { 4837e102996Smaya /* structs don't have a defined underlying base type so just treat all 4847e102996Smaya * component slots as used and set the bit size to 0. If there is 4857e102996Smaya * location aliasing, we'll fail anyway later. 48601e04c3fSmrg */ 48701e04c3fSmrg last_comp = 4; 4887e102996Smaya base_type_bit_size = 0; 48901e04c3fSmrg } else { 4907e102996Smaya unsigned dmul = type_without_array->is_64bit() ? 2 : 1; 4917e102996Smaya last_comp = component + type_without_array->vector_elements * dmul; 4927e102996Smaya base_type_bit_size = 4937e102996Smaya glsl_base_type_get_bit_size(type_without_array->base_type); 49401e04c3fSmrg } 49501e04c3fSmrg 49601e04c3fSmrg while (location < location_limit) { 49701e04c3fSmrg unsigned comp = 0; 49801e04c3fSmrg while (comp < 4) { 49901e04c3fSmrg struct explicit_location_info *info = 50001e04c3fSmrg &explicit_locations[location][comp]; 50101e04c3fSmrg 50201e04c3fSmrg if (info->var) { 5037e102996Smaya if (info->var->type->without_array()->is_struct() || is_struct) { 5047e102996Smaya /* Structs cannot share location since they are incompatible 5057e102996Smaya * with any other underlying numerical type. 5067e102996Smaya */ 5077e102996Smaya linker_error(prog, 5087e102996Smaya "%s shader has multiple %sputs sharing the " 5097e102996Smaya "same location that don't have the same " 5107e102996Smaya "underlying numerical type. Struct variable '%s', " 5117e102996Smaya "location %u\n", 5127e102996Smaya _mesa_shader_stage_to_string(stage), 5137e102996Smaya var->data.mode == ir_var_shader_in ? "in" : "out", 5147e102996Smaya is_struct ? var->name : info->var->name, 5157e102996Smaya location); 5167e102996Smaya return false; 5177e102996Smaya } else if (comp >= component && comp < last_comp) { 5187e102996Smaya /* Component aliasing is not allowed */ 51901e04c3fSmrg linker_error(prog, 52001e04c3fSmrg "%s shader has multiple %sputs explicitly " 52101e04c3fSmrg "assigned to location %d and component %d\n", 52201e04c3fSmrg _mesa_shader_stage_to_string(stage), 52301e04c3fSmrg var->data.mode == ir_var_shader_in ? "in" : "out", 52401e04c3fSmrg location, comp); 52501e04c3fSmrg return false; 52601e04c3fSmrg } else { 5277e102996Smaya /* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout 5287e102996Smaya * Qualifiers, Page 67, (Location aliasing): 5297e102996Smaya * 5307e102996Smaya * " Further, when location aliasing, the aliases sharing the 5317e102996Smaya * location must have the same underlying numerical type 5327e102996Smaya * and bit width (floating-point or integer, 32-bit versus 5337e102996Smaya * 64-bit, etc.) and the same auxiliary storage and 5347e102996Smaya * interpolation qualification." 5357e102996Smaya */ 5367e102996Smaya 5377e102996Smaya /* If the underlying numerical type isn't integer, implicitly 5387e102996Smaya * it will be float or else we would have failed by now. 53901e04c3fSmrg */ 5407e102996Smaya if (info->base_type_is_integer != base_type_is_integer) { 5417e102996Smaya linker_error(prog, 5427e102996Smaya "%s shader has multiple %sputs sharing the " 5437e102996Smaya "same location that don't have the same " 5447e102996Smaya "underlying numerical type. Location %u " 5457e102996Smaya "component %u.\n", 5467e102996Smaya _mesa_shader_stage_to_string(stage), 5477e102996Smaya var->data.mode == ir_var_shader_in ? 5487e102996Smaya "in" : "out", location, comp); 5497e102996Smaya return false; 5507e102996Smaya } 5517e102996Smaya 5527e102996Smaya if (info->base_type_bit_size != base_type_bit_size) { 55301e04c3fSmrg linker_error(prog, 5547e102996Smaya "%s shader has multiple %sputs sharing the " 5557e102996Smaya "same location that don't have the same " 5567e102996Smaya "underlying numerical bit size. Location %u " 5577e102996Smaya "component %u.\n", 5587e102996Smaya _mesa_shader_stage_to_string(stage), 5597e102996Smaya var->data.mode == ir_var_shader_in ? 5607e102996Smaya "in" : "out", location, comp); 56101e04c3fSmrg return false; 56201e04c3fSmrg } 56301e04c3fSmrg 56401e04c3fSmrg if (info->interpolation != interpolation) { 56501e04c3fSmrg linker_error(prog, 5667e102996Smaya "%s shader has multiple %sputs sharing the " 5677e102996Smaya "same location that don't have the same " 5687e102996Smaya "interpolation qualification. Location %u " 5697e102996Smaya "component %u.\n", 57001e04c3fSmrg _mesa_shader_stage_to_string(stage), 57101e04c3fSmrg var->data.mode == ir_var_shader_in ? 5727e102996Smaya "in" : "out", location, comp); 57301e04c3fSmrg return false; 57401e04c3fSmrg } 57501e04c3fSmrg 57601e04c3fSmrg if (info->centroid != centroid || 57701e04c3fSmrg info->sample != sample || 57801e04c3fSmrg info->patch != patch) { 57901e04c3fSmrg linker_error(prog, 5807e102996Smaya "%s shader has multiple %sputs sharing the " 5817e102996Smaya "same location that don't have the same " 5827e102996Smaya "auxiliary storage qualification. Location %u " 5837e102996Smaya "component %u.\n", 58401e04c3fSmrg _mesa_shader_stage_to_string(stage), 58501e04c3fSmrg var->data.mode == ir_var_shader_in ? 5867e102996Smaya "in" : "out", location, comp); 58701e04c3fSmrg return false; 58801e04c3fSmrg } 58901e04c3fSmrg } 59001e04c3fSmrg } else if (comp >= component && comp < last_comp) { 59101e04c3fSmrg info->var = var; 5927e102996Smaya info->base_type_is_integer = base_type_is_integer; 5937e102996Smaya info->base_type_bit_size = base_type_bit_size; 59401e04c3fSmrg info->interpolation = interpolation; 59501e04c3fSmrg info->centroid = centroid; 59601e04c3fSmrg info->sample = sample; 59701e04c3fSmrg info->patch = patch; 59801e04c3fSmrg } 59901e04c3fSmrg 60001e04c3fSmrg comp++; 60101e04c3fSmrg 60201e04c3fSmrg /* We need to do some special handling for doubles as dvec3 and 60301e04c3fSmrg * dvec4 consume two consecutive locations. We don't need to 60401e04c3fSmrg * worry about components beginning at anything other than 0 as 60501e04c3fSmrg * the spec does not allow this for dvec3 and dvec4. 60601e04c3fSmrg */ 60701e04c3fSmrg if (comp == 4 && last_comp > 4) { 60801e04c3fSmrg last_comp = last_comp - 4; 60901e04c3fSmrg /* Bump location index and reset the component index */ 61001e04c3fSmrg location++; 61101e04c3fSmrg comp = 0; 61201e04c3fSmrg component = 0; 61301e04c3fSmrg } 61401e04c3fSmrg } 61501e04c3fSmrg 61601e04c3fSmrg location++; 61701e04c3fSmrg } 61801e04c3fSmrg 61901e04c3fSmrg return true; 62001e04c3fSmrg} 62101e04c3fSmrg 62201e04c3fSmrgstatic bool 62301e04c3fSmrgvalidate_explicit_variable_location(struct gl_context *ctx, 62401e04c3fSmrg struct explicit_location_info explicit_locations[][4], 62501e04c3fSmrg ir_variable *var, 62601e04c3fSmrg gl_shader_program *prog, 62701e04c3fSmrg gl_linked_shader *sh) 62801e04c3fSmrg{ 62901e04c3fSmrg const glsl_type *type = get_varying_type(var, sh->Stage); 63001e04c3fSmrg unsigned num_elements = type->count_attribute_slots(false); 63101e04c3fSmrg unsigned idx = compute_variable_location_slot(var, sh->Stage); 63201e04c3fSmrg unsigned slot_limit = idx + num_elements; 63301e04c3fSmrg 63401e04c3fSmrg /* Vertex shader inputs and fragment shader outputs are validated in 63501e04c3fSmrg * assign_attribute_or_color_locations() so we should not attempt to 63601e04c3fSmrg * validate them again here. 63701e04c3fSmrg */ 63801e04c3fSmrg unsigned slot_max; 63901e04c3fSmrg if (var->data.mode == ir_var_shader_out) { 64001e04c3fSmrg assert(sh->Stage != MESA_SHADER_FRAGMENT); 64101e04c3fSmrg slot_max = 64201e04c3fSmrg ctx->Const.Program[sh->Stage].MaxOutputComponents / 4; 64301e04c3fSmrg } else { 64401e04c3fSmrg assert(var->data.mode == ir_var_shader_in); 64501e04c3fSmrg assert(sh->Stage != MESA_SHADER_VERTEX); 64601e04c3fSmrg slot_max = 64701e04c3fSmrg ctx->Const.Program[sh->Stage].MaxInputComponents / 4; 64801e04c3fSmrg } 64901e04c3fSmrg 65001e04c3fSmrg if (slot_limit > slot_max) { 65101e04c3fSmrg linker_error(prog, 65201e04c3fSmrg "Invalid location %u in %s shader\n", 65301e04c3fSmrg idx, _mesa_shader_stage_to_string(sh->Stage)); 65401e04c3fSmrg return false; 65501e04c3fSmrg } 65601e04c3fSmrg 65701e04c3fSmrg const glsl_type *type_without_array = type->without_array(); 65801e04c3fSmrg if (type_without_array->is_interface()) { 65901e04c3fSmrg for (unsigned i = 0; i < type_without_array->length; i++) { 66001e04c3fSmrg glsl_struct_field *field = &type_without_array->fields.structure[i]; 66101e04c3fSmrg unsigned field_location = field->location - 66201e04c3fSmrg (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0); 6637ec681f3Smrg unsigned field_slots = field->type->count_attribute_slots(false); 66401e04c3fSmrg if (!check_location_aliasing(explicit_locations, var, 66501e04c3fSmrg field_location, 6667ec681f3Smrg 0, 6677ec681f3Smrg field_location + field_slots, 66801e04c3fSmrg field->type, 66901e04c3fSmrg field->interpolation, 67001e04c3fSmrg field->centroid, 67101e04c3fSmrg field->sample, 67201e04c3fSmrg field->patch, 67301e04c3fSmrg prog, sh->Stage)) { 67401e04c3fSmrg return false; 67501e04c3fSmrg } 67601e04c3fSmrg } 67701e04c3fSmrg } else if (!check_location_aliasing(explicit_locations, var, 67801e04c3fSmrg idx, var->data.location_frac, 67901e04c3fSmrg slot_limit, type, 68001e04c3fSmrg var->data.interpolation, 68101e04c3fSmrg var->data.centroid, 68201e04c3fSmrg var->data.sample, 68301e04c3fSmrg var->data.patch, 68401e04c3fSmrg prog, sh->Stage)) { 68501e04c3fSmrg return false; 68601e04c3fSmrg } 68701e04c3fSmrg 68801e04c3fSmrg return true; 68901e04c3fSmrg} 69001e04c3fSmrg 69101e04c3fSmrg/** 69201e04c3fSmrg * Validate explicit locations for the inputs to the first stage and the 6937e102996Smaya * outputs of the last stage in a program, if those are not the VS and FS 6947e102996Smaya * shaders. 69501e04c3fSmrg */ 69601e04c3fSmrgvoid 6977e102996Smayavalidate_first_and_last_interface_explicit_locations(struct gl_context *ctx, 6987e102996Smaya struct gl_shader_program *prog, 6997e102996Smaya gl_shader_stage first_stage, 7007e102996Smaya gl_shader_stage last_stage) 70101e04c3fSmrg{ 70201e04c3fSmrg /* VS inputs and FS outputs are validated in 70301e04c3fSmrg * assign_attribute_or_color_locations() 70401e04c3fSmrg */ 70501e04c3fSmrg bool validate_first_stage = first_stage != MESA_SHADER_VERTEX; 70601e04c3fSmrg bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT; 70701e04c3fSmrg if (!validate_first_stage && !validate_last_stage) 70801e04c3fSmrg return; 70901e04c3fSmrg 71001e04c3fSmrg struct explicit_location_info explicit_locations[MAX_VARYING][4]; 71101e04c3fSmrg 71201e04c3fSmrg gl_shader_stage stages[2] = { first_stage, last_stage }; 71301e04c3fSmrg bool validate_stage[2] = { validate_first_stage, validate_last_stage }; 71401e04c3fSmrg ir_variable_mode var_direction[2] = { ir_var_shader_in, ir_var_shader_out }; 71501e04c3fSmrg 71601e04c3fSmrg for (unsigned i = 0; i < 2; i++) { 71701e04c3fSmrg if (!validate_stage[i]) 71801e04c3fSmrg continue; 71901e04c3fSmrg 72001e04c3fSmrg gl_shader_stage stage = stages[i]; 72101e04c3fSmrg 72201e04c3fSmrg gl_linked_shader *sh = prog->_LinkedShaders[stage]; 72301e04c3fSmrg assert(sh); 72401e04c3fSmrg 72501e04c3fSmrg memset(explicit_locations, 0, sizeof(explicit_locations)); 72601e04c3fSmrg 72701e04c3fSmrg foreach_in_list(ir_instruction, node, sh->ir) { 72801e04c3fSmrg ir_variable *const var = node->as_variable(); 72901e04c3fSmrg 73001e04c3fSmrg if (var == NULL || 73101e04c3fSmrg !var->data.explicit_location || 73201e04c3fSmrg var->data.location < VARYING_SLOT_VAR0 || 73301e04c3fSmrg var->data.mode != var_direction[i]) 73401e04c3fSmrg continue; 73501e04c3fSmrg 73601e04c3fSmrg if (!validate_explicit_variable_location( 73701e04c3fSmrg ctx, explicit_locations, var, prog, sh)) { 73801e04c3fSmrg return; 73901e04c3fSmrg } 74001e04c3fSmrg } 74101e04c3fSmrg } 74201e04c3fSmrg} 74301e04c3fSmrg 7447ec681f3Smrg/** 7457ec681f3Smrg * Check if we should force input / output matching between shader 7467ec681f3Smrg * interfaces. 7477ec681f3Smrg * 7487ec681f3Smrg * Section 4.3.4 (Inputs) of the GLSL 4.10 specifications say: 7497ec681f3Smrg * 7507ec681f3Smrg * "Only the input variables that are actually read need to be 7517ec681f3Smrg * written by the previous stage; it is allowed to have 7527ec681f3Smrg * superfluous declarations of input variables." 7537ec681f3Smrg * 7547ec681f3Smrg * However it's not defined anywhere as to how we should handle 7557ec681f3Smrg * inputs that are not written in the previous stage and it's not 7567ec681f3Smrg * clear what "actually read" means. 7577ec681f3Smrg * 7587ec681f3Smrg * The GLSL 4.20 spec however is much clearer: 7597ec681f3Smrg * 7607ec681f3Smrg * "Only the input variables that are statically read need to 7617ec681f3Smrg * be written by the previous stage; it is allowed to have 7627ec681f3Smrg * superfluous declarations of input variables." 7637ec681f3Smrg * 7647ec681f3Smrg * It also has a table that states it is an error to statically 7657ec681f3Smrg * read an input that is not defined in the previous stage. While 7667ec681f3Smrg * it is not an error to not statically write to the output (it 7677ec681f3Smrg * just needs to be defined to not be an error). 7687ec681f3Smrg * 7697ec681f3Smrg * The text in the GLSL 4.20 spec was an attempt to clarify the 7707ec681f3Smrg * previous spec iterations. However given the difference in spec 7717ec681f3Smrg * and that some applications seem to depend on not erroring when 7727ec681f3Smrg * the input is not actually read in control flow we only apply 7737ec681f3Smrg * this rule to GLSL 4.20 and higher. GLSL 4.10 shaders have been 7747ec681f3Smrg * seen in the wild that depend on the less strict interpretation. 7757ec681f3Smrg */ 7767ec681f3Smrgstatic bool 7777ec681f3Smrgstatic_input_output_matching(struct gl_shader_program *prog) 7787ec681f3Smrg{ 7797ec681f3Smrg return prog->data->Version >= (prog->IsES ? 0 : 420); 7807ec681f3Smrg} 7817ec681f3Smrg 78201e04c3fSmrg/** 78301e04c3fSmrg * Validate that outputs from one stage match inputs of another 78401e04c3fSmrg */ 78501e04c3fSmrgvoid 78601e04c3fSmrgcross_validate_outputs_to_inputs(struct gl_context *ctx, 78701e04c3fSmrg struct gl_shader_program *prog, 78801e04c3fSmrg gl_linked_shader *producer, 78901e04c3fSmrg gl_linked_shader *consumer) 79001e04c3fSmrg{ 79101e04c3fSmrg glsl_symbol_table parameters; 7927e102996Smaya struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = {}; 7937e102996Smaya struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = {}; 79401e04c3fSmrg 79501e04c3fSmrg /* Find all shader outputs in the "producer" stage. 79601e04c3fSmrg */ 79701e04c3fSmrg foreach_in_list(ir_instruction, node, producer->ir) { 79801e04c3fSmrg ir_variable *const var = node->as_variable(); 79901e04c3fSmrg 80001e04c3fSmrg if (var == NULL || var->data.mode != ir_var_shader_out) 80101e04c3fSmrg continue; 80201e04c3fSmrg 80301e04c3fSmrg if (!var->data.explicit_location 80401e04c3fSmrg || var->data.location < VARYING_SLOT_VAR0) 80501e04c3fSmrg parameters.add_variable(var); 80601e04c3fSmrg else { 80701e04c3fSmrg /* User-defined varyings with explicit locations are handled 80801e04c3fSmrg * differently because they do not need to have matching names. 80901e04c3fSmrg */ 81001e04c3fSmrg if (!validate_explicit_variable_location(ctx, 8117e102996Smaya output_explicit_locations, 81201e04c3fSmrg var, prog, producer)) { 81301e04c3fSmrg return; 81401e04c3fSmrg } 81501e04c3fSmrg } 81601e04c3fSmrg } 81701e04c3fSmrg 81801e04c3fSmrg 81901e04c3fSmrg /* Find all shader inputs in the "consumer" stage. Any variables that have 82001e04c3fSmrg * matching outputs already in the symbol table must have the same type and 82101e04c3fSmrg * qualifiers. 82201e04c3fSmrg * 82301e04c3fSmrg * Exception: if the consumer is the geometry shader, then the inputs 82401e04c3fSmrg * should be arrays and the type of the array element should match the type 82501e04c3fSmrg * of the corresponding producer output. 82601e04c3fSmrg */ 82701e04c3fSmrg foreach_in_list(ir_instruction, node, consumer->ir) { 82801e04c3fSmrg ir_variable *const input = node->as_variable(); 82901e04c3fSmrg 83001e04c3fSmrg if (input == NULL || input->data.mode != ir_var_shader_in) 83101e04c3fSmrg continue; 83201e04c3fSmrg 83301e04c3fSmrg if (strcmp(input->name, "gl_Color") == 0 && input->data.used) { 83401e04c3fSmrg const ir_variable *const front_color = 83501e04c3fSmrg parameters.get_variable("gl_FrontColor"); 83601e04c3fSmrg 83701e04c3fSmrg const ir_variable *const back_color = 83801e04c3fSmrg parameters.get_variable("gl_BackColor"); 83901e04c3fSmrg 84001e04c3fSmrg cross_validate_front_and_back_color(ctx, prog, input, 84101e04c3fSmrg front_color, back_color, 84201e04c3fSmrg consumer->Stage, producer->Stage); 84301e04c3fSmrg } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) { 84401e04c3fSmrg const ir_variable *const front_color = 84501e04c3fSmrg parameters.get_variable("gl_FrontSecondaryColor"); 84601e04c3fSmrg 84701e04c3fSmrg const ir_variable *const back_color = 84801e04c3fSmrg parameters.get_variable("gl_BackSecondaryColor"); 84901e04c3fSmrg 85001e04c3fSmrg cross_validate_front_and_back_color(ctx, prog, input, 85101e04c3fSmrg front_color, back_color, 85201e04c3fSmrg consumer->Stage, producer->Stage); 85301e04c3fSmrg } else { 85401e04c3fSmrg /* The rules for connecting inputs and outputs change in the presence 85501e04c3fSmrg * of explicit locations. In this case, we no longer care about the 85601e04c3fSmrg * names of the variables. Instead, we care only about the 85701e04c3fSmrg * explicitly assigned location. 85801e04c3fSmrg */ 85901e04c3fSmrg ir_variable *output = NULL; 86001e04c3fSmrg if (input->data.explicit_location 86101e04c3fSmrg && input->data.location >= VARYING_SLOT_VAR0) { 86201e04c3fSmrg 86301e04c3fSmrg const glsl_type *type = get_varying_type(input, consumer->Stage); 86401e04c3fSmrg unsigned num_elements = type->count_attribute_slots(false); 86501e04c3fSmrg unsigned idx = 86601e04c3fSmrg compute_variable_location_slot(input, consumer->Stage); 86701e04c3fSmrg unsigned slot_limit = idx + num_elements; 86801e04c3fSmrg 8697e102996Smaya if (!validate_explicit_variable_location(ctx, 8707e102996Smaya input_explicit_locations, 8717e102996Smaya input, prog, consumer)) { 8727e102996Smaya return; 8737e102996Smaya } 8747e102996Smaya 87501e04c3fSmrg while (idx < slot_limit) { 87601e04c3fSmrg if (idx >= MAX_VARYING) { 87701e04c3fSmrg linker_error(prog, 87801e04c3fSmrg "Invalid location %u in %s shader\n", idx, 87901e04c3fSmrg _mesa_shader_stage_to_string(consumer->Stage)); 88001e04c3fSmrg return; 88101e04c3fSmrg } 88201e04c3fSmrg 8837e102996Smaya output = output_explicit_locations[idx][input->data.location_frac].var; 88401e04c3fSmrg 885993e1d59Smrg if (output == NULL) { 886993e1d59Smrg /* A linker failure should only happen when there is no 887993e1d59Smrg * output declaration and there is Static Use of the 888993e1d59Smrg * declared input. 889993e1d59Smrg */ 8907ec681f3Smrg if (input->data.used && static_input_output_matching(prog)) { 891993e1d59Smrg linker_error(prog, 892993e1d59Smrg "%s shader input `%s' with explicit location " 893993e1d59Smrg "has no matching output\n", 894993e1d59Smrg _mesa_shader_stage_to_string(consumer->Stage), 895993e1d59Smrg input->name); 896993e1d59Smrg break; 897993e1d59Smrg } 898993e1d59Smrg } else if (input->data.location != output->data.location) { 89901e04c3fSmrg linker_error(prog, 90001e04c3fSmrg "%s shader input `%s' with explicit location " 90101e04c3fSmrg "has no matching output\n", 90201e04c3fSmrg _mesa_shader_stage_to_string(consumer->Stage), 90301e04c3fSmrg input->name); 90401e04c3fSmrg break; 90501e04c3fSmrg } 90601e04c3fSmrg idx++; 90701e04c3fSmrg } 90801e04c3fSmrg } else { 90901e04c3fSmrg output = parameters.get_variable(input->name); 91001e04c3fSmrg } 91101e04c3fSmrg 91201e04c3fSmrg if (output != NULL) { 91301e04c3fSmrg /* Interface blocks have their own validation elsewhere so don't 91401e04c3fSmrg * try validating them here. 91501e04c3fSmrg */ 91601e04c3fSmrg if (!(input->get_interface_type() && 91701e04c3fSmrg output->get_interface_type())) 91801e04c3fSmrg cross_validate_types_and_qualifiers(ctx, prog, input, output, 91901e04c3fSmrg consumer->Stage, 92001e04c3fSmrg producer->Stage); 92101e04c3fSmrg } else { 92201e04c3fSmrg /* Check for input vars with unmatched output vars in prev stage 92301e04c3fSmrg * taking into account that interface blocks could have a matching 92401e04c3fSmrg * output but with different name, so we ignore them. 92501e04c3fSmrg */ 92601e04c3fSmrg assert(!input->data.assigned); 92701e04c3fSmrg if (input->data.used && !input->get_interface_type() && 9287ec681f3Smrg !input->data.explicit_location && 9297ec681f3Smrg static_input_output_matching(prog)) 93001e04c3fSmrg linker_error(prog, 93101e04c3fSmrg "%s shader input `%s' " 93201e04c3fSmrg "has no matching output in the previous stage\n", 93301e04c3fSmrg _mesa_shader_stage_to_string(consumer->Stage), 93401e04c3fSmrg input->name); 93501e04c3fSmrg } 93601e04c3fSmrg } 93701e04c3fSmrg } 93801e04c3fSmrg} 93901e04c3fSmrg 94001e04c3fSmrg/** 94101e04c3fSmrg * Demote shader inputs and outputs that are not used in other stages, and 94201e04c3fSmrg * remove them via dead code elimination. 94301e04c3fSmrg */ 94401e04c3fSmrgstatic void 94501e04c3fSmrgremove_unused_shader_inputs_and_outputs(bool is_separate_shader_object, 94601e04c3fSmrg gl_linked_shader *sh, 94701e04c3fSmrg enum ir_variable_mode mode) 94801e04c3fSmrg{ 94901e04c3fSmrg if (is_separate_shader_object) 95001e04c3fSmrg return; 95101e04c3fSmrg 95201e04c3fSmrg foreach_in_list(ir_instruction, node, sh->ir) { 95301e04c3fSmrg ir_variable *const var = node->as_variable(); 95401e04c3fSmrg 95501e04c3fSmrg if (var == NULL || var->data.mode != int(mode)) 95601e04c3fSmrg continue; 95701e04c3fSmrg 95801e04c3fSmrg /* A shader 'in' or 'out' variable is only really an input or output if 95901e04c3fSmrg * its value is used by other shader stages. This will cause the 96001e04c3fSmrg * variable to have a location assigned. 96101e04c3fSmrg */ 96201e04c3fSmrg if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) { 96301e04c3fSmrg assert(var->data.mode != ir_var_temporary); 96401e04c3fSmrg 96501e04c3fSmrg /* Assign zeros to demoted inputs to allow more optimizations. */ 96601e04c3fSmrg if (var->data.mode == ir_var_shader_in && !var->constant_value) 96701e04c3fSmrg var->constant_value = ir_constant::zero(var, var->type); 96801e04c3fSmrg 96901e04c3fSmrg var->data.mode = ir_var_auto; 97001e04c3fSmrg } 97101e04c3fSmrg } 97201e04c3fSmrg 97301e04c3fSmrg /* Eliminate code that is now dead due to unused inputs/outputs being 97401e04c3fSmrg * demoted. 97501e04c3fSmrg */ 97601e04c3fSmrg while (do_dead_code(sh->ir, false)) 97701e04c3fSmrg ; 97801e04c3fSmrg 97901e04c3fSmrg} 98001e04c3fSmrg 98101e04c3fSmrg/** 98201e04c3fSmrg * Initialize this object based on a string that was passed to 98301e04c3fSmrg * glTransformFeedbackVaryings. 98401e04c3fSmrg * 98501e04c3fSmrg * If the input is mal-formed, this call still succeeds, but it sets 98601e04c3fSmrg * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var() 98701e04c3fSmrg * will fail to find any matching variable. 98801e04c3fSmrg */ 98901e04c3fSmrgvoid 99001e04c3fSmrgtfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx, 99101e04c3fSmrg const char *input) 99201e04c3fSmrg{ 99301e04c3fSmrg /* We don't have to be pedantic about what is a valid GLSL variable name, 99401e04c3fSmrg * because any variable with an invalid name can't exist in the IR anyway. 99501e04c3fSmrg */ 99601e04c3fSmrg 99701e04c3fSmrg this->location = -1; 99801e04c3fSmrg this->orig_name = input; 99901e04c3fSmrg this->lowered_builtin_array_variable = none; 100001e04c3fSmrg this->skip_components = 0; 100101e04c3fSmrg this->next_buffer_separator = false; 100201e04c3fSmrg this->matched_candidate = NULL; 100301e04c3fSmrg this->stream_id = 0; 100401e04c3fSmrg this->buffer = 0; 100501e04c3fSmrg this->offset = 0; 100601e04c3fSmrg 100701e04c3fSmrg if (ctx->Extensions.ARB_transform_feedback3) { 100801e04c3fSmrg /* Parse gl_NextBuffer. */ 100901e04c3fSmrg if (strcmp(input, "gl_NextBuffer") == 0) { 101001e04c3fSmrg this->next_buffer_separator = true; 101101e04c3fSmrg return; 101201e04c3fSmrg } 101301e04c3fSmrg 101401e04c3fSmrg /* Parse gl_SkipComponents. */ 101501e04c3fSmrg if (strcmp(input, "gl_SkipComponents1") == 0) 101601e04c3fSmrg this->skip_components = 1; 101701e04c3fSmrg else if (strcmp(input, "gl_SkipComponents2") == 0) 101801e04c3fSmrg this->skip_components = 2; 101901e04c3fSmrg else if (strcmp(input, "gl_SkipComponents3") == 0) 102001e04c3fSmrg this->skip_components = 3; 102101e04c3fSmrg else if (strcmp(input, "gl_SkipComponents4") == 0) 102201e04c3fSmrg this->skip_components = 4; 102301e04c3fSmrg 102401e04c3fSmrg if (this->skip_components) 102501e04c3fSmrg return; 102601e04c3fSmrg } 102701e04c3fSmrg 102801e04c3fSmrg /* Parse a declaration. */ 102901e04c3fSmrg const char *base_name_end; 10307ec681f3Smrg long subscript = parse_program_resource_name(input, strlen(input), 10317ec681f3Smrg &base_name_end); 103201e04c3fSmrg this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input); 103301e04c3fSmrg if (this->var_name == NULL) { 103401e04c3fSmrg _mesa_error_no_memory(__func__); 103501e04c3fSmrg return; 103601e04c3fSmrg } 103701e04c3fSmrg 103801e04c3fSmrg if (subscript >= 0) { 103901e04c3fSmrg this->array_subscript = subscript; 104001e04c3fSmrg this->is_subscripted = true; 104101e04c3fSmrg } else { 104201e04c3fSmrg this->is_subscripted = false; 104301e04c3fSmrg } 104401e04c3fSmrg 104501e04c3fSmrg /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this 104601e04c3fSmrg * class must behave specially to account for the fact that gl_ClipDistance 104701e04c3fSmrg * is converted from a float[8] to a vec4[2]. 104801e04c3fSmrg */ 104901e04c3fSmrg if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance && 105001e04c3fSmrg strcmp(this->var_name, "gl_ClipDistance") == 0) { 105101e04c3fSmrg this->lowered_builtin_array_variable = clip_distance; 105201e04c3fSmrg } 105301e04c3fSmrg if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance && 105401e04c3fSmrg strcmp(this->var_name, "gl_CullDistance") == 0) { 105501e04c3fSmrg this->lowered_builtin_array_variable = cull_distance; 105601e04c3fSmrg } 105701e04c3fSmrg 105801e04c3fSmrg if (ctx->Const.LowerTessLevel && 105901e04c3fSmrg (strcmp(this->var_name, "gl_TessLevelOuter") == 0)) 106001e04c3fSmrg this->lowered_builtin_array_variable = tess_level_outer; 106101e04c3fSmrg if (ctx->Const.LowerTessLevel && 106201e04c3fSmrg (strcmp(this->var_name, "gl_TessLevelInner") == 0)) 106301e04c3fSmrg this->lowered_builtin_array_variable = tess_level_inner; 106401e04c3fSmrg} 106501e04c3fSmrg 106601e04c3fSmrg 106701e04c3fSmrg/** 106801e04c3fSmrg * Determine whether two tfeedback_decl objects refer to the same variable and 106901e04c3fSmrg * array index (if applicable). 107001e04c3fSmrg */ 107101e04c3fSmrgbool 107201e04c3fSmrgtfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y) 107301e04c3fSmrg{ 107401e04c3fSmrg assert(x.is_varying() && y.is_varying()); 107501e04c3fSmrg 107601e04c3fSmrg if (strcmp(x.var_name, y.var_name) != 0) 107701e04c3fSmrg return false; 107801e04c3fSmrg if (x.is_subscripted != y.is_subscripted) 107901e04c3fSmrg return false; 108001e04c3fSmrg if (x.is_subscripted && x.array_subscript != y.array_subscript) 108101e04c3fSmrg return false; 108201e04c3fSmrg return true; 108301e04c3fSmrg} 108401e04c3fSmrg 108501e04c3fSmrg 108601e04c3fSmrg/** 108701e04c3fSmrg * Assign a location and stream ID for this tfeedback_decl object based on the 108801e04c3fSmrg * transform feedback candidate found by find_candidate. 108901e04c3fSmrg * 109001e04c3fSmrg * If an error occurs, the error is reported through linker_error() and false 109101e04c3fSmrg * is returned. 109201e04c3fSmrg */ 109301e04c3fSmrgbool 109401e04c3fSmrgtfeedback_decl::assign_location(struct gl_context *ctx, 109501e04c3fSmrg struct gl_shader_program *prog) 109601e04c3fSmrg{ 109701e04c3fSmrg assert(this->is_varying()); 109801e04c3fSmrg 109901e04c3fSmrg unsigned fine_location 110001e04c3fSmrg = this->matched_candidate->toplevel_var->data.location * 4 110101e04c3fSmrg + this->matched_candidate->toplevel_var->data.location_frac 11027ec681f3Smrg + this->matched_candidate->struct_offset_floats; 110301e04c3fSmrg const unsigned dmul = 110401e04c3fSmrg this->matched_candidate->type->without_array()->is_64bit() ? 2 : 1; 110501e04c3fSmrg 110601e04c3fSmrg if (this->matched_candidate->type->is_array()) { 110701e04c3fSmrg /* Array variable */ 110801e04c3fSmrg const unsigned matrix_cols = 110901e04c3fSmrg this->matched_candidate->type->fields.array->matrix_columns; 111001e04c3fSmrg const unsigned vector_elements = 111101e04c3fSmrg this->matched_candidate->type->fields.array->vector_elements; 111201e04c3fSmrg unsigned actual_array_size; 111301e04c3fSmrg switch (this->lowered_builtin_array_variable) { 111401e04c3fSmrg case clip_distance: 111501e04c3fSmrg actual_array_size = prog->last_vert_prog ? 111601e04c3fSmrg prog->last_vert_prog->info.clip_distance_array_size : 0; 111701e04c3fSmrg break; 111801e04c3fSmrg case cull_distance: 111901e04c3fSmrg actual_array_size = prog->last_vert_prog ? 112001e04c3fSmrg prog->last_vert_prog->info.cull_distance_array_size : 0; 112101e04c3fSmrg break; 112201e04c3fSmrg case tess_level_outer: 112301e04c3fSmrg actual_array_size = 4; 112401e04c3fSmrg break; 112501e04c3fSmrg case tess_level_inner: 112601e04c3fSmrg actual_array_size = 2; 112701e04c3fSmrg break; 112801e04c3fSmrg case none: 112901e04c3fSmrg default: 113001e04c3fSmrg actual_array_size = this->matched_candidate->type->array_size(); 113101e04c3fSmrg break; 113201e04c3fSmrg } 113301e04c3fSmrg 113401e04c3fSmrg if (this->is_subscripted) { 113501e04c3fSmrg /* Check array bounds. */ 113601e04c3fSmrg if (this->array_subscript >= actual_array_size) { 113701e04c3fSmrg linker_error(prog, "Transform feedback varying %s has index " 113801e04c3fSmrg "%i, but the array size is %u.", 113901e04c3fSmrg this->orig_name, this->array_subscript, 114001e04c3fSmrg actual_array_size); 114101e04c3fSmrg return false; 114201e04c3fSmrg } 114301e04c3fSmrg unsigned array_elem_size = this->lowered_builtin_array_variable ? 114401e04c3fSmrg 1 : vector_elements * matrix_cols * dmul; 114501e04c3fSmrg fine_location += array_elem_size * this->array_subscript; 114601e04c3fSmrg this->size = 1; 114701e04c3fSmrg } else { 114801e04c3fSmrg this->size = actual_array_size; 114901e04c3fSmrg } 115001e04c3fSmrg this->vector_elements = vector_elements; 115101e04c3fSmrg this->matrix_columns = matrix_cols; 115201e04c3fSmrg if (this->lowered_builtin_array_variable) 115301e04c3fSmrg this->type = GL_FLOAT; 115401e04c3fSmrg else 115501e04c3fSmrg this->type = this->matched_candidate->type->fields.array->gl_type; 115601e04c3fSmrg } else { 115701e04c3fSmrg /* Regular variable (scalar, vector, or matrix) */ 115801e04c3fSmrg if (this->is_subscripted) { 115901e04c3fSmrg linker_error(prog, "Transform feedback varying %s requested, " 116001e04c3fSmrg "but %s is not an array.", 116101e04c3fSmrg this->orig_name, this->var_name); 116201e04c3fSmrg return false; 116301e04c3fSmrg } 116401e04c3fSmrg this->size = 1; 116501e04c3fSmrg this->vector_elements = this->matched_candidate->type->vector_elements; 116601e04c3fSmrg this->matrix_columns = this->matched_candidate->type->matrix_columns; 116701e04c3fSmrg this->type = this->matched_candidate->type->gl_type; 116801e04c3fSmrg } 116901e04c3fSmrg this->location = fine_location / 4; 117001e04c3fSmrg this->location_frac = fine_location % 4; 117101e04c3fSmrg 117201e04c3fSmrg /* From GL_EXT_transform_feedback: 117301e04c3fSmrg * A program will fail to link if: 117401e04c3fSmrg * 117501e04c3fSmrg * * the total number of components to capture in any varying 117601e04c3fSmrg * variable in <varyings> is greater than the constant 117701e04c3fSmrg * MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the 117801e04c3fSmrg * buffer mode is SEPARATE_ATTRIBS_EXT; 117901e04c3fSmrg */ 118001e04c3fSmrg if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS && 118101e04c3fSmrg this->num_components() > 118201e04c3fSmrg ctx->Const.MaxTransformFeedbackSeparateComponents) { 118301e04c3fSmrg linker_error(prog, "Transform feedback varying %s exceeds " 118401e04c3fSmrg "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.", 118501e04c3fSmrg this->orig_name); 118601e04c3fSmrg return false; 118701e04c3fSmrg } 118801e04c3fSmrg 118901e04c3fSmrg /* Only transform feedback varyings can be assigned to non-zero streams, 119001e04c3fSmrg * so assign the stream id here. 119101e04c3fSmrg */ 119201e04c3fSmrg this->stream_id = this->matched_candidate->toplevel_var->data.stream; 119301e04c3fSmrg 119401e04c3fSmrg unsigned array_offset = this->array_subscript * 4 * dmul; 11957ec681f3Smrg unsigned struct_offset = this->matched_candidate->xfb_offset_floats * 4; 119601e04c3fSmrg this->buffer = this->matched_candidate->toplevel_var->data.xfb_buffer; 119701e04c3fSmrg this->offset = this->matched_candidate->toplevel_var->data.offset + 119801e04c3fSmrg array_offset + struct_offset; 119901e04c3fSmrg 120001e04c3fSmrg return true; 120101e04c3fSmrg} 120201e04c3fSmrg 120301e04c3fSmrg 120401e04c3fSmrgunsigned 120501e04c3fSmrgtfeedback_decl::get_num_outputs() const 120601e04c3fSmrg{ 120701e04c3fSmrg if (!this->is_varying()) { 120801e04c3fSmrg return 0; 120901e04c3fSmrg } 12107ec681f3Smrg 12117ec681f3Smrg if (varying_has_user_specified_location(this->matched_candidate->toplevel_var)) { 12127ec681f3Smrg unsigned dmul = this->is_64bit() ? 2 : 1; 12137ec681f3Smrg unsigned rows_per_element = DIV_ROUND_UP(this->vector_elements * dmul, 4); 12147ec681f3Smrg return this->size * this->matrix_columns * rows_per_element; 12157ec681f3Smrg } else { 12167ec681f3Smrg return (this->num_components() + this->location_frac + 3) / 4; 12177ec681f3Smrg } 121801e04c3fSmrg} 121901e04c3fSmrg 122001e04c3fSmrg 122101e04c3fSmrg/** 122201e04c3fSmrg * Update gl_transform_feedback_info to reflect this tfeedback_decl. 122301e04c3fSmrg * 122401e04c3fSmrg * If an error occurs, the error is reported through linker_error() and false 122501e04c3fSmrg * is returned. 122601e04c3fSmrg */ 122701e04c3fSmrgbool 122801e04c3fSmrgtfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, 122901e04c3fSmrg struct gl_transform_feedback_info *info, 123001e04c3fSmrg unsigned buffer, unsigned buffer_index, 12317e102996Smaya const unsigned max_outputs, 12327e102996Smaya BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS], 12337ec681f3Smrg bool *explicit_stride, unsigned *max_member_alignment, 12347ec681f3Smrg bool has_xfb_qualifiers, const void* mem_ctx) const 123501e04c3fSmrg{ 123601e04c3fSmrg unsigned xfb_offset = 0; 123701e04c3fSmrg unsigned size = this->size; 123801e04c3fSmrg /* Handle gl_SkipComponents. */ 123901e04c3fSmrg if (this->skip_components) { 124001e04c3fSmrg info->Buffers[buffer].Stride += this->skip_components; 124101e04c3fSmrg size = this->skip_components; 124201e04c3fSmrg goto store_varying; 124301e04c3fSmrg } 124401e04c3fSmrg 124501e04c3fSmrg if (this->next_buffer_separator) { 124601e04c3fSmrg size = 0; 124701e04c3fSmrg goto store_varying; 124801e04c3fSmrg } 124901e04c3fSmrg 125001e04c3fSmrg if (has_xfb_qualifiers) { 125101e04c3fSmrg xfb_offset = this->offset / 4; 125201e04c3fSmrg } else { 125301e04c3fSmrg xfb_offset = info->Buffers[buffer].Stride; 125401e04c3fSmrg } 125501e04c3fSmrg info->Varyings[info->NumVarying].Offset = xfb_offset * 4; 125601e04c3fSmrg 125701e04c3fSmrg { 125801e04c3fSmrg unsigned location = this->location; 125901e04c3fSmrg unsigned location_frac = this->location_frac; 126001e04c3fSmrg unsigned num_components = this->num_components(); 12617e102996Smaya 12627e102996Smaya /* From GL_EXT_transform_feedback: 12637e102996Smaya * 12647e102996Smaya * " A program will fail to link if: 12657e102996Smaya * 12667e102996Smaya * * the total number of components to capture is greater than the 12677e102996Smaya * constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT 12687e102996Smaya * and the buffer mode is INTERLEAVED_ATTRIBS_EXT." 12697e102996Smaya * 12707e102996Smaya * From GL_ARB_enhanced_layouts: 12717e102996Smaya * 12727e102996Smaya * " The resulting stride (implicit or explicit) must be less than or 12737e102996Smaya * equal to the implementation-dependent constant 12747e102996Smaya * gl_MaxTransformFeedbackInterleavedComponents." 12757e102996Smaya */ 12767e102996Smaya if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS || 12777e102996Smaya has_xfb_qualifiers) && 12787e102996Smaya xfb_offset + num_components > 12797e102996Smaya ctx->Const.MaxTransformFeedbackInterleavedComponents) { 12807e102996Smaya linker_error(prog, 12817e102996Smaya "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS " 12827e102996Smaya "limit has been exceeded."); 12837e102996Smaya return false; 12847e102996Smaya } 12857e102996Smaya 12867e102996Smaya /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers, 12877e102996Smaya * Page 76, (Transform Feedback Layout Qualifiers): 12887e102996Smaya * 12897e102996Smaya * " No aliasing in output buffers is allowed: It is a compile-time or 12907e102996Smaya * link-time error to specify variables with overlapping transform 12917e102996Smaya * feedback offsets." 12927e102996Smaya */ 12937e102996Smaya const unsigned max_components = 12947e102996Smaya ctx->Const.MaxTransformFeedbackInterleavedComponents; 12957e102996Smaya const unsigned first_component = xfb_offset; 12967e102996Smaya const unsigned last_component = xfb_offset + num_components - 1; 12977e102996Smaya const unsigned start_word = BITSET_BITWORD(first_component); 12987e102996Smaya const unsigned end_word = BITSET_BITWORD(last_component); 12997e102996Smaya BITSET_WORD *used; 13007e102996Smaya assert(last_component < max_components); 13017e102996Smaya 13027e102996Smaya if (!used_components[buffer]) { 13037e102996Smaya used_components[buffer] = 13047e102996Smaya rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components)); 13057e102996Smaya } 13067e102996Smaya used = used_components[buffer]; 13077e102996Smaya 13087e102996Smaya for (unsigned word = start_word; word <= end_word; word++) { 13097e102996Smaya unsigned start_range = 0; 13107e102996Smaya unsigned end_range = BITSET_WORDBITS - 1; 13117e102996Smaya 13127e102996Smaya if (word == start_word) 13137e102996Smaya start_range = first_component % BITSET_WORDBITS; 13147e102996Smaya 13157e102996Smaya if (word == end_word) 13167e102996Smaya end_range = last_component % BITSET_WORDBITS; 13177e102996Smaya 13187e102996Smaya if (used[word] & BITSET_RANGE(start_range, end_range)) { 13197e102996Smaya linker_error(prog, 13207e102996Smaya "variable '%s', xfb_offset (%d) is causing aliasing.", 13217e102996Smaya this->orig_name, xfb_offset * 4); 13227e102996Smaya return false; 13237e102996Smaya } 13247e102996Smaya used[word] |= BITSET_RANGE(start_range, end_range); 13257e102996Smaya } 13267e102996Smaya 13277ec681f3Smrg const unsigned type_num_components = 13287ec681f3Smrg this->vector_elements * (this->is_64bit() ? 2 : 1); 13297ec681f3Smrg unsigned current_type_components_left = type_num_components; 13307ec681f3Smrg 133101e04c3fSmrg while (num_components > 0) { 13327ec681f3Smrg unsigned output_size = 0; 13337ec681f3Smrg 13347ec681f3Smrg /* From GL_ARB_enhanced_layouts: 13357ec681f3Smrg * 13367ec681f3Smrg * "When an attribute variable declared using an array type is bound to 13377ec681f3Smrg * generic attribute index <i>, the active array elements are assigned to 13387ec681f3Smrg * consecutive generic attributes beginning with generic attribute <i>. The 13397ec681f3Smrg * number of attributes and components assigned to each element are 13407ec681f3Smrg * determined according to the data type of array elements and "component" 13417ec681f3Smrg * layout qualifier (if any) specified in the declaration of the array." 13427ec681f3Smrg * 13437ec681f3Smrg * "When an attribute variable declared using a matrix type is bound to a 13447ec681f3Smrg * generic attribute index <i>, its values are taken from consecutive generic 13457ec681f3Smrg * attributes beginning with generic attribute <i>. Such matrices are 13467ec681f3Smrg * treated as an array of column vectors with values taken from the generic 13477ec681f3Smrg * attributes. 13487ec681f3Smrg * This means there may be gaps in the varyings we are taking values from." 13497ec681f3Smrg * 13507ec681f3Smrg * Examples: 13517ec681f3Smrg * 13527ec681f3Smrg * | layout(location=0) dvec3[2] a; | layout(location=4) vec2[4] b; | 13537ec681f3Smrg * | | | 13547ec681f3Smrg * | 32b 32b 32b 32b | 32b 32b 32b 32b | 13557ec681f3Smrg * | 0 X X Y Y | 4 X Y 0 0 | 13567ec681f3Smrg * | 1 Z Z 0 0 | 5 X Y 0 0 | 13577ec681f3Smrg * | 2 X X Y Y | 6 X Y 0 0 | 13587ec681f3Smrg * | 3 Z Z 0 0 | 7 X Y 0 0 | 13597ec681f3Smrg * 13607ec681f3Smrg */ 13617ec681f3Smrg if (varying_has_user_specified_location(this->matched_candidate->toplevel_var)) { 13627ec681f3Smrg output_size = MIN3(num_components, current_type_components_left, 4); 13637ec681f3Smrg current_type_components_left -= output_size; 13647ec681f3Smrg if (current_type_components_left == 0) { 13657ec681f3Smrg current_type_components_left = type_num_components; 13667ec681f3Smrg } 13677ec681f3Smrg } else { 13687ec681f3Smrg output_size = MIN2(num_components, 4 - location_frac); 13697ec681f3Smrg } 13707ec681f3Smrg 137101e04c3fSmrg assert((info->NumOutputs == 0 && max_outputs == 0) || 137201e04c3fSmrg info->NumOutputs < max_outputs); 137301e04c3fSmrg 137401e04c3fSmrg /* From the ARB_enhanced_layouts spec: 137501e04c3fSmrg * 137601e04c3fSmrg * "If such a block member or variable is not written during a shader 137701e04c3fSmrg * invocation, the buffer contents at the assigned offset will be 137801e04c3fSmrg * undefined. Even if there are no static writes to a variable or 137901e04c3fSmrg * member that is assigned a transform feedback offset, the space is 138001e04c3fSmrg * still allocated in the buffer and still affects the stride." 138101e04c3fSmrg */ 138201e04c3fSmrg if (this->is_varying_written()) { 138301e04c3fSmrg info->Outputs[info->NumOutputs].ComponentOffset = location_frac; 138401e04c3fSmrg info->Outputs[info->NumOutputs].OutputRegister = location; 138501e04c3fSmrg info->Outputs[info->NumOutputs].NumComponents = output_size; 138601e04c3fSmrg info->Outputs[info->NumOutputs].StreamId = stream_id; 138701e04c3fSmrg info->Outputs[info->NumOutputs].OutputBuffer = buffer; 138801e04c3fSmrg info->Outputs[info->NumOutputs].DstOffset = xfb_offset; 138901e04c3fSmrg ++info->NumOutputs; 139001e04c3fSmrg } 139101e04c3fSmrg info->Buffers[buffer].Stream = this->stream_id; 139201e04c3fSmrg xfb_offset += output_size; 139301e04c3fSmrg 139401e04c3fSmrg num_components -= output_size; 139501e04c3fSmrg location++; 139601e04c3fSmrg location_frac = 0; 139701e04c3fSmrg } 139801e04c3fSmrg } 139901e04c3fSmrg 140001e04c3fSmrg if (explicit_stride && explicit_stride[buffer]) { 140101e04c3fSmrg if (this->is_64bit() && info->Buffers[buffer].Stride % 2) { 140201e04c3fSmrg linker_error(prog, "invalid qualifier xfb_stride=%d must be a " 140301e04c3fSmrg "multiple of 8 as its applied to a type that is or " 140401e04c3fSmrg "contains a double.", 140501e04c3fSmrg info->Buffers[buffer].Stride * 4); 140601e04c3fSmrg return false; 140701e04c3fSmrg } 140801e04c3fSmrg 1409993e1d59Smrg if (xfb_offset > info->Buffers[buffer].Stride) { 141001e04c3fSmrg linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for " 141101e04c3fSmrg "buffer (%d)", xfb_offset * 4, 141201e04c3fSmrg info->Buffers[buffer].Stride * 4, buffer); 141301e04c3fSmrg return false; 141401e04c3fSmrg } 141501e04c3fSmrg } else { 14167ec681f3Smrg if (max_member_alignment && has_xfb_qualifiers) { 14177ec681f3Smrg max_member_alignment[buffer] = MAX2(max_member_alignment[buffer], 14187ec681f3Smrg this->is_64bit() ? 2 : 1); 14197ec681f3Smrg info->Buffers[buffer].Stride = ALIGN(xfb_offset, 14207ec681f3Smrg max_member_alignment[buffer]); 14217ec681f3Smrg } else { 14227ec681f3Smrg info->Buffers[buffer].Stride = xfb_offset; 14237ec681f3Smrg } 142401e04c3fSmrg } 142501e04c3fSmrg 142601e04c3fSmrg store_varying: 142701e04c3fSmrg info->Varyings[info->NumVarying].Name = ralloc_strdup(prog, 142801e04c3fSmrg this->orig_name); 142901e04c3fSmrg info->Varyings[info->NumVarying].Type = this->type; 143001e04c3fSmrg info->Varyings[info->NumVarying].Size = size; 143101e04c3fSmrg info->Varyings[info->NumVarying].BufferIndex = buffer_index; 143201e04c3fSmrg info->NumVarying++; 143301e04c3fSmrg info->Buffers[buffer].NumVaryings++; 143401e04c3fSmrg 143501e04c3fSmrg return true; 143601e04c3fSmrg} 143701e04c3fSmrg 143801e04c3fSmrg 143901e04c3fSmrgconst tfeedback_candidate * 144001e04c3fSmrgtfeedback_decl::find_candidate(gl_shader_program *prog, 144101e04c3fSmrg hash_table *tfeedback_candidates) 144201e04c3fSmrg{ 144301e04c3fSmrg const char *name = this->var_name; 144401e04c3fSmrg switch (this->lowered_builtin_array_variable) { 144501e04c3fSmrg case none: 144601e04c3fSmrg name = this->var_name; 144701e04c3fSmrg break; 144801e04c3fSmrg case clip_distance: 144901e04c3fSmrg name = "gl_ClipDistanceMESA"; 145001e04c3fSmrg break; 145101e04c3fSmrg case cull_distance: 145201e04c3fSmrg name = "gl_CullDistanceMESA"; 145301e04c3fSmrg break; 145401e04c3fSmrg case tess_level_outer: 145501e04c3fSmrg name = "gl_TessLevelOuterMESA"; 145601e04c3fSmrg break; 145701e04c3fSmrg case tess_level_inner: 145801e04c3fSmrg name = "gl_TessLevelInnerMESA"; 145901e04c3fSmrg break; 146001e04c3fSmrg } 146101e04c3fSmrg hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name); 146201e04c3fSmrg 146301e04c3fSmrg this->matched_candidate = entry ? 146401e04c3fSmrg (const tfeedback_candidate *) entry->data : NULL; 146501e04c3fSmrg 146601e04c3fSmrg if (!this->matched_candidate) { 146701e04c3fSmrg /* From GL_EXT_transform_feedback: 146801e04c3fSmrg * A program will fail to link if: 146901e04c3fSmrg * 147001e04c3fSmrg * * any variable name specified in the <varyings> array is not 147101e04c3fSmrg * declared as an output in the geometry shader (if present) or 147201e04c3fSmrg * the vertex shader (if no geometry shader is present); 147301e04c3fSmrg */ 147401e04c3fSmrg linker_error(prog, "Transform feedback varying %s undeclared.", 147501e04c3fSmrg this->orig_name); 147601e04c3fSmrg } 147701e04c3fSmrg 147801e04c3fSmrg return this->matched_candidate; 147901e04c3fSmrg} 148001e04c3fSmrg 14817ec681f3Smrg/** 14827ec681f3Smrg * Force a candidate over the previously matched one. It happens when a new 14837ec681f3Smrg * varying needs to be created to match the xfb declaration, for example, 14847ec681f3Smrg * to fullfil an alignment criteria. 14857ec681f3Smrg */ 14867ec681f3Smrgvoid 14877ec681f3Smrgtfeedback_decl::set_lowered_candidate(const tfeedback_candidate *candidate) 14887ec681f3Smrg{ 14897ec681f3Smrg this->matched_candidate = candidate; 14907ec681f3Smrg 14917ec681f3Smrg /* The subscript part is no longer relevant */ 14927ec681f3Smrg this->is_subscripted = false; 14937ec681f3Smrg this->array_subscript = 0; 14947ec681f3Smrg} 14957ec681f3Smrg 149601e04c3fSmrg 149701e04c3fSmrg/** 149801e04c3fSmrg * Parse all the transform feedback declarations that were passed to 149901e04c3fSmrg * glTransformFeedbackVaryings() and store them in tfeedback_decl objects. 150001e04c3fSmrg * 150101e04c3fSmrg * If an error occurs, the error is reported through linker_error() and false 150201e04c3fSmrg * is returned. 150301e04c3fSmrg */ 150401e04c3fSmrgstatic bool 150501e04c3fSmrgparse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog, 150601e04c3fSmrg const void *mem_ctx, unsigned num_names, 150701e04c3fSmrg char **varying_names, tfeedback_decl *decls) 150801e04c3fSmrg{ 150901e04c3fSmrg for (unsigned i = 0; i < num_names; ++i) { 151001e04c3fSmrg decls[i].init(ctx, mem_ctx, varying_names[i]); 151101e04c3fSmrg 151201e04c3fSmrg if (!decls[i].is_varying()) 151301e04c3fSmrg continue; 151401e04c3fSmrg 151501e04c3fSmrg /* From GL_EXT_transform_feedback: 151601e04c3fSmrg * A program will fail to link if: 151701e04c3fSmrg * 151801e04c3fSmrg * * any two entries in the <varyings> array specify the same varying 151901e04c3fSmrg * variable; 152001e04c3fSmrg * 152101e04c3fSmrg * We interpret this to mean "any two entries in the <varyings> array 152201e04c3fSmrg * specify the same varying variable and array index", since transform 152301e04c3fSmrg * feedback of arrays would be useless otherwise. 152401e04c3fSmrg */ 152501e04c3fSmrg for (unsigned j = 0; j < i; ++j) { 152601e04c3fSmrg if (decls[j].is_varying()) { 152701e04c3fSmrg if (tfeedback_decl::is_same(decls[i], decls[j])) { 152801e04c3fSmrg linker_error(prog, "Transform feedback varying %s specified " 152901e04c3fSmrg "more than once.", varying_names[i]); 153001e04c3fSmrg return false; 153101e04c3fSmrg } 153201e04c3fSmrg } 153301e04c3fSmrg } 153401e04c3fSmrg } 153501e04c3fSmrg return true; 153601e04c3fSmrg} 153701e04c3fSmrg 153801e04c3fSmrg 153901e04c3fSmrgstatic int 154001e04c3fSmrgcmp_xfb_offset(const void * x_generic, const void * y_generic) 154101e04c3fSmrg{ 154201e04c3fSmrg tfeedback_decl *x = (tfeedback_decl *) x_generic; 154301e04c3fSmrg tfeedback_decl *y = (tfeedback_decl *) y_generic; 154401e04c3fSmrg 154501e04c3fSmrg if (x->get_buffer() != y->get_buffer()) 154601e04c3fSmrg return x->get_buffer() - y->get_buffer(); 154701e04c3fSmrg return x->get_offset() - y->get_offset(); 154801e04c3fSmrg} 154901e04c3fSmrg 155001e04c3fSmrg/** 155101e04c3fSmrg * Store transform feedback location assignments into 155201e04c3fSmrg * prog->sh.LinkedTransformFeedback based on the data stored in 155301e04c3fSmrg * tfeedback_decls. 155401e04c3fSmrg * 155501e04c3fSmrg * If an error occurs, the error is reported through linker_error() and false 155601e04c3fSmrg * is returned. 155701e04c3fSmrg */ 155801e04c3fSmrgstatic bool 155901e04c3fSmrgstore_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, 156001e04c3fSmrg unsigned num_tfeedback_decls, 15617e102996Smaya tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers, 15627e102996Smaya const void *mem_ctx) 156301e04c3fSmrg{ 156401e04c3fSmrg if (!prog->last_vert_prog) 156501e04c3fSmrg return true; 156601e04c3fSmrg 156701e04c3fSmrg /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for 156801e04c3fSmrg * tracking the number of buffers doesn't overflow. 156901e04c3fSmrg */ 157001e04c3fSmrg assert(ctx->Const.MaxTransformFeedbackBuffers < 32); 157101e04c3fSmrg 157201e04c3fSmrg bool separate_attribs_mode = 157301e04c3fSmrg prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS; 157401e04c3fSmrg 157501e04c3fSmrg struct gl_program *xfb_prog = prog->last_vert_prog; 157601e04c3fSmrg xfb_prog->sh.LinkedTransformFeedback = 157701e04c3fSmrg rzalloc(xfb_prog, struct gl_transform_feedback_info); 157801e04c3fSmrg 157901e04c3fSmrg /* The xfb_offset qualifier does not have to be used in increasing order 158001e04c3fSmrg * however some drivers expect to receive the list of transform feedback 158101e04c3fSmrg * declarations in order so sort it now for convenience. 158201e04c3fSmrg */ 158301e04c3fSmrg if (has_xfb_qualifiers) { 158401e04c3fSmrg qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls), 158501e04c3fSmrg cmp_xfb_offset); 158601e04c3fSmrg } 158701e04c3fSmrg 158801e04c3fSmrg xfb_prog->sh.LinkedTransformFeedback->Varyings = 158901e04c3fSmrg rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info, 159001e04c3fSmrg num_tfeedback_decls); 159101e04c3fSmrg 159201e04c3fSmrg unsigned num_outputs = 0; 159301e04c3fSmrg for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 159401e04c3fSmrg if (tfeedback_decls[i].is_varying_written()) 159501e04c3fSmrg num_outputs += tfeedback_decls[i].get_num_outputs(); 159601e04c3fSmrg } 159701e04c3fSmrg 159801e04c3fSmrg xfb_prog->sh.LinkedTransformFeedback->Outputs = 159901e04c3fSmrg rzalloc_array(xfb_prog, struct gl_transform_feedback_output, 160001e04c3fSmrg num_outputs); 160101e04c3fSmrg 160201e04c3fSmrg unsigned num_buffers = 0; 160301e04c3fSmrg unsigned buffers = 0; 16047e102996Smaya BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {}; 160501e04c3fSmrg 160601e04c3fSmrg if (!has_xfb_qualifiers && separate_attribs_mode) { 160701e04c3fSmrg /* GL_SEPARATE_ATTRIBS */ 160801e04c3fSmrg for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 160901e04c3fSmrg if (!tfeedback_decls[i].store(ctx, prog, 161001e04c3fSmrg xfb_prog->sh.LinkedTransformFeedback, 161101e04c3fSmrg num_buffers, num_buffers, num_outputs, 16127ec681f3Smrg used_components, NULL, NULL, 16137e102996Smaya has_xfb_qualifiers, mem_ctx)) 161401e04c3fSmrg return false; 161501e04c3fSmrg 161601e04c3fSmrg buffers |= 1 << num_buffers; 161701e04c3fSmrg num_buffers++; 161801e04c3fSmrg } 161901e04c3fSmrg } 162001e04c3fSmrg else { 162101e04c3fSmrg /* GL_INVERLEAVED_ATTRIBS */ 162201e04c3fSmrg int buffer_stream_id = -1; 162301e04c3fSmrg unsigned buffer = 162401e04c3fSmrg num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0; 162501e04c3fSmrg bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false }; 16267ec681f3Smrg unsigned max_member_alignment[MAX_FEEDBACK_BUFFERS] = { 1, 1, 1, 1 }; 162701e04c3fSmrg /* Apply any xfb_stride global qualifiers */ 162801e04c3fSmrg if (has_xfb_qualifiers) { 162901e04c3fSmrg for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { 163001e04c3fSmrg if (prog->TransformFeedback.BufferStride[j]) { 163101e04c3fSmrg explicit_stride[j] = true; 163201e04c3fSmrg xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride = 163301e04c3fSmrg prog->TransformFeedback.BufferStride[j] / 4; 163401e04c3fSmrg } 163501e04c3fSmrg } 163601e04c3fSmrg } 163701e04c3fSmrg 163801e04c3fSmrg for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 163901e04c3fSmrg if (has_xfb_qualifiers && 164001e04c3fSmrg buffer != tfeedback_decls[i].get_buffer()) { 164101e04c3fSmrg /* we have moved to the next buffer so reset stream id */ 164201e04c3fSmrg buffer_stream_id = -1; 164301e04c3fSmrg num_buffers++; 164401e04c3fSmrg } 164501e04c3fSmrg 164601e04c3fSmrg if (tfeedback_decls[i].is_next_buffer_separator()) { 164701e04c3fSmrg if (!tfeedback_decls[i].store(ctx, prog, 164801e04c3fSmrg xfb_prog->sh.LinkedTransformFeedback, 164901e04c3fSmrg buffer, num_buffers, num_outputs, 16507e102996Smaya used_components, explicit_stride, 16517ec681f3Smrg max_member_alignment, 16527ec681f3Smrg has_xfb_qualifiers, 16537ec681f3Smrg mem_ctx)) 165401e04c3fSmrg return false; 165501e04c3fSmrg num_buffers++; 165601e04c3fSmrg buffer_stream_id = -1; 165701e04c3fSmrg continue; 165801e04c3fSmrg } 165901e04c3fSmrg 166001e04c3fSmrg if (has_xfb_qualifiers) { 166101e04c3fSmrg buffer = tfeedback_decls[i].get_buffer(); 166201e04c3fSmrg } else { 166301e04c3fSmrg buffer = num_buffers; 166401e04c3fSmrg } 166501e04c3fSmrg 166601e04c3fSmrg if (tfeedback_decls[i].is_varying()) { 166701e04c3fSmrg if (buffer_stream_id == -1) { 166801e04c3fSmrg /* First varying writing to this buffer: remember its stream */ 166901e04c3fSmrg buffer_stream_id = (int) tfeedback_decls[i].get_stream_id(); 167001e04c3fSmrg 167101e04c3fSmrg /* Only mark a buffer as active when there is a varying 167201e04c3fSmrg * attached to it. This behaviour is based on a revised version 167301e04c3fSmrg * of section 13.2.2 of the GL 4.6 spec. 167401e04c3fSmrg */ 167501e04c3fSmrg buffers |= 1 << buffer; 167601e04c3fSmrg } else if (buffer_stream_id != 167701e04c3fSmrg (int) tfeedback_decls[i].get_stream_id()) { 167801e04c3fSmrg /* Varying writes to the same buffer from a different stream */ 167901e04c3fSmrg linker_error(prog, 168001e04c3fSmrg "Transform feedback can't capture varyings belonging " 168101e04c3fSmrg "to different vertex streams in a single buffer. " 168201e04c3fSmrg "Varying %s writes to buffer from stream %u, other " 168301e04c3fSmrg "varyings in the same buffer write from stream %u.", 168401e04c3fSmrg tfeedback_decls[i].name(), 168501e04c3fSmrg tfeedback_decls[i].get_stream_id(), 168601e04c3fSmrg buffer_stream_id); 168701e04c3fSmrg return false; 168801e04c3fSmrg } 168901e04c3fSmrg } 169001e04c3fSmrg 169101e04c3fSmrg if (!tfeedback_decls[i].store(ctx, prog, 169201e04c3fSmrg xfb_prog->sh.LinkedTransformFeedback, 169301e04c3fSmrg buffer, num_buffers, num_outputs, 16947e102996Smaya used_components, explicit_stride, 16957ec681f3Smrg max_member_alignment, 16967ec681f3Smrg has_xfb_qualifiers, 16977ec681f3Smrg mem_ctx)) 169801e04c3fSmrg return false; 169901e04c3fSmrg } 170001e04c3fSmrg } 170101e04c3fSmrg 170201e04c3fSmrg assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs); 170301e04c3fSmrg 170401e04c3fSmrg xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers; 170501e04c3fSmrg return true; 170601e04c3fSmrg} 170701e04c3fSmrg 170801e04c3fSmrgnamespace { 170901e04c3fSmrg 171001e04c3fSmrg/** 171101e04c3fSmrg * Data structure recording the relationship between outputs of one shader 171201e04c3fSmrg * stage (the "producer") and inputs of another (the "consumer"). 171301e04c3fSmrg */ 171401e04c3fSmrgclass varying_matches 171501e04c3fSmrg{ 171601e04c3fSmrgpublic: 17177ec681f3Smrg varying_matches(bool disable_varying_packing, 17187ec681f3Smrg bool disable_xfb_packing, 17197ec681f3Smrg bool xfb_enabled, 172001e04c3fSmrg bool enhanced_layouts_enabled, 172101e04c3fSmrg gl_shader_stage producer_stage, 172201e04c3fSmrg gl_shader_stage consumer_stage); 172301e04c3fSmrg ~varying_matches(); 172401e04c3fSmrg void record(ir_variable *producer_var, ir_variable *consumer_var); 172501e04c3fSmrg unsigned assign_locations(struct gl_shader_program *prog, 172601e04c3fSmrg uint8_t components[], 172701e04c3fSmrg uint64_t reserved_slots); 172801e04c3fSmrg void store_locations() const; 172901e04c3fSmrg 173001e04c3fSmrgprivate: 173101e04c3fSmrg bool is_varying_packing_safe(const glsl_type *type, 173201e04c3fSmrg const ir_variable *var) const; 173301e04c3fSmrg 173401e04c3fSmrg /** 173501e04c3fSmrg * If true, this driver disables varying packing, so all varyings need to 173601e04c3fSmrg * be aligned on slot boundaries, and take up a number of slots equal to 173701e04c3fSmrg * their number of matrix columns times their array size. 173801e04c3fSmrg * 173901e04c3fSmrg * Packing may also be disabled because our current packing method is not 174001e04c3fSmrg * safe in SSO or versions of OpenGL where interpolation qualifiers are not 174101e04c3fSmrg * guaranteed to match across stages. 174201e04c3fSmrg */ 174301e04c3fSmrg const bool disable_varying_packing; 174401e04c3fSmrg 17457ec681f3Smrg /** 17467ec681f3Smrg * If true, this driver disables packing for varyings used by transform 17477ec681f3Smrg * feedback. 17487ec681f3Smrg */ 17497ec681f3Smrg const bool disable_xfb_packing; 17507ec681f3Smrg 175101e04c3fSmrg /** 175201e04c3fSmrg * If true, this driver has transform feedback enabled. The transform 17537ec681f3Smrg * feedback code usually requires at least some packing be done even 17547ec681f3Smrg * when varying packing is disabled, fortunately where transform feedback 17557ec681f3Smrg * requires packing it's safe to override the disabled setting. See 175601e04c3fSmrg * is_varying_packing_safe(). 175701e04c3fSmrg */ 175801e04c3fSmrg const bool xfb_enabled; 175901e04c3fSmrg 176001e04c3fSmrg const bool enhanced_layouts_enabled; 176101e04c3fSmrg 176201e04c3fSmrg /** 176301e04c3fSmrg * Enum representing the order in which varyings are packed within a 176401e04c3fSmrg * packing class. 176501e04c3fSmrg * 176601e04c3fSmrg * Currently we pack vec4's first, then vec2's, then scalar values, then 176701e04c3fSmrg * vec3's. This order ensures that the only vectors that are at risk of 176801e04c3fSmrg * having to be "double parked" (split between two adjacent varying slots) 176901e04c3fSmrg * are the vec3's. 177001e04c3fSmrg */ 177101e04c3fSmrg enum packing_order_enum { 177201e04c3fSmrg PACKING_ORDER_VEC4, 177301e04c3fSmrg PACKING_ORDER_VEC2, 177401e04c3fSmrg PACKING_ORDER_SCALAR, 177501e04c3fSmrg PACKING_ORDER_VEC3, 177601e04c3fSmrg }; 177701e04c3fSmrg 177801e04c3fSmrg static unsigned compute_packing_class(const ir_variable *var); 177901e04c3fSmrg static packing_order_enum compute_packing_order(const ir_variable *var); 178001e04c3fSmrg static int match_comparator(const void *x_generic, const void *y_generic); 178101e04c3fSmrg static int xfb_comparator(const void *x_generic, const void *y_generic); 17827ec681f3Smrg static int not_xfb_comparator(const void *x_generic, const void *y_generic); 178301e04c3fSmrg 178401e04c3fSmrg /** 178501e04c3fSmrg * Structure recording the relationship between a single producer output 178601e04c3fSmrg * and a single consumer input. 178701e04c3fSmrg */ 178801e04c3fSmrg struct match { 178901e04c3fSmrg /** 179001e04c3fSmrg * Packing class for this varying, computed by compute_packing_class(). 179101e04c3fSmrg */ 179201e04c3fSmrg unsigned packing_class; 179301e04c3fSmrg 179401e04c3fSmrg /** 179501e04c3fSmrg * Packing order for this varying, computed by compute_packing_order(). 179601e04c3fSmrg */ 179701e04c3fSmrg packing_order_enum packing_order; 179801e04c3fSmrg 179901e04c3fSmrg /** 180001e04c3fSmrg * The output variable in the producer stage. 180101e04c3fSmrg */ 180201e04c3fSmrg ir_variable *producer_var; 180301e04c3fSmrg 180401e04c3fSmrg /** 180501e04c3fSmrg * The input variable in the consumer stage. 180601e04c3fSmrg */ 180701e04c3fSmrg ir_variable *consumer_var; 180801e04c3fSmrg 180901e04c3fSmrg /** 181001e04c3fSmrg * The location which has been assigned for this varying. This is 181101e04c3fSmrg * expressed in multiples of a float, with the first generic varying 181201e04c3fSmrg * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the 181301e04c3fSmrg * value 0. 181401e04c3fSmrg */ 181501e04c3fSmrg unsigned generic_location; 181601e04c3fSmrg } *matches; 181701e04c3fSmrg 181801e04c3fSmrg /** 181901e04c3fSmrg * The number of elements in the \c matches array that are currently in 182001e04c3fSmrg * use. 182101e04c3fSmrg */ 182201e04c3fSmrg unsigned num_matches; 182301e04c3fSmrg 182401e04c3fSmrg /** 182501e04c3fSmrg * The number of elements that were set aside for the \c matches array when 182601e04c3fSmrg * it was allocated. 182701e04c3fSmrg */ 182801e04c3fSmrg unsigned matches_capacity; 182901e04c3fSmrg 183001e04c3fSmrg gl_shader_stage producer_stage; 183101e04c3fSmrg gl_shader_stage consumer_stage; 183201e04c3fSmrg}; 183301e04c3fSmrg 183401e04c3fSmrg} /* anonymous namespace */ 183501e04c3fSmrg 183601e04c3fSmrgvarying_matches::varying_matches(bool disable_varying_packing, 18377ec681f3Smrg bool disable_xfb_packing, 183801e04c3fSmrg bool xfb_enabled, 183901e04c3fSmrg bool enhanced_layouts_enabled, 184001e04c3fSmrg gl_shader_stage producer_stage, 184101e04c3fSmrg gl_shader_stage consumer_stage) 184201e04c3fSmrg : disable_varying_packing(disable_varying_packing), 18437ec681f3Smrg disable_xfb_packing(disable_xfb_packing), 184401e04c3fSmrg xfb_enabled(xfb_enabled), 184501e04c3fSmrg enhanced_layouts_enabled(enhanced_layouts_enabled), 184601e04c3fSmrg producer_stage(producer_stage), 184701e04c3fSmrg consumer_stage(consumer_stage) 184801e04c3fSmrg{ 184901e04c3fSmrg /* Note: this initial capacity is rather arbitrarily chosen to be large 185001e04c3fSmrg * enough for many cases without wasting an unreasonable amount of space. 185101e04c3fSmrg * varying_matches::record() will resize the array if there are more than 185201e04c3fSmrg * this number of varyings. 185301e04c3fSmrg */ 185401e04c3fSmrg this->matches_capacity = 8; 185501e04c3fSmrg this->matches = (match *) 185601e04c3fSmrg malloc(sizeof(*this->matches) * this->matches_capacity); 185701e04c3fSmrg this->num_matches = 0; 185801e04c3fSmrg} 185901e04c3fSmrg 186001e04c3fSmrg 186101e04c3fSmrgvarying_matches::~varying_matches() 186201e04c3fSmrg{ 186301e04c3fSmrg free(this->matches); 186401e04c3fSmrg} 186501e04c3fSmrg 186601e04c3fSmrg 186701e04c3fSmrg/** 186801e04c3fSmrg * Packing is always safe on individual arrays, structures, and matrices. It 186901e04c3fSmrg * is also safe if the varying is only used for transform feedback. 187001e04c3fSmrg */ 187101e04c3fSmrgbool 187201e04c3fSmrgvarying_matches::is_varying_packing_safe(const glsl_type *type, 187301e04c3fSmrg const ir_variable *var) const 187401e04c3fSmrg{ 187501e04c3fSmrg if (consumer_stage == MESA_SHADER_TESS_EVAL || 187601e04c3fSmrg consumer_stage == MESA_SHADER_TESS_CTRL || 187701e04c3fSmrg producer_stage == MESA_SHADER_TESS_CTRL) 187801e04c3fSmrg return false; 187901e04c3fSmrg 18807e102996Smaya return xfb_enabled && (type->is_array() || type->is_struct() || 188101e04c3fSmrg type->is_matrix() || var->data.is_xfb_only); 188201e04c3fSmrg} 188301e04c3fSmrg 188401e04c3fSmrg 188501e04c3fSmrg/** 188601e04c3fSmrg * Record the given producer/consumer variable pair in the list of variables 188701e04c3fSmrg * that should later be assigned locations. 188801e04c3fSmrg * 188901e04c3fSmrg * It is permissible for \c consumer_var to be NULL (this happens if a 189001e04c3fSmrg * variable is output by the producer and consumed by transform feedback, but 189101e04c3fSmrg * not consumed by the consumer). 189201e04c3fSmrg * 189301e04c3fSmrg * If \c producer_var has already been paired up with a consumer_var, or 189401e04c3fSmrg * producer_var is part of fixed pipeline functionality (and hence already has 189501e04c3fSmrg * a location assigned), this function has no effect. 189601e04c3fSmrg * 189701e04c3fSmrg * Note: as a side effect this function may change the interpolation type of 189801e04c3fSmrg * \c producer_var, but only when the change couldn't possibly affect 189901e04c3fSmrg * rendering. 190001e04c3fSmrg */ 190101e04c3fSmrgvoid 190201e04c3fSmrgvarying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) 190301e04c3fSmrg{ 190401e04c3fSmrg assert(producer_var != NULL || consumer_var != NULL); 190501e04c3fSmrg 190601e04c3fSmrg if ((producer_var && (!producer_var->data.is_unmatched_generic_inout || 190701e04c3fSmrg producer_var->data.explicit_location)) || 190801e04c3fSmrg (consumer_var && (!consumer_var->data.is_unmatched_generic_inout || 190901e04c3fSmrg consumer_var->data.explicit_location))) { 191001e04c3fSmrg /* Either a location already exists for this variable (since it is part 191101e04c3fSmrg * of fixed functionality), or it has already been recorded as part of a 191201e04c3fSmrg * previous match. 191301e04c3fSmrg */ 191401e04c3fSmrg return; 191501e04c3fSmrg } 191601e04c3fSmrg 191701e04c3fSmrg bool needs_flat_qualifier = consumer_var == NULL && 191801e04c3fSmrg (producer_var->type->contains_integer() || 191901e04c3fSmrg producer_var->type->contains_double()); 192001e04c3fSmrg 192101e04c3fSmrg if (!disable_varying_packing && 19227ec681f3Smrg (!disable_xfb_packing || producer_var == NULL || !producer_var->data.is_xfb) && 192301e04c3fSmrg (needs_flat_qualifier || 192401e04c3fSmrg (consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) { 192501e04c3fSmrg /* Since this varying is not being consumed by the fragment shader, its 192601e04c3fSmrg * interpolation type varying cannot possibly affect rendering. 192701e04c3fSmrg * Also, this variable is non-flat and is (or contains) an integer 192801e04c3fSmrg * or a double. 192901e04c3fSmrg * If the consumer stage is unknown, don't modify the interpolation 193001e04c3fSmrg * type as it could affect rendering later with separate shaders. 193101e04c3fSmrg * 193201e04c3fSmrg * lower_packed_varyings requires all integer varyings to flat, 193301e04c3fSmrg * regardless of where they appear. We can trivially satisfy that 193401e04c3fSmrg * requirement by changing the interpolation type to flat here. 193501e04c3fSmrg */ 193601e04c3fSmrg if (producer_var) { 193701e04c3fSmrg producer_var->data.centroid = false; 193801e04c3fSmrg producer_var->data.sample = false; 193901e04c3fSmrg producer_var->data.interpolation = INTERP_MODE_FLAT; 194001e04c3fSmrg } 194101e04c3fSmrg 194201e04c3fSmrg if (consumer_var) { 194301e04c3fSmrg consumer_var->data.centroid = false; 194401e04c3fSmrg consumer_var->data.sample = false; 194501e04c3fSmrg consumer_var->data.interpolation = INTERP_MODE_FLAT; 194601e04c3fSmrg } 194701e04c3fSmrg } 194801e04c3fSmrg 194901e04c3fSmrg if (this->num_matches == this->matches_capacity) { 195001e04c3fSmrg this->matches_capacity *= 2; 195101e04c3fSmrg this->matches = (match *) 195201e04c3fSmrg realloc(this->matches, 195301e04c3fSmrg sizeof(*this->matches) * this->matches_capacity); 195401e04c3fSmrg } 195501e04c3fSmrg 195601e04c3fSmrg /* We must use the consumer to compute the packing class because in GL4.4+ 195701e04c3fSmrg * there is no guarantee interpolation qualifiers will match across stages. 195801e04c3fSmrg * 195901e04c3fSmrg * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec: 196001e04c3fSmrg * 196101e04c3fSmrg * "The type and presence of interpolation qualifiers of variables with 196201e04c3fSmrg * the same name declared in all linked shaders for the same cross-stage 196301e04c3fSmrg * interface must match, otherwise the link command will fail. 196401e04c3fSmrg * 196501e04c3fSmrg * When comparing an output from one stage to an input of a subsequent 196601e04c3fSmrg * stage, the input and output don't match if their interpolation 196701e04c3fSmrg * qualifiers (or lack thereof) are not the same." 196801e04c3fSmrg * 196901e04c3fSmrg * This text was also in at least revison 7 of the 4.40 spec but is no 197001e04c3fSmrg * longer in revision 9 and not in the 4.50 spec. 197101e04c3fSmrg */ 197201e04c3fSmrg const ir_variable *const var = (consumer_var != NULL) 197301e04c3fSmrg ? consumer_var : producer_var; 197401e04c3fSmrg 197501e04c3fSmrg if (producer_var && consumer_var && 197601e04c3fSmrg consumer_var->data.must_be_shader_input) { 197701e04c3fSmrg producer_var->data.must_be_shader_input = 1; 197801e04c3fSmrg } 197901e04c3fSmrg 198001e04c3fSmrg this->matches[this->num_matches].packing_class 198101e04c3fSmrg = this->compute_packing_class(var); 198201e04c3fSmrg this->matches[this->num_matches].packing_order 198301e04c3fSmrg = this->compute_packing_order(var); 198401e04c3fSmrg 198501e04c3fSmrg this->matches[this->num_matches].producer_var = producer_var; 198601e04c3fSmrg this->matches[this->num_matches].consumer_var = consumer_var; 198701e04c3fSmrg this->num_matches++; 198801e04c3fSmrg if (producer_var) 198901e04c3fSmrg producer_var->data.is_unmatched_generic_inout = 0; 199001e04c3fSmrg if (consumer_var) 199101e04c3fSmrg consumer_var->data.is_unmatched_generic_inout = 0; 199201e04c3fSmrg} 199301e04c3fSmrg 199401e04c3fSmrg 199501e04c3fSmrg/** 199601e04c3fSmrg * Choose locations for all of the variable matches that were previously 199701e04c3fSmrg * passed to varying_matches::record(). 199801e04c3fSmrg * \param components returns array[slot] of number of components used 199901e04c3fSmrg * per slot (1, 2, 3 or 4) 200001e04c3fSmrg * \param reserved_slots bitmask indicating which varying slots are already 200101e04c3fSmrg * allocated 200201e04c3fSmrg * \return number of slots (4-element vectors) allocated 200301e04c3fSmrg */ 200401e04c3fSmrgunsigned 200501e04c3fSmrgvarying_matches::assign_locations(struct gl_shader_program *prog, 200601e04c3fSmrg uint8_t components[], 200701e04c3fSmrg uint64_t reserved_slots) 200801e04c3fSmrg{ 200901e04c3fSmrg /* If packing has been disabled then we cannot safely sort the varyings by 201001e04c3fSmrg * class as it may mean we are using a version of OpenGL where 201101e04c3fSmrg * interpolation qualifiers are not guaranteed to be matching across 201201e04c3fSmrg * shaders, sorting in this case could result in mismatching shader 201301e04c3fSmrg * interfaces. 201401e04c3fSmrg * When packing is disabled the sort orders varyings used by transform 201501e04c3fSmrg * feedback first, but also depends on *undefined behaviour* of qsort to 201601e04c3fSmrg * reverse the order of the varyings. See: xfb_comparator(). 20177ec681f3Smrg * 20187ec681f3Smrg * If packing is only disabled for xfb varyings (mutually exclusive with 20197ec681f3Smrg * disable_varying_packing), we then group varyings depending on if they 20207ec681f3Smrg * are captured for transform feedback. The same *undefined behaviour* is 20217ec681f3Smrg * taken advantage of. 202201e04c3fSmrg */ 20237ec681f3Smrg if (this->disable_varying_packing) { 202401e04c3fSmrg /* Only sort varyings that are only used by transform feedback. */ 202501e04c3fSmrg qsort(this->matches, this->num_matches, sizeof(*this->matches), 202601e04c3fSmrg &varying_matches::xfb_comparator); 20277ec681f3Smrg } else if (this->disable_xfb_packing) { 20287ec681f3Smrg /* Only sort varyings that are NOT used by transform feedback. */ 20297ec681f3Smrg qsort(this->matches, this->num_matches, sizeof(*this->matches), 20307ec681f3Smrg &varying_matches::not_xfb_comparator); 20317ec681f3Smrg } else { 20327ec681f3Smrg /* Sort varying matches into an order that makes them easy to pack. */ 20337ec681f3Smrg qsort(this->matches, this->num_matches, sizeof(*this->matches), 20347ec681f3Smrg &varying_matches::match_comparator); 203501e04c3fSmrg } 203601e04c3fSmrg 203701e04c3fSmrg unsigned generic_location = 0; 203801e04c3fSmrg unsigned generic_patch_location = MAX_VARYING*4; 20397ec681f3Smrg bool previous_var_xfb = false; 204001e04c3fSmrg bool previous_var_xfb_only = false; 204101e04c3fSmrg unsigned previous_packing_class = ~0u; 204201e04c3fSmrg 204301e04c3fSmrg /* For tranform feedback separate mode, we know the number of attributes 204401e04c3fSmrg * is <= the number of buffers. So packing isn't critical. In fact, 204501e04c3fSmrg * packing vec3 attributes can cause trouble because splitting a vec3 204601e04c3fSmrg * effectively creates an additional transform feedback output. The 204701e04c3fSmrg * extra TFB output may exceed device driver limits. 204801e04c3fSmrg */ 204901e04c3fSmrg const bool dont_pack_vec3 = 205001e04c3fSmrg (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS && 205101e04c3fSmrg prog->TransformFeedback.NumVarying > 0); 205201e04c3fSmrg 205301e04c3fSmrg for (unsigned i = 0; i < this->num_matches; i++) { 205401e04c3fSmrg unsigned *location = &generic_location; 205501e04c3fSmrg const ir_variable *var; 205601e04c3fSmrg const glsl_type *type; 205701e04c3fSmrg bool is_vertex_input = false; 205801e04c3fSmrg 205901e04c3fSmrg if (matches[i].consumer_var) { 206001e04c3fSmrg var = matches[i].consumer_var; 206101e04c3fSmrg type = get_varying_type(var, consumer_stage); 206201e04c3fSmrg if (consumer_stage == MESA_SHADER_VERTEX) 206301e04c3fSmrg is_vertex_input = true; 206401e04c3fSmrg } else { 206501e04c3fSmrg var = matches[i].producer_var; 206601e04c3fSmrg type = get_varying_type(var, producer_stage); 206701e04c3fSmrg } 206801e04c3fSmrg 206901e04c3fSmrg if (var->data.patch) 207001e04c3fSmrg location = &generic_patch_location; 207101e04c3fSmrg 207201e04c3fSmrg /* Advance to the next slot if this varying has a different packing 207301e04c3fSmrg * class than the previous one, and we're not already on a slot 207401e04c3fSmrg * boundary. 207501e04c3fSmrg * 20767ec681f3Smrg * Also advance if varying packing is disabled for transform feedback, 20777ec681f3Smrg * and previous or current varying is used for transform feedback. 20787ec681f3Smrg * 207901e04c3fSmrg * Also advance to the next slot if packing is disabled. This makes sure 208001e04c3fSmrg * we don't assign varyings the same locations which is possible 208101e04c3fSmrg * because we still pack individual arrays, records and matrices even 208201e04c3fSmrg * when packing is disabled. Note we don't advance to the next slot if 208301e04c3fSmrg * we can pack varyings together that are only used for transform 208401e04c3fSmrg * feedback. 208501e04c3fSmrg */ 208601e04c3fSmrg if (var->data.must_be_shader_input || 20877ec681f3Smrg (this->disable_xfb_packing && 20887ec681f3Smrg (previous_var_xfb || var->data.is_xfb)) || 208901e04c3fSmrg (this->disable_varying_packing && 209001e04c3fSmrg !(previous_var_xfb_only && var->data.is_xfb_only)) || 209101e04c3fSmrg (previous_packing_class != this->matches[i].packing_class) || 209201e04c3fSmrg (this->matches[i].packing_order == PACKING_ORDER_VEC3 && 209301e04c3fSmrg dont_pack_vec3)) { 209401e04c3fSmrg *location = ALIGN(*location, 4); 209501e04c3fSmrg } 209601e04c3fSmrg 20977ec681f3Smrg previous_var_xfb = var->data.is_xfb; 209801e04c3fSmrg previous_var_xfb_only = var->data.is_xfb_only; 209901e04c3fSmrg previous_packing_class = this->matches[i].packing_class; 210001e04c3fSmrg 210101e04c3fSmrg /* The number of components taken up by this variable. For vertex shader 210201e04c3fSmrg * inputs, we use the number of slots * 4, as they have different 210301e04c3fSmrg * counting rules. 210401e04c3fSmrg */ 21057ec681f3Smrg unsigned num_components = 0; 21067ec681f3Smrg if (is_vertex_input) { 21077ec681f3Smrg num_components = type->count_attribute_slots(is_vertex_input) * 4; 21087ec681f3Smrg } else { 21097ec681f3Smrg if ((this->disable_varying_packing && 21107ec681f3Smrg !is_varying_packing_safe(type, var)) || 21117ec681f3Smrg (this->disable_xfb_packing && var->data.is_xfb && 21127ec681f3Smrg !(type->is_array() || type->is_struct() || type->is_matrix())) || 21137ec681f3Smrg var->data.must_be_shader_input) { 21147ec681f3Smrg num_components = type->count_attribute_slots(false) * 4; 21157ec681f3Smrg } else { 21167ec681f3Smrg num_components = type->component_slots_aligned(*location); 21177ec681f3Smrg } 21187ec681f3Smrg } 211901e04c3fSmrg 212001e04c3fSmrg /* The last slot for this variable, inclusive. */ 212101e04c3fSmrg unsigned slot_end = *location + num_components - 1; 212201e04c3fSmrg 212301e04c3fSmrg /* FIXME: We could be smarter in the below code and loop back over 212401e04c3fSmrg * trying to fill any locations that we skipped because we couldn't pack 212501e04c3fSmrg * the varying between an explicit location. For now just let the user 212601e04c3fSmrg * hit the linking error if we run out of room and suggest they use 212701e04c3fSmrg * explicit locations. 212801e04c3fSmrg */ 212901e04c3fSmrg while (slot_end < MAX_VARYING * 4u) { 213001e04c3fSmrg const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1; 213101e04c3fSmrg const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u); 213201e04c3fSmrg 213301e04c3fSmrg assert(slots > 0); 213401e04c3fSmrg 213501e04c3fSmrg if ((reserved_slots & slot_mask) == 0) { 213601e04c3fSmrg break; 213701e04c3fSmrg } 213801e04c3fSmrg 213901e04c3fSmrg *location = ALIGN(*location + 1, 4); 214001e04c3fSmrg slot_end = *location + num_components - 1; 214101e04c3fSmrg } 214201e04c3fSmrg 214301e04c3fSmrg if (!var->data.patch && slot_end >= MAX_VARYING * 4u) { 214401e04c3fSmrg linker_error(prog, "insufficient contiguous locations available for " 214501e04c3fSmrg "%s it is possible an array or struct could not be " 214601e04c3fSmrg "packed between varyings with explicit locations. Try " 214701e04c3fSmrg "using an explicit location for arrays and structs.", 214801e04c3fSmrg var->name); 214901e04c3fSmrg } 215001e04c3fSmrg 215101e04c3fSmrg if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) { 215201e04c3fSmrg for (unsigned j = *location / 4u; j < slot_end / 4u; j++) 215301e04c3fSmrg components[j] = 4; 215401e04c3fSmrg components[slot_end / 4u] = (slot_end & 3) + 1; 215501e04c3fSmrg } 215601e04c3fSmrg 215701e04c3fSmrg this->matches[i].generic_location = *location; 215801e04c3fSmrg 215901e04c3fSmrg *location = slot_end + 1; 216001e04c3fSmrg } 216101e04c3fSmrg 216201e04c3fSmrg return (generic_location + 3) / 4; 216301e04c3fSmrg} 216401e04c3fSmrg 216501e04c3fSmrg 216601e04c3fSmrg/** 216701e04c3fSmrg * Update the producer and consumer shaders to reflect the locations 216801e04c3fSmrg * assignments that were made by varying_matches::assign_locations(). 216901e04c3fSmrg */ 217001e04c3fSmrgvoid 217101e04c3fSmrgvarying_matches::store_locations() const 217201e04c3fSmrg{ 217301e04c3fSmrg /* Check is location needs to be packed with lower_packed_varyings() or if 217401e04c3fSmrg * we can just use ARB_enhanced_layouts packing. 217501e04c3fSmrg */ 21767ec681f3Smrg bool pack_loc[MAX_VARYINGS_INCL_PATCH] = {}; 217701e04c3fSmrg const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} }; 217801e04c3fSmrg 217901e04c3fSmrg for (unsigned i = 0; i < this->num_matches; i++) { 218001e04c3fSmrg ir_variable *producer_var = this->matches[i].producer_var; 218101e04c3fSmrg ir_variable *consumer_var = this->matches[i].consumer_var; 218201e04c3fSmrg unsigned generic_location = this->matches[i].generic_location; 218301e04c3fSmrg unsigned slot = generic_location / 4; 218401e04c3fSmrg unsigned offset = generic_location % 4; 218501e04c3fSmrg 218601e04c3fSmrg if (producer_var) { 218701e04c3fSmrg producer_var->data.location = VARYING_SLOT_VAR0 + slot; 218801e04c3fSmrg producer_var->data.location_frac = offset; 218901e04c3fSmrg } 219001e04c3fSmrg 219101e04c3fSmrg if (consumer_var) { 219201e04c3fSmrg assert(consumer_var->data.location == -1); 219301e04c3fSmrg consumer_var->data.location = VARYING_SLOT_VAR0 + slot; 219401e04c3fSmrg consumer_var->data.location_frac = offset; 219501e04c3fSmrg } 219601e04c3fSmrg 219701e04c3fSmrg /* Find locations suitable for native packing via 219801e04c3fSmrg * ARB_enhanced_layouts. 219901e04c3fSmrg */ 220001e04c3fSmrg if (producer_var && consumer_var) { 220101e04c3fSmrg if (enhanced_layouts_enabled) { 220201e04c3fSmrg const glsl_type *type = 220301e04c3fSmrg get_varying_type(producer_var, producer_stage); 22047e102996Smaya if (type->is_array() || type->is_matrix() || type->is_struct() || 22057ec681f3Smrg type->is_64bit()) { 220601e04c3fSmrg unsigned comp_slots = type->component_slots() + offset; 220701e04c3fSmrg unsigned slots = comp_slots / 4; 220801e04c3fSmrg if (comp_slots % 4) 220901e04c3fSmrg slots += 1; 221001e04c3fSmrg 221101e04c3fSmrg for (unsigned j = 0; j < slots; j++) { 221201e04c3fSmrg pack_loc[slot + j] = true; 221301e04c3fSmrg } 221401e04c3fSmrg } else if (offset + type->vector_elements > 4) { 221501e04c3fSmrg pack_loc[slot] = true; 221601e04c3fSmrg pack_loc[slot + 1] = true; 221701e04c3fSmrg } else { 221801e04c3fSmrg loc_type[slot][offset] = type; 221901e04c3fSmrg } 222001e04c3fSmrg } 222101e04c3fSmrg } 222201e04c3fSmrg } 222301e04c3fSmrg 222401e04c3fSmrg /* Attempt to use ARB_enhanced_layouts for more efficient packing if 222501e04c3fSmrg * suitable. 222601e04c3fSmrg */ 222701e04c3fSmrg if (enhanced_layouts_enabled) { 222801e04c3fSmrg for (unsigned i = 0; i < this->num_matches; i++) { 222901e04c3fSmrg ir_variable *producer_var = this->matches[i].producer_var; 223001e04c3fSmrg ir_variable *consumer_var = this->matches[i].consumer_var; 223101e04c3fSmrg unsigned generic_location = this->matches[i].generic_location; 223201e04c3fSmrg unsigned slot = generic_location / 4; 223301e04c3fSmrg 223401e04c3fSmrg if (pack_loc[slot] || !producer_var || !consumer_var) 223501e04c3fSmrg continue; 223601e04c3fSmrg 223701e04c3fSmrg const glsl_type *type = 223801e04c3fSmrg get_varying_type(producer_var, producer_stage); 223901e04c3fSmrg bool type_match = true; 224001e04c3fSmrg for (unsigned j = 0; j < 4; j++) { 224101e04c3fSmrg if (loc_type[slot][j]) { 224201e04c3fSmrg if (type->base_type != loc_type[slot][j]->base_type) 224301e04c3fSmrg type_match = false; 224401e04c3fSmrg } 224501e04c3fSmrg } 224601e04c3fSmrg 224701e04c3fSmrg if (type_match) { 224801e04c3fSmrg producer_var->data.explicit_location = 1; 224901e04c3fSmrg consumer_var->data.explicit_location = 1; 225001e04c3fSmrg producer_var->data.explicit_component = 1; 225101e04c3fSmrg consumer_var->data.explicit_component = 1; 225201e04c3fSmrg } 225301e04c3fSmrg } 225401e04c3fSmrg } 225501e04c3fSmrg} 225601e04c3fSmrg 225701e04c3fSmrg 225801e04c3fSmrg/** 225901e04c3fSmrg * Compute the "packing class" of the given varying. This is an unsigned 226001e04c3fSmrg * integer with the property that two variables in the same packing class can 226101e04c3fSmrg * be safely backed into the same vec4. 226201e04c3fSmrg */ 226301e04c3fSmrgunsigned 226401e04c3fSmrgvarying_matches::compute_packing_class(const ir_variable *var) 226501e04c3fSmrg{ 226601e04c3fSmrg /* Without help from the back-end, there is no way to pack together 226701e04c3fSmrg * variables with different interpolation types, because 226801e04c3fSmrg * lower_packed_varyings must choose exactly one interpolation type for 226901e04c3fSmrg * each packed varying it creates. 227001e04c3fSmrg * 227101e04c3fSmrg * However, we can safely pack together floats, ints, and uints, because: 227201e04c3fSmrg * 227301e04c3fSmrg * - varyings of base type "int" and "uint" must use the "flat" 227401e04c3fSmrg * interpolation type, which can only occur in GLSL 1.30 and above. 227501e04c3fSmrg * 227601e04c3fSmrg * - On platforms that support GLSL 1.30 and above, lower_packed_varyings 227701e04c3fSmrg * can store flat floats as ints without losing any information (using 227801e04c3fSmrg * the ir_unop_bitcast_* opcodes). 227901e04c3fSmrg * 228001e04c3fSmrg * Therefore, the packing class depends only on the interpolation type. 228101e04c3fSmrg */ 228201e04c3fSmrg const unsigned interp = var->is_interpolation_flat() 228301e04c3fSmrg ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation; 228401e04c3fSmrg 228501e04c3fSmrg assert(interp < (1 << 3)); 228601e04c3fSmrg 228701e04c3fSmrg const unsigned packing_class = (interp << 0) | 228801e04c3fSmrg (var->data.centroid << 3) | 228901e04c3fSmrg (var->data.sample << 4) | 229001e04c3fSmrg (var->data.patch << 5) | 229101e04c3fSmrg (var->data.must_be_shader_input << 6); 229201e04c3fSmrg 229301e04c3fSmrg return packing_class; 229401e04c3fSmrg} 229501e04c3fSmrg 229601e04c3fSmrg 229701e04c3fSmrg/** 229801e04c3fSmrg * Compute the "packing order" of the given varying. This is a sort key we 229901e04c3fSmrg * use to determine when to attempt to pack the given varying relative to 230001e04c3fSmrg * other varyings in the same packing class. 230101e04c3fSmrg */ 230201e04c3fSmrgvarying_matches::packing_order_enum 230301e04c3fSmrgvarying_matches::compute_packing_order(const ir_variable *var) 230401e04c3fSmrg{ 230501e04c3fSmrg const glsl_type *element_type = var->type; 230601e04c3fSmrg 230701e04c3fSmrg while (element_type->is_array()) { 230801e04c3fSmrg element_type = element_type->fields.array; 230901e04c3fSmrg } 231001e04c3fSmrg 231101e04c3fSmrg switch (element_type->component_slots() % 4) { 231201e04c3fSmrg case 1: return PACKING_ORDER_SCALAR; 231301e04c3fSmrg case 2: return PACKING_ORDER_VEC2; 231401e04c3fSmrg case 3: return PACKING_ORDER_VEC3; 231501e04c3fSmrg case 0: return PACKING_ORDER_VEC4; 231601e04c3fSmrg default: 231701e04c3fSmrg assert(!"Unexpected value of vector_elements"); 231801e04c3fSmrg return PACKING_ORDER_VEC4; 231901e04c3fSmrg } 232001e04c3fSmrg} 232101e04c3fSmrg 232201e04c3fSmrg 232301e04c3fSmrg/** 232401e04c3fSmrg * Comparison function passed to qsort() to sort varyings by packing_class and 232501e04c3fSmrg * then by packing_order. 232601e04c3fSmrg */ 232701e04c3fSmrgint 232801e04c3fSmrgvarying_matches::match_comparator(const void *x_generic, const void *y_generic) 232901e04c3fSmrg{ 233001e04c3fSmrg const match *x = (const match *) x_generic; 233101e04c3fSmrg const match *y = (const match *) y_generic; 233201e04c3fSmrg 233301e04c3fSmrg if (x->packing_class != y->packing_class) 233401e04c3fSmrg return x->packing_class - y->packing_class; 233501e04c3fSmrg return x->packing_order - y->packing_order; 233601e04c3fSmrg} 233701e04c3fSmrg 233801e04c3fSmrg 233901e04c3fSmrg/** 234001e04c3fSmrg * Comparison function passed to qsort() to sort varyings used only by 234101e04c3fSmrg * transform feedback when packing of other varyings is disabled. 234201e04c3fSmrg */ 234301e04c3fSmrgint 234401e04c3fSmrgvarying_matches::xfb_comparator(const void *x_generic, const void *y_generic) 234501e04c3fSmrg{ 234601e04c3fSmrg const match *x = (const match *) x_generic; 234701e04c3fSmrg 234801e04c3fSmrg if (x->producer_var != NULL && x->producer_var->data.is_xfb_only) 234901e04c3fSmrg return match_comparator(x_generic, y_generic); 235001e04c3fSmrg 235101e04c3fSmrg /* FIXME: When the comparator returns 0 it means the elements being 235201e04c3fSmrg * compared are equivalent. However the qsort documentation says: 235301e04c3fSmrg * 235401e04c3fSmrg * "The order of equivalent elements is undefined." 235501e04c3fSmrg * 235601e04c3fSmrg * In practice the sort ends up reversing the order of the varyings which 235701e04c3fSmrg * means locations are also assigned in this reversed order and happens to 235801e04c3fSmrg * be what we want. This is also whats happening in 235901e04c3fSmrg * varying_matches::match_comparator(). 236001e04c3fSmrg */ 236101e04c3fSmrg return 0; 236201e04c3fSmrg} 236301e04c3fSmrg 236401e04c3fSmrg 23657ec681f3Smrg/** 23667ec681f3Smrg * Comparison function passed to qsort() to sort varyings NOT used by 23677ec681f3Smrg * transform feedback when packing of xfb varyings is disabled. 23687ec681f3Smrg */ 23697ec681f3Smrgint 23707ec681f3Smrgvarying_matches::not_xfb_comparator(const void *x_generic, const void *y_generic) 23717ec681f3Smrg{ 23727ec681f3Smrg const match *x = (const match *) x_generic; 23737ec681f3Smrg 23747ec681f3Smrg if (x->producer_var != NULL && !x->producer_var->data.is_xfb) 23757ec681f3Smrg return match_comparator(x_generic, y_generic); 23767ec681f3Smrg 23777ec681f3Smrg /* FIXME: When the comparator returns 0 it means the elements being 23787ec681f3Smrg * compared are equivalent. However the qsort documentation says: 23797ec681f3Smrg * 23807ec681f3Smrg * "The order of equivalent elements is undefined." 23817ec681f3Smrg * 23827ec681f3Smrg * In practice the sort ends up reversing the order of the varyings which 23837ec681f3Smrg * means locations are also assigned in this reversed order and happens to 23847ec681f3Smrg * be what we want. This is also whats happening in 23857ec681f3Smrg * varying_matches::match_comparator(). 23867ec681f3Smrg */ 23877ec681f3Smrg return 0; 23887ec681f3Smrg} 23897ec681f3Smrg 23907ec681f3Smrg 239101e04c3fSmrg/** 239201e04c3fSmrg * Is the given variable a varying variable to be counted against the 239301e04c3fSmrg * limit in ctx->Const.MaxVarying? 239401e04c3fSmrg * This includes variables such as texcoords, colors and generic 239501e04c3fSmrg * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord. 239601e04c3fSmrg */ 239701e04c3fSmrgstatic bool 239801e04c3fSmrgvar_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var) 239901e04c3fSmrg{ 240001e04c3fSmrg /* Only fragment shaders will take a varying variable as an input */ 240101e04c3fSmrg if (stage == MESA_SHADER_FRAGMENT && 240201e04c3fSmrg var->data.mode == ir_var_shader_in) { 240301e04c3fSmrg switch (var->data.location) { 240401e04c3fSmrg case VARYING_SLOT_POS: 240501e04c3fSmrg case VARYING_SLOT_FACE: 240601e04c3fSmrg case VARYING_SLOT_PNTC: 240701e04c3fSmrg return false; 240801e04c3fSmrg default: 240901e04c3fSmrg return true; 241001e04c3fSmrg } 241101e04c3fSmrg } 241201e04c3fSmrg return false; 241301e04c3fSmrg} 241401e04c3fSmrg 241501e04c3fSmrg 241601e04c3fSmrg/** 241701e04c3fSmrg * Visitor class that generates tfeedback_candidate structs describing all 241801e04c3fSmrg * possible targets of transform feedback. 241901e04c3fSmrg * 242001e04c3fSmrg * tfeedback_candidate structs are stored in the hash table 242101e04c3fSmrg * tfeedback_candidates, which is passed to the constructor. This hash table 242201e04c3fSmrg * maps varying names to instances of the tfeedback_candidate struct. 242301e04c3fSmrg */ 242401e04c3fSmrgclass tfeedback_candidate_generator : public program_resource_visitor 242501e04c3fSmrg{ 242601e04c3fSmrgpublic: 242701e04c3fSmrg tfeedback_candidate_generator(void *mem_ctx, 2428993e1d59Smrg hash_table *tfeedback_candidates, 2429993e1d59Smrg gl_shader_stage stage) 243001e04c3fSmrg : mem_ctx(mem_ctx), 243101e04c3fSmrg tfeedback_candidates(tfeedback_candidates), 2432993e1d59Smrg stage(stage), 243301e04c3fSmrg toplevel_var(NULL), 24347ec681f3Smrg varying_floats(0), 24357ec681f3Smrg xfb_offset_floats(0) 243601e04c3fSmrg { 243701e04c3fSmrg } 243801e04c3fSmrg 243901e04c3fSmrg void process(ir_variable *var) 244001e04c3fSmrg { 244101e04c3fSmrg /* All named varying interface blocks should be flattened by now */ 244201e04c3fSmrg assert(!var->is_interface_instance()); 2443993e1d59Smrg assert(var->data.mode == ir_var_shader_out); 244401e04c3fSmrg 244501e04c3fSmrg this->toplevel_var = var; 244601e04c3fSmrg this->varying_floats = 0; 24477ec681f3Smrg this->xfb_offset_floats = 0; 2448993e1d59Smrg const glsl_type *t = 2449993e1d59Smrg var->data.from_named_ifc_block ? var->get_interface_type() : var->type; 2450993e1d59Smrg if (!var->data.patch && stage == MESA_SHADER_TESS_CTRL) { 2451993e1d59Smrg assert(t->is_array()); 2452993e1d59Smrg t = t->fields.array; 2453993e1d59Smrg } 2454993e1d59Smrg program_resource_visitor::process(var, t, false); 245501e04c3fSmrg } 245601e04c3fSmrg 245701e04c3fSmrgprivate: 245801e04c3fSmrg virtual void visit_field(const glsl_type *type, const char *name, 245901e04c3fSmrg bool /* row_major */, 246001e04c3fSmrg const glsl_type * /* record_type */, 246101e04c3fSmrg const enum glsl_interface_packing, 246201e04c3fSmrg bool /* last_field */) 246301e04c3fSmrg { 24647e102996Smaya assert(!type->without_array()->is_struct()); 246501e04c3fSmrg assert(!type->without_array()->is_interface()); 246601e04c3fSmrg 246701e04c3fSmrg tfeedback_candidate *candidate 246801e04c3fSmrg = rzalloc(this->mem_ctx, tfeedback_candidate); 246901e04c3fSmrg candidate->toplevel_var = this->toplevel_var; 247001e04c3fSmrg candidate->type = type; 24717ec681f3Smrg 24727ec681f3Smrg if (type->without_array()->is_64bit()) { 24737ec681f3Smrg /* From ARB_gpu_shader_fp64: 24747ec681f3Smrg * 24757ec681f3Smrg * If any variable captured in transform feedback has double-precision 24767ec681f3Smrg * components, the practical requirements for defined behavior are: 24777ec681f3Smrg * ... 24787ec681f3Smrg * (c) each double-precision variable captured must be aligned to a 24797ec681f3Smrg * multiple of eight bytes relative to the beginning of a vertex. 24807ec681f3Smrg */ 24817ec681f3Smrg this->xfb_offset_floats = ALIGN(this->xfb_offset_floats, 2); 24827ec681f3Smrg /* 64-bit members of structs are also aligned. */ 24837ec681f3Smrg this->varying_floats = ALIGN(this->varying_floats, 2); 24847ec681f3Smrg } 24857ec681f3Smrg 24867ec681f3Smrg candidate->xfb_offset_floats = this->xfb_offset_floats; 24877ec681f3Smrg candidate->struct_offset_floats = this->varying_floats; 24887ec681f3Smrg 24897ec681f3Smrg _mesa_hash_table_insert(this->tfeedback_candidates, 24907ec681f3Smrg ralloc_strdup(this->mem_ctx, name), 24917ec681f3Smrg candidate); 24927ec681f3Smrg 24937ec681f3Smrg const unsigned component_slots = type->component_slots(); 24947ec681f3Smrg 24957ec681f3Smrg if (varying_has_user_specified_location(this->toplevel_var)) { 24967ec681f3Smrg this->varying_floats += type->count_attribute_slots(false) * 4; 24977ec681f3Smrg } else { 24987ec681f3Smrg this->varying_floats += component_slots; 24997ec681f3Smrg } 25007ec681f3Smrg 25017ec681f3Smrg this->xfb_offset_floats += component_slots; 250201e04c3fSmrg } 250301e04c3fSmrg 250401e04c3fSmrg /** 250501e04c3fSmrg * Memory context used to allocate hash table keys and values. 250601e04c3fSmrg */ 250701e04c3fSmrg void * const mem_ctx; 250801e04c3fSmrg 250901e04c3fSmrg /** 251001e04c3fSmrg * Hash table in which tfeedback_candidate objects should be stored. 251101e04c3fSmrg */ 251201e04c3fSmrg hash_table * const tfeedback_candidates; 251301e04c3fSmrg 2514993e1d59Smrg gl_shader_stage stage; 2515993e1d59Smrg 251601e04c3fSmrg /** 251701e04c3fSmrg * Pointer to the toplevel variable that is being traversed. 251801e04c3fSmrg */ 251901e04c3fSmrg ir_variable *toplevel_var; 252001e04c3fSmrg 252101e04c3fSmrg /** 252201e04c3fSmrg * Total number of varying floats that have been visited so far. This is 252301e04c3fSmrg * used to determine the offset to each varying within the toplevel 252401e04c3fSmrg * variable. 252501e04c3fSmrg */ 252601e04c3fSmrg unsigned varying_floats; 25277ec681f3Smrg 25287ec681f3Smrg /** 25297ec681f3Smrg * Offset within the xfb. Counted in floats. 25307ec681f3Smrg */ 25317ec681f3Smrg unsigned xfb_offset_floats; 253201e04c3fSmrg}; 253301e04c3fSmrg 253401e04c3fSmrg 253501e04c3fSmrgnamespace linker { 253601e04c3fSmrg 253701e04c3fSmrgvoid 253801e04c3fSmrgpopulate_consumer_input_sets(void *mem_ctx, exec_list *ir, 253901e04c3fSmrg hash_table *consumer_inputs, 254001e04c3fSmrg hash_table *consumer_interface_inputs, 254101e04c3fSmrg ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) 254201e04c3fSmrg{ 254301e04c3fSmrg memset(consumer_inputs_with_locations, 254401e04c3fSmrg 0, 254501e04c3fSmrg sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX); 254601e04c3fSmrg 254701e04c3fSmrg foreach_in_list(ir_instruction, node, ir) { 254801e04c3fSmrg ir_variable *const input_var = node->as_variable(); 254901e04c3fSmrg 255001e04c3fSmrg if (input_var != NULL && input_var->data.mode == ir_var_shader_in) { 255101e04c3fSmrg /* All interface blocks should have been lowered by this point */ 255201e04c3fSmrg assert(!input_var->type->is_interface()); 255301e04c3fSmrg 255401e04c3fSmrg if (input_var->data.explicit_location) { 255501e04c3fSmrg /* assign_varying_locations only cares about finding the 255601e04c3fSmrg * ir_variable at the start of a contiguous location block. 255701e04c3fSmrg * 255801e04c3fSmrg * - For !producer, consumer_inputs_with_locations isn't used. 255901e04c3fSmrg * 256001e04c3fSmrg * - For !consumer, consumer_inputs_with_locations is empty. 256101e04c3fSmrg * 256201e04c3fSmrg * For consumer && producer, if you were trying to set some 256301e04c3fSmrg * ir_variable to the middle of a location block on the other side 256401e04c3fSmrg * of producer/consumer, cross_validate_outputs_to_inputs() should 256501e04c3fSmrg * be link-erroring due to either type mismatch or location 256601e04c3fSmrg * overlaps. If the variables do match up, then they've got a 256701e04c3fSmrg * matching data.location and you only looked at 256801e04c3fSmrg * consumer_inputs_with_locations[var->data.location], not any 256901e04c3fSmrg * following entries for the array/structure. 257001e04c3fSmrg */ 257101e04c3fSmrg consumer_inputs_with_locations[input_var->data.location] = 257201e04c3fSmrg input_var; 257301e04c3fSmrg } else if (input_var->get_interface_type() != NULL) { 257401e04c3fSmrg char *const iface_field_name = 257501e04c3fSmrg ralloc_asprintf(mem_ctx, "%s.%s", 257601e04c3fSmrg input_var->get_interface_type()->without_array()->name, 257701e04c3fSmrg input_var->name); 257801e04c3fSmrg _mesa_hash_table_insert(consumer_interface_inputs, 257901e04c3fSmrg iface_field_name, input_var); 258001e04c3fSmrg } else { 258101e04c3fSmrg _mesa_hash_table_insert(consumer_inputs, 258201e04c3fSmrg ralloc_strdup(mem_ctx, input_var->name), 258301e04c3fSmrg input_var); 258401e04c3fSmrg } 258501e04c3fSmrg } 258601e04c3fSmrg } 258701e04c3fSmrg} 258801e04c3fSmrg 258901e04c3fSmrg/** 259001e04c3fSmrg * Find a variable from the consumer that "matches" the specified variable 259101e04c3fSmrg * 259201e04c3fSmrg * This function only finds inputs with names that match. There is no 259301e04c3fSmrg * validation (here) that the types, etc. are compatible. 259401e04c3fSmrg */ 259501e04c3fSmrgir_variable * 259601e04c3fSmrgget_matching_input(void *mem_ctx, 259701e04c3fSmrg const ir_variable *output_var, 259801e04c3fSmrg hash_table *consumer_inputs, 259901e04c3fSmrg hash_table *consumer_interface_inputs, 260001e04c3fSmrg ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) 260101e04c3fSmrg{ 260201e04c3fSmrg ir_variable *input_var; 260301e04c3fSmrg 260401e04c3fSmrg if (output_var->data.explicit_location) { 260501e04c3fSmrg input_var = consumer_inputs_with_locations[output_var->data.location]; 260601e04c3fSmrg } else if (output_var->get_interface_type() != NULL) { 260701e04c3fSmrg char *const iface_field_name = 260801e04c3fSmrg ralloc_asprintf(mem_ctx, "%s.%s", 260901e04c3fSmrg output_var->get_interface_type()->without_array()->name, 261001e04c3fSmrg output_var->name); 261101e04c3fSmrg hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name); 261201e04c3fSmrg input_var = entry ? (ir_variable *) entry->data : NULL; 261301e04c3fSmrg } else { 261401e04c3fSmrg hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name); 261501e04c3fSmrg input_var = entry ? (ir_variable *) entry->data : NULL; 261601e04c3fSmrg } 261701e04c3fSmrg 261801e04c3fSmrg return (input_var == NULL || input_var->data.mode != ir_var_shader_in) 261901e04c3fSmrg ? NULL : input_var; 262001e04c3fSmrg} 262101e04c3fSmrg 262201e04c3fSmrg} 262301e04c3fSmrg 262401e04c3fSmrgstatic int 262501e04c3fSmrgio_variable_cmp(const void *_a, const void *_b) 262601e04c3fSmrg{ 262701e04c3fSmrg const ir_variable *const a = *(const ir_variable **) _a; 262801e04c3fSmrg const ir_variable *const b = *(const ir_variable **) _b; 262901e04c3fSmrg 263001e04c3fSmrg if (a->data.explicit_location && b->data.explicit_location) 263101e04c3fSmrg return b->data.location - a->data.location; 263201e04c3fSmrg 263301e04c3fSmrg if (a->data.explicit_location && !b->data.explicit_location) 263401e04c3fSmrg return 1; 263501e04c3fSmrg 263601e04c3fSmrg if (!a->data.explicit_location && b->data.explicit_location) 263701e04c3fSmrg return -1; 263801e04c3fSmrg 263901e04c3fSmrg return -strcmp(a->name, b->name); 264001e04c3fSmrg} 264101e04c3fSmrg 264201e04c3fSmrg/** 264301e04c3fSmrg * Sort the shader IO variables into canonical order 264401e04c3fSmrg */ 264501e04c3fSmrgstatic void 264601e04c3fSmrgcanonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode) 264701e04c3fSmrg{ 264801e04c3fSmrg ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4]; 264901e04c3fSmrg unsigned num_variables = 0; 265001e04c3fSmrg 265101e04c3fSmrg foreach_in_list(ir_instruction, node, ir) { 265201e04c3fSmrg ir_variable *const var = node->as_variable(); 265301e04c3fSmrg 265401e04c3fSmrg if (var == NULL || var->data.mode != io_mode) 265501e04c3fSmrg continue; 265601e04c3fSmrg 265701e04c3fSmrg /* If we have already encountered more I/O variables that could 265801e04c3fSmrg * successfully link, bail. 265901e04c3fSmrg */ 266001e04c3fSmrg if (num_variables == ARRAY_SIZE(var_table)) 266101e04c3fSmrg return; 266201e04c3fSmrg 266301e04c3fSmrg var_table[num_variables++] = var; 266401e04c3fSmrg } 266501e04c3fSmrg 266601e04c3fSmrg if (num_variables == 0) 266701e04c3fSmrg return; 266801e04c3fSmrg 266901e04c3fSmrg /* Sort the list in reverse order (io_variable_cmp handles this). Later 267001e04c3fSmrg * we're going to push the variables on to the IR list as a stack, so we 267101e04c3fSmrg * want the last variable (in canonical order) to be first in the list. 267201e04c3fSmrg */ 267301e04c3fSmrg qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp); 267401e04c3fSmrg 267501e04c3fSmrg /* Remove the variable from it's current location in the IR, and put it at 267601e04c3fSmrg * the front. 267701e04c3fSmrg */ 267801e04c3fSmrg for (unsigned i = 0; i < num_variables; i++) { 267901e04c3fSmrg var_table[i]->remove(); 268001e04c3fSmrg ir->push_head(var_table[i]); 268101e04c3fSmrg } 268201e04c3fSmrg} 268301e04c3fSmrg 268401e04c3fSmrg/** 268501e04c3fSmrg * Generate a bitfield map of the explicit locations for shader varyings. 268601e04c3fSmrg * 268701e04c3fSmrg * Note: For Tessellation shaders we are sitting right on the limits of the 268801e04c3fSmrg * 64 bit map. Per-vertex and per-patch both have separate location domains 268901e04c3fSmrg * with a max of MAX_VARYING. 269001e04c3fSmrg */ 269101e04c3fSmrgstatic uint64_t 269201e04c3fSmrgreserved_varying_slot(struct gl_linked_shader *stage, 269301e04c3fSmrg ir_variable_mode io_mode) 269401e04c3fSmrg{ 269501e04c3fSmrg assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); 269601e04c3fSmrg /* Avoid an overflow of the returned value */ 269701e04c3fSmrg assert(MAX_VARYINGS_INCL_PATCH <= 64); 269801e04c3fSmrg 269901e04c3fSmrg uint64_t slots = 0; 270001e04c3fSmrg int var_slot; 270101e04c3fSmrg 270201e04c3fSmrg if (!stage) 270301e04c3fSmrg return slots; 270401e04c3fSmrg 270501e04c3fSmrg foreach_in_list(ir_instruction, node, stage->ir) { 270601e04c3fSmrg ir_variable *const var = node->as_variable(); 270701e04c3fSmrg 270801e04c3fSmrg if (var == NULL || var->data.mode != io_mode || 270901e04c3fSmrg !var->data.explicit_location || 271001e04c3fSmrg var->data.location < VARYING_SLOT_VAR0) 271101e04c3fSmrg continue; 271201e04c3fSmrg 271301e04c3fSmrg var_slot = var->data.location - VARYING_SLOT_VAR0; 271401e04c3fSmrg 271501e04c3fSmrg unsigned num_elements = get_varying_type(var, stage->Stage) 271601e04c3fSmrg ->count_attribute_slots(io_mode == ir_var_shader_in && 271701e04c3fSmrg stage->Stage == MESA_SHADER_VERTEX); 271801e04c3fSmrg for (unsigned i = 0; i < num_elements; i++) { 271901e04c3fSmrg if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH) 272001e04c3fSmrg slots |= UINT64_C(1) << var_slot; 272101e04c3fSmrg var_slot += 1; 272201e04c3fSmrg } 272301e04c3fSmrg } 272401e04c3fSmrg 272501e04c3fSmrg return slots; 272601e04c3fSmrg} 272701e04c3fSmrg 272801e04c3fSmrg 272901e04c3fSmrg/** 273001e04c3fSmrg * Assign locations for all variables that are produced in one pipeline stage 273101e04c3fSmrg * (the "producer") and consumed in the next stage (the "consumer"). 273201e04c3fSmrg * 273301e04c3fSmrg * Variables produced by the producer may also be consumed by transform 273401e04c3fSmrg * feedback. 273501e04c3fSmrg * 273601e04c3fSmrg * \param num_tfeedback_decls is the number of declarations indicating 273701e04c3fSmrg * variables that may be consumed by transform feedback. 273801e04c3fSmrg * 273901e04c3fSmrg * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects 274001e04c3fSmrg * representing the result of parsing the strings passed to 274101e04c3fSmrg * glTransformFeedbackVaryings(). assign_location() will be called for 274201e04c3fSmrg * each of these objects that matches one of the outputs of the 274301e04c3fSmrg * producer. 274401e04c3fSmrg * 274501e04c3fSmrg * When num_tfeedback_decls is nonzero, it is permissible for the consumer to 274601e04c3fSmrg * be NULL. In this case, varying locations are assigned solely based on the 274701e04c3fSmrg * requirements of transform feedback. 274801e04c3fSmrg */ 274901e04c3fSmrgstatic bool 275001e04c3fSmrgassign_varying_locations(struct gl_context *ctx, 275101e04c3fSmrg void *mem_ctx, 275201e04c3fSmrg struct gl_shader_program *prog, 275301e04c3fSmrg gl_linked_shader *producer, 275401e04c3fSmrg gl_linked_shader *consumer, 275501e04c3fSmrg unsigned num_tfeedback_decls, 275601e04c3fSmrg tfeedback_decl *tfeedback_decls, 275701e04c3fSmrg const uint64_t reserved_slots) 275801e04c3fSmrg{ 275901e04c3fSmrg /* Tessellation shaders treat inputs and outputs as shared memory and can 276001e04c3fSmrg * access inputs and outputs of other invocations. 276101e04c3fSmrg * Therefore, they can't be lowered to temps easily (and definitely not 276201e04c3fSmrg * efficiently). 276301e04c3fSmrg */ 276401e04c3fSmrg bool unpackable_tess = 276501e04c3fSmrg (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) || 276601e04c3fSmrg (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) || 276701e04c3fSmrg (producer && producer->Stage == MESA_SHADER_TESS_CTRL); 276801e04c3fSmrg 276901e04c3fSmrg /* Transform feedback code assumes varying arrays are packed, so if the 277001e04c3fSmrg * driver has disabled varying packing, make sure to at least enable 27717ec681f3Smrg * packing required by transform feedback. See below for exception. 277201e04c3fSmrg */ 277301e04c3fSmrg bool xfb_enabled = 277401e04c3fSmrg ctx->Extensions.EXT_transform_feedback && !unpackable_tess; 277501e04c3fSmrg 27767ec681f3Smrg /* Some drivers actually requires packing to be explicitly disabled 27777ec681f3Smrg * for varyings used by transform feedback. 27787ec681f3Smrg */ 27797ec681f3Smrg bool disable_xfb_packing = 27807ec681f3Smrg ctx->Const.DisableTransformFeedbackPacking; 27817ec681f3Smrg 278201e04c3fSmrg /* Disable packing on outward facing interfaces for SSO because in ES we 278301e04c3fSmrg * need to retain the unpacked varying information for draw time 278401e04c3fSmrg * validation. 278501e04c3fSmrg * 278601e04c3fSmrg * Packing is still enabled on individual arrays, structs, and matrices as 278701e04c3fSmrg * these are required by the transform feedback code and it is still safe 278801e04c3fSmrg * to do so. We also enable packing when a varying is only used for 278901e04c3fSmrg * transform feedback and its not a SSO. 279001e04c3fSmrg */ 279101e04c3fSmrg bool disable_varying_packing = 279201e04c3fSmrg ctx->Const.DisableVaryingPacking || unpackable_tess; 279301e04c3fSmrg if (prog->SeparateShader && (producer == NULL || consumer == NULL)) 279401e04c3fSmrg disable_varying_packing = true; 279501e04c3fSmrg 27967ec681f3Smrg varying_matches matches(disable_varying_packing, 27977ec681f3Smrg disable_xfb_packing, 27987ec681f3Smrg xfb_enabled, 279901e04c3fSmrg ctx->Extensions.ARB_enhanced_layouts, 280001e04c3fSmrg producer ? producer->Stage : MESA_SHADER_NONE, 280101e04c3fSmrg consumer ? consumer->Stage : MESA_SHADER_NONE); 28027ec681f3Smrg void *hash_table_ctx = ralloc_context(NULL); 280301e04c3fSmrg hash_table *tfeedback_candidates = 28047ec681f3Smrg _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string, 280501e04c3fSmrg _mesa_key_string_equal); 280601e04c3fSmrg hash_table *consumer_inputs = 28077ec681f3Smrg _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string, 280801e04c3fSmrg _mesa_key_string_equal); 280901e04c3fSmrg hash_table *consumer_interface_inputs = 28107ec681f3Smrg _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string, 281101e04c3fSmrg _mesa_key_string_equal); 281201e04c3fSmrg ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = { 281301e04c3fSmrg NULL, 281401e04c3fSmrg }; 281501e04c3fSmrg 281601e04c3fSmrg unsigned consumer_vertices = 0; 281701e04c3fSmrg if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY) 281801e04c3fSmrg consumer_vertices = prog->Geom.VerticesIn; 281901e04c3fSmrg 282001e04c3fSmrg /* Operate in a total of four passes. 282101e04c3fSmrg * 282201e04c3fSmrg * 1. Sort inputs / outputs into a canonical order. This is necessary so 282301e04c3fSmrg * that inputs / outputs of separable shaders will be assigned 282401e04c3fSmrg * predictable locations regardless of the order in which declarations 282501e04c3fSmrg * appeared in the shader source. 282601e04c3fSmrg * 282701e04c3fSmrg * 2. Assign locations for any matching inputs and outputs. 282801e04c3fSmrg * 282901e04c3fSmrg * 3. Mark output variables in the producer that do not have locations as 283001e04c3fSmrg * not being outputs. This lets the optimizer eliminate them. 283101e04c3fSmrg * 283201e04c3fSmrg * 4. Mark input variables in the consumer that do not have locations as 283301e04c3fSmrg * not being inputs. This lets the optimizer eliminate them. 283401e04c3fSmrg */ 283501e04c3fSmrg if (consumer) 283601e04c3fSmrg canonicalize_shader_io(consumer->ir, ir_var_shader_in); 283701e04c3fSmrg 283801e04c3fSmrg if (producer) 283901e04c3fSmrg canonicalize_shader_io(producer->ir, ir_var_shader_out); 284001e04c3fSmrg 284101e04c3fSmrg if (consumer) 284201e04c3fSmrg linker::populate_consumer_input_sets(mem_ctx, consumer->ir, 284301e04c3fSmrg consumer_inputs, 284401e04c3fSmrg consumer_interface_inputs, 284501e04c3fSmrg consumer_inputs_with_locations); 284601e04c3fSmrg 284701e04c3fSmrg if (producer) { 284801e04c3fSmrg foreach_in_list(ir_instruction, node, producer->ir) { 284901e04c3fSmrg ir_variable *const output_var = node->as_variable(); 285001e04c3fSmrg 285101e04c3fSmrg if (output_var == NULL || output_var->data.mode != ir_var_shader_out) 285201e04c3fSmrg continue; 285301e04c3fSmrg 285401e04c3fSmrg /* Only geometry shaders can use non-zero streams */ 285501e04c3fSmrg assert(output_var->data.stream == 0 || 285601e04c3fSmrg (output_var->data.stream < MAX_VERTEX_STREAMS && 285701e04c3fSmrg producer->Stage == MESA_SHADER_GEOMETRY)); 285801e04c3fSmrg 285901e04c3fSmrg if (num_tfeedback_decls > 0) { 2860993e1d59Smrg tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates, producer->Stage); 2861993e1d59Smrg /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1 2862993e1d59Smrg * ("Vertex Shader Variables / Output Variables") 2863993e1d59Smrg * 2864993e1d59Smrg * "Each program object can specify a set of output variables from 2865993e1d59Smrg * one shader to be recorded in transform feedback mode (see 2866993e1d59Smrg * section 13.3). The variables that can be recorded are those 2867993e1d59Smrg * emitted by the first active shader, in order, from the 2868993e1d59Smrg * following list: 2869993e1d59Smrg * 2870993e1d59Smrg * * geometry shader 2871993e1d59Smrg * * tessellation evaluation shader 2872993e1d59Smrg * * tessellation control shader 2873993e1d59Smrg * * vertex shader" 2874993e1d59Smrg * 2875993e1d59Smrg * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader 2876993e1d59Smrg * Variables / Output Variables") tessellation control shader is 2877993e1d59Smrg * not included in the stages list. 2878993e1d59Smrg */ 2879993e1d59Smrg if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) { 2880993e1d59Smrg g.process(output_var); 2881993e1d59Smrg } 288201e04c3fSmrg } 288301e04c3fSmrg 288401e04c3fSmrg ir_variable *const input_var = 288501e04c3fSmrg linker::get_matching_input(mem_ctx, output_var, consumer_inputs, 288601e04c3fSmrg consumer_interface_inputs, 288701e04c3fSmrg consumer_inputs_with_locations); 288801e04c3fSmrg 288901e04c3fSmrg /* If a matching input variable was found, add this output (and the 289001e04c3fSmrg * input) to the set. If this is a separable program and there is no 289101e04c3fSmrg * consumer stage, add the output. 289201e04c3fSmrg * 289301e04c3fSmrg * Always add TCS outputs. They are shared by all invocations 289401e04c3fSmrg * within a patch and can be used as shared memory. 289501e04c3fSmrg */ 289601e04c3fSmrg if (input_var || (prog->SeparateShader && consumer == NULL) || 289701e04c3fSmrg producer->Stage == MESA_SHADER_TESS_CTRL) { 289801e04c3fSmrg matches.record(output_var, input_var); 289901e04c3fSmrg } 290001e04c3fSmrg 290101e04c3fSmrg /* Only stream 0 outputs can be consumed in the next stage */ 290201e04c3fSmrg if (input_var && output_var->data.stream != 0) { 290301e04c3fSmrg linker_error(prog, "output %s is assigned to stream=%d but " 290401e04c3fSmrg "is linked to an input, which requires stream=0", 290501e04c3fSmrg output_var->name, output_var->data.stream); 29067ec681f3Smrg ralloc_free(hash_table_ctx); 290701e04c3fSmrg return false; 290801e04c3fSmrg } 290901e04c3fSmrg } 291001e04c3fSmrg } else { 291101e04c3fSmrg /* If there's no producer stage, then this must be a separable program. 291201e04c3fSmrg * For example, we may have a program that has just a fragment shader. 291301e04c3fSmrg * Later this program will be used with some arbitrary vertex (or 291401e04c3fSmrg * geometry) shader program. This means that locations must be assigned 291501e04c3fSmrg * for all the inputs. 291601e04c3fSmrg */ 291701e04c3fSmrg foreach_in_list(ir_instruction, node, consumer->ir) { 291801e04c3fSmrg ir_variable *const input_var = node->as_variable(); 291901e04c3fSmrg if (input_var && input_var->data.mode == ir_var_shader_in) { 292001e04c3fSmrg matches.record(NULL, input_var); 292101e04c3fSmrg } 292201e04c3fSmrg } 292301e04c3fSmrg } 292401e04c3fSmrg 292501e04c3fSmrg for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 292601e04c3fSmrg if (!tfeedback_decls[i].is_varying()) 292701e04c3fSmrg continue; 292801e04c3fSmrg 292901e04c3fSmrg const tfeedback_candidate *matched_candidate 293001e04c3fSmrg = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates); 293101e04c3fSmrg 293201e04c3fSmrg if (matched_candidate == NULL) { 29337ec681f3Smrg ralloc_free(hash_table_ctx); 293401e04c3fSmrg return false; 293501e04c3fSmrg } 293601e04c3fSmrg 29377ec681f3Smrg /* There are two situations where a new output varying is needed: 29387ec681f3Smrg * 29397ec681f3Smrg * - If varying packing is disabled for xfb and the current declaration 29407ec681f3Smrg * is subscripting an array, whether the subscript is aligned or not. 29417ec681f3Smrg * to preserve the rest of the array for the consumer. 29427ec681f3Smrg * 29437ec681f3Smrg * - If a builtin variable needs to be copied to a new variable 29447ec681f3Smrg * before its content is modified by another lowering pass (e.g. 29457ec681f3Smrg * \c gl_Position is transformed by \c nir_lower_viewport_transform). 29467ec681f3Smrg */ 29477ec681f3Smrg const bool lowered = 29487ec681f3Smrg (disable_xfb_packing && tfeedback_decls[i].subscripted()) || 29497ec681f3Smrg (matched_candidate->toplevel_var->data.explicit_location && 29507ec681f3Smrg matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 && 29517ec681f3Smrg (!consumer || consumer->Stage == MESA_SHADER_FRAGMENT) && 29527ec681f3Smrg (ctx->Const.ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb & 29537ec681f3Smrg BITFIELD_BIT(matched_candidate->toplevel_var->data.location))); 29547ec681f3Smrg 29557ec681f3Smrg if (lowered) { 29567ec681f3Smrg ir_variable *new_var; 29577ec681f3Smrg tfeedback_candidate *new_candidate = NULL; 29587ec681f3Smrg 29597ec681f3Smrg new_var = lower_xfb_varying(mem_ctx, producer, tfeedback_decls[i].name()); 29607ec681f3Smrg if (new_var == NULL) { 29617ec681f3Smrg ralloc_free(hash_table_ctx); 29627ec681f3Smrg return false; 29637ec681f3Smrg } 29647ec681f3Smrg 29657ec681f3Smrg /* Create new candidate and replace matched_candidate */ 29667ec681f3Smrg new_candidate = rzalloc(mem_ctx, tfeedback_candidate); 29677ec681f3Smrg new_candidate->toplevel_var = new_var; 29687ec681f3Smrg new_candidate->toplevel_var->data.is_unmatched_generic_inout = 1; 29697ec681f3Smrg new_candidate->type = new_var->type; 29707ec681f3Smrg new_candidate->struct_offset_floats = 0; 29717ec681f3Smrg new_candidate->xfb_offset_floats = 0; 29727ec681f3Smrg _mesa_hash_table_insert(tfeedback_candidates, 29737ec681f3Smrg ralloc_strdup(mem_ctx, new_var->name), 29747ec681f3Smrg new_candidate); 29757ec681f3Smrg 29767ec681f3Smrg tfeedback_decls[i].set_lowered_candidate(new_candidate); 29777ec681f3Smrg matched_candidate = new_candidate; 29787ec681f3Smrg } 29797ec681f3Smrg 29807ec681f3Smrg /* Mark as xfb varying */ 29817ec681f3Smrg matched_candidate->toplevel_var->data.is_xfb = 1; 29827ec681f3Smrg 298301e04c3fSmrg /* Mark xfb varyings as always active */ 298401e04c3fSmrg matched_candidate->toplevel_var->data.always_active_io = 1; 298501e04c3fSmrg 298601e04c3fSmrg /* Mark any corresponding inputs as always active also. We must do this 298701e04c3fSmrg * because we have a NIR pass that lowers vectors to scalars and another 298801e04c3fSmrg * that removes unused varyings. 298901e04c3fSmrg * We don't split varyings marked as always active because there is no 299001e04c3fSmrg * point in doing so. This means we need to mark both sides of the 299101e04c3fSmrg * interface as always active otherwise we will have a mismatch and 299201e04c3fSmrg * start removing things we shouldn't. 299301e04c3fSmrg */ 299401e04c3fSmrg ir_variable *const input_var = 299501e04c3fSmrg linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var, 299601e04c3fSmrg consumer_inputs, 299701e04c3fSmrg consumer_interface_inputs, 299801e04c3fSmrg consumer_inputs_with_locations); 29997ec681f3Smrg if (input_var) { 30007ec681f3Smrg input_var->data.is_xfb = 1; 300101e04c3fSmrg input_var->data.always_active_io = 1; 30027ec681f3Smrg } 300301e04c3fSmrg 300401e04c3fSmrg if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) { 300501e04c3fSmrg matched_candidate->toplevel_var->data.is_xfb_only = 1; 300601e04c3fSmrg matches.record(matched_candidate->toplevel_var, NULL); 300701e04c3fSmrg } 300801e04c3fSmrg } 300901e04c3fSmrg 301001e04c3fSmrg uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0}; 301101e04c3fSmrg const unsigned slots_used = matches.assign_locations( 301201e04c3fSmrg prog, components, reserved_slots); 301301e04c3fSmrg matches.store_locations(); 301401e04c3fSmrg 301501e04c3fSmrg for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 301601e04c3fSmrg if (tfeedback_decls[i].is_varying()) { 301701e04c3fSmrg if (!tfeedback_decls[i].assign_location(ctx, prog)) { 30187ec681f3Smrg ralloc_free(hash_table_ctx); 301901e04c3fSmrg return false; 302001e04c3fSmrg } 302101e04c3fSmrg } 302201e04c3fSmrg } 30237ec681f3Smrg ralloc_free(hash_table_ctx); 302401e04c3fSmrg 302501e04c3fSmrg if (consumer && producer) { 302601e04c3fSmrg foreach_in_list(ir_instruction, node, consumer->ir) { 302701e04c3fSmrg ir_variable *const var = node->as_variable(); 302801e04c3fSmrg 302901e04c3fSmrg if (var && var->data.mode == ir_var_shader_in && 303001e04c3fSmrg var->data.is_unmatched_generic_inout) { 303101e04c3fSmrg if (!prog->IsES && prog->data->Version <= 120) { 303201e04c3fSmrg /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec: 303301e04c3fSmrg * 303401e04c3fSmrg * Only those varying variables used (i.e. read) in 303501e04c3fSmrg * the fragment shader executable must be written to 303601e04c3fSmrg * by the vertex shader executable; declaring 303701e04c3fSmrg * superfluous varying variables in a vertex shader is 303801e04c3fSmrg * permissible. 303901e04c3fSmrg * 304001e04c3fSmrg * We interpret this text as meaning that the VS must 304101e04c3fSmrg * write the variable for the FS to read it. See 304201e04c3fSmrg * "glsl1-varying read but not written" in piglit. 304301e04c3fSmrg */ 304401e04c3fSmrg linker_error(prog, "%s shader varying %s not written " 304501e04c3fSmrg "by %s shader\n.", 304601e04c3fSmrg _mesa_shader_stage_to_string(consumer->Stage), 304701e04c3fSmrg var->name, 304801e04c3fSmrg _mesa_shader_stage_to_string(producer->Stage)); 304901e04c3fSmrg } else { 305001e04c3fSmrg linker_warning(prog, "%s shader varying %s not written " 305101e04c3fSmrg "by %s shader\n.", 305201e04c3fSmrg _mesa_shader_stage_to_string(consumer->Stage), 305301e04c3fSmrg var->name, 305401e04c3fSmrg _mesa_shader_stage_to_string(producer->Stage)); 305501e04c3fSmrg } 305601e04c3fSmrg } 305701e04c3fSmrg } 305801e04c3fSmrg 305901e04c3fSmrg /* Now that validation is done its safe to remove unused varyings. As 306001e04c3fSmrg * we have both a producer and consumer its safe to remove unused 306101e04c3fSmrg * varyings even if the program is a SSO because the stages are being 306201e04c3fSmrg * linked together i.e. we have a multi-stage SSO. 306301e04c3fSmrg */ 306401e04c3fSmrg remove_unused_shader_inputs_and_outputs(false, producer, 306501e04c3fSmrg ir_var_shader_out); 306601e04c3fSmrg remove_unused_shader_inputs_and_outputs(false, consumer, 306701e04c3fSmrg ir_var_shader_in); 306801e04c3fSmrg } 306901e04c3fSmrg 307001e04c3fSmrg if (producer) { 307101e04c3fSmrg lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out, 307201e04c3fSmrg 0, producer, disable_varying_packing, 30737ec681f3Smrg disable_xfb_packing, xfb_enabled); 307401e04c3fSmrg } 307501e04c3fSmrg 307601e04c3fSmrg if (consumer) { 307701e04c3fSmrg lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in, 30787ec681f3Smrg consumer_vertices, consumer, disable_varying_packing, 30797ec681f3Smrg disable_xfb_packing, xfb_enabled); 308001e04c3fSmrg } 308101e04c3fSmrg 308201e04c3fSmrg return true; 308301e04c3fSmrg} 308401e04c3fSmrg 308501e04c3fSmrgstatic bool 308601e04c3fSmrgcheck_against_output_limit(struct gl_context *ctx, 308701e04c3fSmrg struct gl_shader_program *prog, 308801e04c3fSmrg gl_linked_shader *producer, 308901e04c3fSmrg unsigned num_explicit_locations) 309001e04c3fSmrg{ 309101e04c3fSmrg unsigned output_vectors = num_explicit_locations; 309201e04c3fSmrg 309301e04c3fSmrg foreach_in_list(ir_instruction, node, producer->ir) { 309401e04c3fSmrg ir_variable *const var = node->as_variable(); 309501e04c3fSmrg 309601e04c3fSmrg if (var && !var->data.explicit_location && 309701e04c3fSmrg var->data.mode == ir_var_shader_out && 309801e04c3fSmrg var_counts_against_varying_limit(producer->Stage, var)) { 309901e04c3fSmrg /* outputs for fragment shader can't be doubles */ 310001e04c3fSmrg output_vectors += var->type->count_attribute_slots(false); 310101e04c3fSmrg } 310201e04c3fSmrg } 310301e04c3fSmrg 310401e04c3fSmrg assert(producer->Stage != MESA_SHADER_FRAGMENT); 310501e04c3fSmrg unsigned max_output_components = 310601e04c3fSmrg ctx->Const.Program[producer->Stage].MaxOutputComponents; 310701e04c3fSmrg 310801e04c3fSmrg const unsigned output_components = output_vectors * 4; 310901e04c3fSmrg if (output_components > max_output_components) { 311001e04c3fSmrg if (ctx->API == API_OPENGLES2 || prog->IsES) 311101e04c3fSmrg linker_error(prog, "%s shader uses too many output vectors " 311201e04c3fSmrg "(%u > %u)\n", 311301e04c3fSmrg _mesa_shader_stage_to_string(producer->Stage), 311401e04c3fSmrg output_vectors, 311501e04c3fSmrg max_output_components / 4); 311601e04c3fSmrg else 311701e04c3fSmrg linker_error(prog, "%s shader uses too many output components " 311801e04c3fSmrg "(%u > %u)\n", 311901e04c3fSmrg _mesa_shader_stage_to_string(producer->Stage), 312001e04c3fSmrg output_components, 312101e04c3fSmrg max_output_components); 312201e04c3fSmrg 312301e04c3fSmrg return false; 312401e04c3fSmrg } 312501e04c3fSmrg 312601e04c3fSmrg return true; 312701e04c3fSmrg} 312801e04c3fSmrg 312901e04c3fSmrgstatic bool 313001e04c3fSmrgcheck_against_input_limit(struct gl_context *ctx, 313101e04c3fSmrg struct gl_shader_program *prog, 313201e04c3fSmrg gl_linked_shader *consumer, 313301e04c3fSmrg unsigned num_explicit_locations) 313401e04c3fSmrg{ 313501e04c3fSmrg unsigned input_vectors = num_explicit_locations; 313601e04c3fSmrg 313701e04c3fSmrg foreach_in_list(ir_instruction, node, consumer->ir) { 313801e04c3fSmrg ir_variable *const var = node->as_variable(); 313901e04c3fSmrg 314001e04c3fSmrg if (var && !var->data.explicit_location && 314101e04c3fSmrg var->data.mode == ir_var_shader_in && 314201e04c3fSmrg var_counts_against_varying_limit(consumer->Stage, var)) { 314301e04c3fSmrg /* vertex inputs aren't varying counted */ 314401e04c3fSmrg input_vectors += var->type->count_attribute_slots(false); 314501e04c3fSmrg } 314601e04c3fSmrg } 314701e04c3fSmrg 314801e04c3fSmrg assert(consumer->Stage != MESA_SHADER_VERTEX); 314901e04c3fSmrg unsigned max_input_components = 315001e04c3fSmrg ctx->Const.Program[consumer->Stage].MaxInputComponents; 315101e04c3fSmrg 315201e04c3fSmrg const unsigned input_components = input_vectors * 4; 315301e04c3fSmrg if (input_components > max_input_components) { 315401e04c3fSmrg if (ctx->API == API_OPENGLES2 || prog->IsES) 315501e04c3fSmrg linker_error(prog, "%s shader uses too many input vectors " 315601e04c3fSmrg "(%u > %u)\n", 315701e04c3fSmrg _mesa_shader_stage_to_string(consumer->Stage), 315801e04c3fSmrg input_vectors, 315901e04c3fSmrg max_input_components / 4); 316001e04c3fSmrg else 316101e04c3fSmrg linker_error(prog, "%s shader uses too many input components " 316201e04c3fSmrg "(%u > %u)\n", 316301e04c3fSmrg _mesa_shader_stage_to_string(consumer->Stage), 316401e04c3fSmrg input_components, 316501e04c3fSmrg max_input_components); 316601e04c3fSmrg 316701e04c3fSmrg return false; 316801e04c3fSmrg } 316901e04c3fSmrg 317001e04c3fSmrg return true; 317101e04c3fSmrg} 317201e04c3fSmrg 317301e04c3fSmrgbool 317401e04c3fSmrglink_varyings(struct gl_shader_program *prog, unsigned first, unsigned last, 317501e04c3fSmrg struct gl_context *ctx, void *mem_ctx) 317601e04c3fSmrg{ 317701e04c3fSmrg bool has_xfb_qualifiers = false; 317801e04c3fSmrg unsigned num_tfeedback_decls = 0; 317901e04c3fSmrg char **varying_names = NULL; 318001e04c3fSmrg tfeedback_decl *tfeedback_decls = NULL; 318101e04c3fSmrg 318201e04c3fSmrg /* From the ARB_enhanced_layouts spec: 318301e04c3fSmrg * 318401e04c3fSmrg * "If the shader used to record output variables for transform feedback 318501e04c3fSmrg * varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout 318601e04c3fSmrg * qualifiers, the values specified by TransformFeedbackVaryings are 318701e04c3fSmrg * ignored, and the set of variables captured for transform feedback is 318801e04c3fSmrg * instead derived from the specified layout qualifiers." 318901e04c3fSmrg */ 319001e04c3fSmrg for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) { 319101e04c3fSmrg /* Find last stage before fragment shader */ 319201e04c3fSmrg if (prog->_LinkedShaders[i]) { 319301e04c3fSmrg has_xfb_qualifiers = 319401e04c3fSmrg process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i], 319501e04c3fSmrg prog, &num_tfeedback_decls, 319601e04c3fSmrg &varying_names); 319701e04c3fSmrg break; 319801e04c3fSmrg } 319901e04c3fSmrg } 320001e04c3fSmrg 320101e04c3fSmrg if (!has_xfb_qualifiers) { 320201e04c3fSmrg num_tfeedback_decls = prog->TransformFeedback.NumVarying; 320301e04c3fSmrg varying_names = prog->TransformFeedback.VaryingNames; 320401e04c3fSmrg } 320501e04c3fSmrg 320601e04c3fSmrg if (num_tfeedback_decls != 0) { 320701e04c3fSmrg /* From GL_EXT_transform_feedback: 320801e04c3fSmrg * A program will fail to link if: 320901e04c3fSmrg * 321001e04c3fSmrg * * the <count> specified by TransformFeedbackVaryingsEXT is 321101e04c3fSmrg * non-zero, but the program object has no vertex or geometry 321201e04c3fSmrg * shader; 321301e04c3fSmrg */ 321401e04c3fSmrg if (first >= MESA_SHADER_FRAGMENT) { 321501e04c3fSmrg linker_error(prog, "Transform feedback varyings specified, but " 321601e04c3fSmrg "no vertex, tessellation, or geometry shader is " 321701e04c3fSmrg "present.\n"); 321801e04c3fSmrg return false; 321901e04c3fSmrg } 322001e04c3fSmrg 322101e04c3fSmrg tfeedback_decls = rzalloc_array(mem_ctx, tfeedback_decl, 322201e04c3fSmrg num_tfeedback_decls); 322301e04c3fSmrg if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls, 322401e04c3fSmrg varying_names, tfeedback_decls)) 322501e04c3fSmrg return false; 322601e04c3fSmrg } 322701e04c3fSmrg 322801e04c3fSmrg /* If there is no fragment shader we need to set transform feedback. 322901e04c3fSmrg * 323001e04c3fSmrg * For SSO we also need to assign output locations. We assign them here 323101e04c3fSmrg * because we need to do it for both single stage programs and multi stage 323201e04c3fSmrg * programs. 323301e04c3fSmrg */ 323401e04c3fSmrg if (last < MESA_SHADER_FRAGMENT && 323501e04c3fSmrg (num_tfeedback_decls != 0 || prog->SeparateShader)) { 323601e04c3fSmrg const uint64_t reserved_out_slots = 323701e04c3fSmrg reserved_varying_slot(prog->_LinkedShaders[last], ir_var_shader_out); 323801e04c3fSmrg if (!assign_varying_locations(ctx, mem_ctx, prog, 323901e04c3fSmrg prog->_LinkedShaders[last], NULL, 324001e04c3fSmrg num_tfeedback_decls, tfeedback_decls, 324101e04c3fSmrg reserved_out_slots)) 324201e04c3fSmrg return false; 324301e04c3fSmrg } 324401e04c3fSmrg 324501e04c3fSmrg if (last <= MESA_SHADER_FRAGMENT) { 324601e04c3fSmrg /* Remove unused varyings from the first/last stage unless SSO */ 324701e04c3fSmrg remove_unused_shader_inputs_and_outputs(prog->SeparateShader, 324801e04c3fSmrg prog->_LinkedShaders[first], 324901e04c3fSmrg ir_var_shader_in); 325001e04c3fSmrg remove_unused_shader_inputs_and_outputs(prog->SeparateShader, 325101e04c3fSmrg prog->_LinkedShaders[last], 325201e04c3fSmrg ir_var_shader_out); 325301e04c3fSmrg 325401e04c3fSmrg /* If the program is made up of only a single stage */ 325501e04c3fSmrg if (first == last) { 325601e04c3fSmrg gl_linked_shader *const sh = prog->_LinkedShaders[last]; 325701e04c3fSmrg 325801e04c3fSmrg do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL); 325901e04c3fSmrg do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls, 326001e04c3fSmrg tfeedback_decls); 326101e04c3fSmrg 326201e04c3fSmrg if (prog->SeparateShader) { 326301e04c3fSmrg const uint64_t reserved_slots = 326401e04c3fSmrg reserved_varying_slot(sh, ir_var_shader_in); 326501e04c3fSmrg 326601e04c3fSmrg /* Assign input locations for SSO, output locations are already 326701e04c3fSmrg * assigned. 326801e04c3fSmrg */ 326901e04c3fSmrg if (!assign_varying_locations(ctx, mem_ctx, prog, 327001e04c3fSmrg NULL /* producer */, 327101e04c3fSmrg sh /* consumer */, 327201e04c3fSmrg 0 /* num_tfeedback_decls */, 327301e04c3fSmrg NULL /* tfeedback_decls */, 327401e04c3fSmrg reserved_slots)) 327501e04c3fSmrg return false; 327601e04c3fSmrg } 327701e04c3fSmrg } else { 327801e04c3fSmrg /* Linking the stages in the opposite order (from fragment to vertex) 327901e04c3fSmrg * ensures that inter-shader outputs written to in an earlier stage 328001e04c3fSmrg * are eliminated if they are (transitively) not used in a later 328101e04c3fSmrg * stage. 328201e04c3fSmrg */ 328301e04c3fSmrg int next = last; 328401e04c3fSmrg for (int i = next - 1; i >= 0; i--) { 328501e04c3fSmrg if (prog->_LinkedShaders[i] == NULL && i != 0) 328601e04c3fSmrg continue; 328701e04c3fSmrg 328801e04c3fSmrg gl_linked_shader *const sh_i = prog->_LinkedShaders[i]; 328901e04c3fSmrg gl_linked_shader *const sh_next = prog->_LinkedShaders[next]; 329001e04c3fSmrg 329101e04c3fSmrg const uint64_t reserved_out_slots = 329201e04c3fSmrg reserved_varying_slot(sh_i, ir_var_shader_out); 329301e04c3fSmrg const uint64_t reserved_in_slots = 329401e04c3fSmrg reserved_varying_slot(sh_next, ir_var_shader_in); 329501e04c3fSmrg 329601e04c3fSmrg do_dead_builtin_varyings(ctx, sh_i, sh_next, 329701e04c3fSmrg next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, 329801e04c3fSmrg tfeedback_decls); 329901e04c3fSmrg 330001e04c3fSmrg if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next, 330101e04c3fSmrg next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, 330201e04c3fSmrg tfeedback_decls, 330301e04c3fSmrg reserved_out_slots | reserved_in_slots)) 330401e04c3fSmrg return false; 330501e04c3fSmrg 330601e04c3fSmrg /* This must be done after all dead varyings are eliminated. */ 330701e04c3fSmrg if (sh_i != NULL) { 330801e04c3fSmrg unsigned slots_used = util_bitcount64(reserved_out_slots); 330901e04c3fSmrg if (!check_against_output_limit(ctx, prog, sh_i, slots_used)) { 331001e04c3fSmrg return false; 331101e04c3fSmrg } 331201e04c3fSmrg } 331301e04c3fSmrg 331401e04c3fSmrg unsigned slots_used = util_bitcount64(reserved_in_slots); 331501e04c3fSmrg if (!check_against_input_limit(ctx, prog, sh_next, slots_used)) 331601e04c3fSmrg return false; 331701e04c3fSmrg 331801e04c3fSmrg next = i; 331901e04c3fSmrg } 332001e04c3fSmrg } 332101e04c3fSmrg } 332201e04c3fSmrg 332301e04c3fSmrg if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls, 33247e102996Smaya has_xfb_qualifiers, mem_ctx)) 332501e04c3fSmrg return false; 332601e04c3fSmrg 332701e04c3fSmrg return true; 332801e04c3fSmrg} 3329