1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2012 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21b8e80941Smrg * DEALINGS IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg/** 25b8e80941Smrg * \file link_varyings.cpp 26b8e80941Smrg * 27b8e80941Smrg * Linker functions related specifically to linking varyings between shader 28b8e80941Smrg * stages. 29b8e80941Smrg */ 30b8e80941Smrg 31b8e80941Smrg 32b8e80941Smrg#include "main/errors.h" 33b8e80941Smrg#include "main/mtypes.h" 34b8e80941Smrg#include "glsl_symbol_table.h" 35b8e80941Smrg#include "glsl_parser_extras.h" 36b8e80941Smrg#include "ir_optimization.h" 37b8e80941Smrg#include "linker.h" 38b8e80941Smrg#include "link_varyings.h" 39b8e80941Smrg#include "main/macros.h" 40b8e80941Smrg#include "util/hash_table.h" 41b8e80941Smrg#include "util/u_math.h" 42b8e80941Smrg#include "program.h" 43b8e80941Smrg 44b8e80941Smrg 45b8e80941Smrg/** 46b8e80941Smrg * Get the varying type stripped of the outermost array if we're processing 47b8e80941Smrg * a stage whose varyings are arrays indexed by a vertex number (such as 48b8e80941Smrg * geometry shader inputs). 49b8e80941Smrg */ 50b8e80941Smrgstatic const glsl_type * 51b8e80941Smrgget_varying_type(const ir_variable *var, gl_shader_stage stage) 52b8e80941Smrg{ 53b8e80941Smrg const glsl_type *type = var->type; 54b8e80941Smrg 55b8e80941Smrg if (!var->data.patch && 56b8e80941Smrg ((var->data.mode == ir_var_shader_out && 57b8e80941Smrg stage == MESA_SHADER_TESS_CTRL) || 58b8e80941Smrg (var->data.mode == ir_var_shader_in && 59b8e80941Smrg (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL || 60b8e80941Smrg stage == MESA_SHADER_GEOMETRY)))) { 61b8e80941Smrg assert(type->is_array()); 62b8e80941Smrg type = type->fields.array; 63b8e80941Smrg } 64b8e80941Smrg 65b8e80941Smrg return type; 66b8e80941Smrg} 67b8e80941Smrg 68b8e80941Smrgstatic void 69b8e80941Smrgcreate_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name, 70b8e80941Smrg size_t name_length, unsigned *count, 71b8e80941Smrg const char *ifc_member_name, 72b8e80941Smrg const glsl_type *ifc_member_t, char ***varying_names) 73b8e80941Smrg{ 74b8e80941Smrg if (t->is_interface()) { 75b8e80941Smrg size_t new_length = name_length; 76b8e80941Smrg 77b8e80941Smrg assert(ifc_member_name && ifc_member_t); 78b8e80941Smrg ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name); 79b8e80941Smrg 80b8e80941Smrg create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count, 81b8e80941Smrg NULL, NULL, varying_names); 82b8e80941Smrg } else if (t->is_struct()) { 83b8e80941Smrg for (unsigned i = 0; i < t->length; i++) { 84b8e80941Smrg const char *field = t->fields.structure[i].name; 85b8e80941Smrg size_t new_length = name_length; 86b8e80941Smrg 87b8e80941Smrg ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field); 88b8e80941Smrg 89b8e80941Smrg create_xfb_varying_names(mem_ctx, t->fields.structure[i].type, name, 90b8e80941Smrg new_length, count, NULL, NULL, 91b8e80941Smrg varying_names); 92b8e80941Smrg } 93b8e80941Smrg } else if (t->without_array()->is_struct() || 94b8e80941Smrg t->without_array()->is_interface() || 95b8e80941Smrg (t->is_array() && t->fields.array->is_array())) { 96b8e80941Smrg for (unsigned i = 0; i < t->length; i++) { 97b8e80941Smrg size_t new_length = name_length; 98b8e80941Smrg 99b8e80941Smrg /* Append the subscript to the current variable name */ 100b8e80941Smrg ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); 101b8e80941Smrg 102b8e80941Smrg create_xfb_varying_names(mem_ctx, t->fields.array, name, new_length, 103b8e80941Smrg count, ifc_member_name, ifc_member_t, 104b8e80941Smrg varying_names); 105b8e80941Smrg } 106b8e80941Smrg } else { 107b8e80941Smrg (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name); 108b8e80941Smrg } 109b8e80941Smrg} 110b8e80941Smrg 111b8e80941Smrgstatic bool 112b8e80941Smrgprocess_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh, 113b8e80941Smrg struct gl_shader_program *prog, 114b8e80941Smrg unsigned *num_tfeedback_decls, 115b8e80941Smrg char ***varying_names) 116b8e80941Smrg{ 117b8e80941Smrg bool has_xfb_qualifiers = false; 118b8e80941Smrg 119b8e80941Smrg /* We still need to enable transform feedback mode even if xfb_stride is 120b8e80941Smrg * only applied to a global out. Also we don't bother to propagate 121b8e80941Smrg * xfb_stride to interface block members so this will catch that case also. 122b8e80941Smrg */ 123b8e80941Smrg for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { 124b8e80941Smrg if (prog->TransformFeedback.BufferStride[j]) { 125b8e80941Smrg has_xfb_qualifiers = true; 126b8e80941Smrg break; 127b8e80941Smrg } 128b8e80941Smrg } 129b8e80941Smrg 130b8e80941Smrg foreach_in_list(ir_instruction, node, sh->ir) { 131b8e80941Smrg ir_variable *var = node->as_variable(); 132b8e80941Smrg if (!var || var->data.mode != ir_var_shader_out) 133b8e80941Smrg continue; 134b8e80941Smrg 135b8e80941Smrg /* From the ARB_enhanced_layouts spec: 136b8e80941Smrg * 137b8e80941Smrg * "Any shader making any static use (after preprocessing) of any of 138b8e80941Smrg * these *xfb_* qualifiers will cause the shader to be in a 139b8e80941Smrg * transform feedback capturing mode and hence responsible for 140b8e80941Smrg * describing the transform feedback setup. This mode will capture 141b8e80941Smrg * any output selected by *xfb_offset*, directly or indirectly, to 142b8e80941Smrg * a transform feedback buffer." 143b8e80941Smrg */ 144b8e80941Smrg if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) { 145b8e80941Smrg has_xfb_qualifiers = true; 146b8e80941Smrg } 147b8e80941Smrg 148b8e80941Smrg if (var->data.explicit_xfb_offset) { 149b8e80941Smrg *num_tfeedback_decls += var->type->varying_count(); 150b8e80941Smrg has_xfb_qualifiers = true; 151b8e80941Smrg } 152b8e80941Smrg } 153b8e80941Smrg 154b8e80941Smrg if (*num_tfeedback_decls == 0) 155b8e80941Smrg return has_xfb_qualifiers; 156b8e80941Smrg 157b8e80941Smrg unsigned i = 0; 158b8e80941Smrg *varying_names = ralloc_array(mem_ctx, char *, *num_tfeedback_decls); 159b8e80941Smrg foreach_in_list(ir_instruction, node, sh->ir) { 160b8e80941Smrg ir_variable *var = node->as_variable(); 161b8e80941Smrg if (!var || var->data.mode != ir_var_shader_out) 162b8e80941Smrg continue; 163b8e80941Smrg 164b8e80941Smrg if (var->data.explicit_xfb_offset) { 165b8e80941Smrg char *name; 166b8e80941Smrg const glsl_type *type, *member_type; 167b8e80941Smrg 168b8e80941Smrg if (var->data.from_named_ifc_block) { 169b8e80941Smrg type = var->get_interface_type(); 170b8e80941Smrg 171b8e80941Smrg /* Find the member type before it was altered by lowering */ 172b8e80941Smrg const glsl_type *type_wa = type->without_array(); 173b8e80941Smrg member_type = 174b8e80941Smrg type_wa->fields.structure[type_wa->field_index(var->name)].type; 175b8e80941Smrg name = ralloc_strdup(NULL, type_wa->name); 176b8e80941Smrg } else { 177b8e80941Smrg type = var->type; 178b8e80941Smrg member_type = NULL; 179b8e80941Smrg name = ralloc_strdup(NULL, var->name); 180b8e80941Smrg } 181b8e80941Smrg create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i, 182b8e80941Smrg var->name, member_type, varying_names); 183b8e80941Smrg ralloc_free(name); 184b8e80941Smrg } 185b8e80941Smrg } 186b8e80941Smrg 187b8e80941Smrg assert(i == *num_tfeedback_decls); 188b8e80941Smrg return has_xfb_qualifiers; 189b8e80941Smrg} 190b8e80941Smrg 191b8e80941Smrg/** 192b8e80941Smrg * Validate the types and qualifiers of an output from one stage against the 193b8e80941Smrg * matching input to another stage. 194b8e80941Smrg */ 195b8e80941Smrgstatic void 196b8e80941Smrgcross_validate_types_and_qualifiers(struct gl_context *ctx, 197b8e80941Smrg struct gl_shader_program *prog, 198b8e80941Smrg const ir_variable *input, 199b8e80941Smrg const ir_variable *output, 200b8e80941Smrg gl_shader_stage consumer_stage, 201b8e80941Smrg gl_shader_stage producer_stage) 202b8e80941Smrg{ 203b8e80941Smrg /* Check that the types match between stages. 204b8e80941Smrg */ 205b8e80941Smrg const glsl_type *type_to_match = input->type; 206b8e80941Smrg 207b8e80941Smrg /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */ 208b8e80941Smrg const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX && 209b8e80941Smrg consumer_stage != MESA_SHADER_FRAGMENT) || 210b8e80941Smrg consumer_stage == MESA_SHADER_GEOMETRY; 211b8e80941Smrg if (extra_array_level) { 212b8e80941Smrg assert(type_to_match->is_array()); 213b8e80941Smrg type_to_match = type_to_match->fields.array; 214b8e80941Smrg } 215b8e80941Smrg 216b8e80941Smrg if (type_to_match != output->type) { 217b8e80941Smrg if (output->type->is_struct()) { 218b8e80941Smrg /* Structures across shader stages can have different name 219b8e80941Smrg * and considered to match in type if and only if structure 220b8e80941Smrg * members match in name, type, qualification, and declaration 221b8e80941Smrg * order. 222b8e80941Smrg */ 223b8e80941Smrg if (!output->type->record_compare(type_to_match, false, true)) { 224b8e80941Smrg linker_error(prog, 225b8e80941Smrg "%s shader output `%s' declared as struct `%s', " 226b8e80941Smrg "doesn't match in type with %s shader input " 227b8e80941Smrg "declared as struct `%s'\n", 228b8e80941Smrg _mesa_shader_stage_to_string(producer_stage), 229b8e80941Smrg output->name, 230b8e80941Smrg output->type->name, 231b8e80941Smrg _mesa_shader_stage_to_string(consumer_stage), 232b8e80941Smrg input->type->name); 233b8e80941Smrg } 234b8e80941Smrg } else if (!output->type->is_array() || !is_gl_identifier(output->name)) { 235b8e80941Smrg /* There is a bit of a special case for gl_TexCoord. This 236b8e80941Smrg * built-in is unsized by default. Applications that variable 237b8e80941Smrg * access it must redeclare it with a size. There is some 238b8e80941Smrg * language in the GLSL spec that implies the fragment shader 239b8e80941Smrg * and vertex shader do not have to agree on this size. Other 240b8e80941Smrg * driver behave this way, and one or two applications seem to 241b8e80941Smrg * rely on it. 242b8e80941Smrg * 243b8e80941Smrg * Neither declaration needs to be modified here because the array 244b8e80941Smrg * sizes are fixed later when update_array_sizes is called. 245b8e80941Smrg * 246b8e80941Smrg * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec: 247b8e80941Smrg * 248b8e80941Smrg * "Unlike user-defined varying variables, the built-in 249b8e80941Smrg * varying variables don't have a strict one-to-one 250b8e80941Smrg * correspondence between the vertex language and the 251b8e80941Smrg * fragment language." 252b8e80941Smrg */ 253b8e80941Smrg linker_error(prog, 254b8e80941Smrg "%s shader output `%s' declared as type `%s', " 255b8e80941Smrg "but %s shader input declared as type `%s'\n", 256b8e80941Smrg _mesa_shader_stage_to_string(producer_stage), 257b8e80941Smrg output->name, 258b8e80941Smrg output->type->name, 259b8e80941Smrg _mesa_shader_stage_to_string(consumer_stage), 260b8e80941Smrg input->type->name); 261b8e80941Smrg return; 262b8e80941Smrg } 263b8e80941Smrg } 264b8e80941Smrg 265b8e80941Smrg /* Check that all of the qualifiers match between stages. 266b8e80941Smrg */ 267b8e80941Smrg 268b8e80941Smrg /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier 269b8e80941Smrg * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0 270b8e80941Smrg * conformance test suite does not verify that the qualifiers must match. 271b8e80941Smrg * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for 272b8e80941Smrg * OpenGLES 3.0 drivers, so we relax the checking in all cases. 273b8e80941Smrg */ 274b8e80941Smrg if (false /* always skip the centroid check */ && 275b8e80941Smrg prog->data->Version < (prog->IsES ? 310 : 430) && 276b8e80941Smrg input->data.centroid != output->data.centroid) { 277b8e80941Smrg linker_error(prog, 278b8e80941Smrg "%s shader output `%s' %s centroid qualifier, " 279b8e80941Smrg "but %s shader input %s centroid qualifier\n", 280b8e80941Smrg _mesa_shader_stage_to_string(producer_stage), 281b8e80941Smrg output->name, 282b8e80941Smrg (output->data.centroid) ? "has" : "lacks", 283b8e80941Smrg _mesa_shader_stage_to_string(consumer_stage), 284b8e80941Smrg (input->data.centroid) ? "has" : "lacks"); 285b8e80941Smrg return; 286b8e80941Smrg } 287b8e80941Smrg 288b8e80941Smrg if (input->data.sample != output->data.sample) { 289b8e80941Smrg linker_error(prog, 290b8e80941Smrg "%s shader output `%s' %s sample qualifier, " 291b8e80941Smrg "but %s shader input %s sample qualifier\n", 292b8e80941Smrg _mesa_shader_stage_to_string(producer_stage), 293b8e80941Smrg output->name, 294b8e80941Smrg (output->data.sample) ? "has" : "lacks", 295b8e80941Smrg _mesa_shader_stage_to_string(consumer_stage), 296b8e80941Smrg (input->data.sample) ? "has" : "lacks"); 297b8e80941Smrg return; 298b8e80941Smrg } 299b8e80941Smrg 300b8e80941Smrg if (input->data.patch != output->data.patch) { 301b8e80941Smrg linker_error(prog, 302b8e80941Smrg "%s shader output `%s' %s patch qualifier, " 303b8e80941Smrg "but %s shader input %s patch qualifier\n", 304b8e80941Smrg _mesa_shader_stage_to_string(producer_stage), 305b8e80941Smrg output->name, 306b8e80941Smrg (output->data.patch) ? "has" : "lacks", 307b8e80941Smrg _mesa_shader_stage_to_string(consumer_stage), 308b8e80941Smrg (input->data.patch) ? "has" : "lacks"); 309b8e80941Smrg return; 310b8e80941Smrg } 311b8e80941Smrg 312b8e80941Smrg /* The GLSL 4.30 and GLSL ES 3.00 specifications say: 313b8e80941Smrg * 314b8e80941Smrg * "As only outputs need be declared with invariant, an output from 315b8e80941Smrg * one shader stage will still match an input of a subsequent stage 316b8e80941Smrg * without the input being declared as invariant." 317b8e80941Smrg * 318b8e80941Smrg * while GLSL 4.20 says: 319b8e80941Smrg * 320b8e80941Smrg * "For variables leaving one shader and coming into another shader, 321b8e80941Smrg * the invariant keyword has to be used in both shaders, or a link 322b8e80941Smrg * error will result." 323b8e80941Smrg * 324b8e80941Smrg * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says: 325b8e80941Smrg * 326b8e80941Smrg * "The invariance of varyings that are declared in both the vertex 327b8e80941Smrg * and fragment shaders must match." 328b8e80941Smrg */ 329b8e80941Smrg if (input->data.explicit_invariant != output->data.explicit_invariant && 330b8e80941Smrg prog->data->Version < (prog->IsES ? 300 : 430)) { 331b8e80941Smrg linker_error(prog, 332b8e80941Smrg "%s shader output `%s' %s invariant qualifier, " 333b8e80941Smrg "but %s shader input %s invariant qualifier\n", 334b8e80941Smrg _mesa_shader_stage_to_string(producer_stage), 335b8e80941Smrg output->name, 336b8e80941Smrg (output->data.explicit_invariant) ? "has" : "lacks", 337b8e80941Smrg _mesa_shader_stage_to_string(consumer_stage), 338b8e80941Smrg (input->data.explicit_invariant) ? "has" : "lacks"); 339b8e80941Smrg return; 340b8e80941Smrg } 341b8e80941Smrg 342b8e80941Smrg /* GLSL >= 4.40 removes text requiring interpolation qualifiers 343b8e80941Smrg * to match cross stage, they must only match within the same stage. 344b8e80941Smrg * 345b8e80941Smrg * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec: 346b8e80941Smrg * 347b8e80941Smrg * "It is a link-time error if, within the same stage, the interpolation 348b8e80941Smrg * qualifiers of variables of the same name do not match. 349b8e80941Smrg * 350b8e80941Smrg * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says: 351b8e80941Smrg * 352b8e80941Smrg * "When no interpolation qualifier is present, smooth interpolation 353b8e80941Smrg * is used." 354b8e80941Smrg * 355b8e80941Smrg * So we match variables where one is smooth and the other has no explicit 356b8e80941Smrg * qualifier. 357b8e80941Smrg */ 358b8e80941Smrg unsigned input_interpolation = input->data.interpolation; 359b8e80941Smrg unsigned output_interpolation = output->data.interpolation; 360b8e80941Smrg if (prog->IsES) { 361b8e80941Smrg if (input_interpolation == INTERP_MODE_NONE) 362b8e80941Smrg input_interpolation = INTERP_MODE_SMOOTH; 363b8e80941Smrg if (output_interpolation == INTERP_MODE_NONE) 364b8e80941Smrg output_interpolation = INTERP_MODE_SMOOTH; 365b8e80941Smrg } 366b8e80941Smrg if (input_interpolation != output_interpolation && 367b8e80941Smrg prog->data->Version < 440) { 368b8e80941Smrg if (!ctx->Const.AllowGLSLCrossStageInterpolationMismatch) { 369b8e80941Smrg linker_error(prog, 370b8e80941Smrg "%s shader output `%s' specifies %s " 371b8e80941Smrg "interpolation qualifier, " 372b8e80941Smrg "but %s shader input specifies %s " 373b8e80941Smrg "interpolation qualifier\n", 374b8e80941Smrg _mesa_shader_stage_to_string(producer_stage), 375b8e80941Smrg output->name, 376b8e80941Smrg interpolation_string(output->data.interpolation), 377b8e80941Smrg _mesa_shader_stage_to_string(consumer_stage), 378b8e80941Smrg interpolation_string(input->data.interpolation)); 379b8e80941Smrg return; 380b8e80941Smrg } else { 381b8e80941Smrg linker_warning(prog, 382b8e80941Smrg "%s shader output `%s' specifies %s " 383b8e80941Smrg "interpolation qualifier, " 384b8e80941Smrg "but %s shader input specifies %s " 385b8e80941Smrg "interpolation qualifier\n", 386b8e80941Smrg _mesa_shader_stage_to_string(producer_stage), 387b8e80941Smrg output->name, 388b8e80941Smrg interpolation_string(output->data.interpolation), 389b8e80941Smrg _mesa_shader_stage_to_string(consumer_stage), 390b8e80941Smrg interpolation_string(input->data.interpolation)); 391b8e80941Smrg } 392b8e80941Smrg } 393b8e80941Smrg} 394b8e80941Smrg 395b8e80941Smrg/** 396b8e80941Smrg * Validate front and back color outputs against single color input 397b8e80941Smrg */ 398b8e80941Smrgstatic void 399b8e80941Smrgcross_validate_front_and_back_color(struct gl_context *ctx, 400b8e80941Smrg struct gl_shader_program *prog, 401b8e80941Smrg const ir_variable *input, 402b8e80941Smrg const ir_variable *front_color, 403b8e80941Smrg const ir_variable *back_color, 404b8e80941Smrg gl_shader_stage consumer_stage, 405b8e80941Smrg gl_shader_stage producer_stage) 406b8e80941Smrg{ 407b8e80941Smrg if (front_color != NULL && front_color->data.assigned) 408b8e80941Smrg cross_validate_types_and_qualifiers(ctx, prog, input, front_color, 409b8e80941Smrg consumer_stage, producer_stage); 410b8e80941Smrg 411b8e80941Smrg if (back_color != NULL && back_color->data.assigned) 412b8e80941Smrg cross_validate_types_and_qualifiers(ctx, prog, input, back_color, 413b8e80941Smrg consumer_stage, producer_stage); 414b8e80941Smrg} 415b8e80941Smrg 416b8e80941Smrgstatic unsigned 417b8e80941Smrgcompute_variable_location_slot(ir_variable *var, gl_shader_stage stage) 418b8e80941Smrg{ 419b8e80941Smrg unsigned location_start = VARYING_SLOT_VAR0; 420b8e80941Smrg 421b8e80941Smrg switch (stage) { 422b8e80941Smrg case MESA_SHADER_VERTEX: 423b8e80941Smrg if (var->data.mode == ir_var_shader_in) 424b8e80941Smrg location_start = VERT_ATTRIB_GENERIC0; 425b8e80941Smrg break; 426b8e80941Smrg case MESA_SHADER_TESS_CTRL: 427b8e80941Smrg case MESA_SHADER_TESS_EVAL: 428b8e80941Smrg if (var->data.patch) 429b8e80941Smrg location_start = VARYING_SLOT_PATCH0; 430b8e80941Smrg break; 431b8e80941Smrg case MESA_SHADER_FRAGMENT: 432b8e80941Smrg if (var->data.mode == ir_var_shader_out) 433b8e80941Smrg location_start = FRAG_RESULT_DATA0; 434b8e80941Smrg break; 435b8e80941Smrg default: 436b8e80941Smrg break; 437b8e80941Smrg } 438b8e80941Smrg 439b8e80941Smrg return var->data.location - location_start; 440b8e80941Smrg} 441b8e80941Smrg 442b8e80941Smrgstruct explicit_location_info { 443b8e80941Smrg ir_variable *var; 444b8e80941Smrg bool base_type_is_integer; 445b8e80941Smrg unsigned base_type_bit_size; 446b8e80941Smrg unsigned interpolation; 447b8e80941Smrg bool centroid; 448b8e80941Smrg bool sample; 449b8e80941Smrg bool patch; 450b8e80941Smrg}; 451b8e80941Smrg 452b8e80941Smrgstatic bool 453b8e80941Smrgcheck_location_aliasing(struct explicit_location_info explicit_locations[][4], 454b8e80941Smrg ir_variable *var, 455b8e80941Smrg unsigned location, 456b8e80941Smrg unsigned component, 457b8e80941Smrg unsigned location_limit, 458b8e80941Smrg const glsl_type *type, 459b8e80941Smrg unsigned interpolation, 460b8e80941Smrg bool centroid, 461b8e80941Smrg bool sample, 462b8e80941Smrg bool patch, 463b8e80941Smrg gl_shader_program *prog, 464b8e80941Smrg gl_shader_stage stage) 465b8e80941Smrg{ 466b8e80941Smrg unsigned last_comp; 467b8e80941Smrg unsigned base_type_bit_size; 468b8e80941Smrg const glsl_type *type_without_array = type->without_array(); 469b8e80941Smrg const bool base_type_is_integer = 470b8e80941Smrg glsl_base_type_is_integer(type_without_array->base_type); 471b8e80941Smrg const bool is_struct = type_without_array->is_struct(); 472b8e80941Smrg if (is_struct) { 473b8e80941Smrg /* structs don't have a defined underlying base type so just treat all 474b8e80941Smrg * component slots as used and set the bit size to 0. If there is 475b8e80941Smrg * location aliasing, we'll fail anyway later. 476b8e80941Smrg */ 477b8e80941Smrg last_comp = 4; 478b8e80941Smrg base_type_bit_size = 0; 479b8e80941Smrg } else { 480b8e80941Smrg unsigned dmul = type_without_array->is_64bit() ? 2 : 1; 481b8e80941Smrg last_comp = component + type_without_array->vector_elements * dmul; 482b8e80941Smrg base_type_bit_size = 483b8e80941Smrg glsl_base_type_get_bit_size(type_without_array->base_type); 484b8e80941Smrg } 485b8e80941Smrg 486b8e80941Smrg while (location < location_limit) { 487b8e80941Smrg unsigned comp = 0; 488b8e80941Smrg while (comp < 4) { 489b8e80941Smrg struct explicit_location_info *info = 490b8e80941Smrg &explicit_locations[location][comp]; 491b8e80941Smrg 492b8e80941Smrg if (info->var) { 493b8e80941Smrg if (info->var->type->without_array()->is_struct() || is_struct) { 494b8e80941Smrg /* Structs cannot share location since they are incompatible 495b8e80941Smrg * with any other underlying numerical type. 496b8e80941Smrg */ 497b8e80941Smrg linker_error(prog, 498b8e80941Smrg "%s shader has multiple %sputs sharing the " 499b8e80941Smrg "same location that don't have the same " 500b8e80941Smrg "underlying numerical type. Struct variable '%s', " 501b8e80941Smrg "location %u\n", 502b8e80941Smrg _mesa_shader_stage_to_string(stage), 503b8e80941Smrg var->data.mode == ir_var_shader_in ? "in" : "out", 504b8e80941Smrg is_struct ? var->name : info->var->name, 505b8e80941Smrg location); 506b8e80941Smrg return false; 507b8e80941Smrg } else if (comp >= component && comp < last_comp) { 508b8e80941Smrg /* Component aliasing is not allowed */ 509b8e80941Smrg linker_error(prog, 510b8e80941Smrg "%s shader has multiple %sputs explicitly " 511b8e80941Smrg "assigned to location %d and component %d\n", 512b8e80941Smrg _mesa_shader_stage_to_string(stage), 513b8e80941Smrg var->data.mode == ir_var_shader_in ? "in" : "out", 514b8e80941Smrg location, comp); 515b8e80941Smrg return false; 516b8e80941Smrg } else { 517b8e80941Smrg /* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout 518b8e80941Smrg * Qualifiers, Page 67, (Location aliasing): 519b8e80941Smrg * 520b8e80941Smrg * " Further, when location aliasing, the aliases sharing the 521b8e80941Smrg * location must have the same underlying numerical type 522b8e80941Smrg * and bit width (floating-point or integer, 32-bit versus 523b8e80941Smrg * 64-bit, etc.) and the same auxiliary storage and 524b8e80941Smrg * interpolation qualification." 525b8e80941Smrg */ 526b8e80941Smrg 527b8e80941Smrg /* If the underlying numerical type isn't integer, implicitly 528b8e80941Smrg * it will be float or else we would have failed by now. 529b8e80941Smrg */ 530b8e80941Smrg if (info->base_type_is_integer != base_type_is_integer) { 531b8e80941Smrg linker_error(prog, 532b8e80941Smrg "%s shader has multiple %sputs sharing the " 533b8e80941Smrg "same location that don't have the same " 534b8e80941Smrg "underlying numerical type. Location %u " 535b8e80941Smrg "component %u.\n", 536b8e80941Smrg _mesa_shader_stage_to_string(stage), 537b8e80941Smrg var->data.mode == ir_var_shader_in ? 538b8e80941Smrg "in" : "out", location, comp); 539b8e80941Smrg return false; 540b8e80941Smrg } 541b8e80941Smrg 542b8e80941Smrg if (info->base_type_bit_size != base_type_bit_size) { 543b8e80941Smrg linker_error(prog, 544b8e80941Smrg "%s shader has multiple %sputs sharing the " 545b8e80941Smrg "same location that don't have the same " 546b8e80941Smrg "underlying numerical bit size. Location %u " 547b8e80941Smrg "component %u.\n", 548b8e80941Smrg _mesa_shader_stage_to_string(stage), 549b8e80941Smrg var->data.mode == ir_var_shader_in ? 550b8e80941Smrg "in" : "out", location, comp); 551b8e80941Smrg return false; 552b8e80941Smrg } 553b8e80941Smrg 554b8e80941Smrg if (info->interpolation != interpolation) { 555b8e80941Smrg linker_error(prog, 556b8e80941Smrg "%s shader has multiple %sputs sharing the " 557b8e80941Smrg "same location that don't have the same " 558b8e80941Smrg "interpolation qualification. Location %u " 559b8e80941Smrg "component %u.\n", 560b8e80941Smrg _mesa_shader_stage_to_string(stage), 561b8e80941Smrg var->data.mode == ir_var_shader_in ? 562b8e80941Smrg "in" : "out", location, comp); 563b8e80941Smrg return false; 564b8e80941Smrg } 565b8e80941Smrg 566b8e80941Smrg if (info->centroid != centroid || 567b8e80941Smrg info->sample != sample || 568b8e80941Smrg info->patch != patch) { 569b8e80941Smrg linker_error(prog, 570b8e80941Smrg "%s shader has multiple %sputs sharing the " 571b8e80941Smrg "same location that don't have the same " 572b8e80941Smrg "auxiliary storage qualification. Location %u " 573b8e80941Smrg "component %u.\n", 574b8e80941Smrg _mesa_shader_stage_to_string(stage), 575b8e80941Smrg var->data.mode == ir_var_shader_in ? 576b8e80941Smrg "in" : "out", location, comp); 577b8e80941Smrg return false; 578b8e80941Smrg } 579b8e80941Smrg } 580b8e80941Smrg } else if (comp >= component && comp < last_comp) { 581b8e80941Smrg info->var = var; 582b8e80941Smrg info->base_type_is_integer = base_type_is_integer; 583b8e80941Smrg info->base_type_bit_size = base_type_bit_size; 584b8e80941Smrg info->interpolation = interpolation; 585b8e80941Smrg info->centroid = centroid; 586b8e80941Smrg info->sample = sample; 587b8e80941Smrg info->patch = patch; 588b8e80941Smrg } 589b8e80941Smrg 590b8e80941Smrg comp++; 591b8e80941Smrg 592b8e80941Smrg /* We need to do some special handling for doubles as dvec3 and 593b8e80941Smrg * dvec4 consume two consecutive locations. We don't need to 594b8e80941Smrg * worry about components beginning at anything other than 0 as 595b8e80941Smrg * the spec does not allow this for dvec3 and dvec4. 596b8e80941Smrg */ 597b8e80941Smrg if (comp == 4 && last_comp > 4) { 598b8e80941Smrg last_comp = last_comp - 4; 599b8e80941Smrg /* Bump location index and reset the component index */ 600b8e80941Smrg location++; 601b8e80941Smrg comp = 0; 602b8e80941Smrg component = 0; 603b8e80941Smrg } 604b8e80941Smrg } 605b8e80941Smrg 606b8e80941Smrg location++; 607b8e80941Smrg } 608b8e80941Smrg 609b8e80941Smrg return true; 610b8e80941Smrg} 611b8e80941Smrg 612b8e80941Smrgstatic bool 613b8e80941Smrgvalidate_explicit_variable_location(struct gl_context *ctx, 614b8e80941Smrg struct explicit_location_info explicit_locations[][4], 615b8e80941Smrg ir_variable *var, 616b8e80941Smrg gl_shader_program *prog, 617b8e80941Smrg gl_linked_shader *sh) 618b8e80941Smrg{ 619b8e80941Smrg const glsl_type *type = get_varying_type(var, sh->Stage); 620b8e80941Smrg unsigned num_elements = type->count_attribute_slots(false); 621b8e80941Smrg unsigned idx = compute_variable_location_slot(var, sh->Stage); 622b8e80941Smrg unsigned slot_limit = idx + num_elements; 623b8e80941Smrg 624b8e80941Smrg /* Vertex shader inputs and fragment shader outputs are validated in 625b8e80941Smrg * assign_attribute_or_color_locations() so we should not attempt to 626b8e80941Smrg * validate them again here. 627b8e80941Smrg */ 628b8e80941Smrg unsigned slot_max; 629b8e80941Smrg if (var->data.mode == ir_var_shader_out) { 630b8e80941Smrg assert(sh->Stage != MESA_SHADER_FRAGMENT); 631b8e80941Smrg slot_max = 632b8e80941Smrg ctx->Const.Program[sh->Stage].MaxOutputComponents / 4; 633b8e80941Smrg } else { 634b8e80941Smrg assert(var->data.mode == ir_var_shader_in); 635b8e80941Smrg assert(sh->Stage != MESA_SHADER_VERTEX); 636b8e80941Smrg slot_max = 637b8e80941Smrg ctx->Const.Program[sh->Stage].MaxInputComponents / 4; 638b8e80941Smrg } 639b8e80941Smrg 640b8e80941Smrg if (slot_limit > slot_max) { 641b8e80941Smrg linker_error(prog, 642b8e80941Smrg "Invalid location %u in %s shader\n", 643b8e80941Smrg idx, _mesa_shader_stage_to_string(sh->Stage)); 644b8e80941Smrg return false; 645b8e80941Smrg } 646b8e80941Smrg 647b8e80941Smrg const glsl_type *type_without_array = type->without_array(); 648b8e80941Smrg if (type_without_array->is_interface()) { 649b8e80941Smrg for (unsigned i = 0; i < type_without_array->length; i++) { 650b8e80941Smrg glsl_struct_field *field = &type_without_array->fields.structure[i]; 651b8e80941Smrg unsigned field_location = field->location - 652b8e80941Smrg (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0); 653b8e80941Smrg if (!check_location_aliasing(explicit_locations, var, 654b8e80941Smrg field_location, 655b8e80941Smrg 0, field_location + 1, 656b8e80941Smrg field->type, 657b8e80941Smrg field->interpolation, 658b8e80941Smrg field->centroid, 659b8e80941Smrg field->sample, 660b8e80941Smrg field->patch, 661b8e80941Smrg prog, sh->Stage)) { 662b8e80941Smrg return false; 663b8e80941Smrg } 664b8e80941Smrg } 665b8e80941Smrg } else if (!check_location_aliasing(explicit_locations, var, 666b8e80941Smrg idx, var->data.location_frac, 667b8e80941Smrg slot_limit, type, 668b8e80941Smrg var->data.interpolation, 669b8e80941Smrg var->data.centroid, 670b8e80941Smrg var->data.sample, 671b8e80941Smrg var->data.patch, 672b8e80941Smrg prog, sh->Stage)) { 673b8e80941Smrg return false; 674b8e80941Smrg } 675b8e80941Smrg 676b8e80941Smrg return true; 677b8e80941Smrg} 678b8e80941Smrg 679b8e80941Smrg/** 680b8e80941Smrg * Validate explicit locations for the inputs to the first stage and the 681b8e80941Smrg * outputs of the last stage in a program, if those are not the VS and FS 682b8e80941Smrg * shaders. 683b8e80941Smrg */ 684b8e80941Smrgvoid 685b8e80941Smrgvalidate_first_and_last_interface_explicit_locations(struct gl_context *ctx, 686b8e80941Smrg struct gl_shader_program *prog, 687b8e80941Smrg gl_shader_stage first_stage, 688b8e80941Smrg gl_shader_stage last_stage) 689b8e80941Smrg{ 690b8e80941Smrg /* VS inputs and FS outputs are validated in 691b8e80941Smrg * assign_attribute_or_color_locations() 692b8e80941Smrg */ 693b8e80941Smrg bool validate_first_stage = first_stage != MESA_SHADER_VERTEX; 694b8e80941Smrg bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT; 695b8e80941Smrg if (!validate_first_stage && !validate_last_stage) 696b8e80941Smrg return; 697b8e80941Smrg 698b8e80941Smrg struct explicit_location_info explicit_locations[MAX_VARYING][4]; 699b8e80941Smrg 700b8e80941Smrg gl_shader_stage stages[2] = { first_stage, last_stage }; 701b8e80941Smrg bool validate_stage[2] = { validate_first_stage, validate_last_stage }; 702b8e80941Smrg ir_variable_mode var_direction[2] = { ir_var_shader_in, ir_var_shader_out }; 703b8e80941Smrg 704b8e80941Smrg for (unsigned i = 0; i < 2; i++) { 705b8e80941Smrg if (!validate_stage[i]) 706b8e80941Smrg continue; 707b8e80941Smrg 708b8e80941Smrg gl_shader_stage stage = stages[i]; 709b8e80941Smrg 710b8e80941Smrg gl_linked_shader *sh = prog->_LinkedShaders[stage]; 711b8e80941Smrg assert(sh); 712b8e80941Smrg 713b8e80941Smrg memset(explicit_locations, 0, sizeof(explicit_locations)); 714b8e80941Smrg 715b8e80941Smrg foreach_in_list(ir_instruction, node, sh->ir) { 716b8e80941Smrg ir_variable *const var = node->as_variable(); 717b8e80941Smrg 718b8e80941Smrg if (var == NULL || 719b8e80941Smrg !var->data.explicit_location || 720b8e80941Smrg var->data.location < VARYING_SLOT_VAR0 || 721b8e80941Smrg var->data.mode != var_direction[i]) 722b8e80941Smrg continue; 723b8e80941Smrg 724b8e80941Smrg if (!validate_explicit_variable_location( 725b8e80941Smrg ctx, explicit_locations, var, prog, sh)) { 726b8e80941Smrg return; 727b8e80941Smrg } 728b8e80941Smrg } 729b8e80941Smrg } 730b8e80941Smrg} 731b8e80941Smrg 732b8e80941Smrg/** 733b8e80941Smrg * Validate that outputs from one stage match inputs of another 734b8e80941Smrg */ 735b8e80941Smrgvoid 736b8e80941Smrgcross_validate_outputs_to_inputs(struct gl_context *ctx, 737b8e80941Smrg struct gl_shader_program *prog, 738b8e80941Smrg gl_linked_shader *producer, 739b8e80941Smrg gl_linked_shader *consumer) 740b8e80941Smrg{ 741b8e80941Smrg glsl_symbol_table parameters; 742b8e80941Smrg struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = {}; 743b8e80941Smrg struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = {}; 744b8e80941Smrg 745b8e80941Smrg /* Find all shader outputs in the "producer" stage. 746b8e80941Smrg */ 747b8e80941Smrg foreach_in_list(ir_instruction, node, producer->ir) { 748b8e80941Smrg ir_variable *const var = node->as_variable(); 749b8e80941Smrg 750b8e80941Smrg if (var == NULL || var->data.mode != ir_var_shader_out) 751b8e80941Smrg continue; 752b8e80941Smrg 753b8e80941Smrg if (!var->data.explicit_location 754b8e80941Smrg || var->data.location < VARYING_SLOT_VAR0) 755b8e80941Smrg parameters.add_variable(var); 756b8e80941Smrg else { 757b8e80941Smrg /* User-defined varyings with explicit locations are handled 758b8e80941Smrg * differently because they do not need to have matching names. 759b8e80941Smrg */ 760b8e80941Smrg if (!validate_explicit_variable_location(ctx, 761b8e80941Smrg output_explicit_locations, 762b8e80941Smrg var, prog, producer)) { 763b8e80941Smrg return; 764b8e80941Smrg } 765b8e80941Smrg } 766b8e80941Smrg } 767b8e80941Smrg 768b8e80941Smrg 769b8e80941Smrg /* Find all shader inputs in the "consumer" stage. Any variables that have 770b8e80941Smrg * matching outputs already in the symbol table must have the same type and 771b8e80941Smrg * qualifiers. 772b8e80941Smrg * 773b8e80941Smrg * Exception: if the consumer is the geometry shader, then the inputs 774b8e80941Smrg * should be arrays and the type of the array element should match the type 775b8e80941Smrg * of the corresponding producer output. 776b8e80941Smrg */ 777b8e80941Smrg foreach_in_list(ir_instruction, node, consumer->ir) { 778b8e80941Smrg ir_variable *const input = node->as_variable(); 779b8e80941Smrg 780b8e80941Smrg if (input == NULL || input->data.mode != ir_var_shader_in) 781b8e80941Smrg continue; 782b8e80941Smrg 783b8e80941Smrg if (strcmp(input->name, "gl_Color") == 0 && input->data.used) { 784b8e80941Smrg const ir_variable *const front_color = 785b8e80941Smrg parameters.get_variable("gl_FrontColor"); 786b8e80941Smrg 787b8e80941Smrg const ir_variable *const back_color = 788b8e80941Smrg parameters.get_variable("gl_BackColor"); 789b8e80941Smrg 790b8e80941Smrg cross_validate_front_and_back_color(ctx, prog, input, 791b8e80941Smrg front_color, back_color, 792b8e80941Smrg consumer->Stage, producer->Stage); 793b8e80941Smrg } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) { 794b8e80941Smrg const ir_variable *const front_color = 795b8e80941Smrg parameters.get_variable("gl_FrontSecondaryColor"); 796b8e80941Smrg 797b8e80941Smrg const ir_variable *const back_color = 798b8e80941Smrg parameters.get_variable("gl_BackSecondaryColor"); 799b8e80941Smrg 800b8e80941Smrg cross_validate_front_and_back_color(ctx, prog, input, 801b8e80941Smrg front_color, back_color, 802b8e80941Smrg consumer->Stage, producer->Stage); 803b8e80941Smrg } else { 804b8e80941Smrg /* The rules for connecting inputs and outputs change in the presence 805b8e80941Smrg * of explicit locations. In this case, we no longer care about the 806b8e80941Smrg * names of the variables. Instead, we care only about the 807b8e80941Smrg * explicitly assigned location. 808b8e80941Smrg */ 809b8e80941Smrg ir_variable *output = NULL; 810b8e80941Smrg if (input->data.explicit_location 811b8e80941Smrg && input->data.location >= VARYING_SLOT_VAR0) { 812b8e80941Smrg 813b8e80941Smrg const glsl_type *type = get_varying_type(input, consumer->Stage); 814b8e80941Smrg unsigned num_elements = type->count_attribute_slots(false); 815b8e80941Smrg unsigned idx = 816b8e80941Smrg compute_variable_location_slot(input, consumer->Stage); 817b8e80941Smrg unsigned slot_limit = idx + num_elements; 818b8e80941Smrg 819b8e80941Smrg if (!validate_explicit_variable_location(ctx, 820b8e80941Smrg input_explicit_locations, 821b8e80941Smrg input, prog, consumer)) { 822b8e80941Smrg return; 823b8e80941Smrg } 824b8e80941Smrg 825b8e80941Smrg while (idx < slot_limit) { 826b8e80941Smrg if (idx >= MAX_VARYING) { 827b8e80941Smrg linker_error(prog, 828b8e80941Smrg "Invalid location %u in %s shader\n", idx, 829b8e80941Smrg _mesa_shader_stage_to_string(consumer->Stage)); 830b8e80941Smrg return; 831b8e80941Smrg } 832b8e80941Smrg 833b8e80941Smrg output = output_explicit_locations[idx][input->data.location_frac].var; 834b8e80941Smrg 835b8e80941Smrg if (output == NULL) { 836b8e80941Smrg /* A linker failure should only happen when there is no 837b8e80941Smrg * output declaration and there is Static Use of the 838b8e80941Smrg * declared input. 839b8e80941Smrg */ 840b8e80941Smrg if (input->data.used) { 841b8e80941Smrg linker_error(prog, 842b8e80941Smrg "%s shader input `%s' with explicit location " 843b8e80941Smrg "has no matching output\n", 844b8e80941Smrg _mesa_shader_stage_to_string(consumer->Stage), 845b8e80941Smrg input->name); 846b8e80941Smrg break; 847b8e80941Smrg } 848b8e80941Smrg } else if (input->data.location != output->data.location) { 849b8e80941Smrg linker_error(prog, 850b8e80941Smrg "%s shader input `%s' with explicit location " 851b8e80941Smrg "has no matching output\n", 852b8e80941Smrg _mesa_shader_stage_to_string(consumer->Stage), 853b8e80941Smrg input->name); 854b8e80941Smrg break; 855b8e80941Smrg } 856b8e80941Smrg idx++; 857b8e80941Smrg } 858b8e80941Smrg } else { 859b8e80941Smrg output = parameters.get_variable(input->name); 860b8e80941Smrg } 861b8e80941Smrg 862b8e80941Smrg if (output != NULL) { 863b8e80941Smrg /* Interface blocks have their own validation elsewhere so don't 864b8e80941Smrg * try validating them here. 865b8e80941Smrg */ 866b8e80941Smrg if (!(input->get_interface_type() && 867b8e80941Smrg output->get_interface_type())) 868b8e80941Smrg cross_validate_types_and_qualifiers(ctx, prog, input, output, 869b8e80941Smrg consumer->Stage, 870b8e80941Smrg producer->Stage); 871b8e80941Smrg } else { 872b8e80941Smrg /* Check for input vars with unmatched output vars in prev stage 873b8e80941Smrg * taking into account that interface blocks could have a matching 874b8e80941Smrg * output but with different name, so we ignore them. 875b8e80941Smrg */ 876b8e80941Smrg assert(!input->data.assigned); 877b8e80941Smrg if (input->data.used && !input->get_interface_type() && 878b8e80941Smrg !input->data.explicit_location) 879b8e80941Smrg linker_error(prog, 880b8e80941Smrg "%s shader input `%s' " 881b8e80941Smrg "has no matching output in the previous stage\n", 882b8e80941Smrg _mesa_shader_stage_to_string(consumer->Stage), 883b8e80941Smrg input->name); 884b8e80941Smrg } 885b8e80941Smrg } 886b8e80941Smrg } 887b8e80941Smrg} 888b8e80941Smrg 889b8e80941Smrg/** 890b8e80941Smrg * Demote shader inputs and outputs that are not used in other stages, and 891b8e80941Smrg * remove them via dead code elimination. 892b8e80941Smrg */ 893b8e80941Smrgstatic void 894b8e80941Smrgremove_unused_shader_inputs_and_outputs(bool is_separate_shader_object, 895b8e80941Smrg gl_linked_shader *sh, 896b8e80941Smrg enum ir_variable_mode mode) 897b8e80941Smrg{ 898b8e80941Smrg if (is_separate_shader_object) 899b8e80941Smrg return; 900b8e80941Smrg 901b8e80941Smrg foreach_in_list(ir_instruction, node, sh->ir) { 902b8e80941Smrg ir_variable *const var = node->as_variable(); 903b8e80941Smrg 904b8e80941Smrg if (var == NULL || var->data.mode != int(mode)) 905b8e80941Smrg continue; 906b8e80941Smrg 907b8e80941Smrg /* A shader 'in' or 'out' variable is only really an input or output if 908b8e80941Smrg * its value is used by other shader stages. This will cause the 909b8e80941Smrg * variable to have a location assigned. 910b8e80941Smrg */ 911b8e80941Smrg if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) { 912b8e80941Smrg assert(var->data.mode != ir_var_temporary); 913b8e80941Smrg 914b8e80941Smrg /* Assign zeros to demoted inputs to allow more optimizations. */ 915b8e80941Smrg if (var->data.mode == ir_var_shader_in && !var->constant_value) 916b8e80941Smrg var->constant_value = ir_constant::zero(var, var->type); 917b8e80941Smrg 918b8e80941Smrg var->data.mode = ir_var_auto; 919b8e80941Smrg } 920b8e80941Smrg } 921b8e80941Smrg 922b8e80941Smrg /* Eliminate code that is now dead due to unused inputs/outputs being 923b8e80941Smrg * demoted. 924b8e80941Smrg */ 925b8e80941Smrg while (do_dead_code(sh->ir, false)) 926b8e80941Smrg ; 927b8e80941Smrg 928b8e80941Smrg} 929b8e80941Smrg 930b8e80941Smrg/** 931b8e80941Smrg * Initialize this object based on a string that was passed to 932b8e80941Smrg * glTransformFeedbackVaryings. 933b8e80941Smrg * 934b8e80941Smrg * If the input is mal-formed, this call still succeeds, but it sets 935b8e80941Smrg * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var() 936b8e80941Smrg * will fail to find any matching variable. 937b8e80941Smrg */ 938b8e80941Smrgvoid 939b8e80941Smrgtfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx, 940b8e80941Smrg const char *input) 941b8e80941Smrg{ 942b8e80941Smrg /* We don't have to be pedantic about what is a valid GLSL variable name, 943b8e80941Smrg * because any variable with an invalid name can't exist in the IR anyway. 944b8e80941Smrg */ 945b8e80941Smrg 946b8e80941Smrg this->location = -1; 947b8e80941Smrg this->orig_name = input; 948b8e80941Smrg this->lowered_builtin_array_variable = none; 949b8e80941Smrg this->skip_components = 0; 950b8e80941Smrg this->next_buffer_separator = false; 951b8e80941Smrg this->matched_candidate = NULL; 952b8e80941Smrg this->stream_id = 0; 953b8e80941Smrg this->buffer = 0; 954b8e80941Smrg this->offset = 0; 955b8e80941Smrg 956b8e80941Smrg if (ctx->Extensions.ARB_transform_feedback3) { 957b8e80941Smrg /* Parse gl_NextBuffer. */ 958b8e80941Smrg if (strcmp(input, "gl_NextBuffer") == 0) { 959b8e80941Smrg this->next_buffer_separator = true; 960b8e80941Smrg return; 961b8e80941Smrg } 962b8e80941Smrg 963b8e80941Smrg /* Parse gl_SkipComponents. */ 964b8e80941Smrg if (strcmp(input, "gl_SkipComponents1") == 0) 965b8e80941Smrg this->skip_components = 1; 966b8e80941Smrg else if (strcmp(input, "gl_SkipComponents2") == 0) 967b8e80941Smrg this->skip_components = 2; 968b8e80941Smrg else if (strcmp(input, "gl_SkipComponents3") == 0) 969b8e80941Smrg this->skip_components = 3; 970b8e80941Smrg else if (strcmp(input, "gl_SkipComponents4") == 0) 971b8e80941Smrg this->skip_components = 4; 972b8e80941Smrg 973b8e80941Smrg if (this->skip_components) 974b8e80941Smrg return; 975b8e80941Smrg } 976b8e80941Smrg 977b8e80941Smrg /* Parse a declaration. */ 978b8e80941Smrg const char *base_name_end; 979b8e80941Smrg long subscript = parse_program_resource_name(input, &base_name_end); 980b8e80941Smrg this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input); 981b8e80941Smrg if (this->var_name == NULL) { 982b8e80941Smrg _mesa_error_no_memory(__func__); 983b8e80941Smrg return; 984b8e80941Smrg } 985b8e80941Smrg 986b8e80941Smrg if (subscript >= 0) { 987b8e80941Smrg this->array_subscript = subscript; 988b8e80941Smrg this->is_subscripted = true; 989b8e80941Smrg } else { 990b8e80941Smrg this->is_subscripted = false; 991b8e80941Smrg } 992b8e80941Smrg 993b8e80941Smrg /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this 994b8e80941Smrg * class must behave specially to account for the fact that gl_ClipDistance 995b8e80941Smrg * is converted from a float[8] to a vec4[2]. 996b8e80941Smrg */ 997b8e80941Smrg if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance && 998b8e80941Smrg strcmp(this->var_name, "gl_ClipDistance") == 0) { 999b8e80941Smrg this->lowered_builtin_array_variable = clip_distance; 1000b8e80941Smrg } 1001b8e80941Smrg if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance && 1002b8e80941Smrg strcmp(this->var_name, "gl_CullDistance") == 0) { 1003b8e80941Smrg this->lowered_builtin_array_variable = cull_distance; 1004b8e80941Smrg } 1005b8e80941Smrg 1006b8e80941Smrg if (ctx->Const.LowerTessLevel && 1007b8e80941Smrg (strcmp(this->var_name, "gl_TessLevelOuter") == 0)) 1008b8e80941Smrg this->lowered_builtin_array_variable = tess_level_outer; 1009b8e80941Smrg if (ctx->Const.LowerTessLevel && 1010b8e80941Smrg (strcmp(this->var_name, "gl_TessLevelInner") == 0)) 1011b8e80941Smrg this->lowered_builtin_array_variable = tess_level_inner; 1012b8e80941Smrg} 1013b8e80941Smrg 1014b8e80941Smrg 1015b8e80941Smrg/** 1016b8e80941Smrg * Determine whether two tfeedback_decl objects refer to the same variable and 1017b8e80941Smrg * array index (if applicable). 1018b8e80941Smrg */ 1019b8e80941Smrgbool 1020b8e80941Smrgtfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y) 1021b8e80941Smrg{ 1022b8e80941Smrg assert(x.is_varying() && y.is_varying()); 1023b8e80941Smrg 1024b8e80941Smrg if (strcmp(x.var_name, y.var_name) != 0) 1025b8e80941Smrg return false; 1026b8e80941Smrg if (x.is_subscripted != y.is_subscripted) 1027b8e80941Smrg return false; 1028b8e80941Smrg if (x.is_subscripted && x.array_subscript != y.array_subscript) 1029b8e80941Smrg return false; 1030b8e80941Smrg return true; 1031b8e80941Smrg} 1032b8e80941Smrg 1033b8e80941Smrg 1034b8e80941Smrg/** 1035b8e80941Smrg * Assign a location and stream ID for this tfeedback_decl object based on the 1036b8e80941Smrg * transform feedback candidate found by find_candidate. 1037b8e80941Smrg * 1038b8e80941Smrg * If an error occurs, the error is reported through linker_error() and false 1039b8e80941Smrg * is returned. 1040b8e80941Smrg */ 1041b8e80941Smrgbool 1042b8e80941Smrgtfeedback_decl::assign_location(struct gl_context *ctx, 1043b8e80941Smrg struct gl_shader_program *prog) 1044b8e80941Smrg{ 1045b8e80941Smrg assert(this->is_varying()); 1046b8e80941Smrg 1047b8e80941Smrg unsigned fine_location 1048b8e80941Smrg = this->matched_candidate->toplevel_var->data.location * 4 1049b8e80941Smrg + this->matched_candidate->toplevel_var->data.location_frac 1050b8e80941Smrg + this->matched_candidate->offset; 1051b8e80941Smrg const unsigned dmul = 1052b8e80941Smrg this->matched_candidate->type->without_array()->is_64bit() ? 2 : 1; 1053b8e80941Smrg 1054b8e80941Smrg if (this->matched_candidate->type->is_array()) { 1055b8e80941Smrg /* Array variable */ 1056b8e80941Smrg const unsigned matrix_cols = 1057b8e80941Smrg this->matched_candidate->type->fields.array->matrix_columns; 1058b8e80941Smrg const unsigned vector_elements = 1059b8e80941Smrg this->matched_candidate->type->fields.array->vector_elements; 1060b8e80941Smrg unsigned actual_array_size; 1061b8e80941Smrg switch (this->lowered_builtin_array_variable) { 1062b8e80941Smrg case clip_distance: 1063b8e80941Smrg actual_array_size = prog->last_vert_prog ? 1064b8e80941Smrg prog->last_vert_prog->info.clip_distance_array_size : 0; 1065b8e80941Smrg break; 1066b8e80941Smrg case cull_distance: 1067b8e80941Smrg actual_array_size = prog->last_vert_prog ? 1068b8e80941Smrg prog->last_vert_prog->info.cull_distance_array_size : 0; 1069b8e80941Smrg break; 1070b8e80941Smrg case tess_level_outer: 1071b8e80941Smrg actual_array_size = 4; 1072b8e80941Smrg break; 1073b8e80941Smrg case tess_level_inner: 1074b8e80941Smrg actual_array_size = 2; 1075b8e80941Smrg break; 1076b8e80941Smrg case none: 1077b8e80941Smrg default: 1078b8e80941Smrg actual_array_size = this->matched_candidate->type->array_size(); 1079b8e80941Smrg break; 1080b8e80941Smrg } 1081b8e80941Smrg 1082b8e80941Smrg if (this->is_subscripted) { 1083b8e80941Smrg /* Check array bounds. */ 1084b8e80941Smrg if (this->array_subscript >= actual_array_size) { 1085b8e80941Smrg linker_error(prog, "Transform feedback varying %s has index " 1086b8e80941Smrg "%i, but the array size is %u.", 1087b8e80941Smrg this->orig_name, this->array_subscript, 1088b8e80941Smrg actual_array_size); 1089b8e80941Smrg return false; 1090b8e80941Smrg } 1091b8e80941Smrg unsigned array_elem_size = this->lowered_builtin_array_variable ? 1092b8e80941Smrg 1 : vector_elements * matrix_cols * dmul; 1093b8e80941Smrg fine_location += array_elem_size * this->array_subscript; 1094b8e80941Smrg this->size = 1; 1095b8e80941Smrg } else { 1096b8e80941Smrg this->size = actual_array_size; 1097b8e80941Smrg } 1098b8e80941Smrg this->vector_elements = vector_elements; 1099b8e80941Smrg this->matrix_columns = matrix_cols; 1100b8e80941Smrg if (this->lowered_builtin_array_variable) 1101b8e80941Smrg this->type = GL_FLOAT; 1102b8e80941Smrg else 1103b8e80941Smrg this->type = this->matched_candidate->type->fields.array->gl_type; 1104b8e80941Smrg } else { 1105b8e80941Smrg /* Regular variable (scalar, vector, or matrix) */ 1106b8e80941Smrg if (this->is_subscripted) { 1107b8e80941Smrg linker_error(prog, "Transform feedback varying %s requested, " 1108b8e80941Smrg "but %s is not an array.", 1109b8e80941Smrg this->orig_name, this->var_name); 1110b8e80941Smrg return false; 1111b8e80941Smrg } 1112b8e80941Smrg this->size = 1; 1113b8e80941Smrg this->vector_elements = this->matched_candidate->type->vector_elements; 1114b8e80941Smrg this->matrix_columns = this->matched_candidate->type->matrix_columns; 1115b8e80941Smrg this->type = this->matched_candidate->type->gl_type; 1116b8e80941Smrg } 1117b8e80941Smrg this->location = fine_location / 4; 1118b8e80941Smrg this->location_frac = fine_location % 4; 1119b8e80941Smrg 1120b8e80941Smrg /* From GL_EXT_transform_feedback: 1121b8e80941Smrg * A program will fail to link if: 1122b8e80941Smrg * 1123b8e80941Smrg * * the total number of components to capture in any varying 1124b8e80941Smrg * variable in <varyings> is greater than the constant 1125b8e80941Smrg * MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the 1126b8e80941Smrg * buffer mode is SEPARATE_ATTRIBS_EXT; 1127b8e80941Smrg */ 1128b8e80941Smrg if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS && 1129b8e80941Smrg this->num_components() > 1130b8e80941Smrg ctx->Const.MaxTransformFeedbackSeparateComponents) { 1131b8e80941Smrg linker_error(prog, "Transform feedback varying %s exceeds " 1132b8e80941Smrg "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.", 1133b8e80941Smrg this->orig_name); 1134b8e80941Smrg return false; 1135b8e80941Smrg } 1136b8e80941Smrg 1137b8e80941Smrg /* Only transform feedback varyings can be assigned to non-zero streams, 1138b8e80941Smrg * so assign the stream id here. 1139b8e80941Smrg */ 1140b8e80941Smrg this->stream_id = this->matched_candidate->toplevel_var->data.stream; 1141b8e80941Smrg 1142b8e80941Smrg unsigned array_offset = this->array_subscript * 4 * dmul; 1143b8e80941Smrg unsigned struct_offset = this->matched_candidate->offset * 4 * dmul; 1144b8e80941Smrg this->buffer = this->matched_candidate->toplevel_var->data.xfb_buffer; 1145b8e80941Smrg this->offset = this->matched_candidate->toplevel_var->data.offset + 1146b8e80941Smrg array_offset + struct_offset; 1147b8e80941Smrg 1148b8e80941Smrg return true; 1149b8e80941Smrg} 1150b8e80941Smrg 1151b8e80941Smrg 1152b8e80941Smrgunsigned 1153b8e80941Smrgtfeedback_decl::get_num_outputs() const 1154b8e80941Smrg{ 1155b8e80941Smrg if (!this->is_varying()) { 1156b8e80941Smrg return 0; 1157b8e80941Smrg } 1158b8e80941Smrg return (this->num_components() + this->location_frac + 3)/4; 1159b8e80941Smrg} 1160b8e80941Smrg 1161b8e80941Smrg 1162b8e80941Smrg/** 1163b8e80941Smrg * Update gl_transform_feedback_info to reflect this tfeedback_decl. 1164b8e80941Smrg * 1165b8e80941Smrg * If an error occurs, the error is reported through linker_error() and false 1166b8e80941Smrg * is returned. 1167b8e80941Smrg */ 1168b8e80941Smrgbool 1169b8e80941Smrgtfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, 1170b8e80941Smrg struct gl_transform_feedback_info *info, 1171b8e80941Smrg unsigned buffer, unsigned buffer_index, 1172b8e80941Smrg const unsigned max_outputs, 1173b8e80941Smrg BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS], 1174b8e80941Smrg bool *explicit_stride, bool has_xfb_qualifiers, 1175b8e80941Smrg const void* mem_ctx) const 1176b8e80941Smrg{ 1177b8e80941Smrg unsigned xfb_offset = 0; 1178b8e80941Smrg unsigned size = this->size; 1179b8e80941Smrg /* Handle gl_SkipComponents. */ 1180b8e80941Smrg if (this->skip_components) { 1181b8e80941Smrg info->Buffers[buffer].Stride += this->skip_components; 1182b8e80941Smrg size = this->skip_components; 1183b8e80941Smrg goto store_varying; 1184b8e80941Smrg } 1185b8e80941Smrg 1186b8e80941Smrg if (this->next_buffer_separator) { 1187b8e80941Smrg size = 0; 1188b8e80941Smrg goto store_varying; 1189b8e80941Smrg } 1190b8e80941Smrg 1191b8e80941Smrg if (has_xfb_qualifiers) { 1192b8e80941Smrg xfb_offset = this->offset / 4; 1193b8e80941Smrg } else { 1194b8e80941Smrg xfb_offset = info->Buffers[buffer].Stride; 1195b8e80941Smrg } 1196b8e80941Smrg info->Varyings[info->NumVarying].Offset = xfb_offset * 4; 1197b8e80941Smrg 1198b8e80941Smrg { 1199b8e80941Smrg unsigned location = this->location; 1200b8e80941Smrg unsigned location_frac = this->location_frac; 1201b8e80941Smrg unsigned num_components = this->num_components(); 1202b8e80941Smrg 1203b8e80941Smrg /* From GL_EXT_transform_feedback: 1204b8e80941Smrg * 1205b8e80941Smrg * " A program will fail to link if: 1206b8e80941Smrg * 1207b8e80941Smrg * * the total number of components to capture is greater than the 1208b8e80941Smrg * constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT 1209b8e80941Smrg * and the buffer mode is INTERLEAVED_ATTRIBS_EXT." 1210b8e80941Smrg * 1211b8e80941Smrg * From GL_ARB_enhanced_layouts: 1212b8e80941Smrg * 1213b8e80941Smrg * " The resulting stride (implicit or explicit) must be less than or 1214b8e80941Smrg * equal to the implementation-dependent constant 1215b8e80941Smrg * gl_MaxTransformFeedbackInterleavedComponents." 1216b8e80941Smrg */ 1217b8e80941Smrg if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS || 1218b8e80941Smrg has_xfb_qualifiers) && 1219b8e80941Smrg xfb_offset + num_components > 1220b8e80941Smrg ctx->Const.MaxTransformFeedbackInterleavedComponents) { 1221b8e80941Smrg linker_error(prog, 1222b8e80941Smrg "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS " 1223b8e80941Smrg "limit has been exceeded."); 1224b8e80941Smrg return false; 1225b8e80941Smrg } 1226b8e80941Smrg 1227b8e80941Smrg /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers, 1228b8e80941Smrg * Page 76, (Transform Feedback Layout Qualifiers): 1229b8e80941Smrg * 1230b8e80941Smrg * " No aliasing in output buffers is allowed: It is a compile-time or 1231b8e80941Smrg * link-time error to specify variables with overlapping transform 1232b8e80941Smrg * feedback offsets." 1233b8e80941Smrg */ 1234b8e80941Smrg const unsigned max_components = 1235b8e80941Smrg ctx->Const.MaxTransformFeedbackInterleavedComponents; 1236b8e80941Smrg const unsigned first_component = xfb_offset; 1237b8e80941Smrg const unsigned last_component = xfb_offset + num_components - 1; 1238b8e80941Smrg const unsigned start_word = BITSET_BITWORD(first_component); 1239b8e80941Smrg const unsigned end_word = BITSET_BITWORD(last_component); 1240b8e80941Smrg BITSET_WORD *used; 1241b8e80941Smrg assert(last_component < max_components); 1242b8e80941Smrg 1243b8e80941Smrg if (!used_components[buffer]) { 1244b8e80941Smrg used_components[buffer] = 1245b8e80941Smrg rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components)); 1246b8e80941Smrg } 1247b8e80941Smrg used = used_components[buffer]; 1248b8e80941Smrg 1249b8e80941Smrg for (unsigned word = start_word; word <= end_word; word++) { 1250b8e80941Smrg unsigned start_range = 0; 1251b8e80941Smrg unsigned end_range = BITSET_WORDBITS - 1; 1252b8e80941Smrg 1253b8e80941Smrg if (word == start_word) 1254b8e80941Smrg start_range = first_component % BITSET_WORDBITS; 1255b8e80941Smrg 1256b8e80941Smrg if (word == end_word) 1257b8e80941Smrg end_range = last_component % BITSET_WORDBITS; 1258b8e80941Smrg 1259b8e80941Smrg if (used[word] & BITSET_RANGE(start_range, end_range)) { 1260b8e80941Smrg linker_error(prog, 1261b8e80941Smrg "variable '%s', xfb_offset (%d) is causing aliasing.", 1262b8e80941Smrg this->orig_name, xfb_offset * 4); 1263b8e80941Smrg return false; 1264b8e80941Smrg } 1265b8e80941Smrg used[word] |= BITSET_RANGE(start_range, end_range); 1266b8e80941Smrg } 1267b8e80941Smrg 1268b8e80941Smrg while (num_components > 0) { 1269b8e80941Smrg unsigned output_size = MIN2(num_components, 4 - location_frac); 1270b8e80941Smrg assert((info->NumOutputs == 0 && max_outputs == 0) || 1271b8e80941Smrg info->NumOutputs < max_outputs); 1272b8e80941Smrg 1273b8e80941Smrg /* From the ARB_enhanced_layouts spec: 1274b8e80941Smrg * 1275b8e80941Smrg * "If such a block member or variable is not written during a shader 1276b8e80941Smrg * invocation, the buffer contents at the assigned offset will be 1277b8e80941Smrg * undefined. Even if there are no static writes to a variable or 1278b8e80941Smrg * member that is assigned a transform feedback offset, the space is 1279b8e80941Smrg * still allocated in the buffer and still affects the stride." 1280b8e80941Smrg */ 1281b8e80941Smrg if (this->is_varying_written()) { 1282b8e80941Smrg info->Outputs[info->NumOutputs].ComponentOffset = location_frac; 1283b8e80941Smrg info->Outputs[info->NumOutputs].OutputRegister = location; 1284b8e80941Smrg info->Outputs[info->NumOutputs].NumComponents = output_size; 1285b8e80941Smrg info->Outputs[info->NumOutputs].StreamId = stream_id; 1286b8e80941Smrg info->Outputs[info->NumOutputs].OutputBuffer = buffer; 1287b8e80941Smrg info->Outputs[info->NumOutputs].DstOffset = xfb_offset; 1288b8e80941Smrg ++info->NumOutputs; 1289b8e80941Smrg } 1290b8e80941Smrg info->Buffers[buffer].Stream = this->stream_id; 1291b8e80941Smrg xfb_offset += output_size; 1292b8e80941Smrg 1293b8e80941Smrg num_components -= output_size; 1294b8e80941Smrg location++; 1295b8e80941Smrg location_frac = 0; 1296b8e80941Smrg } 1297b8e80941Smrg } 1298b8e80941Smrg 1299b8e80941Smrg if (explicit_stride && explicit_stride[buffer]) { 1300b8e80941Smrg if (this->is_64bit() && info->Buffers[buffer].Stride % 2) { 1301b8e80941Smrg linker_error(prog, "invalid qualifier xfb_stride=%d must be a " 1302b8e80941Smrg "multiple of 8 as its applied to a type that is or " 1303b8e80941Smrg "contains a double.", 1304b8e80941Smrg info->Buffers[buffer].Stride * 4); 1305b8e80941Smrg return false; 1306b8e80941Smrg } 1307b8e80941Smrg 1308b8e80941Smrg if (xfb_offset > info->Buffers[buffer].Stride) { 1309b8e80941Smrg linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for " 1310b8e80941Smrg "buffer (%d)", xfb_offset * 4, 1311b8e80941Smrg info->Buffers[buffer].Stride * 4, buffer); 1312b8e80941Smrg return false; 1313b8e80941Smrg } 1314b8e80941Smrg } else { 1315b8e80941Smrg info->Buffers[buffer].Stride = xfb_offset; 1316b8e80941Smrg } 1317b8e80941Smrg 1318b8e80941Smrg store_varying: 1319b8e80941Smrg info->Varyings[info->NumVarying].Name = ralloc_strdup(prog, 1320b8e80941Smrg this->orig_name); 1321b8e80941Smrg info->Varyings[info->NumVarying].Type = this->type; 1322b8e80941Smrg info->Varyings[info->NumVarying].Size = size; 1323b8e80941Smrg info->Varyings[info->NumVarying].BufferIndex = buffer_index; 1324b8e80941Smrg info->NumVarying++; 1325b8e80941Smrg info->Buffers[buffer].NumVaryings++; 1326b8e80941Smrg 1327b8e80941Smrg return true; 1328b8e80941Smrg} 1329b8e80941Smrg 1330b8e80941Smrg 1331b8e80941Smrgconst tfeedback_candidate * 1332b8e80941Smrgtfeedback_decl::find_candidate(gl_shader_program *prog, 1333b8e80941Smrg hash_table *tfeedback_candidates) 1334b8e80941Smrg{ 1335b8e80941Smrg const char *name = this->var_name; 1336b8e80941Smrg switch (this->lowered_builtin_array_variable) { 1337b8e80941Smrg case none: 1338b8e80941Smrg name = this->var_name; 1339b8e80941Smrg break; 1340b8e80941Smrg case clip_distance: 1341b8e80941Smrg name = "gl_ClipDistanceMESA"; 1342b8e80941Smrg break; 1343b8e80941Smrg case cull_distance: 1344b8e80941Smrg name = "gl_CullDistanceMESA"; 1345b8e80941Smrg break; 1346b8e80941Smrg case tess_level_outer: 1347b8e80941Smrg name = "gl_TessLevelOuterMESA"; 1348b8e80941Smrg break; 1349b8e80941Smrg case tess_level_inner: 1350b8e80941Smrg name = "gl_TessLevelInnerMESA"; 1351b8e80941Smrg break; 1352b8e80941Smrg } 1353b8e80941Smrg hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name); 1354b8e80941Smrg 1355b8e80941Smrg this->matched_candidate = entry ? 1356b8e80941Smrg (const tfeedback_candidate *) entry->data : NULL; 1357b8e80941Smrg 1358b8e80941Smrg if (!this->matched_candidate) { 1359b8e80941Smrg /* From GL_EXT_transform_feedback: 1360b8e80941Smrg * A program will fail to link if: 1361b8e80941Smrg * 1362b8e80941Smrg * * any variable name specified in the <varyings> array is not 1363b8e80941Smrg * declared as an output in the geometry shader (if present) or 1364b8e80941Smrg * the vertex shader (if no geometry shader is present); 1365b8e80941Smrg */ 1366b8e80941Smrg linker_error(prog, "Transform feedback varying %s undeclared.", 1367b8e80941Smrg this->orig_name); 1368b8e80941Smrg } 1369b8e80941Smrg 1370b8e80941Smrg return this->matched_candidate; 1371b8e80941Smrg} 1372b8e80941Smrg 1373b8e80941Smrg 1374b8e80941Smrg/** 1375b8e80941Smrg * Parse all the transform feedback declarations that were passed to 1376b8e80941Smrg * glTransformFeedbackVaryings() and store them in tfeedback_decl objects. 1377b8e80941Smrg * 1378b8e80941Smrg * If an error occurs, the error is reported through linker_error() and false 1379b8e80941Smrg * is returned. 1380b8e80941Smrg */ 1381b8e80941Smrgstatic bool 1382b8e80941Smrgparse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog, 1383b8e80941Smrg const void *mem_ctx, unsigned num_names, 1384b8e80941Smrg char **varying_names, tfeedback_decl *decls) 1385b8e80941Smrg{ 1386b8e80941Smrg for (unsigned i = 0; i < num_names; ++i) { 1387b8e80941Smrg decls[i].init(ctx, mem_ctx, varying_names[i]); 1388b8e80941Smrg 1389b8e80941Smrg if (!decls[i].is_varying()) 1390b8e80941Smrg continue; 1391b8e80941Smrg 1392b8e80941Smrg /* From GL_EXT_transform_feedback: 1393b8e80941Smrg * A program will fail to link if: 1394b8e80941Smrg * 1395b8e80941Smrg * * any two entries in the <varyings> array specify the same varying 1396b8e80941Smrg * variable; 1397b8e80941Smrg * 1398b8e80941Smrg * We interpret this to mean "any two entries in the <varyings> array 1399b8e80941Smrg * specify the same varying variable and array index", since transform 1400b8e80941Smrg * feedback of arrays would be useless otherwise. 1401b8e80941Smrg */ 1402b8e80941Smrg for (unsigned j = 0; j < i; ++j) { 1403b8e80941Smrg if (decls[j].is_varying()) { 1404b8e80941Smrg if (tfeedback_decl::is_same(decls[i], decls[j])) { 1405b8e80941Smrg linker_error(prog, "Transform feedback varying %s specified " 1406b8e80941Smrg "more than once.", varying_names[i]); 1407b8e80941Smrg return false; 1408b8e80941Smrg } 1409b8e80941Smrg } 1410b8e80941Smrg } 1411b8e80941Smrg } 1412b8e80941Smrg return true; 1413b8e80941Smrg} 1414b8e80941Smrg 1415b8e80941Smrg 1416b8e80941Smrgstatic int 1417b8e80941Smrgcmp_xfb_offset(const void * x_generic, const void * y_generic) 1418b8e80941Smrg{ 1419b8e80941Smrg tfeedback_decl *x = (tfeedback_decl *) x_generic; 1420b8e80941Smrg tfeedback_decl *y = (tfeedback_decl *) y_generic; 1421b8e80941Smrg 1422b8e80941Smrg if (x->get_buffer() != y->get_buffer()) 1423b8e80941Smrg return x->get_buffer() - y->get_buffer(); 1424b8e80941Smrg return x->get_offset() - y->get_offset(); 1425b8e80941Smrg} 1426b8e80941Smrg 1427b8e80941Smrg/** 1428b8e80941Smrg * Store transform feedback location assignments into 1429b8e80941Smrg * prog->sh.LinkedTransformFeedback based on the data stored in 1430b8e80941Smrg * tfeedback_decls. 1431b8e80941Smrg * 1432b8e80941Smrg * If an error occurs, the error is reported through linker_error() and false 1433b8e80941Smrg * is returned. 1434b8e80941Smrg */ 1435b8e80941Smrgstatic bool 1436b8e80941Smrgstore_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, 1437b8e80941Smrg unsigned num_tfeedback_decls, 1438b8e80941Smrg tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers, 1439b8e80941Smrg const void *mem_ctx) 1440b8e80941Smrg{ 1441b8e80941Smrg if (!prog->last_vert_prog) 1442b8e80941Smrg return true; 1443b8e80941Smrg 1444b8e80941Smrg /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for 1445b8e80941Smrg * tracking the number of buffers doesn't overflow. 1446b8e80941Smrg */ 1447b8e80941Smrg assert(ctx->Const.MaxTransformFeedbackBuffers < 32); 1448b8e80941Smrg 1449b8e80941Smrg bool separate_attribs_mode = 1450b8e80941Smrg prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS; 1451b8e80941Smrg 1452b8e80941Smrg struct gl_program *xfb_prog = prog->last_vert_prog; 1453b8e80941Smrg xfb_prog->sh.LinkedTransformFeedback = 1454b8e80941Smrg rzalloc(xfb_prog, struct gl_transform_feedback_info); 1455b8e80941Smrg 1456b8e80941Smrg /* The xfb_offset qualifier does not have to be used in increasing order 1457b8e80941Smrg * however some drivers expect to receive the list of transform feedback 1458b8e80941Smrg * declarations in order so sort it now for convenience. 1459b8e80941Smrg */ 1460b8e80941Smrg if (has_xfb_qualifiers) { 1461b8e80941Smrg qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls), 1462b8e80941Smrg cmp_xfb_offset); 1463b8e80941Smrg } 1464b8e80941Smrg 1465b8e80941Smrg xfb_prog->sh.LinkedTransformFeedback->Varyings = 1466b8e80941Smrg rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info, 1467b8e80941Smrg num_tfeedback_decls); 1468b8e80941Smrg 1469b8e80941Smrg unsigned num_outputs = 0; 1470b8e80941Smrg for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 1471b8e80941Smrg if (tfeedback_decls[i].is_varying_written()) 1472b8e80941Smrg num_outputs += tfeedback_decls[i].get_num_outputs(); 1473b8e80941Smrg } 1474b8e80941Smrg 1475b8e80941Smrg xfb_prog->sh.LinkedTransformFeedback->Outputs = 1476b8e80941Smrg rzalloc_array(xfb_prog, struct gl_transform_feedback_output, 1477b8e80941Smrg num_outputs); 1478b8e80941Smrg 1479b8e80941Smrg unsigned num_buffers = 0; 1480b8e80941Smrg unsigned buffers = 0; 1481b8e80941Smrg BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {}; 1482b8e80941Smrg 1483b8e80941Smrg if (!has_xfb_qualifiers && separate_attribs_mode) { 1484b8e80941Smrg /* GL_SEPARATE_ATTRIBS */ 1485b8e80941Smrg for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 1486b8e80941Smrg if (!tfeedback_decls[i].store(ctx, prog, 1487b8e80941Smrg xfb_prog->sh.LinkedTransformFeedback, 1488b8e80941Smrg num_buffers, num_buffers, num_outputs, 1489b8e80941Smrg used_components, NULL, 1490b8e80941Smrg has_xfb_qualifiers, mem_ctx)) 1491b8e80941Smrg return false; 1492b8e80941Smrg 1493b8e80941Smrg buffers |= 1 << num_buffers; 1494b8e80941Smrg num_buffers++; 1495b8e80941Smrg } 1496b8e80941Smrg } 1497b8e80941Smrg else { 1498b8e80941Smrg /* GL_INVERLEAVED_ATTRIBS */ 1499b8e80941Smrg int buffer_stream_id = -1; 1500b8e80941Smrg unsigned buffer = 1501b8e80941Smrg num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0; 1502b8e80941Smrg bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false }; 1503b8e80941Smrg 1504b8e80941Smrg /* Apply any xfb_stride global qualifiers */ 1505b8e80941Smrg if (has_xfb_qualifiers) { 1506b8e80941Smrg for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { 1507b8e80941Smrg if (prog->TransformFeedback.BufferStride[j]) { 1508b8e80941Smrg explicit_stride[j] = true; 1509b8e80941Smrg xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride = 1510b8e80941Smrg prog->TransformFeedback.BufferStride[j] / 4; 1511b8e80941Smrg } 1512b8e80941Smrg } 1513b8e80941Smrg } 1514b8e80941Smrg 1515b8e80941Smrg for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 1516b8e80941Smrg if (has_xfb_qualifiers && 1517b8e80941Smrg buffer != tfeedback_decls[i].get_buffer()) { 1518b8e80941Smrg /* we have moved to the next buffer so reset stream id */ 1519b8e80941Smrg buffer_stream_id = -1; 1520b8e80941Smrg num_buffers++; 1521b8e80941Smrg } 1522b8e80941Smrg 1523b8e80941Smrg if (tfeedback_decls[i].is_next_buffer_separator()) { 1524b8e80941Smrg if (!tfeedback_decls[i].store(ctx, prog, 1525b8e80941Smrg xfb_prog->sh.LinkedTransformFeedback, 1526b8e80941Smrg buffer, num_buffers, num_outputs, 1527b8e80941Smrg used_components, explicit_stride, 1528b8e80941Smrg has_xfb_qualifiers, mem_ctx)) 1529b8e80941Smrg return false; 1530b8e80941Smrg num_buffers++; 1531b8e80941Smrg buffer_stream_id = -1; 1532b8e80941Smrg continue; 1533b8e80941Smrg } 1534b8e80941Smrg 1535b8e80941Smrg if (has_xfb_qualifiers) { 1536b8e80941Smrg buffer = tfeedback_decls[i].get_buffer(); 1537b8e80941Smrg } else { 1538b8e80941Smrg buffer = num_buffers; 1539b8e80941Smrg } 1540b8e80941Smrg 1541b8e80941Smrg if (tfeedback_decls[i].is_varying()) { 1542b8e80941Smrg if (buffer_stream_id == -1) { 1543b8e80941Smrg /* First varying writing to this buffer: remember its stream */ 1544b8e80941Smrg buffer_stream_id = (int) tfeedback_decls[i].get_stream_id(); 1545b8e80941Smrg 1546b8e80941Smrg /* Only mark a buffer as active when there is a varying 1547b8e80941Smrg * attached to it. This behaviour is based on a revised version 1548b8e80941Smrg * of section 13.2.2 of the GL 4.6 spec. 1549b8e80941Smrg */ 1550b8e80941Smrg buffers |= 1 << buffer; 1551b8e80941Smrg } else if (buffer_stream_id != 1552b8e80941Smrg (int) tfeedback_decls[i].get_stream_id()) { 1553b8e80941Smrg /* Varying writes to the same buffer from a different stream */ 1554b8e80941Smrg linker_error(prog, 1555b8e80941Smrg "Transform feedback can't capture varyings belonging " 1556b8e80941Smrg "to different vertex streams in a single buffer. " 1557b8e80941Smrg "Varying %s writes to buffer from stream %u, other " 1558b8e80941Smrg "varyings in the same buffer write from stream %u.", 1559b8e80941Smrg tfeedback_decls[i].name(), 1560b8e80941Smrg tfeedback_decls[i].get_stream_id(), 1561b8e80941Smrg buffer_stream_id); 1562b8e80941Smrg return false; 1563b8e80941Smrg } 1564b8e80941Smrg } 1565b8e80941Smrg 1566b8e80941Smrg if (!tfeedback_decls[i].store(ctx, prog, 1567b8e80941Smrg xfb_prog->sh.LinkedTransformFeedback, 1568b8e80941Smrg buffer, num_buffers, num_outputs, 1569b8e80941Smrg used_components, explicit_stride, 1570b8e80941Smrg has_xfb_qualifiers, mem_ctx)) 1571b8e80941Smrg return false; 1572b8e80941Smrg } 1573b8e80941Smrg } 1574b8e80941Smrg 1575b8e80941Smrg assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs); 1576b8e80941Smrg 1577b8e80941Smrg xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers; 1578b8e80941Smrg return true; 1579b8e80941Smrg} 1580b8e80941Smrg 1581b8e80941Smrgnamespace { 1582b8e80941Smrg 1583b8e80941Smrg/** 1584b8e80941Smrg * Data structure recording the relationship between outputs of one shader 1585b8e80941Smrg * stage (the "producer") and inputs of another (the "consumer"). 1586b8e80941Smrg */ 1587b8e80941Smrgclass varying_matches 1588b8e80941Smrg{ 1589b8e80941Smrgpublic: 1590b8e80941Smrg varying_matches(bool disable_varying_packing, bool xfb_enabled, 1591b8e80941Smrg bool enhanced_layouts_enabled, 1592b8e80941Smrg gl_shader_stage producer_stage, 1593b8e80941Smrg gl_shader_stage consumer_stage); 1594b8e80941Smrg ~varying_matches(); 1595b8e80941Smrg void record(ir_variable *producer_var, ir_variable *consumer_var); 1596b8e80941Smrg unsigned assign_locations(struct gl_shader_program *prog, 1597b8e80941Smrg uint8_t components[], 1598b8e80941Smrg uint64_t reserved_slots); 1599b8e80941Smrg void store_locations() const; 1600b8e80941Smrg 1601b8e80941Smrgprivate: 1602b8e80941Smrg bool is_varying_packing_safe(const glsl_type *type, 1603b8e80941Smrg const ir_variable *var) const; 1604b8e80941Smrg 1605b8e80941Smrg /** 1606b8e80941Smrg * If true, this driver disables varying packing, so all varyings need to 1607b8e80941Smrg * be aligned on slot boundaries, and take up a number of slots equal to 1608b8e80941Smrg * their number of matrix columns times their array size. 1609b8e80941Smrg * 1610b8e80941Smrg * Packing may also be disabled because our current packing method is not 1611b8e80941Smrg * safe in SSO or versions of OpenGL where interpolation qualifiers are not 1612b8e80941Smrg * guaranteed to match across stages. 1613b8e80941Smrg */ 1614b8e80941Smrg const bool disable_varying_packing; 1615b8e80941Smrg 1616b8e80941Smrg /** 1617b8e80941Smrg * If true, this driver has transform feedback enabled. The transform 1618b8e80941Smrg * feedback code requires at least some packing be done even when varying 1619b8e80941Smrg * packing is disabled, fortunately where transform feedback requires 1620b8e80941Smrg * packing it's safe to override the disabled setting. See 1621b8e80941Smrg * is_varying_packing_safe(). 1622b8e80941Smrg */ 1623b8e80941Smrg const bool xfb_enabled; 1624b8e80941Smrg 1625b8e80941Smrg const bool enhanced_layouts_enabled; 1626b8e80941Smrg 1627b8e80941Smrg /** 1628b8e80941Smrg * Enum representing the order in which varyings are packed within a 1629b8e80941Smrg * packing class. 1630b8e80941Smrg * 1631b8e80941Smrg * Currently we pack vec4's first, then vec2's, then scalar values, then 1632b8e80941Smrg * vec3's. This order ensures that the only vectors that are at risk of 1633b8e80941Smrg * having to be "double parked" (split between two adjacent varying slots) 1634b8e80941Smrg * are the vec3's. 1635b8e80941Smrg */ 1636b8e80941Smrg enum packing_order_enum { 1637b8e80941Smrg PACKING_ORDER_VEC4, 1638b8e80941Smrg PACKING_ORDER_VEC2, 1639b8e80941Smrg PACKING_ORDER_SCALAR, 1640b8e80941Smrg PACKING_ORDER_VEC3, 1641b8e80941Smrg }; 1642b8e80941Smrg 1643b8e80941Smrg static unsigned compute_packing_class(const ir_variable *var); 1644b8e80941Smrg static packing_order_enum compute_packing_order(const ir_variable *var); 1645b8e80941Smrg static int match_comparator(const void *x_generic, const void *y_generic); 1646b8e80941Smrg static int xfb_comparator(const void *x_generic, const void *y_generic); 1647b8e80941Smrg 1648b8e80941Smrg /** 1649b8e80941Smrg * Structure recording the relationship between a single producer output 1650b8e80941Smrg * and a single consumer input. 1651b8e80941Smrg */ 1652b8e80941Smrg struct match { 1653b8e80941Smrg /** 1654b8e80941Smrg * Packing class for this varying, computed by compute_packing_class(). 1655b8e80941Smrg */ 1656b8e80941Smrg unsigned packing_class; 1657b8e80941Smrg 1658b8e80941Smrg /** 1659b8e80941Smrg * Packing order for this varying, computed by compute_packing_order(). 1660b8e80941Smrg */ 1661b8e80941Smrg packing_order_enum packing_order; 1662b8e80941Smrg unsigned num_components; 1663b8e80941Smrg 1664b8e80941Smrg /** 1665b8e80941Smrg * The output variable in the producer stage. 1666b8e80941Smrg */ 1667b8e80941Smrg ir_variable *producer_var; 1668b8e80941Smrg 1669b8e80941Smrg /** 1670b8e80941Smrg * The input variable in the consumer stage. 1671b8e80941Smrg */ 1672b8e80941Smrg ir_variable *consumer_var; 1673b8e80941Smrg 1674b8e80941Smrg /** 1675b8e80941Smrg * The location which has been assigned for this varying. This is 1676b8e80941Smrg * expressed in multiples of a float, with the first generic varying 1677b8e80941Smrg * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the 1678b8e80941Smrg * value 0. 1679b8e80941Smrg */ 1680b8e80941Smrg unsigned generic_location; 1681b8e80941Smrg } *matches; 1682b8e80941Smrg 1683b8e80941Smrg /** 1684b8e80941Smrg * The number of elements in the \c matches array that are currently in 1685b8e80941Smrg * use. 1686b8e80941Smrg */ 1687b8e80941Smrg unsigned num_matches; 1688b8e80941Smrg 1689b8e80941Smrg /** 1690b8e80941Smrg * The number of elements that were set aside for the \c matches array when 1691b8e80941Smrg * it was allocated. 1692b8e80941Smrg */ 1693b8e80941Smrg unsigned matches_capacity; 1694b8e80941Smrg 1695b8e80941Smrg gl_shader_stage producer_stage; 1696b8e80941Smrg gl_shader_stage consumer_stage; 1697b8e80941Smrg}; 1698b8e80941Smrg 1699b8e80941Smrg} /* anonymous namespace */ 1700b8e80941Smrg 1701b8e80941Smrgvarying_matches::varying_matches(bool disable_varying_packing, 1702b8e80941Smrg bool xfb_enabled, 1703b8e80941Smrg bool enhanced_layouts_enabled, 1704b8e80941Smrg gl_shader_stage producer_stage, 1705b8e80941Smrg gl_shader_stage consumer_stage) 1706b8e80941Smrg : disable_varying_packing(disable_varying_packing), 1707b8e80941Smrg xfb_enabled(xfb_enabled), 1708b8e80941Smrg enhanced_layouts_enabled(enhanced_layouts_enabled), 1709b8e80941Smrg producer_stage(producer_stage), 1710b8e80941Smrg consumer_stage(consumer_stage) 1711b8e80941Smrg{ 1712b8e80941Smrg /* Note: this initial capacity is rather arbitrarily chosen to be large 1713b8e80941Smrg * enough for many cases without wasting an unreasonable amount of space. 1714b8e80941Smrg * varying_matches::record() will resize the array if there are more than 1715b8e80941Smrg * this number of varyings. 1716b8e80941Smrg */ 1717b8e80941Smrg this->matches_capacity = 8; 1718b8e80941Smrg this->matches = (match *) 1719b8e80941Smrg malloc(sizeof(*this->matches) * this->matches_capacity); 1720b8e80941Smrg this->num_matches = 0; 1721b8e80941Smrg} 1722b8e80941Smrg 1723b8e80941Smrg 1724b8e80941Smrgvarying_matches::~varying_matches() 1725b8e80941Smrg{ 1726b8e80941Smrg free(this->matches); 1727b8e80941Smrg} 1728b8e80941Smrg 1729b8e80941Smrg 1730b8e80941Smrg/** 1731b8e80941Smrg * Packing is always safe on individual arrays, structures, and matrices. It 1732b8e80941Smrg * is also safe if the varying is only used for transform feedback. 1733b8e80941Smrg */ 1734b8e80941Smrgbool 1735b8e80941Smrgvarying_matches::is_varying_packing_safe(const glsl_type *type, 1736b8e80941Smrg const ir_variable *var) const 1737b8e80941Smrg{ 1738b8e80941Smrg if (consumer_stage == MESA_SHADER_TESS_EVAL || 1739b8e80941Smrg consumer_stage == MESA_SHADER_TESS_CTRL || 1740b8e80941Smrg producer_stage == MESA_SHADER_TESS_CTRL) 1741b8e80941Smrg return false; 1742b8e80941Smrg 1743b8e80941Smrg return xfb_enabled && (type->is_array() || type->is_struct() || 1744b8e80941Smrg type->is_matrix() || var->data.is_xfb_only); 1745b8e80941Smrg} 1746b8e80941Smrg 1747b8e80941Smrg 1748b8e80941Smrg/** 1749b8e80941Smrg * Record the given producer/consumer variable pair in the list of variables 1750b8e80941Smrg * that should later be assigned locations. 1751b8e80941Smrg * 1752b8e80941Smrg * It is permissible for \c consumer_var to be NULL (this happens if a 1753b8e80941Smrg * variable is output by the producer and consumed by transform feedback, but 1754b8e80941Smrg * not consumed by the consumer). 1755b8e80941Smrg * 1756b8e80941Smrg * If \c producer_var has already been paired up with a consumer_var, or 1757b8e80941Smrg * producer_var is part of fixed pipeline functionality (and hence already has 1758b8e80941Smrg * a location assigned), this function has no effect. 1759b8e80941Smrg * 1760b8e80941Smrg * Note: as a side effect this function may change the interpolation type of 1761b8e80941Smrg * \c producer_var, but only when the change couldn't possibly affect 1762b8e80941Smrg * rendering. 1763b8e80941Smrg */ 1764b8e80941Smrgvoid 1765b8e80941Smrgvarying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) 1766b8e80941Smrg{ 1767b8e80941Smrg assert(producer_var != NULL || consumer_var != NULL); 1768b8e80941Smrg 1769b8e80941Smrg if ((producer_var && (!producer_var->data.is_unmatched_generic_inout || 1770b8e80941Smrg producer_var->data.explicit_location)) || 1771b8e80941Smrg (consumer_var && (!consumer_var->data.is_unmatched_generic_inout || 1772b8e80941Smrg consumer_var->data.explicit_location))) { 1773b8e80941Smrg /* Either a location already exists for this variable (since it is part 1774b8e80941Smrg * of fixed functionality), or it has already been recorded as part of a 1775b8e80941Smrg * previous match. 1776b8e80941Smrg */ 1777b8e80941Smrg return; 1778b8e80941Smrg } 1779b8e80941Smrg 1780b8e80941Smrg bool needs_flat_qualifier = consumer_var == NULL && 1781b8e80941Smrg (producer_var->type->contains_integer() || 1782b8e80941Smrg producer_var->type->contains_double()); 1783b8e80941Smrg 1784b8e80941Smrg if (!disable_varying_packing && 1785b8e80941Smrg (needs_flat_qualifier || 1786b8e80941Smrg (consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) { 1787b8e80941Smrg /* Since this varying is not being consumed by the fragment shader, its 1788b8e80941Smrg * interpolation type varying cannot possibly affect rendering. 1789b8e80941Smrg * Also, this variable is non-flat and is (or contains) an integer 1790b8e80941Smrg * or a double. 1791b8e80941Smrg * If the consumer stage is unknown, don't modify the interpolation 1792b8e80941Smrg * type as it could affect rendering later with separate shaders. 1793b8e80941Smrg * 1794b8e80941Smrg * lower_packed_varyings requires all integer varyings to flat, 1795b8e80941Smrg * regardless of where they appear. We can trivially satisfy that 1796b8e80941Smrg * requirement by changing the interpolation type to flat here. 1797b8e80941Smrg */ 1798b8e80941Smrg if (producer_var) { 1799b8e80941Smrg producer_var->data.centroid = false; 1800b8e80941Smrg producer_var->data.sample = false; 1801b8e80941Smrg producer_var->data.interpolation = INTERP_MODE_FLAT; 1802b8e80941Smrg } 1803b8e80941Smrg 1804b8e80941Smrg if (consumer_var) { 1805b8e80941Smrg consumer_var->data.centroid = false; 1806b8e80941Smrg consumer_var->data.sample = false; 1807b8e80941Smrg consumer_var->data.interpolation = INTERP_MODE_FLAT; 1808b8e80941Smrg } 1809b8e80941Smrg } 1810b8e80941Smrg 1811b8e80941Smrg if (this->num_matches == this->matches_capacity) { 1812b8e80941Smrg this->matches_capacity *= 2; 1813b8e80941Smrg this->matches = (match *) 1814b8e80941Smrg realloc(this->matches, 1815b8e80941Smrg sizeof(*this->matches) * this->matches_capacity); 1816b8e80941Smrg } 1817b8e80941Smrg 1818b8e80941Smrg /* We must use the consumer to compute the packing class because in GL4.4+ 1819b8e80941Smrg * there is no guarantee interpolation qualifiers will match across stages. 1820b8e80941Smrg * 1821b8e80941Smrg * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec: 1822b8e80941Smrg * 1823b8e80941Smrg * "The type and presence of interpolation qualifiers of variables with 1824b8e80941Smrg * the same name declared in all linked shaders for the same cross-stage 1825b8e80941Smrg * interface must match, otherwise the link command will fail. 1826b8e80941Smrg * 1827b8e80941Smrg * When comparing an output from one stage to an input of a subsequent 1828b8e80941Smrg * stage, the input and output don't match if their interpolation 1829b8e80941Smrg * qualifiers (or lack thereof) are not the same." 1830b8e80941Smrg * 1831b8e80941Smrg * This text was also in at least revison 7 of the 4.40 spec but is no 1832b8e80941Smrg * longer in revision 9 and not in the 4.50 spec. 1833b8e80941Smrg */ 1834b8e80941Smrg const ir_variable *const var = (consumer_var != NULL) 1835b8e80941Smrg ? consumer_var : producer_var; 1836b8e80941Smrg const gl_shader_stage stage = (consumer_var != NULL) 1837b8e80941Smrg ? consumer_stage : producer_stage; 1838b8e80941Smrg const glsl_type *type = get_varying_type(var, stage); 1839b8e80941Smrg 1840b8e80941Smrg if (producer_var && consumer_var && 1841b8e80941Smrg consumer_var->data.must_be_shader_input) { 1842b8e80941Smrg producer_var->data.must_be_shader_input = 1; 1843b8e80941Smrg } 1844b8e80941Smrg 1845b8e80941Smrg this->matches[this->num_matches].packing_class 1846b8e80941Smrg = this->compute_packing_class(var); 1847b8e80941Smrg this->matches[this->num_matches].packing_order 1848b8e80941Smrg = this->compute_packing_order(var); 1849b8e80941Smrg if ((this->disable_varying_packing && !is_varying_packing_safe(type, var)) || 1850b8e80941Smrg var->data.must_be_shader_input) { 1851b8e80941Smrg unsigned slots = type->count_attribute_slots(false); 1852b8e80941Smrg this->matches[this->num_matches].num_components = slots * 4; 1853b8e80941Smrg } else { 1854b8e80941Smrg this->matches[this->num_matches].num_components 1855b8e80941Smrg = type->component_slots(); 1856b8e80941Smrg } 1857b8e80941Smrg 1858b8e80941Smrg this->matches[this->num_matches].producer_var = producer_var; 1859b8e80941Smrg this->matches[this->num_matches].consumer_var = consumer_var; 1860b8e80941Smrg this->num_matches++; 1861b8e80941Smrg if (producer_var) 1862b8e80941Smrg producer_var->data.is_unmatched_generic_inout = 0; 1863b8e80941Smrg if (consumer_var) 1864b8e80941Smrg consumer_var->data.is_unmatched_generic_inout = 0; 1865b8e80941Smrg} 1866b8e80941Smrg 1867b8e80941Smrg 1868b8e80941Smrg/** 1869b8e80941Smrg * Choose locations for all of the variable matches that were previously 1870b8e80941Smrg * passed to varying_matches::record(). 1871b8e80941Smrg * \param components returns array[slot] of number of components used 1872b8e80941Smrg * per slot (1, 2, 3 or 4) 1873b8e80941Smrg * \param reserved_slots bitmask indicating which varying slots are already 1874b8e80941Smrg * allocated 1875b8e80941Smrg * \return number of slots (4-element vectors) allocated 1876b8e80941Smrg */ 1877b8e80941Smrgunsigned 1878b8e80941Smrgvarying_matches::assign_locations(struct gl_shader_program *prog, 1879b8e80941Smrg uint8_t components[], 1880b8e80941Smrg uint64_t reserved_slots) 1881b8e80941Smrg{ 1882b8e80941Smrg /* If packing has been disabled then we cannot safely sort the varyings by 1883b8e80941Smrg * class as it may mean we are using a version of OpenGL where 1884b8e80941Smrg * interpolation qualifiers are not guaranteed to be matching across 1885b8e80941Smrg * shaders, sorting in this case could result in mismatching shader 1886b8e80941Smrg * interfaces. 1887b8e80941Smrg * When packing is disabled the sort orders varyings used by transform 1888b8e80941Smrg * feedback first, but also depends on *undefined behaviour* of qsort to 1889b8e80941Smrg * reverse the order of the varyings. See: xfb_comparator(). 1890b8e80941Smrg */ 1891b8e80941Smrg if (!this->disable_varying_packing) { 1892b8e80941Smrg /* Sort varying matches into an order that makes them easy to pack. */ 1893b8e80941Smrg qsort(this->matches, this->num_matches, sizeof(*this->matches), 1894b8e80941Smrg &varying_matches::match_comparator); 1895b8e80941Smrg } else { 1896b8e80941Smrg /* Only sort varyings that are only used by transform feedback. */ 1897b8e80941Smrg qsort(this->matches, this->num_matches, sizeof(*this->matches), 1898b8e80941Smrg &varying_matches::xfb_comparator); 1899b8e80941Smrg } 1900b8e80941Smrg 1901b8e80941Smrg unsigned generic_location = 0; 1902b8e80941Smrg unsigned generic_patch_location = MAX_VARYING*4; 1903b8e80941Smrg bool previous_var_xfb_only = false; 1904b8e80941Smrg unsigned previous_packing_class = ~0u; 1905b8e80941Smrg 1906b8e80941Smrg /* For tranform feedback separate mode, we know the number of attributes 1907b8e80941Smrg * is <= the number of buffers. So packing isn't critical. In fact, 1908b8e80941Smrg * packing vec3 attributes can cause trouble because splitting a vec3 1909b8e80941Smrg * effectively creates an additional transform feedback output. The 1910b8e80941Smrg * extra TFB output may exceed device driver limits. 1911b8e80941Smrg */ 1912b8e80941Smrg const bool dont_pack_vec3 = 1913b8e80941Smrg (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS && 1914b8e80941Smrg prog->TransformFeedback.NumVarying > 0); 1915b8e80941Smrg 1916b8e80941Smrg for (unsigned i = 0; i < this->num_matches; i++) { 1917b8e80941Smrg unsigned *location = &generic_location; 1918b8e80941Smrg const ir_variable *var; 1919b8e80941Smrg const glsl_type *type; 1920b8e80941Smrg bool is_vertex_input = false; 1921b8e80941Smrg 1922b8e80941Smrg if (matches[i].consumer_var) { 1923b8e80941Smrg var = matches[i].consumer_var; 1924b8e80941Smrg type = get_varying_type(var, consumer_stage); 1925b8e80941Smrg if (consumer_stage == MESA_SHADER_VERTEX) 1926b8e80941Smrg is_vertex_input = true; 1927b8e80941Smrg } else { 1928b8e80941Smrg var = matches[i].producer_var; 1929b8e80941Smrg type = get_varying_type(var, producer_stage); 1930b8e80941Smrg } 1931b8e80941Smrg 1932b8e80941Smrg if (var->data.patch) 1933b8e80941Smrg location = &generic_patch_location; 1934b8e80941Smrg 1935b8e80941Smrg /* Advance to the next slot if this varying has a different packing 1936b8e80941Smrg * class than the previous one, and we're not already on a slot 1937b8e80941Smrg * boundary. 1938b8e80941Smrg * 1939b8e80941Smrg * Also advance to the next slot if packing is disabled. This makes sure 1940b8e80941Smrg * we don't assign varyings the same locations which is possible 1941b8e80941Smrg * because we still pack individual arrays, records and matrices even 1942b8e80941Smrg * when packing is disabled. Note we don't advance to the next slot if 1943b8e80941Smrg * we can pack varyings together that are only used for transform 1944b8e80941Smrg * feedback. 1945b8e80941Smrg */ 1946b8e80941Smrg if (var->data.must_be_shader_input || 1947b8e80941Smrg (this->disable_varying_packing && 1948b8e80941Smrg !(previous_var_xfb_only && var->data.is_xfb_only)) || 1949b8e80941Smrg (previous_packing_class != this->matches[i].packing_class) || 1950b8e80941Smrg (this->matches[i].packing_order == PACKING_ORDER_VEC3 && 1951b8e80941Smrg dont_pack_vec3)) { 1952b8e80941Smrg *location = ALIGN(*location, 4); 1953b8e80941Smrg } 1954b8e80941Smrg 1955b8e80941Smrg previous_var_xfb_only = var->data.is_xfb_only; 1956b8e80941Smrg previous_packing_class = this->matches[i].packing_class; 1957b8e80941Smrg 1958b8e80941Smrg /* The number of components taken up by this variable. For vertex shader 1959b8e80941Smrg * inputs, we use the number of slots * 4, as they have different 1960b8e80941Smrg * counting rules. 1961b8e80941Smrg */ 1962b8e80941Smrg unsigned num_components = is_vertex_input ? 1963b8e80941Smrg type->count_attribute_slots(is_vertex_input) * 4 : 1964b8e80941Smrg this->matches[i].num_components; 1965b8e80941Smrg 1966b8e80941Smrg /* The last slot for this variable, inclusive. */ 1967b8e80941Smrg unsigned slot_end = *location + num_components - 1; 1968b8e80941Smrg 1969b8e80941Smrg /* FIXME: We could be smarter in the below code and loop back over 1970b8e80941Smrg * trying to fill any locations that we skipped because we couldn't pack 1971b8e80941Smrg * the varying between an explicit location. For now just let the user 1972b8e80941Smrg * hit the linking error if we run out of room and suggest they use 1973b8e80941Smrg * explicit locations. 1974b8e80941Smrg */ 1975b8e80941Smrg while (slot_end < MAX_VARYING * 4u) { 1976b8e80941Smrg const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1; 1977b8e80941Smrg const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u); 1978b8e80941Smrg 1979b8e80941Smrg assert(slots > 0); 1980b8e80941Smrg 1981b8e80941Smrg if ((reserved_slots & slot_mask) == 0) { 1982b8e80941Smrg break; 1983b8e80941Smrg } 1984b8e80941Smrg 1985b8e80941Smrg *location = ALIGN(*location + 1, 4); 1986b8e80941Smrg slot_end = *location + num_components - 1; 1987b8e80941Smrg } 1988b8e80941Smrg 1989b8e80941Smrg if (!var->data.patch && slot_end >= MAX_VARYING * 4u) { 1990b8e80941Smrg linker_error(prog, "insufficient contiguous locations available for " 1991b8e80941Smrg "%s it is possible an array or struct could not be " 1992b8e80941Smrg "packed between varyings with explicit locations. Try " 1993b8e80941Smrg "using an explicit location for arrays and structs.", 1994b8e80941Smrg var->name); 1995b8e80941Smrg } 1996b8e80941Smrg 1997b8e80941Smrg if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) { 1998b8e80941Smrg for (unsigned j = *location / 4u; j < slot_end / 4u; j++) 1999b8e80941Smrg components[j] = 4; 2000b8e80941Smrg components[slot_end / 4u] = (slot_end & 3) + 1; 2001b8e80941Smrg } 2002b8e80941Smrg 2003b8e80941Smrg this->matches[i].generic_location = *location; 2004b8e80941Smrg 2005b8e80941Smrg *location = slot_end + 1; 2006b8e80941Smrg } 2007b8e80941Smrg 2008b8e80941Smrg return (generic_location + 3) / 4; 2009b8e80941Smrg} 2010b8e80941Smrg 2011b8e80941Smrg 2012b8e80941Smrg/** 2013b8e80941Smrg * Update the producer and consumer shaders to reflect the locations 2014b8e80941Smrg * assignments that were made by varying_matches::assign_locations(). 2015b8e80941Smrg */ 2016b8e80941Smrgvoid 2017b8e80941Smrgvarying_matches::store_locations() const 2018b8e80941Smrg{ 2019b8e80941Smrg /* Check is location needs to be packed with lower_packed_varyings() or if 2020b8e80941Smrg * we can just use ARB_enhanced_layouts packing. 2021b8e80941Smrg */ 2022b8e80941Smrg bool pack_loc[MAX_VARYINGS_INCL_PATCH] = { 0 }; 2023b8e80941Smrg const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} }; 2024b8e80941Smrg 2025b8e80941Smrg for (unsigned i = 0; i < this->num_matches; i++) { 2026b8e80941Smrg ir_variable *producer_var = this->matches[i].producer_var; 2027b8e80941Smrg ir_variable *consumer_var = this->matches[i].consumer_var; 2028b8e80941Smrg unsigned generic_location = this->matches[i].generic_location; 2029b8e80941Smrg unsigned slot = generic_location / 4; 2030b8e80941Smrg unsigned offset = generic_location % 4; 2031b8e80941Smrg 2032b8e80941Smrg if (producer_var) { 2033b8e80941Smrg producer_var->data.location = VARYING_SLOT_VAR0 + slot; 2034b8e80941Smrg producer_var->data.location_frac = offset; 2035b8e80941Smrg } 2036b8e80941Smrg 2037b8e80941Smrg if (consumer_var) { 2038b8e80941Smrg assert(consumer_var->data.location == -1); 2039b8e80941Smrg consumer_var->data.location = VARYING_SLOT_VAR0 + slot; 2040b8e80941Smrg consumer_var->data.location_frac = offset; 2041b8e80941Smrg } 2042b8e80941Smrg 2043b8e80941Smrg /* Find locations suitable for native packing via 2044b8e80941Smrg * ARB_enhanced_layouts. 2045b8e80941Smrg */ 2046b8e80941Smrg if (producer_var && consumer_var) { 2047b8e80941Smrg if (enhanced_layouts_enabled) { 2048b8e80941Smrg const glsl_type *type = 2049b8e80941Smrg get_varying_type(producer_var, producer_stage); 2050b8e80941Smrg if (type->is_array() || type->is_matrix() || type->is_struct() || 2051b8e80941Smrg type->is_double()) { 2052b8e80941Smrg unsigned comp_slots = type->component_slots() + offset; 2053b8e80941Smrg unsigned slots = comp_slots / 4; 2054b8e80941Smrg if (comp_slots % 4) 2055b8e80941Smrg slots += 1; 2056b8e80941Smrg 2057b8e80941Smrg for (unsigned j = 0; j < slots; j++) { 2058b8e80941Smrg pack_loc[slot + j] = true; 2059b8e80941Smrg } 2060b8e80941Smrg } else if (offset + type->vector_elements > 4) { 2061b8e80941Smrg pack_loc[slot] = true; 2062b8e80941Smrg pack_loc[slot + 1] = true; 2063b8e80941Smrg } else { 2064b8e80941Smrg loc_type[slot][offset] = type; 2065b8e80941Smrg } 2066b8e80941Smrg } 2067b8e80941Smrg } 2068b8e80941Smrg } 2069b8e80941Smrg 2070b8e80941Smrg /* Attempt to use ARB_enhanced_layouts for more efficient packing if 2071b8e80941Smrg * suitable. 2072b8e80941Smrg */ 2073b8e80941Smrg if (enhanced_layouts_enabled) { 2074b8e80941Smrg for (unsigned i = 0; i < this->num_matches; i++) { 2075b8e80941Smrg ir_variable *producer_var = this->matches[i].producer_var; 2076b8e80941Smrg ir_variable *consumer_var = this->matches[i].consumer_var; 2077b8e80941Smrg unsigned generic_location = this->matches[i].generic_location; 2078b8e80941Smrg unsigned slot = generic_location / 4; 2079b8e80941Smrg 2080b8e80941Smrg if (pack_loc[slot] || !producer_var || !consumer_var) 2081b8e80941Smrg continue; 2082b8e80941Smrg 2083b8e80941Smrg const glsl_type *type = 2084b8e80941Smrg get_varying_type(producer_var, producer_stage); 2085b8e80941Smrg bool type_match = true; 2086b8e80941Smrg for (unsigned j = 0; j < 4; j++) { 2087b8e80941Smrg if (loc_type[slot][j]) { 2088b8e80941Smrg if (type->base_type != loc_type[slot][j]->base_type) 2089b8e80941Smrg type_match = false; 2090b8e80941Smrg } 2091b8e80941Smrg } 2092b8e80941Smrg 2093b8e80941Smrg if (type_match) { 2094b8e80941Smrg producer_var->data.explicit_location = 1; 2095b8e80941Smrg consumer_var->data.explicit_location = 1; 2096b8e80941Smrg producer_var->data.explicit_component = 1; 2097b8e80941Smrg consumer_var->data.explicit_component = 1; 2098b8e80941Smrg } 2099b8e80941Smrg } 2100b8e80941Smrg } 2101b8e80941Smrg} 2102b8e80941Smrg 2103b8e80941Smrg 2104b8e80941Smrg/** 2105b8e80941Smrg * Compute the "packing class" of the given varying. This is an unsigned 2106b8e80941Smrg * integer with the property that two variables in the same packing class can 2107b8e80941Smrg * be safely backed into the same vec4. 2108b8e80941Smrg */ 2109b8e80941Smrgunsigned 2110b8e80941Smrgvarying_matches::compute_packing_class(const ir_variable *var) 2111b8e80941Smrg{ 2112b8e80941Smrg /* Without help from the back-end, there is no way to pack together 2113b8e80941Smrg * variables with different interpolation types, because 2114b8e80941Smrg * lower_packed_varyings must choose exactly one interpolation type for 2115b8e80941Smrg * each packed varying it creates. 2116b8e80941Smrg * 2117b8e80941Smrg * However, we can safely pack together floats, ints, and uints, because: 2118b8e80941Smrg * 2119b8e80941Smrg * - varyings of base type "int" and "uint" must use the "flat" 2120b8e80941Smrg * interpolation type, which can only occur in GLSL 1.30 and above. 2121b8e80941Smrg * 2122b8e80941Smrg * - On platforms that support GLSL 1.30 and above, lower_packed_varyings 2123b8e80941Smrg * can store flat floats as ints without losing any information (using 2124b8e80941Smrg * the ir_unop_bitcast_* opcodes). 2125b8e80941Smrg * 2126b8e80941Smrg * Therefore, the packing class depends only on the interpolation type. 2127b8e80941Smrg */ 2128b8e80941Smrg const unsigned interp = var->is_interpolation_flat() 2129b8e80941Smrg ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation; 2130b8e80941Smrg 2131b8e80941Smrg assert(interp < (1 << 3)); 2132b8e80941Smrg 2133b8e80941Smrg const unsigned packing_class = (interp << 0) | 2134b8e80941Smrg (var->data.centroid << 3) | 2135b8e80941Smrg (var->data.sample << 4) | 2136b8e80941Smrg (var->data.patch << 5) | 2137b8e80941Smrg (var->data.must_be_shader_input << 6); 2138b8e80941Smrg 2139b8e80941Smrg return packing_class; 2140b8e80941Smrg} 2141b8e80941Smrg 2142b8e80941Smrg 2143b8e80941Smrg/** 2144b8e80941Smrg * Compute the "packing order" of the given varying. This is a sort key we 2145b8e80941Smrg * use to determine when to attempt to pack the given varying relative to 2146b8e80941Smrg * other varyings in the same packing class. 2147b8e80941Smrg */ 2148b8e80941Smrgvarying_matches::packing_order_enum 2149b8e80941Smrgvarying_matches::compute_packing_order(const ir_variable *var) 2150b8e80941Smrg{ 2151b8e80941Smrg const glsl_type *element_type = var->type; 2152b8e80941Smrg 2153b8e80941Smrg while (element_type->is_array()) { 2154b8e80941Smrg element_type = element_type->fields.array; 2155b8e80941Smrg } 2156b8e80941Smrg 2157b8e80941Smrg switch (element_type->component_slots() % 4) { 2158b8e80941Smrg case 1: return PACKING_ORDER_SCALAR; 2159b8e80941Smrg case 2: return PACKING_ORDER_VEC2; 2160b8e80941Smrg case 3: return PACKING_ORDER_VEC3; 2161b8e80941Smrg case 0: return PACKING_ORDER_VEC4; 2162b8e80941Smrg default: 2163b8e80941Smrg assert(!"Unexpected value of vector_elements"); 2164b8e80941Smrg return PACKING_ORDER_VEC4; 2165b8e80941Smrg } 2166b8e80941Smrg} 2167b8e80941Smrg 2168b8e80941Smrg 2169b8e80941Smrg/** 2170b8e80941Smrg * Comparison function passed to qsort() to sort varyings by packing_class and 2171b8e80941Smrg * then by packing_order. 2172b8e80941Smrg */ 2173b8e80941Smrgint 2174b8e80941Smrgvarying_matches::match_comparator(const void *x_generic, const void *y_generic) 2175b8e80941Smrg{ 2176b8e80941Smrg const match *x = (const match *) x_generic; 2177b8e80941Smrg const match *y = (const match *) y_generic; 2178b8e80941Smrg 2179b8e80941Smrg if (x->packing_class != y->packing_class) 2180b8e80941Smrg return x->packing_class - y->packing_class; 2181b8e80941Smrg return x->packing_order - y->packing_order; 2182b8e80941Smrg} 2183b8e80941Smrg 2184b8e80941Smrg 2185b8e80941Smrg/** 2186b8e80941Smrg * Comparison function passed to qsort() to sort varyings used only by 2187b8e80941Smrg * transform feedback when packing of other varyings is disabled. 2188b8e80941Smrg */ 2189b8e80941Smrgint 2190b8e80941Smrgvarying_matches::xfb_comparator(const void *x_generic, const void *y_generic) 2191b8e80941Smrg{ 2192b8e80941Smrg const match *x = (const match *) x_generic; 2193b8e80941Smrg 2194b8e80941Smrg if (x->producer_var != NULL && x->producer_var->data.is_xfb_only) 2195b8e80941Smrg return match_comparator(x_generic, y_generic); 2196b8e80941Smrg 2197b8e80941Smrg /* FIXME: When the comparator returns 0 it means the elements being 2198b8e80941Smrg * compared are equivalent. However the qsort documentation says: 2199b8e80941Smrg * 2200b8e80941Smrg * "The order of equivalent elements is undefined." 2201b8e80941Smrg * 2202b8e80941Smrg * In practice the sort ends up reversing the order of the varyings which 2203b8e80941Smrg * means locations are also assigned in this reversed order and happens to 2204b8e80941Smrg * be what we want. This is also whats happening in 2205b8e80941Smrg * varying_matches::match_comparator(). 2206b8e80941Smrg */ 2207b8e80941Smrg return 0; 2208b8e80941Smrg} 2209b8e80941Smrg 2210b8e80941Smrg 2211b8e80941Smrg/** 2212b8e80941Smrg * Is the given variable a varying variable to be counted against the 2213b8e80941Smrg * limit in ctx->Const.MaxVarying? 2214b8e80941Smrg * This includes variables such as texcoords, colors and generic 2215b8e80941Smrg * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord. 2216b8e80941Smrg */ 2217b8e80941Smrgstatic bool 2218b8e80941Smrgvar_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var) 2219b8e80941Smrg{ 2220b8e80941Smrg /* Only fragment shaders will take a varying variable as an input */ 2221b8e80941Smrg if (stage == MESA_SHADER_FRAGMENT && 2222b8e80941Smrg var->data.mode == ir_var_shader_in) { 2223b8e80941Smrg switch (var->data.location) { 2224b8e80941Smrg case VARYING_SLOT_POS: 2225b8e80941Smrg case VARYING_SLOT_FACE: 2226b8e80941Smrg case VARYING_SLOT_PNTC: 2227b8e80941Smrg return false; 2228b8e80941Smrg default: 2229b8e80941Smrg return true; 2230b8e80941Smrg } 2231b8e80941Smrg } 2232b8e80941Smrg return false; 2233b8e80941Smrg} 2234b8e80941Smrg 2235b8e80941Smrg 2236b8e80941Smrg/** 2237b8e80941Smrg * Visitor class that generates tfeedback_candidate structs describing all 2238b8e80941Smrg * possible targets of transform feedback. 2239b8e80941Smrg * 2240b8e80941Smrg * tfeedback_candidate structs are stored in the hash table 2241b8e80941Smrg * tfeedback_candidates, which is passed to the constructor. This hash table 2242b8e80941Smrg * maps varying names to instances of the tfeedback_candidate struct. 2243b8e80941Smrg */ 2244b8e80941Smrgclass tfeedback_candidate_generator : public program_resource_visitor 2245b8e80941Smrg{ 2246b8e80941Smrgpublic: 2247b8e80941Smrg tfeedback_candidate_generator(void *mem_ctx, 2248b8e80941Smrg hash_table *tfeedback_candidates, 2249b8e80941Smrg gl_shader_stage stage) 2250b8e80941Smrg : mem_ctx(mem_ctx), 2251b8e80941Smrg tfeedback_candidates(tfeedback_candidates), 2252b8e80941Smrg stage(stage), 2253b8e80941Smrg toplevel_var(NULL), 2254b8e80941Smrg varying_floats(0) 2255b8e80941Smrg { 2256b8e80941Smrg } 2257b8e80941Smrg 2258b8e80941Smrg void process(ir_variable *var) 2259b8e80941Smrg { 2260b8e80941Smrg /* All named varying interface blocks should be flattened by now */ 2261b8e80941Smrg assert(!var->is_interface_instance()); 2262b8e80941Smrg assert(var->data.mode == ir_var_shader_out); 2263b8e80941Smrg 2264b8e80941Smrg this->toplevel_var = var; 2265b8e80941Smrg this->varying_floats = 0; 2266b8e80941Smrg const glsl_type *t = 2267b8e80941Smrg var->data.from_named_ifc_block ? var->get_interface_type() : var->type; 2268b8e80941Smrg if (!var->data.patch && stage == MESA_SHADER_TESS_CTRL) { 2269b8e80941Smrg assert(t->is_array()); 2270b8e80941Smrg t = t->fields.array; 2271b8e80941Smrg } 2272b8e80941Smrg program_resource_visitor::process(var, t, false); 2273b8e80941Smrg } 2274b8e80941Smrg 2275b8e80941Smrgprivate: 2276b8e80941Smrg virtual void visit_field(const glsl_type *type, const char *name, 2277b8e80941Smrg bool /* row_major */, 2278b8e80941Smrg const glsl_type * /* record_type */, 2279b8e80941Smrg const enum glsl_interface_packing, 2280b8e80941Smrg bool /* last_field */) 2281b8e80941Smrg { 2282b8e80941Smrg assert(!type->without_array()->is_struct()); 2283b8e80941Smrg assert(!type->without_array()->is_interface()); 2284b8e80941Smrg 2285b8e80941Smrg tfeedback_candidate *candidate 2286b8e80941Smrg = rzalloc(this->mem_ctx, tfeedback_candidate); 2287b8e80941Smrg candidate->toplevel_var = this->toplevel_var; 2288b8e80941Smrg candidate->type = type; 2289b8e80941Smrg candidate->offset = this->varying_floats; 2290b8e80941Smrg _mesa_hash_table_insert(this->tfeedback_candidates, 2291b8e80941Smrg ralloc_strdup(this->mem_ctx, name), 2292b8e80941Smrg candidate); 2293b8e80941Smrg this->varying_floats += type->component_slots(); 2294b8e80941Smrg } 2295b8e80941Smrg 2296b8e80941Smrg /** 2297b8e80941Smrg * Memory context used to allocate hash table keys and values. 2298b8e80941Smrg */ 2299b8e80941Smrg void * const mem_ctx; 2300b8e80941Smrg 2301b8e80941Smrg /** 2302b8e80941Smrg * Hash table in which tfeedback_candidate objects should be stored. 2303b8e80941Smrg */ 2304b8e80941Smrg hash_table * const tfeedback_candidates; 2305b8e80941Smrg 2306b8e80941Smrg gl_shader_stage stage; 2307b8e80941Smrg 2308b8e80941Smrg /** 2309b8e80941Smrg * Pointer to the toplevel variable that is being traversed. 2310b8e80941Smrg */ 2311b8e80941Smrg ir_variable *toplevel_var; 2312b8e80941Smrg 2313b8e80941Smrg /** 2314b8e80941Smrg * Total number of varying floats that have been visited so far. This is 2315b8e80941Smrg * used to determine the offset to each varying within the toplevel 2316b8e80941Smrg * variable. 2317b8e80941Smrg */ 2318b8e80941Smrg unsigned varying_floats; 2319b8e80941Smrg}; 2320b8e80941Smrg 2321b8e80941Smrg 2322b8e80941Smrgnamespace linker { 2323b8e80941Smrg 2324b8e80941Smrgvoid 2325b8e80941Smrgpopulate_consumer_input_sets(void *mem_ctx, exec_list *ir, 2326b8e80941Smrg hash_table *consumer_inputs, 2327b8e80941Smrg hash_table *consumer_interface_inputs, 2328b8e80941Smrg ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) 2329b8e80941Smrg{ 2330b8e80941Smrg memset(consumer_inputs_with_locations, 2331b8e80941Smrg 0, 2332b8e80941Smrg sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX); 2333b8e80941Smrg 2334b8e80941Smrg foreach_in_list(ir_instruction, node, ir) { 2335b8e80941Smrg ir_variable *const input_var = node->as_variable(); 2336b8e80941Smrg 2337b8e80941Smrg if (input_var != NULL && input_var->data.mode == ir_var_shader_in) { 2338b8e80941Smrg /* All interface blocks should have been lowered by this point */ 2339b8e80941Smrg assert(!input_var->type->is_interface()); 2340b8e80941Smrg 2341b8e80941Smrg if (input_var->data.explicit_location) { 2342b8e80941Smrg /* assign_varying_locations only cares about finding the 2343b8e80941Smrg * ir_variable at the start of a contiguous location block. 2344b8e80941Smrg * 2345b8e80941Smrg * - For !producer, consumer_inputs_with_locations isn't used. 2346b8e80941Smrg * 2347b8e80941Smrg * - For !consumer, consumer_inputs_with_locations is empty. 2348b8e80941Smrg * 2349b8e80941Smrg * For consumer && producer, if you were trying to set some 2350b8e80941Smrg * ir_variable to the middle of a location block on the other side 2351b8e80941Smrg * of producer/consumer, cross_validate_outputs_to_inputs() should 2352b8e80941Smrg * be link-erroring due to either type mismatch or location 2353b8e80941Smrg * overlaps. If the variables do match up, then they've got a 2354b8e80941Smrg * matching data.location and you only looked at 2355b8e80941Smrg * consumer_inputs_with_locations[var->data.location], not any 2356b8e80941Smrg * following entries for the array/structure. 2357b8e80941Smrg */ 2358b8e80941Smrg consumer_inputs_with_locations[input_var->data.location] = 2359b8e80941Smrg input_var; 2360b8e80941Smrg } else if (input_var->get_interface_type() != NULL) { 2361b8e80941Smrg char *const iface_field_name = 2362b8e80941Smrg ralloc_asprintf(mem_ctx, "%s.%s", 2363b8e80941Smrg input_var->get_interface_type()->without_array()->name, 2364b8e80941Smrg input_var->name); 2365b8e80941Smrg _mesa_hash_table_insert(consumer_interface_inputs, 2366b8e80941Smrg iface_field_name, input_var); 2367b8e80941Smrg } else { 2368b8e80941Smrg _mesa_hash_table_insert(consumer_inputs, 2369b8e80941Smrg ralloc_strdup(mem_ctx, input_var->name), 2370b8e80941Smrg input_var); 2371b8e80941Smrg } 2372b8e80941Smrg } 2373b8e80941Smrg } 2374b8e80941Smrg} 2375b8e80941Smrg 2376b8e80941Smrg/** 2377b8e80941Smrg * Find a variable from the consumer that "matches" the specified variable 2378b8e80941Smrg * 2379b8e80941Smrg * This function only finds inputs with names that match. There is no 2380b8e80941Smrg * validation (here) that the types, etc. are compatible. 2381b8e80941Smrg */ 2382b8e80941Smrgir_variable * 2383b8e80941Smrgget_matching_input(void *mem_ctx, 2384b8e80941Smrg const ir_variable *output_var, 2385b8e80941Smrg hash_table *consumer_inputs, 2386b8e80941Smrg hash_table *consumer_interface_inputs, 2387b8e80941Smrg ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) 2388b8e80941Smrg{ 2389b8e80941Smrg ir_variable *input_var; 2390b8e80941Smrg 2391b8e80941Smrg if (output_var->data.explicit_location) { 2392b8e80941Smrg input_var = consumer_inputs_with_locations[output_var->data.location]; 2393b8e80941Smrg } else if (output_var->get_interface_type() != NULL) { 2394b8e80941Smrg char *const iface_field_name = 2395b8e80941Smrg ralloc_asprintf(mem_ctx, "%s.%s", 2396b8e80941Smrg output_var->get_interface_type()->without_array()->name, 2397b8e80941Smrg output_var->name); 2398b8e80941Smrg hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name); 2399b8e80941Smrg input_var = entry ? (ir_variable *) entry->data : NULL; 2400b8e80941Smrg } else { 2401b8e80941Smrg hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name); 2402b8e80941Smrg input_var = entry ? (ir_variable *) entry->data : NULL; 2403b8e80941Smrg } 2404b8e80941Smrg 2405b8e80941Smrg return (input_var == NULL || input_var->data.mode != ir_var_shader_in) 2406b8e80941Smrg ? NULL : input_var; 2407b8e80941Smrg} 2408b8e80941Smrg 2409b8e80941Smrg} 2410b8e80941Smrg 2411b8e80941Smrgstatic int 2412b8e80941Smrgio_variable_cmp(const void *_a, const void *_b) 2413b8e80941Smrg{ 2414b8e80941Smrg const ir_variable *const a = *(const ir_variable **) _a; 2415b8e80941Smrg const ir_variable *const b = *(const ir_variable **) _b; 2416b8e80941Smrg 2417b8e80941Smrg if (a->data.explicit_location && b->data.explicit_location) 2418b8e80941Smrg return b->data.location - a->data.location; 2419b8e80941Smrg 2420b8e80941Smrg if (a->data.explicit_location && !b->data.explicit_location) 2421b8e80941Smrg return 1; 2422b8e80941Smrg 2423b8e80941Smrg if (!a->data.explicit_location && b->data.explicit_location) 2424b8e80941Smrg return -1; 2425b8e80941Smrg 2426b8e80941Smrg return -strcmp(a->name, b->name); 2427b8e80941Smrg} 2428b8e80941Smrg 2429b8e80941Smrg/** 2430b8e80941Smrg * Sort the shader IO variables into canonical order 2431b8e80941Smrg */ 2432b8e80941Smrgstatic void 2433b8e80941Smrgcanonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode) 2434b8e80941Smrg{ 2435b8e80941Smrg ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4]; 2436b8e80941Smrg unsigned num_variables = 0; 2437b8e80941Smrg 2438b8e80941Smrg foreach_in_list(ir_instruction, node, ir) { 2439b8e80941Smrg ir_variable *const var = node->as_variable(); 2440b8e80941Smrg 2441b8e80941Smrg if (var == NULL || var->data.mode != io_mode) 2442b8e80941Smrg continue; 2443b8e80941Smrg 2444b8e80941Smrg /* If we have already encountered more I/O variables that could 2445b8e80941Smrg * successfully link, bail. 2446b8e80941Smrg */ 2447b8e80941Smrg if (num_variables == ARRAY_SIZE(var_table)) 2448b8e80941Smrg return; 2449b8e80941Smrg 2450b8e80941Smrg var_table[num_variables++] = var; 2451b8e80941Smrg } 2452b8e80941Smrg 2453b8e80941Smrg if (num_variables == 0) 2454b8e80941Smrg return; 2455b8e80941Smrg 2456b8e80941Smrg /* Sort the list in reverse order (io_variable_cmp handles this). Later 2457b8e80941Smrg * we're going to push the variables on to the IR list as a stack, so we 2458b8e80941Smrg * want the last variable (in canonical order) to be first in the list. 2459b8e80941Smrg */ 2460b8e80941Smrg qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp); 2461b8e80941Smrg 2462b8e80941Smrg /* Remove the variable from it's current location in the IR, and put it at 2463b8e80941Smrg * the front. 2464b8e80941Smrg */ 2465b8e80941Smrg for (unsigned i = 0; i < num_variables; i++) { 2466b8e80941Smrg var_table[i]->remove(); 2467b8e80941Smrg ir->push_head(var_table[i]); 2468b8e80941Smrg } 2469b8e80941Smrg} 2470b8e80941Smrg 2471b8e80941Smrg/** 2472b8e80941Smrg * Generate a bitfield map of the explicit locations for shader varyings. 2473b8e80941Smrg * 2474b8e80941Smrg * Note: For Tessellation shaders we are sitting right on the limits of the 2475b8e80941Smrg * 64 bit map. Per-vertex and per-patch both have separate location domains 2476b8e80941Smrg * with a max of MAX_VARYING. 2477b8e80941Smrg */ 2478b8e80941Smrgstatic uint64_t 2479b8e80941Smrgreserved_varying_slot(struct gl_linked_shader *stage, 2480b8e80941Smrg ir_variable_mode io_mode) 2481b8e80941Smrg{ 2482b8e80941Smrg assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); 2483b8e80941Smrg /* Avoid an overflow of the returned value */ 2484b8e80941Smrg assert(MAX_VARYINGS_INCL_PATCH <= 64); 2485b8e80941Smrg 2486b8e80941Smrg uint64_t slots = 0; 2487b8e80941Smrg int var_slot; 2488b8e80941Smrg 2489b8e80941Smrg if (!stage) 2490b8e80941Smrg return slots; 2491b8e80941Smrg 2492b8e80941Smrg foreach_in_list(ir_instruction, node, stage->ir) { 2493b8e80941Smrg ir_variable *const var = node->as_variable(); 2494b8e80941Smrg 2495b8e80941Smrg if (var == NULL || var->data.mode != io_mode || 2496b8e80941Smrg !var->data.explicit_location || 2497b8e80941Smrg var->data.location < VARYING_SLOT_VAR0) 2498b8e80941Smrg continue; 2499b8e80941Smrg 2500b8e80941Smrg var_slot = var->data.location - VARYING_SLOT_VAR0; 2501b8e80941Smrg 2502b8e80941Smrg unsigned num_elements = get_varying_type(var, stage->Stage) 2503b8e80941Smrg ->count_attribute_slots(io_mode == ir_var_shader_in && 2504b8e80941Smrg stage->Stage == MESA_SHADER_VERTEX); 2505b8e80941Smrg for (unsigned i = 0; i < num_elements; i++) { 2506b8e80941Smrg if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH) 2507b8e80941Smrg slots |= UINT64_C(1) << var_slot; 2508b8e80941Smrg var_slot += 1; 2509b8e80941Smrg } 2510b8e80941Smrg } 2511b8e80941Smrg 2512b8e80941Smrg return slots; 2513b8e80941Smrg} 2514b8e80941Smrg 2515b8e80941Smrg 2516b8e80941Smrg/** 2517b8e80941Smrg * Assign locations for all variables that are produced in one pipeline stage 2518b8e80941Smrg * (the "producer") and consumed in the next stage (the "consumer"). 2519b8e80941Smrg * 2520b8e80941Smrg * Variables produced by the producer may also be consumed by transform 2521b8e80941Smrg * feedback. 2522b8e80941Smrg * 2523b8e80941Smrg * \param num_tfeedback_decls is the number of declarations indicating 2524b8e80941Smrg * variables that may be consumed by transform feedback. 2525b8e80941Smrg * 2526b8e80941Smrg * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects 2527b8e80941Smrg * representing the result of parsing the strings passed to 2528b8e80941Smrg * glTransformFeedbackVaryings(). assign_location() will be called for 2529b8e80941Smrg * each of these objects that matches one of the outputs of the 2530b8e80941Smrg * producer. 2531b8e80941Smrg * 2532b8e80941Smrg * When num_tfeedback_decls is nonzero, it is permissible for the consumer to 2533b8e80941Smrg * be NULL. In this case, varying locations are assigned solely based on the 2534b8e80941Smrg * requirements of transform feedback. 2535b8e80941Smrg */ 2536b8e80941Smrgstatic bool 2537b8e80941Smrgassign_varying_locations(struct gl_context *ctx, 2538b8e80941Smrg void *mem_ctx, 2539b8e80941Smrg struct gl_shader_program *prog, 2540b8e80941Smrg gl_linked_shader *producer, 2541b8e80941Smrg gl_linked_shader *consumer, 2542b8e80941Smrg unsigned num_tfeedback_decls, 2543b8e80941Smrg tfeedback_decl *tfeedback_decls, 2544b8e80941Smrg const uint64_t reserved_slots) 2545b8e80941Smrg{ 2546b8e80941Smrg /* Tessellation shaders treat inputs and outputs as shared memory and can 2547b8e80941Smrg * access inputs and outputs of other invocations. 2548b8e80941Smrg * Therefore, they can't be lowered to temps easily (and definitely not 2549b8e80941Smrg * efficiently). 2550b8e80941Smrg */ 2551b8e80941Smrg bool unpackable_tess = 2552b8e80941Smrg (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) || 2553b8e80941Smrg (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) || 2554b8e80941Smrg (producer && producer->Stage == MESA_SHADER_TESS_CTRL); 2555b8e80941Smrg 2556b8e80941Smrg /* Transform feedback code assumes varying arrays are packed, so if the 2557b8e80941Smrg * driver has disabled varying packing, make sure to at least enable 2558b8e80941Smrg * packing required by transform feedback. 2559b8e80941Smrg */ 2560b8e80941Smrg bool xfb_enabled = 2561b8e80941Smrg ctx->Extensions.EXT_transform_feedback && !unpackable_tess; 2562b8e80941Smrg 2563b8e80941Smrg /* Disable packing on outward facing interfaces for SSO because in ES we 2564b8e80941Smrg * need to retain the unpacked varying information for draw time 2565b8e80941Smrg * validation. 2566b8e80941Smrg * 2567b8e80941Smrg * Packing is still enabled on individual arrays, structs, and matrices as 2568b8e80941Smrg * these are required by the transform feedback code and it is still safe 2569b8e80941Smrg * to do so. We also enable packing when a varying is only used for 2570b8e80941Smrg * transform feedback and its not a SSO. 2571b8e80941Smrg */ 2572b8e80941Smrg bool disable_varying_packing = 2573b8e80941Smrg ctx->Const.DisableVaryingPacking || unpackable_tess; 2574b8e80941Smrg if (prog->SeparateShader && (producer == NULL || consumer == NULL)) 2575b8e80941Smrg disable_varying_packing = true; 2576b8e80941Smrg 2577b8e80941Smrg varying_matches matches(disable_varying_packing, xfb_enabled, 2578b8e80941Smrg ctx->Extensions.ARB_enhanced_layouts, 2579b8e80941Smrg producer ? producer->Stage : MESA_SHADER_NONE, 2580b8e80941Smrg consumer ? consumer->Stage : MESA_SHADER_NONE); 2581b8e80941Smrg hash_table *tfeedback_candidates = 2582b8e80941Smrg _mesa_hash_table_create(NULL, _mesa_key_hash_string, 2583b8e80941Smrg _mesa_key_string_equal); 2584b8e80941Smrg hash_table *consumer_inputs = 2585b8e80941Smrg _mesa_hash_table_create(NULL, _mesa_key_hash_string, 2586b8e80941Smrg _mesa_key_string_equal); 2587b8e80941Smrg hash_table *consumer_interface_inputs = 2588b8e80941Smrg _mesa_hash_table_create(NULL, _mesa_key_hash_string, 2589b8e80941Smrg _mesa_key_string_equal); 2590b8e80941Smrg ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = { 2591b8e80941Smrg NULL, 2592b8e80941Smrg }; 2593b8e80941Smrg 2594b8e80941Smrg unsigned consumer_vertices = 0; 2595b8e80941Smrg if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY) 2596b8e80941Smrg consumer_vertices = prog->Geom.VerticesIn; 2597b8e80941Smrg 2598b8e80941Smrg /* Operate in a total of four passes. 2599b8e80941Smrg * 2600b8e80941Smrg * 1. Sort inputs / outputs into a canonical order. This is necessary so 2601b8e80941Smrg * that inputs / outputs of separable shaders will be assigned 2602b8e80941Smrg * predictable locations regardless of the order in which declarations 2603b8e80941Smrg * appeared in the shader source. 2604b8e80941Smrg * 2605b8e80941Smrg * 2. Assign locations for any matching inputs and outputs. 2606b8e80941Smrg * 2607b8e80941Smrg * 3. Mark output variables in the producer that do not have locations as 2608b8e80941Smrg * not being outputs. This lets the optimizer eliminate them. 2609b8e80941Smrg * 2610b8e80941Smrg * 4. Mark input variables in the consumer that do not have locations as 2611b8e80941Smrg * not being inputs. This lets the optimizer eliminate them. 2612b8e80941Smrg */ 2613b8e80941Smrg if (consumer) 2614b8e80941Smrg canonicalize_shader_io(consumer->ir, ir_var_shader_in); 2615b8e80941Smrg 2616b8e80941Smrg if (producer) 2617b8e80941Smrg canonicalize_shader_io(producer->ir, ir_var_shader_out); 2618b8e80941Smrg 2619b8e80941Smrg if (consumer) 2620b8e80941Smrg linker::populate_consumer_input_sets(mem_ctx, consumer->ir, 2621b8e80941Smrg consumer_inputs, 2622b8e80941Smrg consumer_interface_inputs, 2623b8e80941Smrg consumer_inputs_with_locations); 2624b8e80941Smrg 2625b8e80941Smrg if (producer) { 2626b8e80941Smrg foreach_in_list(ir_instruction, node, producer->ir) { 2627b8e80941Smrg ir_variable *const output_var = node->as_variable(); 2628b8e80941Smrg 2629b8e80941Smrg if (output_var == NULL || output_var->data.mode != ir_var_shader_out) 2630b8e80941Smrg continue; 2631b8e80941Smrg 2632b8e80941Smrg /* Only geometry shaders can use non-zero streams */ 2633b8e80941Smrg assert(output_var->data.stream == 0 || 2634b8e80941Smrg (output_var->data.stream < MAX_VERTEX_STREAMS && 2635b8e80941Smrg producer->Stage == MESA_SHADER_GEOMETRY)); 2636b8e80941Smrg 2637b8e80941Smrg if (num_tfeedback_decls > 0) { 2638b8e80941Smrg tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates, producer->Stage); 2639b8e80941Smrg /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1 2640b8e80941Smrg * ("Vertex Shader Variables / Output Variables") 2641b8e80941Smrg * 2642b8e80941Smrg * "Each program object can specify a set of output variables from 2643b8e80941Smrg * one shader to be recorded in transform feedback mode (see 2644b8e80941Smrg * section 13.3). The variables that can be recorded are those 2645b8e80941Smrg * emitted by the first active shader, in order, from the 2646b8e80941Smrg * following list: 2647b8e80941Smrg * 2648b8e80941Smrg * * geometry shader 2649b8e80941Smrg * * tessellation evaluation shader 2650b8e80941Smrg * * tessellation control shader 2651b8e80941Smrg * * vertex shader" 2652b8e80941Smrg * 2653b8e80941Smrg * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader 2654b8e80941Smrg * Variables / Output Variables") tessellation control shader is 2655b8e80941Smrg * not included in the stages list. 2656b8e80941Smrg */ 2657b8e80941Smrg if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) { 2658b8e80941Smrg g.process(output_var); 2659b8e80941Smrg } 2660b8e80941Smrg } 2661b8e80941Smrg 2662b8e80941Smrg ir_variable *const input_var = 2663b8e80941Smrg linker::get_matching_input(mem_ctx, output_var, consumer_inputs, 2664b8e80941Smrg consumer_interface_inputs, 2665b8e80941Smrg consumer_inputs_with_locations); 2666b8e80941Smrg 2667b8e80941Smrg /* If a matching input variable was found, add this output (and the 2668b8e80941Smrg * input) to the set. If this is a separable program and there is no 2669b8e80941Smrg * consumer stage, add the output. 2670b8e80941Smrg * 2671b8e80941Smrg * Always add TCS outputs. They are shared by all invocations 2672b8e80941Smrg * within a patch and can be used as shared memory. 2673b8e80941Smrg */ 2674b8e80941Smrg if (input_var || (prog->SeparateShader && consumer == NULL) || 2675b8e80941Smrg producer->Stage == MESA_SHADER_TESS_CTRL) { 2676b8e80941Smrg matches.record(output_var, input_var); 2677b8e80941Smrg } 2678b8e80941Smrg 2679b8e80941Smrg /* Only stream 0 outputs can be consumed in the next stage */ 2680b8e80941Smrg if (input_var && output_var->data.stream != 0) { 2681b8e80941Smrg linker_error(prog, "output %s is assigned to stream=%d but " 2682b8e80941Smrg "is linked to an input, which requires stream=0", 2683b8e80941Smrg output_var->name, output_var->data.stream); 2684b8e80941Smrg return false; 2685b8e80941Smrg } 2686b8e80941Smrg } 2687b8e80941Smrg } else { 2688b8e80941Smrg /* If there's no producer stage, then this must be a separable program. 2689b8e80941Smrg * For example, we may have a program that has just a fragment shader. 2690b8e80941Smrg * Later this program will be used with some arbitrary vertex (or 2691b8e80941Smrg * geometry) shader program. This means that locations must be assigned 2692b8e80941Smrg * for all the inputs. 2693b8e80941Smrg */ 2694b8e80941Smrg foreach_in_list(ir_instruction, node, consumer->ir) { 2695b8e80941Smrg ir_variable *const input_var = node->as_variable(); 2696b8e80941Smrg if (input_var && input_var->data.mode == ir_var_shader_in) { 2697b8e80941Smrg matches.record(NULL, input_var); 2698b8e80941Smrg } 2699b8e80941Smrg } 2700b8e80941Smrg } 2701b8e80941Smrg 2702b8e80941Smrg for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 2703b8e80941Smrg if (!tfeedback_decls[i].is_varying()) 2704b8e80941Smrg continue; 2705b8e80941Smrg 2706b8e80941Smrg const tfeedback_candidate *matched_candidate 2707b8e80941Smrg = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates); 2708b8e80941Smrg 2709b8e80941Smrg if (matched_candidate == NULL) { 2710b8e80941Smrg _mesa_hash_table_destroy(tfeedback_candidates, NULL); 2711b8e80941Smrg return false; 2712b8e80941Smrg } 2713b8e80941Smrg 2714b8e80941Smrg /* Mark xfb varyings as always active */ 2715b8e80941Smrg matched_candidate->toplevel_var->data.always_active_io = 1; 2716b8e80941Smrg 2717b8e80941Smrg /* Mark any corresponding inputs as always active also. We must do this 2718b8e80941Smrg * because we have a NIR pass that lowers vectors to scalars and another 2719b8e80941Smrg * that removes unused varyings. 2720b8e80941Smrg * We don't split varyings marked as always active because there is no 2721b8e80941Smrg * point in doing so. This means we need to mark both sides of the 2722b8e80941Smrg * interface as always active otherwise we will have a mismatch and 2723b8e80941Smrg * start removing things we shouldn't. 2724b8e80941Smrg */ 2725b8e80941Smrg ir_variable *const input_var = 2726b8e80941Smrg linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var, 2727b8e80941Smrg consumer_inputs, 2728b8e80941Smrg consumer_interface_inputs, 2729b8e80941Smrg consumer_inputs_with_locations); 2730b8e80941Smrg if (input_var) 2731b8e80941Smrg input_var->data.always_active_io = 1; 2732b8e80941Smrg 2733b8e80941Smrg if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) { 2734b8e80941Smrg matched_candidate->toplevel_var->data.is_xfb_only = 1; 2735b8e80941Smrg matches.record(matched_candidate->toplevel_var, NULL); 2736b8e80941Smrg } 2737b8e80941Smrg } 2738b8e80941Smrg 2739b8e80941Smrg _mesa_hash_table_destroy(consumer_inputs, NULL); 2740b8e80941Smrg _mesa_hash_table_destroy(consumer_interface_inputs, NULL); 2741b8e80941Smrg 2742b8e80941Smrg uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0}; 2743b8e80941Smrg const unsigned slots_used = matches.assign_locations( 2744b8e80941Smrg prog, components, reserved_slots); 2745b8e80941Smrg matches.store_locations(); 2746b8e80941Smrg 2747b8e80941Smrg for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 2748b8e80941Smrg if (tfeedback_decls[i].is_varying()) { 2749b8e80941Smrg if (!tfeedback_decls[i].assign_location(ctx, prog)) { 2750b8e80941Smrg _mesa_hash_table_destroy(tfeedback_candidates, NULL); 2751b8e80941Smrg return false; 2752b8e80941Smrg } 2753b8e80941Smrg } 2754b8e80941Smrg } 2755b8e80941Smrg _mesa_hash_table_destroy(tfeedback_candidates, NULL); 2756b8e80941Smrg 2757b8e80941Smrg if (consumer && producer) { 2758b8e80941Smrg foreach_in_list(ir_instruction, node, consumer->ir) { 2759b8e80941Smrg ir_variable *const var = node->as_variable(); 2760b8e80941Smrg 2761b8e80941Smrg if (var && var->data.mode == ir_var_shader_in && 2762b8e80941Smrg var->data.is_unmatched_generic_inout) { 2763b8e80941Smrg if (!prog->IsES && prog->data->Version <= 120) { 2764b8e80941Smrg /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec: 2765b8e80941Smrg * 2766b8e80941Smrg * Only those varying variables used (i.e. read) in 2767b8e80941Smrg * the fragment shader executable must be written to 2768b8e80941Smrg * by the vertex shader executable; declaring 2769b8e80941Smrg * superfluous varying variables in a vertex shader is 2770b8e80941Smrg * permissible. 2771b8e80941Smrg * 2772b8e80941Smrg * We interpret this text as meaning that the VS must 2773b8e80941Smrg * write the variable for the FS to read it. See 2774b8e80941Smrg * "glsl1-varying read but not written" in piglit. 2775b8e80941Smrg */ 2776b8e80941Smrg linker_error(prog, "%s shader varying %s not written " 2777b8e80941Smrg "by %s shader\n.", 2778b8e80941Smrg _mesa_shader_stage_to_string(consumer->Stage), 2779b8e80941Smrg var->name, 2780b8e80941Smrg _mesa_shader_stage_to_string(producer->Stage)); 2781b8e80941Smrg } else { 2782b8e80941Smrg linker_warning(prog, "%s shader varying %s not written " 2783b8e80941Smrg "by %s shader\n.", 2784b8e80941Smrg _mesa_shader_stage_to_string(consumer->Stage), 2785b8e80941Smrg var->name, 2786b8e80941Smrg _mesa_shader_stage_to_string(producer->Stage)); 2787b8e80941Smrg } 2788b8e80941Smrg } 2789b8e80941Smrg } 2790b8e80941Smrg 2791b8e80941Smrg /* Now that validation is done its safe to remove unused varyings. As 2792b8e80941Smrg * we have both a producer and consumer its safe to remove unused 2793b8e80941Smrg * varyings even if the program is a SSO because the stages are being 2794b8e80941Smrg * linked together i.e. we have a multi-stage SSO. 2795b8e80941Smrg */ 2796b8e80941Smrg remove_unused_shader_inputs_and_outputs(false, producer, 2797b8e80941Smrg ir_var_shader_out); 2798b8e80941Smrg remove_unused_shader_inputs_and_outputs(false, consumer, 2799b8e80941Smrg ir_var_shader_in); 2800b8e80941Smrg } 2801b8e80941Smrg 2802b8e80941Smrg if (producer) { 2803b8e80941Smrg lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out, 2804b8e80941Smrg 0, producer, disable_varying_packing, 2805b8e80941Smrg xfb_enabled); 2806b8e80941Smrg } 2807b8e80941Smrg 2808b8e80941Smrg if (consumer) { 2809b8e80941Smrg lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in, 2810b8e80941Smrg consumer_vertices, consumer, 2811b8e80941Smrg disable_varying_packing, xfb_enabled); 2812b8e80941Smrg } 2813b8e80941Smrg 2814b8e80941Smrg return true; 2815b8e80941Smrg} 2816b8e80941Smrg 2817b8e80941Smrgstatic bool 2818b8e80941Smrgcheck_against_output_limit(struct gl_context *ctx, 2819b8e80941Smrg struct gl_shader_program *prog, 2820b8e80941Smrg gl_linked_shader *producer, 2821b8e80941Smrg unsigned num_explicit_locations) 2822b8e80941Smrg{ 2823b8e80941Smrg unsigned output_vectors = num_explicit_locations; 2824b8e80941Smrg 2825b8e80941Smrg foreach_in_list(ir_instruction, node, producer->ir) { 2826b8e80941Smrg ir_variable *const var = node->as_variable(); 2827b8e80941Smrg 2828b8e80941Smrg if (var && !var->data.explicit_location && 2829b8e80941Smrg var->data.mode == ir_var_shader_out && 2830b8e80941Smrg var_counts_against_varying_limit(producer->Stage, var)) { 2831b8e80941Smrg /* outputs for fragment shader can't be doubles */ 2832b8e80941Smrg output_vectors += var->type->count_attribute_slots(false); 2833b8e80941Smrg } 2834b8e80941Smrg } 2835b8e80941Smrg 2836b8e80941Smrg assert(producer->Stage != MESA_SHADER_FRAGMENT); 2837b8e80941Smrg unsigned max_output_components = 2838b8e80941Smrg ctx->Const.Program[producer->Stage].MaxOutputComponents; 2839b8e80941Smrg 2840b8e80941Smrg const unsigned output_components = output_vectors * 4; 2841b8e80941Smrg if (output_components > max_output_components) { 2842b8e80941Smrg if (ctx->API == API_OPENGLES2 || prog->IsES) 2843b8e80941Smrg linker_error(prog, "%s shader uses too many output vectors " 2844b8e80941Smrg "(%u > %u)\n", 2845b8e80941Smrg _mesa_shader_stage_to_string(producer->Stage), 2846b8e80941Smrg output_vectors, 2847b8e80941Smrg max_output_components / 4); 2848b8e80941Smrg else 2849b8e80941Smrg linker_error(prog, "%s shader uses too many output components " 2850b8e80941Smrg "(%u > %u)\n", 2851b8e80941Smrg _mesa_shader_stage_to_string(producer->Stage), 2852b8e80941Smrg output_components, 2853b8e80941Smrg max_output_components); 2854b8e80941Smrg 2855b8e80941Smrg return false; 2856b8e80941Smrg } 2857b8e80941Smrg 2858b8e80941Smrg return true; 2859b8e80941Smrg} 2860b8e80941Smrg 2861b8e80941Smrgstatic bool 2862b8e80941Smrgcheck_against_input_limit(struct gl_context *ctx, 2863b8e80941Smrg struct gl_shader_program *prog, 2864b8e80941Smrg gl_linked_shader *consumer, 2865b8e80941Smrg unsigned num_explicit_locations) 2866b8e80941Smrg{ 2867b8e80941Smrg unsigned input_vectors = num_explicit_locations; 2868b8e80941Smrg 2869b8e80941Smrg foreach_in_list(ir_instruction, node, consumer->ir) { 2870b8e80941Smrg ir_variable *const var = node->as_variable(); 2871b8e80941Smrg 2872b8e80941Smrg if (var && !var->data.explicit_location && 2873b8e80941Smrg var->data.mode == ir_var_shader_in && 2874b8e80941Smrg var_counts_against_varying_limit(consumer->Stage, var)) { 2875b8e80941Smrg /* vertex inputs aren't varying counted */ 2876b8e80941Smrg input_vectors += var->type->count_attribute_slots(false); 2877b8e80941Smrg } 2878b8e80941Smrg } 2879b8e80941Smrg 2880b8e80941Smrg assert(consumer->Stage != MESA_SHADER_VERTEX); 2881b8e80941Smrg unsigned max_input_components = 2882b8e80941Smrg ctx->Const.Program[consumer->Stage].MaxInputComponents; 2883b8e80941Smrg 2884b8e80941Smrg const unsigned input_components = input_vectors * 4; 2885b8e80941Smrg if (input_components > max_input_components) { 2886b8e80941Smrg if (ctx->API == API_OPENGLES2 || prog->IsES) 2887b8e80941Smrg linker_error(prog, "%s shader uses too many input vectors " 2888b8e80941Smrg "(%u > %u)\n", 2889b8e80941Smrg _mesa_shader_stage_to_string(consumer->Stage), 2890b8e80941Smrg input_vectors, 2891b8e80941Smrg max_input_components / 4); 2892b8e80941Smrg else 2893b8e80941Smrg linker_error(prog, "%s shader uses too many input components " 2894b8e80941Smrg "(%u > %u)\n", 2895b8e80941Smrg _mesa_shader_stage_to_string(consumer->Stage), 2896b8e80941Smrg input_components, 2897b8e80941Smrg max_input_components); 2898b8e80941Smrg 2899b8e80941Smrg return false; 2900b8e80941Smrg } 2901b8e80941Smrg 2902b8e80941Smrg return true; 2903b8e80941Smrg} 2904b8e80941Smrg 2905b8e80941Smrgbool 2906b8e80941Smrglink_varyings(struct gl_shader_program *prog, unsigned first, unsigned last, 2907b8e80941Smrg struct gl_context *ctx, void *mem_ctx) 2908b8e80941Smrg{ 2909b8e80941Smrg bool has_xfb_qualifiers = false; 2910b8e80941Smrg unsigned num_tfeedback_decls = 0; 2911b8e80941Smrg char **varying_names = NULL; 2912b8e80941Smrg tfeedback_decl *tfeedback_decls = NULL; 2913b8e80941Smrg 2914b8e80941Smrg /* From the ARB_enhanced_layouts spec: 2915b8e80941Smrg * 2916b8e80941Smrg * "If the shader used to record output variables for transform feedback 2917b8e80941Smrg * varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout 2918b8e80941Smrg * qualifiers, the values specified by TransformFeedbackVaryings are 2919b8e80941Smrg * ignored, and the set of variables captured for transform feedback is 2920b8e80941Smrg * instead derived from the specified layout qualifiers." 2921b8e80941Smrg */ 2922b8e80941Smrg for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) { 2923b8e80941Smrg /* Find last stage before fragment shader */ 2924b8e80941Smrg if (prog->_LinkedShaders[i]) { 2925b8e80941Smrg has_xfb_qualifiers = 2926b8e80941Smrg process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i], 2927b8e80941Smrg prog, &num_tfeedback_decls, 2928b8e80941Smrg &varying_names); 2929b8e80941Smrg break; 2930b8e80941Smrg } 2931b8e80941Smrg } 2932b8e80941Smrg 2933b8e80941Smrg if (!has_xfb_qualifiers) { 2934b8e80941Smrg num_tfeedback_decls = prog->TransformFeedback.NumVarying; 2935b8e80941Smrg varying_names = prog->TransformFeedback.VaryingNames; 2936b8e80941Smrg } 2937b8e80941Smrg 2938b8e80941Smrg if (num_tfeedback_decls != 0) { 2939b8e80941Smrg /* From GL_EXT_transform_feedback: 2940b8e80941Smrg * A program will fail to link if: 2941b8e80941Smrg * 2942b8e80941Smrg * * the <count> specified by TransformFeedbackVaryingsEXT is 2943b8e80941Smrg * non-zero, but the program object has no vertex or geometry 2944b8e80941Smrg * shader; 2945b8e80941Smrg */ 2946b8e80941Smrg if (first >= MESA_SHADER_FRAGMENT) { 2947b8e80941Smrg linker_error(prog, "Transform feedback varyings specified, but " 2948b8e80941Smrg "no vertex, tessellation, or geometry shader is " 2949b8e80941Smrg "present.\n"); 2950b8e80941Smrg return false; 2951b8e80941Smrg } 2952b8e80941Smrg 2953b8e80941Smrg tfeedback_decls = rzalloc_array(mem_ctx, tfeedback_decl, 2954b8e80941Smrg num_tfeedback_decls); 2955b8e80941Smrg if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls, 2956b8e80941Smrg varying_names, tfeedback_decls)) 2957b8e80941Smrg return false; 2958b8e80941Smrg } 2959b8e80941Smrg 2960b8e80941Smrg /* If there is no fragment shader we need to set transform feedback. 2961b8e80941Smrg * 2962b8e80941Smrg * For SSO we also need to assign output locations. We assign them here 2963b8e80941Smrg * because we need to do it for both single stage programs and multi stage 2964b8e80941Smrg * programs. 2965b8e80941Smrg */ 2966b8e80941Smrg if (last < MESA_SHADER_FRAGMENT && 2967b8e80941Smrg (num_tfeedback_decls != 0 || prog->SeparateShader)) { 2968b8e80941Smrg const uint64_t reserved_out_slots = 2969b8e80941Smrg reserved_varying_slot(prog->_LinkedShaders[last], ir_var_shader_out); 2970b8e80941Smrg if (!assign_varying_locations(ctx, mem_ctx, prog, 2971b8e80941Smrg prog->_LinkedShaders[last], NULL, 2972b8e80941Smrg num_tfeedback_decls, tfeedback_decls, 2973b8e80941Smrg reserved_out_slots)) 2974b8e80941Smrg return false; 2975b8e80941Smrg } 2976b8e80941Smrg 2977b8e80941Smrg if (last <= MESA_SHADER_FRAGMENT) { 2978b8e80941Smrg /* Remove unused varyings from the first/last stage unless SSO */ 2979b8e80941Smrg remove_unused_shader_inputs_and_outputs(prog->SeparateShader, 2980b8e80941Smrg prog->_LinkedShaders[first], 2981b8e80941Smrg ir_var_shader_in); 2982b8e80941Smrg remove_unused_shader_inputs_and_outputs(prog->SeparateShader, 2983b8e80941Smrg prog->_LinkedShaders[last], 2984b8e80941Smrg ir_var_shader_out); 2985b8e80941Smrg 2986b8e80941Smrg /* If the program is made up of only a single stage */ 2987b8e80941Smrg if (first == last) { 2988b8e80941Smrg gl_linked_shader *const sh = prog->_LinkedShaders[last]; 2989b8e80941Smrg 2990b8e80941Smrg do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL); 2991b8e80941Smrg do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls, 2992b8e80941Smrg tfeedback_decls); 2993b8e80941Smrg 2994b8e80941Smrg if (prog->SeparateShader) { 2995b8e80941Smrg const uint64_t reserved_slots = 2996b8e80941Smrg reserved_varying_slot(sh, ir_var_shader_in); 2997b8e80941Smrg 2998b8e80941Smrg /* Assign input locations for SSO, output locations are already 2999b8e80941Smrg * assigned. 3000b8e80941Smrg */ 3001b8e80941Smrg if (!assign_varying_locations(ctx, mem_ctx, prog, 3002b8e80941Smrg NULL /* producer */, 3003b8e80941Smrg sh /* consumer */, 3004b8e80941Smrg 0 /* num_tfeedback_decls */, 3005b8e80941Smrg NULL /* tfeedback_decls */, 3006b8e80941Smrg reserved_slots)) 3007b8e80941Smrg return false; 3008b8e80941Smrg } 3009b8e80941Smrg } else { 3010b8e80941Smrg /* Linking the stages in the opposite order (from fragment to vertex) 3011b8e80941Smrg * ensures that inter-shader outputs written to in an earlier stage 3012b8e80941Smrg * are eliminated if they are (transitively) not used in a later 3013b8e80941Smrg * stage. 3014b8e80941Smrg */ 3015b8e80941Smrg int next = last; 3016b8e80941Smrg for (int i = next - 1; i >= 0; i--) { 3017b8e80941Smrg if (prog->_LinkedShaders[i] == NULL && i != 0) 3018b8e80941Smrg continue; 3019b8e80941Smrg 3020b8e80941Smrg gl_linked_shader *const sh_i = prog->_LinkedShaders[i]; 3021b8e80941Smrg gl_linked_shader *const sh_next = prog->_LinkedShaders[next]; 3022b8e80941Smrg 3023b8e80941Smrg const uint64_t reserved_out_slots = 3024b8e80941Smrg reserved_varying_slot(sh_i, ir_var_shader_out); 3025b8e80941Smrg const uint64_t reserved_in_slots = 3026b8e80941Smrg reserved_varying_slot(sh_next, ir_var_shader_in); 3027b8e80941Smrg 3028b8e80941Smrg do_dead_builtin_varyings(ctx, sh_i, sh_next, 3029b8e80941Smrg next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, 3030b8e80941Smrg tfeedback_decls); 3031b8e80941Smrg 3032b8e80941Smrg if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next, 3033b8e80941Smrg next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, 3034b8e80941Smrg tfeedback_decls, 3035b8e80941Smrg reserved_out_slots | reserved_in_slots)) 3036b8e80941Smrg return false; 3037b8e80941Smrg 3038b8e80941Smrg /* This must be done after all dead varyings are eliminated. */ 3039b8e80941Smrg if (sh_i != NULL) { 3040b8e80941Smrg unsigned slots_used = util_bitcount64(reserved_out_slots); 3041b8e80941Smrg if (!check_against_output_limit(ctx, prog, sh_i, slots_used)) { 3042b8e80941Smrg return false; 3043b8e80941Smrg } 3044b8e80941Smrg } 3045b8e80941Smrg 3046b8e80941Smrg unsigned slots_used = util_bitcount64(reserved_in_slots); 3047b8e80941Smrg if (!check_against_input_limit(ctx, prog, sh_next, slots_used)) 3048b8e80941Smrg return false; 3049b8e80941Smrg 3050b8e80941Smrg next = i; 3051b8e80941Smrg } 3052b8e80941Smrg } 3053b8e80941Smrg } 3054b8e80941Smrg 3055b8e80941Smrg if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls, 3056b8e80941Smrg has_xfb_qualifiers, mem_ctx)) 3057b8e80941Smrg return false; 3058b8e80941Smrg 3059b8e80941Smrg return true; 3060b8e80941Smrg} 3061