link_varyings.cpp revision 7ec681f3
1/* 2 * Copyright © 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24/** 25 * \file link_varyings.cpp 26 * 27 * Linker functions related specifically to linking varyings between shader 28 * stages. 29 */ 30 31 32#include "main/errors.h" 33#include "main/mtypes.h" 34#include "glsl_symbol_table.h" 35#include "glsl_parser_extras.h" 36#include "ir_optimization.h" 37#include "linker.h" 38#include "link_varyings.h" 39#include "main/macros.h" 40#include "util/hash_table.h" 41#include "util/u_math.h" 42#include "program.h" 43 44 45/** 46 * Get the varying type stripped of the outermost array if we're processing 47 * a stage whose varyings are arrays indexed by a vertex number (such as 48 * geometry shader inputs). 49 */ 50static const glsl_type * 51get_varying_type(const ir_variable *var, gl_shader_stage stage) 52{ 53 const glsl_type *type = var->type; 54 55 if (!var->data.patch && 56 ((var->data.mode == ir_var_shader_out && 57 stage == MESA_SHADER_TESS_CTRL) || 58 (var->data.mode == ir_var_shader_in && 59 (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL || 60 stage == MESA_SHADER_GEOMETRY)))) { 61 assert(type->is_array()); 62 type = type->fields.array; 63 } 64 65 return type; 66} 67 68static bool 69varying_has_user_specified_location(const ir_variable *var) 70{ 71 return var->data.explicit_location && 72 var->data.location >= VARYING_SLOT_VAR0; 73} 74 75static void 76create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name, 77 size_t name_length, unsigned *count, 78 const char *ifc_member_name, 79 const glsl_type *ifc_member_t, char ***varying_names) 80{ 81 if (t->is_interface()) { 82 size_t new_length = name_length; 83 84 assert(ifc_member_name && ifc_member_t); 85 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name); 86 87 create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count, 88 NULL, NULL, varying_names); 89 } else if (t->is_struct()) { 90 for (unsigned i = 0; i < t->length; i++) { 91 const char *field = t->fields.structure[i].name; 92 size_t new_length = name_length; 93 94 ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field); 95 96 create_xfb_varying_names(mem_ctx, t->fields.structure[i].type, name, 97 new_length, count, NULL, NULL, 98 varying_names); 99 } 100 } else if (t->without_array()->is_struct() || 101 t->without_array()->is_interface() || 102 (t->is_array() && t->fields.array->is_array())) { 103 for (unsigned i = 0; i < t->length; i++) { 104 size_t new_length = name_length; 105 106 /* Append the subscript to the current variable name */ 107 ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i); 108 109 create_xfb_varying_names(mem_ctx, t->fields.array, name, new_length, 110 count, ifc_member_name, ifc_member_t, 111 varying_names); 112 } 113 } else { 114 (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name); 115 } 116} 117 118static bool 119process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh, 120 struct gl_shader_program *prog, 121 unsigned *num_tfeedback_decls, 122 char ***varying_names) 123{ 124 bool has_xfb_qualifiers = false; 125 126 /* We still need to enable transform feedback mode even if xfb_stride is 127 * only applied to a global out. Also we don't bother to propagate 128 * xfb_stride to interface block members so this will catch that case also. 129 */ 130 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { 131 if (prog->TransformFeedback.BufferStride[j]) { 132 has_xfb_qualifiers = true; 133 break; 134 } 135 } 136 137 foreach_in_list(ir_instruction, node, sh->ir) { 138 ir_variable *var = node->as_variable(); 139 if (!var || var->data.mode != ir_var_shader_out) 140 continue; 141 142 /* From the ARB_enhanced_layouts spec: 143 * 144 * "Any shader making any static use (after preprocessing) of any of 145 * these *xfb_* qualifiers will cause the shader to be in a 146 * transform feedback capturing mode and hence responsible for 147 * describing the transform feedback setup. This mode will capture 148 * any output selected by *xfb_offset*, directly or indirectly, to 149 * a transform feedback buffer." 150 */ 151 if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) { 152 has_xfb_qualifiers = true; 153 } 154 155 if (var->data.explicit_xfb_offset) { 156 *num_tfeedback_decls += var->type->varying_count(); 157 has_xfb_qualifiers = true; 158 } 159 } 160 161 if (*num_tfeedback_decls == 0) 162 return has_xfb_qualifiers; 163 164 unsigned i = 0; 165 *varying_names = ralloc_array(mem_ctx, char *, *num_tfeedback_decls); 166 foreach_in_list(ir_instruction, node, sh->ir) { 167 ir_variable *var = node->as_variable(); 168 if (!var || var->data.mode != ir_var_shader_out) 169 continue; 170 171 if (var->data.explicit_xfb_offset) { 172 char *name; 173 const glsl_type *type, *member_type; 174 175 if (var->data.from_named_ifc_block) { 176 type = var->get_interface_type(); 177 178 /* Find the member type before it was altered by lowering */ 179 const glsl_type *type_wa = type->without_array(); 180 member_type = 181 type_wa->fields.structure[type_wa->field_index(var->name)].type; 182 name = ralloc_strdup(NULL, type_wa->name); 183 } else { 184 type = var->type; 185 member_type = NULL; 186 name = ralloc_strdup(NULL, var->name); 187 } 188 create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i, 189 var->name, member_type, varying_names); 190 ralloc_free(name); 191 } 192 } 193 194 assert(i == *num_tfeedback_decls); 195 return has_xfb_qualifiers; 196} 197 198/** 199 * Validate the types and qualifiers of an output from one stage against the 200 * matching input to another stage. 201 */ 202static void 203cross_validate_types_and_qualifiers(struct gl_context *ctx, 204 struct gl_shader_program *prog, 205 const ir_variable *input, 206 const ir_variable *output, 207 gl_shader_stage consumer_stage, 208 gl_shader_stage producer_stage) 209{ 210 /* Check that the types match between stages. 211 */ 212 const glsl_type *type_to_match = input->type; 213 214 /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */ 215 const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX && 216 consumer_stage != MESA_SHADER_FRAGMENT) || 217 consumer_stage == MESA_SHADER_GEOMETRY; 218 if (extra_array_level) { 219 assert(type_to_match->is_array()); 220 type_to_match = type_to_match->fields.array; 221 } 222 223 if (type_to_match != output->type) { 224 if (output->type->is_struct()) { 225 /* Structures across shader stages can have different name 226 * and considered to match in type if and only if structure 227 * members match in name, type, qualification, and declaration 228 * order. The precision doesn’t need to match. 229 */ 230 if (!output->type->record_compare(type_to_match, 231 false, /* match_name */ 232 true, /* match_locations */ 233 false /* match_precision */)) { 234 linker_error(prog, 235 "%s shader output `%s' declared as struct `%s', " 236 "doesn't match in type with %s shader input " 237 "declared as struct `%s'\n", 238 _mesa_shader_stage_to_string(producer_stage), 239 output->name, 240 output->type->name, 241 _mesa_shader_stage_to_string(consumer_stage), 242 input->type->name); 243 } 244 } else if (!output->type->is_array() || !is_gl_identifier(output->name)) { 245 /* There is a bit of a special case for gl_TexCoord. This 246 * built-in is unsized by default. Applications that variable 247 * access it must redeclare it with a size. There is some 248 * language in the GLSL spec that implies the fragment shader 249 * and vertex shader do not have to agree on this size. Other 250 * driver behave this way, and one or two applications seem to 251 * rely on it. 252 * 253 * Neither declaration needs to be modified here because the array 254 * sizes are fixed later when update_array_sizes is called. 255 * 256 * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec: 257 * 258 * "Unlike user-defined varying variables, the built-in 259 * varying variables don't have a strict one-to-one 260 * correspondence between the vertex language and the 261 * fragment language." 262 */ 263 linker_error(prog, 264 "%s shader output `%s' declared as type `%s', " 265 "but %s shader input declared as type `%s'\n", 266 _mesa_shader_stage_to_string(producer_stage), 267 output->name, 268 output->type->name, 269 _mesa_shader_stage_to_string(consumer_stage), 270 input->type->name); 271 return; 272 } 273 } 274 275 /* Check that all of the qualifiers match between stages. 276 */ 277 278 /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier 279 * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0 280 * conformance test suite does not verify that the qualifiers must match. 281 * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for 282 * OpenGLES 3.0 drivers, so we relax the checking in all cases. 283 */ 284 if (false /* always skip the centroid check */ && 285 prog->data->Version < (prog->IsES ? 310 : 430) && 286 input->data.centroid != output->data.centroid) { 287 linker_error(prog, 288 "%s shader output `%s' %s centroid qualifier, " 289 "but %s shader input %s centroid qualifier\n", 290 _mesa_shader_stage_to_string(producer_stage), 291 output->name, 292 (output->data.centroid) ? "has" : "lacks", 293 _mesa_shader_stage_to_string(consumer_stage), 294 (input->data.centroid) ? "has" : "lacks"); 295 return; 296 } 297 298 if (input->data.sample != output->data.sample) { 299 linker_error(prog, 300 "%s shader output `%s' %s sample qualifier, " 301 "but %s shader input %s sample qualifier\n", 302 _mesa_shader_stage_to_string(producer_stage), 303 output->name, 304 (output->data.sample) ? "has" : "lacks", 305 _mesa_shader_stage_to_string(consumer_stage), 306 (input->data.sample) ? "has" : "lacks"); 307 return; 308 } 309 310 if (input->data.patch != output->data.patch) { 311 linker_error(prog, 312 "%s shader output `%s' %s patch qualifier, " 313 "but %s shader input %s patch qualifier\n", 314 _mesa_shader_stage_to_string(producer_stage), 315 output->name, 316 (output->data.patch) ? "has" : "lacks", 317 _mesa_shader_stage_to_string(consumer_stage), 318 (input->data.patch) ? "has" : "lacks"); 319 return; 320 } 321 322 /* The GLSL 4.20 and GLSL ES 3.00 specifications say: 323 * 324 * "As only outputs need be declared with invariant, an output from 325 * one shader stage will still match an input of a subsequent stage 326 * without the input being declared as invariant." 327 * 328 * while GLSL 4.10 says: 329 * 330 * "For variables leaving one shader and coming into another shader, 331 * the invariant keyword has to be used in both shaders, or a link 332 * error will result." 333 * 334 * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says: 335 * 336 * "The invariance of varyings that are declared in both the vertex 337 * and fragment shaders must match." 338 */ 339 if (input->data.explicit_invariant != output->data.explicit_invariant && 340 prog->data->Version < (prog->IsES ? 300 : 420)) { 341 linker_error(prog, 342 "%s shader output `%s' %s invariant qualifier, " 343 "but %s shader input %s invariant qualifier\n", 344 _mesa_shader_stage_to_string(producer_stage), 345 output->name, 346 (output->data.explicit_invariant) ? "has" : "lacks", 347 _mesa_shader_stage_to_string(consumer_stage), 348 (input->data.explicit_invariant) ? "has" : "lacks"); 349 return; 350 } 351 352 /* GLSL >= 4.40 removes text requiring interpolation qualifiers 353 * to match cross stage, they must only match within the same stage. 354 * 355 * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec: 356 * 357 * "It is a link-time error if, within the same stage, the interpolation 358 * qualifiers of variables of the same name do not match. 359 * 360 * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says: 361 * 362 * "When no interpolation qualifier is present, smooth interpolation 363 * is used." 364 * 365 * So we match variables where one is smooth and the other has no explicit 366 * qualifier. 367 */ 368 unsigned input_interpolation = input->data.interpolation; 369 unsigned output_interpolation = output->data.interpolation; 370 if (prog->IsES) { 371 if (input_interpolation == INTERP_MODE_NONE) 372 input_interpolation = INTERP_MODE_SMOOTH; 373 if (output_interpolation == INTERP_MODE_NONE) 374 output_interpolation = INTERP_MODE_SMOOTH; 375 } 376 if (input_interpolation != output_interpolation && 377 prog->data->Version < 440) { 378 if (!ctx->Const.AllowGLSLCrossStageInterpolationMismatch) { 379 linker_error(prog, 380 "%s shader output `%s' specifies %s " 381 "interpolation qualifier, " 382 "but %s shader input specifies %s " 383 "interpolation qualifier\n", 384 _mesa_shader_stage_to_string(producer_stage), 385 output->name, 386 interpolation_string(output->data.interpolation), 387 _mesa_shader_stage_to_string(consumer_stage), 388 interpolation_string(input->data.interpolation)); 389 return; 390 } else { 391 linker_warning(prog, 392 "%s shader output `%s' specifies %s " 393 "interpolation qualifier, " 394 "but %s shader input specifies %s " 395 "interpolation qualifier\n", 396 _mesa_shader_stage_to_string(producer_stage), 397 output->name, 398 interpolation_string(output->data.interpolation), 399 _mesa_shader_stage_to_string(consumer_stage), 400 interpolation_string(input->data.interpolation)); 401 } 402 } 403} 404 405/** 406 * Validate front and back color outputs against single color input 407 */ 408static void 409cross_validate_front_and_back_color(struct gl_context *ctx, 410 struct gl_shader_program *prog, 411 const ir_variable *input, 412 const ir_variable *front_color, 413 const ir_variable *back_color, 414 gl_shader_stage consumer_stage, 415 gl_shader_stage producer_stage) 416{ 417 if (front_color != NULL && front_color->data.assigned) 418 cross_validate_types_and_qualifiers(ctx, prog, input, front_color, 419 consumer_stage, producer_stage); 420 421 if (back_color != NULL && back_color->data.assigned) 422 cross_validate_types_and_qualifiers(ctx, prog, input, back_color, 423 consumer_stage, producer_stage); 424} 425 426static unsigned 427compute_variable_location_slot(ir_variable *var, gl_shader_stage stage) 428{ 429 unsigned location_start = VARYING_SLOT_VAR0; 430 431 switch (stage) { 432 case MESA_SHADER_VERTEX: 433 if (var->data.mode == ir_var_shader_in) 434 location_start = VERT_ATTRIB_GENERIC0; 435 break; 436 case MESA_SHADER_TESS_CTRL: 437 case MESA_SHADER_TESS_EVAL: 438 if (var->data.patch) 439 location_start = VARYING_SLOT_PATCH0; 440 break; 441 case MESA_SHADER_FRAGMENT: 442 if (var->data.mode == ir_var_shader_out) 443 location_start = FRAG_RESULT_DATA0; 444 break; 445 default: 446 break; 447 } 448 449 return var->data.location - location_start; 450} 451 452struct explicit_location_info { 453 ir_variable *var; 454 bool base_type_is_integer; 455 unsigned base_type_bit_size; 456 unsigned interpolation; 457 bool centroid; 458 bool sample; 459 bool patch; 460}; 461 462static bool 463check_location_aliasing(struct explicit_location_info explicit_locations[][4], 464 ir_variable *var, 465 unsigned location, 466 unsigned component, 467 unsigned location_limit, 468 const glsl_type *type, 469 unsigned interpolation, 470 bool centroid, 471 bool sample, 472 bool patch, 473 gl_shader_program *prog, 474 gl_shader_stage stage) 475{ 476 unsigned last_comp; 477 unsigned base_type_bit_size; 478 const glsl_type *type_without_array = type->without_array(); 479 const bool base_type_is_integer = 480 glsl_base_type_is_integer(type_without_array->base_type); 481 const bool is_struct = type_without_array->is_struct(); 482 if (is_struct) { 483 /* structs don't have a defined underlying base type so just treat all 484 * component slots as used and set the bit size to 0. If there is 485 * location aliasing, we'll fail anyway later. 486 */ 487 last_comp = 4; 488 base_type_bit_size = 0; 489 } else { 490 unsigned dmul = type_without_array->is_64bit() ? 2 : 1; 491 last_comp = component + type_without_array->vector_elements * dmul; 492 base_type_bit_size = 493 glsl_base_type_get_bit_size(type_without_array->base_type); 494 } 495 496 while (location < location_limit) { 497 unsigned comp = 0; 498 while (comp < 4) { 499 struct explicit_location_info *info = 500 &explicit_locations[location][comp]; 501 502 if (info->var) { 503 if (info->var->type->without_array()->is_struct() || is_struct) { 504 /* Structs cannot share location since they are incompatible 505 * with any other underlying numerical type. 506 */ 507 linker_error(prog, 508 "%s shader has multiple %sputs sharing the " 509 "same location that don't have the same " 510 "underlying numerical type. Struct variable '%s', " 511 "location %u\n", 512 _mesa_shader_stage_to_string(stage), 513 var->data.mode == ir_var_shader_in ? "in" : "out", 514 is_struct ? var->name : info->var->name, 515 location); 516 return false; 517 } else if (comp >= component && comp < last_comp) { 518 /* Component aliasing is not allowed */ 519 linker_error(prog, 520 "%s shader has multiple %sputs explicitly " 521 "assigned to location %d and component %d\n", 522 _mesa_shader_stage_to_string(stage), 523 var->data.mode == ir_var_shader_in ? "in" : "out", 524 location, comp); 525 return false; 526 } else { 527 /* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout 528 * Qualifiers, Page 67, (Location aliasing): 529 * 530 * " Further, when location aliasing, the aliases sharing the 531 * location must have the same underlying numerical type 532 * and bit width (floating-point or integer, 32-bit versus 533 * 64-bit, etc.) and the same auxiliary storage and 534 * interpolation qualification." 535 */ 536 537 /* If the underlying numerical type isn't integer, implicitly 538 * it will be float or else we would have failed by now. 539 */ 540 if (info->base_type_is_integer != base_type_is_integer) { 541 linker_error(prog, 542 "%s shader has multiple %sputs sharing the " 543 "same location that don't have the same " 544 "underlying numerical type. Location %u " 545 "component %u.\n", 546 _mesa_shader_stage_to_string(stage), 547 var->data.mode == ir_var_shader_in ? 548 "in" : "out", location, comp); 549 return false; 550 } 551 552 if (info->base_type_bit_size != base_type_bit_size) { 553 linker_error(prog, 554 "%s shader has multiple %sputs sharing the " 555 "same location that don't have the same " 556 "underlying numerical bit size. Location %u " 557 "component %u.\n", 558 _mesa_shader_stage_to_string(stage), 559 var->data.mode == ir_var_shader_in ? 560 "in" : "out", location, comp); 561 return false; 562 } 563 564 if (info->interpolation != interpolation) { 565 linker_error(prog, 566 "%s shader has multiple %sputs sharing the " 567 "same location that don't have the same " 568 "interpolation qualification. Location %u " 569 "component %u.\n", 570 _mesa_shader_stage_to_string(stage), 571 var->data.mode == ir_var_shader_in ? 572 "in" : "out", location, comp); 573 return false; 574 } 575 576 if (info->centroid != centroid || 577 info->sample != sample || 578 info->patch != patch) { 579 linker_error(prog, 580 "%s shader has multiple %sputs sharing the " 581 "same location that don't have the same " 582 "auxiliary storage qualification. Location %u " 583 "component %u.\n", 584 _mesa_shader_stage_to_string(stage), 585 var->data.mode == ir_var_shader_in ? 586 "in" : "out", location, comp); 587 return false; 588 } 589 } 590 } else if (comp >= component && comp < last_comp) { 591 info->var = var; 592 info->base_type_is_integer = base_type_is_integer; 593 info->base_type_bit_size = base_type_bit_size; 594 info->interpolation = interpolation; 595 info->centroid = centroid; 596 info->sample = sample; 597 info->patch = patch; 598 } 599 600 comp++; 601 602 /* We need to do some special handling for doubles as dvec3 and 603 * dvec4 consume two consecutive locations. We don't need to 604 * worry about components beginning at anything other than 0 as 605 * the spec does not allow this for dvec3 and dvec4. 606 */ 607 if (comp == 4 && last_comp > 4) { 608 last_comp = last_comp - 4; 609 /* Bump location index and reset the component index */ 610 location++; 611 comp = 0; 612 component = 0; 613 } 614 } 615 616 location++; 617 } 618 619 return true; 620} 621 622static bool 623validate_explicit_variable_location(struct gl_context *ctx, 624 struct explicit_location_info explicit_locations[][4], 625 ir_variable *var, 626 gl_shader_program *prog, 627 gl_linked_shader *sh) 628{ 629 const glsl_type *type = get_varying_type(var, sh->Stage); 630 unsigned num_elements = type->count_attribute_slots(false); 631 unsigned idx = compute_variable_location_slot(var, sh->Stage); 632 unsigned slot_limit = idx + num_elements; 633 634 /* Vertex shader inputs and fragment shader outputs are validated in 635 * assign_attribute_or_color_locations() so we should not attempt to 636 * validate them again here. 637 */ 638 unsigned slot_max; 639 if (var->data.mode == ir_var_shader_out) { 640 assert(sh->Stage != MESA_SHADER_FRAGMENT); 641 slot_max = 642 ctx->Const.Program[sh->Stage].MaxOutputComponents / 4; 643 } else { 644 assert(var->data.mode == ir_var_shader_in); 645 assert(sh->Stage != MESA_SHADER_VERTEX); 646 slot_max = 647 ctx->Const.Program[sh->Stage].MaxInputComponents / 4; 648 } 649 650 if (slot_limit > slot_max) { 651 linker_error(prog, 652 "Invalid location %u in %s shader\n", 653 idx, _mesa_shader_stage_to_string(sh->Stage)); 654 return false; 655 } 656 657 const glsl_type *type_without_array = type->without_array(); 658 if (type_without_array->is_interface()) { 659 for (unsigned i = 0; i < type_without_array->length; i++) { 660 glsl_struct_field *field = &type_without_array->fields.structure[i]; 661 unsigned field_location = field->location - 662 (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0); 663 unsigned field_slots = field->type->count_attribute_slots(false); 664 if (!check_location_aliasing(explicit_locations, var, 665 field_location, 666 0, 667 field_location + field_slots, 668 field->type, 669 field->interpolation, 670 field->centroid, 671 field->sample, 672 field->patch, 673 prog, sh->Stage)) { 674 return false; 675 } 676 } 677 } else if (!check_location_aliasing(explicit_locations, var, 678 idx, var->data.location_frac, 679 slot_limit, type, 680 var->data.interpolation, 681 var->data.centroid, 682 var->data.sample, 683 var->data.patch, 684 prog, sh->Stage)) { 685 return false; 686 } 687 688 return true; 689} 690 691/** 692 * Validate explicit locations for the inputs to the first stage and the 693 * outputs of the last stage in a program, if those are not the VS and FS 694 * shaders. 695 */ 696void 697validate_first_and_last_interface_explicit_locations(struct gl_context *ctx, 698 struct gl_shader_program *prog, 699 gl_shader_stage first_stage, 700 gl_shader_stage last_stage) 701{ 702 /* VS inputs and FS outputs are validated in 703 * assign_attribute_or_color_locations() 704 */ 705 bool validate_first_stage = first_stage != MESA_SHADER_VERTEX; 706 bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT; 707 if (!validate_first_stage && !validate_last_stage) 708 return; 709 710 struct explicit_location_info explicit_locations[MAX_VARYING][4]; 711 712 gl_shader_stage stages[2] = { first_stage, last_stage }; 713 bool validate_stage[2] = { validate_first_stage, validate_last_stage }; 714 ir_variable_mode var_direction[2] = { ir_var_shader_in, ir_var_shader_out }; 715 716 for (unsigned i = 0; i < 2; i++) { 717 if (!validate_stage[i]) 718 continue; 719 720 gl_shader_stage stage = stages[i]; 721 722 gl_linked_shader *sh = prog->_LinkedShaders[stage]; 723 assert(sh); 724 725 memset(explicit_locations, 0, sizeof(explicit_locations)); 726 727 foreach_in_list(ir_instruction, node, sh->ir) { 728 ir_variable *const var = node->as_variable(); 729 730 if (var == NULL || 731 !var->data.explicit_location || 732 var->data.location < VARYING_SLOT_VAR0 || 733 var->data.mode != var_direction[i]) 734 continue; 735 736 if (!validate_explicit_variable_location( 737 ctx, explicit_locations, var, prog, sh)) { 738 return; 739 } 740 } 741 } 742} 743 744/** 745 * Check if we should force input / output matching between shader 746 * interfaces. 747 * 748 * Section 4.3.4 (Inputs) of the GLSL 4.10 specifications say: 749 * 750 * "Only the input variables that are actually read need to be 751 * written by the previous stage; it is allowed to have 752 * superfluous declarations of input variables." 753 * 754 * However it's not defined anywhere as to how we should handle 755 * inputs that are not written in the previous stage and it's not 756 * clear what "actually read" means. 757 * 758 * The GLSL 4.20 spec however is much clearer: 759 * 760 * "Only the input variables that are statically read need to 761 * be written by the previous stage; it is allowed to have 762 * superfluous declarations of input variables." 763 * 764 * It also has a table that states it is an error to statically 765 * read an input that is not defined in the previous stage. While 766 * it is not an error to not statically write to the output (it 767 * just needs to be defined to not be an error). 768 * 769 * The text in the GLSL 4.20 spec was an attempt to clarify the 770 * previous spec iterations. However given the difference in spec 771 * and that some applications seem to depend on not erroring when 772 * the input is not actually read in control flow we only apply 773 * this rule to GLSL 4.20 and higher. GLSL 4.10 shaders have been 774 * seen in the wild that depend on the less strict interpretation. 775 */ 776static bool 777static_input_output_matching(struct gl_shader_program *prog) 778{ 779 return prog->data->Version >= (prog->IsES ? 0 : 420); 780} 781 782/** 783 * Validate that outputs from one stage match inputs of another 784 */ 785void 786cross_validate_outputs_to_inputs(struct gl_context *ctx, 787 struct gl_shader_program *prog, 788 gl_linked_shader *producer, 789 gl_linked_shader *consumer) 790{ 791 glsl_symbol_table parameters; 792 struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = {}; 793 struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = {}; 794 795 /* Find all shader outputs in the "producer" stage. 796 */ 797 foreach_in_list(ir_instruction, node, producer->ir) { 798 ir_variable *const var = node->as_variable(); 799 800 if (var == NULL || var->data.mode != ir_var_shader_out) 801 continue; 802 803 if (!var->data.explicit_location 804 || var->data.location < VARYING_SLOT_VAR0) 805 parameters.add_variable(var); 806 else { 807 /* User-defined varyings with explicit locations are handled 808 * differently because they do not need to have matching names. 809 */ 810 if (!validate_explicit_variable_location(ctx, 811 output_explicit_locations, 812 var, prog, producer)) { 813 return; 814 } 815 } 816 } 817 818 819 /* Find all shader inputs in the "consumer" stage. Any variables that have 820 * matching outputs already in the symbol table must have the same type and 821 * qualifiers. 822 * 823 * Exception: if the consumer is the geometry shader, then the inputs 824 * should be arrays and the type of the array element should match the type 825 * of the corresponding producer output. 826 */ 827 foreach_in_list(ir_instruction, node, consumer->ir) { 828 ir_variable *const input = node->as_variable(); 829 830 if (input == NULL || input->data.mode != ir_var_shader_in) 831 continue; 832 833 if (strcmp(input->name, "gl_Color") == 0 && input->data.used) { 834 const ir_variable *const front_color = 835 parameters.get_variable("gl_FrontColor"); 836 837 const ir_variable *const back_color = 838 parameters.get_variable("gl_BackColor"); 839 840 cross_validate_front_and_back_color(ctx, prog, input, 841 front_color, back_color, 842 consumer->Stage, producer->Stage); 843 } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) { 844 const ir_variable *const front_color = 845 parameters.get_variable("gl_FrontSecondaryColor"); 846 847 const ir_variable *const back_color = 848 parameters.get_variable("gl_BackSecondaryColor"); 849 850 cross_validate_front_and_back_color(ctx, prog, input, 851 front_color, back_color, 852 consumer->Stage, producer->Stage); 853 } else { 854 /* The rules for connecting inputs and outputs change in the presence 855 * of explicit locations. In this case, we no longer care about the 856 * names of the variables. Instead, we care only about the 857 * explicitly assigned location. 858 */ 859 ir_variable *output = NULL; 860 if (input->data.explicit_location 861 && input->data.location >= VARYING_SLOT_VAR0) { 862 863 const glsl_type *type = get_varying_type(input, consumer->Stage); 864 unsigned num_elements = type->count_attribute_slots(false); 865 unsigned idx = 866 compute_variable_location_slot(input, consumer->Stage); 867 unsigned slot_limit = idx + num_elements; 868 869 if (!validate_explicit_variable_location(ctx, 870 input_explicit_locations, 871 input, prog, consumer)) { 872 return; 873 } 874 875 while (idx < slot_limit) { 876 if (idx >= MAX_VARYING) { 877 linker_error(prog, 878 "Invalid location %u in %s shader\n", idx, 879 _mesa_shader_stage_to_string(consumer->Stage)); 880 return; 881 } 882 883 output = output_explicit_locations[idx][input->data.location_frac].var; 884 885 if (output == NULL) { 886 /* A linker failure should only happen when there is no 887 * output declaration and there is Static Use of the 888 * declared input. 889 */ 890 if (input->data.used && static_input_output_matching(prog)) { 891 linker_error(prog, 892 "%s shader input `%s' with explicit location " 893 "has no matching output\n", 894 _mesa_shader_stage_to_string(consumer->Stage), 895 input->name); 896 break; 897 } 898 } else if (input->data.location != output->data.location) { 899 linker_error(prog, 900 "%s shader input `%s' with explicit location " 901 "has no matching output\n", 902 _mesa_shader_stage_to_string(consumer->Stage), 903 input->name); 904 break; 905 } 906 idx++; 907 } 908 } else { 909 output = parameters.get_variable(input->name); 910 } 911 912 if (output != NULL) { 913 /* Interface blocks have their own validation elsewhere so don't 914 * try validating them here. 915 */ 916 if (!(input->get_interface_type() && 917 output->get_interface_type())) 918 cross_validate_types_and_qualifiers(ctx, prog, input, output, 919 consumer->Stage, 920 producer->Stage); 921 } else { 922 /* Check for input vars with unmatched output vars in prev stage 923 * taking into account that interface blocks could have a matching 924 * output but with different name, so we ignore them. 925 */ 926 assert(!input->data.assigned); 927 if (input->data.used && !input->get_interface_type() && 928 !input->data.explicit_location && 929 static_input_output_matching(prog)) 930 linker_error(prog, 931 "%s shader input `%s' " 932 "has no matching output in the previous stage\n", 933 _mesa_shader_stage_to_string(consumer->Stage), 934 input->name); 935 } 936 } 937 } 938} 939 940/** 941 * Demote shader inputs and outputs that are not used in other stages, and 942 * remove them via dead code elimination. 943 */ 944static void 945remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object, 946 gl_linked_shader *sh, 947 enum ir_variable_mode mode) 948{ 949 if (is_separate_shader_object) 950 return; 951 952 foreach_in_list(ir_instruction, node, sh->ir) { 953 ir_variable *const var = node->as_variable(); 954 955 if (var == NULL || var->data.mode != int(mode)) 956 continue; 957 958 /* A shader 'in' or 'out' variable is only really an input or output if 959 * its value is used by other shader stages. This will cause the 960 * variable to have a location assigned. 961 */ 962 if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) { 963 assert(var->data.mode != ir_var_temporary); 964 965 /* Assign zeros to demoted inputs to allow more optimizations. */ 966 if (var->data.mode == ir_var_shader_in && !var->constant_value) 967 var->constant_value = ir_constant::zero(var, var->type); 968 969 var->data.mode = ir_var_auto; 970 } 971 } 972 973 /* Eliminate code that is now dead due to unused inputs/outputs being 974 * demoted. 975 */ 976 while (do_dead_code(sh->ir, false)) 977 ; 978 979} 980 981/** 982 * Initialize this object based on a string that was passed to 983 * glTransformFeedbackVaryings. 984 * 985 * If the input is mal-formed, this call still succeeds, but it sets 986 * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var() 987 * will fail to find any matching variable. 988 */ 989void 990tfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx, 991 const char *input) 992{ 993 /* We don't have to be pedantic about what is a valid GLSL variable name, 994 * because any variable with an invalid name can't exist in the IR anyway. 995 */ 996 997 this->location = -1; 998 this->orig_name = input; 999 this->lowered_builtin_array_variable = none; 1000 this->skip_components = 0; 1001 this->next_buffer_separator = false; 1002 this->matched_candidate = NULL; 1003 this->stream_id = 0; 1004 this->buffer = 0; 1005 this->offset = 0; 1006 1007 if (ctx->Extensions.ARB_transform_feedback3) { 1008 /* Parse gl_NextBuffer. */ 1009 if (strcmp(input, "gl_NextBuffer") == 0) { 1010 this->next_buffer_separator = true; 1011 return; 1012 } 1013 1014 /* Parse gl_SkipComponents. */ 1015 if (strcmp(input, "gl_SkipComponents1") == 0) 1016 this->skip_components = 1; 1017 else if (strcmp(input, "gl_SkipComponents2") == 0) 1018 this->skip_components = 2; 1019 else if (strcmp(input, "gl_SkipComponents3") == 0) 1020 this->skip_components = 3; 1021 else if (strcmp(input, "gl_SkipComponents4") == 0) 1022 this->skip_components = 4; 1023 1024 if (this->skip_components) 1025 return; 1026 } 1027 1028 /* Parse a declaration. */ 1029 const char *base_name_end; 1030 long subscript = parse_program_resource_name(input, strlen(input), 1031 &base_name_end); 1032 this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input); 1033 if (this->var_name == NULL) { 1034 _mesa_error_no_memory(__func__); 1035 return; 1036 } 1037 1038 if (subscript >= 0) { 1039 this->array_subscript = subscript; 1040 this->is_subscripted = true; 1041 } else { 1042 this->is_subscripted = false; 1043 } 1044 1045 /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this 1046 * class must behave specially to account for the fact that gl_ClipDistance 1047 * is converted from a float[8] to a vec4[2]. 1048 */ 1049 if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance && 1050 strcmp(this->var_name, "gl_ClipDistance") == 0) { 1051 this->lowered_builtin_array_variable = clip_distance; 1052 } 1053 if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance && 1054 strcmp(this->var_name, "gl_CullDistance") == 0) { 1055 this->lowered_builtin_array_variable = cull_distance; 1056 } 1057 1058 if (ctx->Const.LowerTessLevel && 1059 (strcmp(this->var_name, "gl_TessLevelOuter") == 0)) 1060 this->lowered_builtin_array_variable = tess_level_outer; 1061 if (ctx->Const.LowerTessLevel && 1062 (strcmp(this->var_name, "gl_TessLevelInner") == 0)) 1063 this->lowered_builtin_array_variable = tess_level_inner; 1064} 1065 1066 1067/** 1068 * Determine whether two tfeedback_decl objects refer to the same variable and 1069 * array index (if applicable). 1070 */ 1071bool 1072tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y) 1073{ 1074 assert(x.is_varying() && y.is_varying()); 1075 1076 if (strcmp(x.var_name, y.var_name) != 0) 1077 return false; 1078 if (x.is_subscripted != y.is_subscripted) 1079 return false; 1080 if (x.is_subscripted && x.array_subscript != y.array_subscript) 1081 return false; 1082 return true; 1083} 1084 1085 1086/** 1087 * Assign a location and stream ID for this tfeedback_decl object based on the 1088 * transform feedback candidate found by find_candidate. 1089 * 1090 * If an error occurs, the error is reported through linker_error() and false 1091 * is returned. 1092 */ 1093bool 1094tfeedback_decl::assign_location(struct gl_context *ctx, 1095 struct gl_shader_program *prog) 1096{ 1097 assert(this->is_varying()); 1098 1099 unsigned fine_location 1100 = this->matched_candidate->toplevel_var->data.location * 4 1101 + this->matched_candidate->toplevel_var->data.location_frac 1102 + this->matched_candidate->struct_offset_floats; 1103 const unsigned dmul = 1104 this->matched_candidate->type->without_array()->is_64bit() ? 2 : 1; 1105 1106 if (this->matched_candidate->type->is_array()) { 1107 /* Array variable */ 1108 const unsigned matrix_cols = 1109 this->matched_candidate->type->fields.array->matrix_columns; 1110 const unsigned vector_elements = 1111 this->matched_candidate->type->fields.array->vector_elements; 1112 unsigned actual_array_size; 1113 switch (this->lowered_builtin_array_variable) { 1114 case clip_distance: 1115 actual_array_size = prog->last_vert_prog ? 1116 prog->last_vert_prog->info.clip_distance_array_size : 0; 1117 break; 1118 case cull_distance: 1119 actual_array_size = prog->last_vert_prog ? 1120 prog->last_vert_prog->info.cull_distance_array_size : 0; 1121 break; 1122 case tess_level_outer: 1123 actual_array_size = 4; 1124 break; 1125 case tess_level_inner: 1126 actual_array_size = 2; 1127 break; 1128 case none: 1129 default: 1130 actual_array_size = this->matched_candidate->type->array_size(); 1131 break; 1132 } 1133 1134 if (this->is_subscripted) { 1135 /* Check array bounds. */ 1136 if (this->array_subscript >= actual_array_size) { 1137 linker_error(prog, "Transform feedback varying %s has index " 1138 "%i, but the array size is %u.", 1139 this->orig_name, this->array_subscript, 1140 actual_array_size); 1141 return false; 1142 } 1143 unsigned array_elem_size = this->lowered_builtin_array_variable ? 1144 1 : vector_elements * matrix_cols * dmul; 1145 fine_location += array_elem_size * this->array_subscript; 1146 this->size = 1; 1147 } else { 1148 this->size = actual_array_size; 1149 } 1150 this->vector_elements = vector_elements; 1151 this->matrix_columns = matrix_cols; 1152 if (this->lowered_builtin_array_variable) 1153 this->type = GL_FLOAT; 1154 else 1155 this->type = this->matched_candidate->type->fields.array->gl_type; 1156 } else { 1157 /* Regular variable (scalar, vector, or matrix) */ 1158 if (this->is_subscripted) { 1159 linker_error(prog, "Transform feedback varying %s requested, " 1160 "but %s is not an array.", 1161 this->orig_name, this->var_name); 1162 return false; 1163 } 1164 this->size = 1; 1165 this->vector_elements = this->matched_candidate->type->vector_elements; 1166 this->matrix_columns = this->matched_candidate->type->matrix_columns; 1167 this->type = this->matched_candidate->type->gl_type; 1168 } 1169 this->location = fine_location / 4; 1170 this->location_frac = fine_location % 4; 1171 1172 /* From GL_EXT_transform_feedback: 1173 * A program will fail to link if: 1174 * 1175 * * the total number of components to capture in any varying 1176 * variable in <varyings> is greater than the constant 1177 * MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the 1178 * buffer mode is SEPARATE_ATTRIBS_EXT; 1179 */ 1180 if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS && 1181 this->num_components() > 1182 ctx->Const.MaxTransformFeedbackSeparateComponents) { 1183 linker_error(prog, "Transform feedback varying %s exceeds " 1184 "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.", 1185 this->orig_name); 1186 return false; 1187 } 1188 1189 /* Only transform feedback varyings can be assigned to non-zero streams, 1190 * so assign the stream id here. 1191 */ 1192 this->stream_id = this->matched_candidate->toplevel_var->data.stream; 1193 1194 unsigned array_offset = this->array_subscript * 4 * dmul; 1195 unsigned struct_offset = this->matched_candidate->xfb_offset_floats * 4; 1196 this->buffer = this->matched_candidate->toplevel_var->data.xfb_buffer; 1197 this->offset = this->matched_candidate->toplevel_var->data.offset + 1198 array_offset + struct_offset; 1199 1200 return true; 1201} 1202 1203 1204unsigned 1205tfeedback_decl::get_num_outputs() const 1206{ 1207 if (!this->is_varying()) { 1208 return 0; 1209 } 1210 1211 if (varying_has_user_specified_location(this->matched_candidate->toplevel_var)) { 1212 unsigned dmul = this->is_64bit() ? 2 : 1; 1213 unsigned rows_per_element = DIV_ROUND_UP(this->vector_elements * dmul, 4); 1214 return this->size * this->matrix_columns * rows_per_element; 1215 } else { 1216 return (this->num_components() + this->location_frac + 3) / 4; 1217 } 1218} 1219 1220 1221/** 1222 * Update gl_transform_feedback_info to reflect this tfeedback_decl. 1223 * 1224 * If an error occurs, the error is reported through linker_error() and false 1225 * is returned. 1226 */ 1227bool 1228tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog, 1229 struct gl_transform_feedback_info *info, 1230 unsigned buffer, unsigned buffer_index, 1231 const unsigned max_outputs, 1232 BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS], 1233 bool *explicit_stride, unsigned *max_member_alignment, 1234 bool has_xfb_qualifiers, const void* mem_ctx) const 1235{ 1236 unsigned xfb_offset = 0; 1237 unsigned size = this->size; 1238 /* Handle gl_SkipComponents. */ 1239 if (this->skip_components) { 1240 info->Buffers[buffer].Stride += this->skip_components; 1241 size = this->skip_components; 1242 goto store_varying; 1243 } 1244 1245 if (this->next_buffer_separator) { 1246 size = 0; 1247 goto store_varying; 1248 } 1249 1250 if (has_xfb_qualifiers) { 1251 xfb_offset = this->offset / 4; 1252 } else { 1253 xfb_offset = info->Buffers[buffer].Stride; 1254 } 1255 info->Varyings[info->NumVarying].Offset = xfb_offset * 4; 1256 1257 { 1258 unsigned location = this->location; 1259 unsigned location_frac = this->location_frac; 1260 unsigned num_components = this->num_components(); 1261 1262 /* From GL_EXT_transform_feedback: 1263 * 1264 * " A program will fail to link if: 1265 * 1266 * * the total number of components to capture is greater than the 1267 * constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT 1268 * and the buffer mode is INTERLEAVED_ATTRIBS_EXT." 1269 * 1270 * From GL_ARB_enhanced_layouts: 1271 * 1272 * " The resulting stride (implicit or explicit) must be less than or 1273 * equal to the implementation-dependent constant 1274 * gl_MaxTransformFeedbackInterleavedComponents." 1275 */ 1276 if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS || 1277 has_xfb_qualifiers) && 1278 xfb_offset + num_components > 1279 ctx->Const.MaxTransformFeedbackInterleavedComponents) { 1280 linker_error(prog, 1281 "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS " 1282 "limit has been exceeded."); 1283 return false; 1284 } 1285 1286 /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers, 1287 * Page 76, (Transform Feedback Layout Qualifiers): 1288 * 1289 * " No aliasing in output buffers is allowed: It is a compile-time or 1290 * link-time error to specify variables with overlapping transform 1291 * feedback offsets." 1292 */ 1293 const unsigned max_components = 1294 ctx->Const.MaxTransformFeedbackInterleavedComponents; 1295 const unsigned first_component = xfb_offset; 1296 const unsigned last_component = xfb_offset + num_components - 1; 1297 const unsigned start_word = BITSET_BITWORD(first_component); 1298 const unsigned end_word = BITSET_BITWORD(last_component); 1299 BITSET_WORD *used; 1300 assert(last_component < max_components); 1301 1302 if (!used_components[buffer]) { 1303 used_components[buffer] = 1304 rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components)); 1305 } 1306 used = used_components[buffer]; 1307 1308 for (unsigned word = start_word; word <= end_word; word++) { 1309 unsigned start_range = 0; 1310 unsigned end_range = BITSET_WORDBITS - 1; 1311 1312 if (word == start_word) 1313 start_range = first_component % BITSET_WORDBITS; 1314 1315 if (word == end_word) 1316 end_range = last_component % BITSET_WORDBITS; 1317 1318 if (used[word] & BITSET_RANGE(start_range, end_range)) { 1319 linker_error(prog, 1320 "variable '%s', xfb_offset (%d) is causing aliasing.", 1321 this->orig_name, xfb_offset * 4); 1322 return false; 1323 } 1324 used[word] |= BITSET_RANGE(start_range, end_range); 1325 } 1326 1327 const unsigned type_num_components = 1328 this->vector_elements * (this->is_64bit() ? 2 : 1); 1329 unsigned current_type_components_left = type_num_components; 1330 1331 while (num_components > 0) { 1332 unsigned output_size = 0; 1333 1334 /* From GL_ARB_enhanced_layouts: 1335 * 1336 * "When an attribute variable declared using an array type is bound to 1337 * generic attribute index <i>, the active array elements are assigned to 1338 * consecutive generic attributes beginning with generic attribute <i>. The 1339 * number of attributes and components assigned to each element are 1340 * determined according to the data type of array elements and "component" 1341 * layout qualifier (if any) specified in the declaration of the array." 1342 * 1343 * "When an attribute variable declared using a matrix type is bound to a 1344 * generic attribute index <i>, its values are taken from consecutive generic 1345 * attributes beginning with generic attribute <i>. Such matrices are 1346 * treated as an array of column vectors with values taken from the generic 1347 * attributes. 1348 * This means there may be gaps in the varyings we are taking values from." 1349 * 1350 * Examples: 1351 * 1352 * | layout(location=0) dvec3[2] a; | layout(location=4) vec2[4] b; | 1353 * | | | 1354 * | 32b 32b 32b 32b | 32b 32b 32b 32b | 1355 * | 0 X X Y Y | 4 X Y 0 0 | 1356 * | 1 Z Z 0 0 | 5 X Y 0 0 | 1357 * | 2 X X Y Y | 6 X Y 0 0 | 1358 * | 3 Z Z 0 0 | 7 X Y 0 0 | 1359 * 1360 */ 1361 if (varying_has_user_specified_location(this->matched_candidate->toplevel_var)) { 1362 output_size = MIN3(num_components, current_type_components_left, 4); 1363 current_type_components_left -= output_size; 1364 if (current_type_components_left == 0) { 1365 current_type_components_left = type_num_components; 1366 } 1367 } else { 1368 output_size = MIN2(num_components, 4 - location_frac); 1369 } 1370 1371 assert((info->NumOutputs == 0 && max_outputs == 0) || 1372 info->NumOutputs < max_outputs); 1373 1374 /* From the ARB_enhanced_layouts spec: 1375 * 1376 * "If such a block member or variable is not written during a shader 1377 * invocation, the buffer contents at the assigned offset will be 1378 * undefined. Even if there are no static writes to a variable or 1379 * member that is assigned a transform feedback offset, the space is 1380 * still allocated in the buffer and still affects the stride." 1381 */ 1382 if (this->is_varying_written()) { 1383 info->Outputs[info->NumOutputs].ComponentOffset = location_frac; 1384 info->Outputs[info->NumOutputs].OutputRegister = location; 1385 info->Outputs[info->NumOutputs].NumComponents = output_size; 1386 info->Outputs[info->NumOutputs].StreamId = stream_id; 1387 info->Outputs[info->NumOutputs].OutputBuffer = buffer; 1388 info->Outputs[info->NumOutputs].DstOffset = xfb_offset; 1389 ++info->NumOutputs; 1390 } 1391 info->Buffers[buffer].Stream = this->stream_id; 1392 xfb_offset += output_size; 1393 1394 num_components -= output_size; 1395 location++; 1396 location_frac = 0; 1397 } 1398 } 1399 1400 if (explicit_stride && explicit_stride[buffer]) { 1401 if (this->is_64bit() && info->Buffers[buffer].Stride % 2) { 1402 linker_error(prog, "invalid qualifier xfb_stride=%d must be a " 1403 "multiple of 8 as its applied to a type that is or " 1404 "contains a double.", 1405 info->Buffers[buffer].Stride * 4); 1406 return false; 1407 } 1408 1409 if (xfb_offset > info->Buffers[buffer].Stride) { 1410 linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for " 1411 "buffer (%d)", xfb_offset * 4, 1412 info->Buffers[buffer].Stride * 4, buffer); 1413 return false; 1414 } 1415 } else { 1416 if (max_member_alignment && has_xfb_qualifiers) { 1417 max_member_alignment[buffer] = MAX2(max_member_alignment[buffer], 1418 this->is_64bit() ? 2 : 1); 1419 info->Buffers[buffer].Stride = ALIGN(xfb_offset, 1420 max_member_alignment[buffer]); 1421 } else { 1422 info->Buffers[buffer].Stride = xfb_offset; 1423 } 1424 } 1425 1426 store_varying: 1427 info->Varyings[info->NumVarying].Name = ralloc_strdup(prog, 1428 this->orig_name); 1429 info->Varyings[info->NumVarying].Type = this->type; 1430 info->Varyings[info->NumVarying].Size = size; 1431 info->Varyings[info->NumVarying].BufferIndex = buffer_index; 1432 info->NumVarying++; 1433 info->Buffers[buffer].NumVaryings++; 1434 1435 return true; 1436} 1437 1438 1439const tfeedback_candidate * 1440tfeedback_decl::find_candidate(gl_shader_program *prog, 1441 hash_table *tfeedback_candidates) 1442{ 1443 const char *name = this->var_name; 1444 switch (this->lowered_builtin_array_variable) { 1445 case none: 1446 name = this->var_name; 1447 break; 1448 case clip_distance: 1449 name = "gl_ClipDistanceMESA"; 1450 break; 1451 case cull_distance: 1452 name = "gl_CullDistanceMESA"; 1453 break; 1454 case tess_level_outer: 1455 name = "gl_TessLevelOuterMESA"; 1456 break; 1457 case tess_level_inner: 1458 name = "gl_TessLevelInnerMESA"; 1459 break; 1460 } 1461 hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name); 1462 1463 this->matched_candidate = entry ? 1464 (const tfeedback_candidate *) entry->data : NULL; 1465 1466 if (!this->matched_candidate) { 1467 /* From GL_EXT_transform_feedback: 1468 * A program will fail to link if: 1469 * 1470 * * any variable name specified in the <varyings> array is not 1471 * declared as an output in the geometry shader (if present) or 1472 * the vertex shader (if no geometry shader is present); 1473 */ 1474 linker_error(prog, "Transform feedback varying %s undeclared.", 1475 this->orig_name); 1476 } 1477 1478 return this->matched_candidate; 1479} 1480 1481/** 1482 * Force a candidate over the previously matched one. It happens when a new 1483 * varying needs to be created to match the xfb declaration, for example, 1484 * to fullfil an alignment criteria. 1485 */ 1486void 1487tfeedback_decl::set_lowered_candidate(const tfeedback_candidate *candidate) 1488{ 1489 this->matched_candidate = candidate; 1490 1491 /* The subscript part is no longer relevant */ 1492 this->is_subscripted = false; 1493 this->array_subscript = 0; 1494} 1495 1496 1497/** 1498 * Parse all the transform feedback declarations that were passed to 1499 * glTransformFeedbackVaryings() and store them in tfeedback_decl objects. 1500 * 1501 * If an error occurs, the error is reported through linker_error() and false 1502 * is returned. 1503 */ 1504static bool 1505parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog, 1506 const void *mem_ctx, unsigned num_names, 1507 char **varying_names, tfeedback_decl *decls) 1508{ 1509 for (unsigned i = 0; i < num_names; ++i) { 1510 decls[i].init(ctx, mem_ctx, varying_names[i]); 1511 1512 if (!decls[i].is_varying()) 1513 continue; 1514 1515 /* From GL_EXT_transform_feedback: 1516 * A program will fail to link if: 1517 * 1518 * * any two entries in the <varyings> array specify the same varying 1519 * variable; 1520 * 1521 * We interpret this to mean "any two entries in the <varyings> array 1522 * specify the same varying variable and array index", since transform 1523 * feedback of arrays would be useless otherwise. 1524 */ 1525 for (unsigned j = 0; j < i; ++j) { 1526 if (decls[j].is_varying()) { 1527 if (tfeedback_decl::is_same(decls[i], decls[j])) { 1528 linker_error(prog, "Transform feedback varying %s specified " 1529 "more than once.", varying_names[i]); 1530 return false; 1531 } 1532 } 1533 } 1534 } 1535 return true; 1536} 1537 1538 1539static int 1540cmp_xfb_offset(const void * x_generic, const void * y_generic) 1541{ 1542 tfeedback_decl *x = (tfeedback_decl *) x_generic; 1543 tfeedback_decl *y = (tfeedback_decl *) y_generic; 1544 1545 if (x->get_buffer() != y->get_buffer()) 1546 return x->get_buffer() - y->get_buffer(); 1547 return x->get_offset() - y->get_offset(); 1548} 1549 1550/** 1551 * Store transform feedback location assignments into 1552 * prog->sh.LinkedTransformFeedback based on the data stored in 1553 * tfeedback_decls. 1554 * 1555 * If an error occurs, the error is reported through linker_error() and false 1556 * is returned. 1557 */ 1558static bool 1559store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog, 1560 unsigned num_tfeedback_decls, 1561 tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers, 1562 const void *mem_ctx) 1563{ 1564 if (!prog->last_vert_prog) 1565 return true; 1566 1567 /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for 1568 * tracking the number of buffers doesn't overflow. 1569 */ 1570 assert(ctx->Const.MaxTransformFeedbackBuffers < 32); 1571 1572 bool separate_attribs_mode = 1573 prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS; 1574 1575 struct gl_program *xfb_prog = prog->last_vert_prog; 1576 xfb_prog->sh.LinkedTransformFeedback = 1577 rzalloc(xfb_prog, struct gl_transform_feedback_info); 1578 1579 /* The xfb_offset qualifier does not have to be used in increasing order 1580 * however some drivers expect to receive the list of transform feedback 1581 * declarations in order so sort it now for convenience. 1582 */ 1583 if (has_xfb_qualifiers) { 1584 qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls), 1585 cmp_xfb_offset); 1586 } 1587 1588 xfb_prog->sh.LinkedTransformFeedback->Varyings = 1589 rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info, 1590 num_tfeedback_decls); 1591 1592 unsigned num_outputs = 0; 1593 for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 1594 if (tfeedback_decls[i].is_varying_written()) 1595 num_outputs += tfeedback_decls[i].get_num_outputs(); 1596 } 1597 1598 xfb_prog->sh.LinkedTransformFeedback->Outputs = 1599 rzalloc_array(xfb_prog, struct gl_transform_feedback_output, 1600 num_outputs); 1601 1602 unsigned num_buffers = 0; 1603 unsigned buffers = 0; 1604 BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {}; 1605 1606 if (!has_xfb_qualifiers && separate_attribs_mode) { 1607 /* GL_SEPARATE_ATTRIBS */ 1608 for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 1609 if (!tfeedback_decls[i].store(ctx, prog, 1610 xfb_prog->sh.LinkedTransformFeedback, 1611 num_buffers, num_buffers, num_outputs, 1612 used_components, NULL, NULL, 1613 has_xfb_qualifiers, mem_ctx)) 1614 return false; 1615 1616 buffers |= 1 << num_buffers; 1617 num_buffers++; 1618 } 1619 } 1620 else { 1621 /* GL_INVERLEAVED_ATTRIBS */ 1622 int buffer_stream_id = -1; 1623 unsigned buffer = 1624 num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0; 1625 bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false }; 1626 unsigned max_member_alignment[MAX_FEEDBACK_BUFFERS] = { 1, 1, 1, 1 }; 1627 /* Apply any xfb_stride global qualifiers */ 1628 if (has_xfb_qualifiers) { 1629 for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) { 1630 if (prog->TransformFeedback.BufferStride[j]) { 1631 explicit_stride[j] = true; 1632 xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride = 1633 prog->TransformFeedback.BufferStride[j] / 4; 1634 } 1635 } 1636 } 1637 1638 for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 1639 if (has_xfb_qualifiers && 1640 buffer != tfeedback_decls[i].get_buffer()) { 1641 /* we have moved to the next buffer so reset stream id */ 1642 buffer_stream_id = -1; 1643 num_buffers++; 1644 } 1645 1646 if (tfeedback_decls[i].is_next_buffer_separator()) { 1647 if (!tfeedback_decls[i].store(ctx, prog, 1648 xfb_prog->sh.LinkedTransformFeedback, 1649 buffer, num_buffers, num_outputs, 1650 used_components, explicit_stride, 1651 max_member_alignment, 1652 has_xfb_qualifiers, 1653 mem_ctx)) 1654 return false; 1655 num_buffers++; 1656 buffer_stream_id = -1; 1657 continue; 1658 } 1659 1660 if (has_xfb_qualifiers) { 1661 buffer = tfeedback_decls[i].get_buffer(); 1662 } else { 1663 buffer = num_buffers; 1664 } 1665 1666 if (tfeedback_decls[i].is_varying()) { 1667 if (buffer_stream_id == -1) { 1668 /* First varying writing to this buffer: remember its stream */ 1669 buffer_stream_id = (int) tfeedback_decls[i].get_stream_id(); 1670 1671 /* Only mark a buffer as active when there is a varying 1672 * attached to it. This behaviour is based on a revised version 1673 * of section 13.2.2 of the GL 4.6 spec. 1674 */ 1675 buffers |= 1 << buffer; 1676 } else if (buffer_stream_id != 1677 (int) tfeedback_decls[i].get_stream_id()) { 1678 /* Varying writes to the same buffer from a different stream */ 1679 linker_error(prog, 1680 "Transform feedback can't capture varyings belonging " 1681 "to different vertex streams in a single buffer. " 1682 "Varying %s writes to buffer from stream %u, other " 1683 "varyings in the same buffer write from stream %u.", 1684 tfeedback_decls[i].name(), 1685 tfeedback_decls[i].get_stream_id(), 1686 buffer_stream_id); 1687 return false; 1688 } 1689 } 1690 1691 if (!tfeedback_decls[i].store(ctx, prog, 1692 xfb_prog->sh.LinkedTransformFeedback, 1693 buffer, num_buffers, num_outputs, 1694 used_components, explicit_stride, 1695 max_member_alignment, 1696 has_xfb_qualifiers, 1697 mem_ctx)) 1698 return false; 1699 } 1700 } 1701 1702 assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs); 1703 1704 xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers; 1705 return true; 1706} 1707 1708namespace { 1709 1710/** 1711 * Data structure recording the relationship between outputs of one shader 1712 * stage (the "producer") and inputs of another (the "consumer"). 1713 */ 1714class varying_matches 1715{ 1716public: 1717 varying_matches(bool disable_varying_packing, 1718 bool disable_xfb_packing, 1719 bool xfb_enabled, 1720 bool enhanced_layouts_enabled, 1721 gl_shader_stage producer_stage, 1722 gl_shader_stage consumer_stage); 1723 ~varying_matches(); 1724 void record(ir_variable *producer_var, ir_variable *consumer_var); 1725 unsigned assign_locations(struct gl_shader_program *prog, 1726 uint8_t components[], 1727 uint64_t reserved_slots); 1728 void store_locations() const; 1729 1730private: 1731 bool is_varying_packing_safe(const glsl_type *type, 1732 const ir_variable *var) const; 1733 1734 /** 1735 * If true, this driver disables varying packing, so all varyings need to 1736 * be aligned on slot boundaries, and take up a number of slots equal to 1737 * their number of matrix columns times their array size. 1738 * 1739 * Packing may also be disabled because our current packing method is not 1740 * safe in SSO or versions of OpenGL where interpolation qualifiers are not 1741 * guaranteed to match across stages. 1742 */ 1743 const bool disable_varying_packing; 1744 1745 /** 1746 * If true, this driver disables packing for varyings used by transform 1747 * feedback. 1748 */ 1749 const bool disable_xfb_packing; 1750 1751 /** 1752 * If true, this driver has transform feedback enabled. The transform 1753 * feedback code usually requires at least some packing be done even 1754 * when varying packing is disabled, fortunately where transform feedback 1755 * requires packing it's safe to override the disabled setting. See 1756 * is_varying_packing_safe(). 1757 */ 1758 const bool xfb_enabled; 1759 1760 const bool enhanced_layouts_enabled; 1761 1762 /** 1763 * Enum representing the order in which varyings are packed within a 1764 * packing class. 1765 * 1766 * Currently we pack vec4's first, then vec2's, then scalar values, then 1767 * vec3's. This order ensures that the only vectors that are at risk of 1768 * having to be "double parked" (split between two adjacent varying slots) 1769 * are the vec3's. 1770 */ 1771 enum packing_order_enum { 1772 PACKING_ORDER_VEC4, 1773 PACKING_ORDER_VEC2, 1774 PACKING_ORDER_SCALAR, 1775 PACKING_ORDER_VEC3, 1776 }; 1777 1778 static unsigned compute_packing_class(const ir_variable *var); 1779 static packing_order_enum compute_packing_order(const ir_variable *var); 1780 static int match_comparator(const void *x_generic, const void *y_generic); 1781 static int xfb_comparator(const void *x_generic, const void *y_generic); 1782 static int not_xfb_comparator(const void *x_generic, const void *y_generic); 1783 1784 /** 1785 * Structure recording the relationship between a single producer output 1786 * and a single consumer input. 1787 */ 1788 struct match { 1789 /** 1790 * Packing class for this varying, computed by compute_packing_class(). 1791 */ 1792 unsigned packing_class; 1793 1794 /** 1795 * Packing order for this varying, computed by compute_packing_order(). 1796 */ 1797 packing_order_enum packing_order; 1798 1799 /** 1800 * The output variable in the producer stage. 1801 */ 1802 ir_variable *producer_var; 1803 1804 /** 1805 * The input variable in the consumer stage. 1806 */ 1807 ir_variable *consumer_var; 1808 1809 /** 1810 * The location which has been assigned for this varying. This is 1811 * expressed in multiples of a float, with the first generic varying 1812 * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the 1813 * value 0. 1814 */ 1815 unsigned generic_location; 1816 } *matches; 1817 1818 /** 1819 * The number of elements in the \c matches array that are currently in 1820 * use. 1821 */ 1822 unsigned num_matches; 1823 1824 /** 1825 * The number of elements that were set aside for the \c matches array when 1826 * it was allocated. 1827 */ 1828 unsigned matches_capacity; 1829 1830 gl_shader_stage producer_stage; 1831 gl_shader_stage consumer_stage; 1832}; 1833 1834} /* anonymous namespace */ 1835 1836varying_matches::varying_matches(bool disable_varying_packing, 1837 bool disable_xfb_packing, 1838 bool xfb_enabled, 1839 bool enhanced_layouts_enabled, 1840 gl_shader_stage producer_stage, 1841 gl_shader_stage consumer_stage) 1842 : disable_varying_packing(disable_varying_packing), 1843 disable_xfb_packing(disable_xfb_packing), 1844 xfb_enabled(xfb_enabled), 1845 enhanced_layouts_enabled(enhanced_layouts_enabled), 1846 producer_stage(producer_stage), 1847 consumer_stage(consumer_stage) 1848{ 1849 /* Note: this initial capacity is rather arbitrarily chosen to be large 1850 * enough for many cases without wasting an unreasonable amount of space. 1851 * varying_matches::record() will resize the array if there are more than 1852 * this number of varyings. 1853 */ 1854 this->matches_capacity = 8; 1855 this->matches = (match *) 1856 malloc(sizeof(*this->matches) * this->matches_capacity); 1857 this->num_matches = 0; 1858} 1859 1860 1861varying_matches::~varying_matches() 1862{ 1863 free(this->matches); 1864} 1865 1866 1867/** 1868 * Packing is always safe on individual arrays, structures, and matrices. It 1869 * is also safe if the varying is only used for transform feedback. 1870 */ 1871bool 1872varying_matches::is_varying_packing_safe(const glsl_type *type, 1873 const ir_variable *var) const 1874{ 1875 if (consumer_stage == MESA_SHADER_TESS_EVAL || 1876 consumer_stage == MESA_SHADER_TESS_CTRL || 1877 producer_stage == MESA_SHADER_TESS_CTRL) 1878 return false; 1879 1880 return xfb_enabled && (type->is_array() || type->is_struct() || 1881 type->is_matrix() || var->data.is_xfb_only); 1882} 1883 1884 1885/** 1886 * Record the given producer/consumer variable pair in the list of variables 1887 * that should later be assigned locations. 1888 * 1889 * It is permissible for \c consumer_var to be NULL (this happens if a 1890 * variable is output by the producer and consumed by transform feedback, but 1891 * not consumed by the consumer). 1892 * 1893 * If \c producer_var has already been paired up with a consumer_var, or 1894 * producer_var is part of fixed pipeline functionality (and hence already has 1895 * a location assigned), this function has no effect. 1896 * 1897 * Note: as a side effect this function may change the interpolation type of 1898 * \c producer_var, but only when the change couldn't possibly affect 1899 * rendering. 1900 */ 1901void 1902varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var) 1903{ 1904 assert(producer_var != NULL || consumer_var != NULL); 1905 1906 if ((producer_var && (!producer_var->data.is_unmatched_generic_inout || 1907 producer_var->data.explicit_location)) || 1908 (consumer_var && (!consumer_var->data.is_unmatched_generic_inout || 1909 consumer_var->data.explicit_location))) { 1910 /* Either a location already exists for this variable (since it is part 1911 * of fixed functionality), or it has already been recorded as part of a 1912 * previous match. 1913 */ 1914 return; 1915 } 1916 1917 bool needs_flat_qualifier = consumer_var == NULL && 1918 (producer_var->type->contains_integer() || 1919 producer_var->type->contains_double()); 1920 1921 if (!disable_varying_packing && 1922 (!disable_xfb_packing || producer_var == NULL || !producer_var->data.is_xfb) && 1923 (needs_flat_qualifier || 1924 (consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) { 1925 /* Since this varying is not being consumed by the fragment shader, its 1926 * interpolation type varying cannot possibly affect rendering. 1927 * Also, this variable is non-flat and is (or contains) an integer 1928 * or a double. 1929 * If the consumer stage is unknown, don't modify the interpolation 1930 * type as it could affect rendering later with separate shaders. 1931 * 1932 * lower_packed_varyings requires all integer varyings to flat, 1933 * regardless of where they appear. We can trivially satisfy that 1934 * requirement by changing the interpolation type to flat here. 1935 */ 1936 if (producer_var) { 1937 producer_var->data.centroid = false; 1938 producer_var->data.sample = false; 1939 producer_var->data.interpolation = INTERP_MODE_FLAT; 1940 } 1941 1942 if (consumer_var) { 1943 consumer_var->data.centroid = false; 1944 consumer_var->data.sample = false; 1945 consumer_var->data.interpolation = INTERP_MODE_FLAT; 1946 } 1947 } 1948 1949 if (this->num_matches == this->matches_capacity) { 1950 this->matches_capacity *= 2; 1951 this->matches = (match *) 1952 realloc(this->matches, 1953 sizeof(*this->matches) * this->matches_capacity); 1954 } 1955 1956 /* We must use the consumer to compute the packing class because in GL4.4+ 1957 * there is no guarantee interpolation qualifiers will match across stages. 1958 * 1959 * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec: 1960 * 1961 * "The type and presence of interpolation qualifiers of variables with 1962 * the same name declared in all linked shaders for the same cross-stage 1963 * interface must match, otherwise the link command will fail. 1964 * 1965 * When comparing an output from one stage to an input of a subsequent 1966 * stage, the input and output don't match if their interpolation 1967 * qualifiers (or lack thereof) are not the same." 1968 * 1969 * This text was also in at least revison 7 of the 4.40 spec but is no 1970 * longer in revision 9 and not in the 4.50 spec. 1971 */ 1972 const ir_variable *const var = (consumer_var != NULL) 1973 ? consumer_var : producer_var; 1974 1975 if (producer_var && consumer_var && 1976 consumer_var->data.must_be_shader_input) { 1977 producer_var->data.must_be_shader_input = 1; 1978 } 1979 1980 this->matches[this->num_matches].packing_class 1981 = this->compute_packing_class(var); 1982 this->matches[this->num_matches].packing_order 1983 = this->compute_packing_order(var); 1984 1985 this->matches[this->num_matches].producer_var = producer_var; 1986 this->matches[this->num_matches].consumer_var = consumer_var; 1987 this->num_matches++; 1988 if (producer_var) 1989 producer_var->data.is_unmatched_generic_inout = 0; 1990 if (consumer_var) 1991 consumer_var->data.is_unmatched_generic_inout = 0; 1992} 1993 1994 1995/** 1996 * Choose locations for all of the variable matches that were previously 1997 * passed to varying_matches::record(). 1998 * \param components returns array[slot] of number of components used 1999 * per slot (1, 2, 3 or 4) 2000 * \param reserved_slots bitmask indicating which varying slots are already 2001 * allocated 2002 * \return number of slots (4-element vectors) allocated 2003 */ 2004unsigned 2005varying_matches::assign_locations(struct gl_shader_program *prog, 2006 uint8_t components[], 2007 uint64_t reserved_slots) 2008{ 2009 /* If packing has been disabled then we cannot safely sort the varyings by 2010 * class as it may mean we are using a version of OpenGL where 2011 * interpolation qualifiers are not guaranteed to be matching across 2012 * shaders, sorting in this case could result in mismatching shader 2013 * interfaces. 2014 * When packing is disabled the sort orders varyings used by transform 2015 * feedback first, but also depends on *undefined behaviour* of qsort to 2016 * reverse the order of the varyings. See: xfb_comparator(). 2017 * 2018 * If packing is only disabled for xfb varyings (mutually exclusive with 2019 * disable_varying_packing), we then group varyings depending on if they 2020 * are captured for transform feedback. The same *undefined behaviour* is 2021 * taken advantage of. 2022 */ 2023 if (this->disable_varying_packing) { 2024 /* Only sort varyings that are only used by transform feedback. */ 2025 qsort(this->matches, this->num_matches, sizeof(*this->matches), 2026 &varying_matches::xfb_comparator); 2027 } else if (this->disable_xfb_packing) { 2028 /* Only sort varyings that are NOT used by transform feedback. */ 2029 qsort(this->matches, this->num_matches, sizeof(*this->matches), 2030 &varying_matches::not_xfb_comparator); 2031 } else { 2032 /* Sort varying matches into an order that makes them easy to pack. */ 2033 qsort(this->matches, this->num_matches, sizeof(*this->matches), 2034 &varying_matches::match_comparator); 2035 } 2036 2037 unsigned generic_location = 0; 2038 unsigned generic_patch_location = MAX_VARYING*4; 2039 bool previous_var_xfb = false; 2040 bool previous_var_xfb_only = false; 2041 unsigned previous_packing_class = ~0u; 2042 2043 /* For tranform feedback separate mode, we know the number of attributes 2044 * is <= the number of buffers. So packing isn't critical. In fact, 2045 * packing vec3 attributes can cause trouble because splitting a vec3 2046 * effectively creates an additional transform feedback output. The 2047 * extra TFB output may exceed device driver limits. 2048 */ 2049 const bool dont_pack_vec3 = 2050 (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS && 2051 prog->TransformFeedback.NumVarying > 0); 2052 2053 for (unsigned i = 0; i < this->num_matches; i++) { 2054 unsigned *location = &generic_location; 2055 const ir_variable *var; 2056 const glsl_type *type; 2057 bool is_vertex_input = false; 2058 2059 if (matches[i].consumer_var) { 2060 var = matches[i].consumer_var; 2061 type = get_varying_type(var, consumer_stage); 2062 if (consumer_stage == MESA_SHADER_VERTEX) 2063 is_vertex_input = true; 2064 } else { 2065 var = matches[i].producer_var; 2066 type = get_varying_type(var, producer_stage); 2067 } 2068 2069 if (var->data.patch) 2070 location = &generic_patch_location; 2071 2072 /* Advance to the next slot if this varying has a different packing 2073 * class than the previous one, and we're not already on a slot 2074 * boundary. 2075 * 2076 * Also advance if varying packing is disabled for transform feedback, 2077 * and previous or current varying is used for transform feedback. 2078 * 2079 * Also advance to the next slot if packing is disabled. This makes sure 2080 * we don't assign varyings the same locations which is possible 2081 * because we still pack individual arrays, records and matrices even 2082 * when packing is disabled. Note we don't advance to the next slot if 2083 * we can pack varyings together that are only used for transform 2084 * feedback. 2085 */ 2086 if (var->data.must_be_shader_input || 2087 (this->disable_xfb_packing && 2088 (previous_var_xfb || var->data.is_xfb)) || 2089 (this->disable_varying_packing && 2090 !(previous_var_xfb_only && var->data.is_xfb_only)) || 2091 (previous_packing_class != this->matches[i].packing_class) || 2092 (this->matches[i].packing_order == PACKING_ORDER_VEC3 && 2093 dont_pack_vec3)) { 2094 *location = ALIGN(*location, 4); 2095 } 2096 2097 previous_var_xfb = var->data.is_xfb; 2098 previous_var_xfb_only = var->data.is_xfb_only; 2099 previous_packing_class = this->matches[i].packing_class; 2100 2101 /* The number of components taken up by this variable. For vertex shader 2102 * inputs, we use the number of slots * 4, as they have different 2103 * counting rules. 2104 */ 2105 unsigned num_components = 0; 2106 if (is_vertex_input) { 2107 num_components = type->count_attribute_slots(is_vertex_input) * 4; 2108 } else { 2109 if ((this->disable_varying_packing && 2110 !is_varying_packing_safe(type, var)) || 2111 (this->disable_xfb_packing && var->data.is_xfb && 2112 !(type->is_array() || type->is_struct() || type->is_matrix())) || 2113 var->data.must_be_shader_input) { 2114 num_components = type->count_attribute_slots(false) * 4; 2115 } else { 2116 num_components = type->component_slots_aligned(*location); 2117 } 2118 } 2119 2120 /* The last slot for this variable, inclusive. */ 2121 unsigned slot_end = *location + num_components - 1; 2122 2123 /* FIXME: We could be smarter in the below code and loop back over 2124 * trying to fill any locations that we skipped because we couldn't pack 2125 * the varying between an explicit location. For now just let the user 2126 * hit the linking error if we run out of room and suggest they use 2127 * explicit locations. 2128 */ 2129 while (slot_end < MAX_VARYING * 4u) { 2130 const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1; 2131 const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u); 2132 2133 assert(slots > 0); 2134 2135 if ((reserved_slots & slot_mask) == 0) { 2136 break; 2137 } 2138 2139 *location = ALIGN(*location + 1, 4); 2140 slot_end = *location + num_components - 1; 2141 } 2142 2143 if (!var->data.patch && slot_end >= MAX_VARYING * 4u) { 2144 linker_error(prog, "insufficient contiguous locations available for " 2145 "%s it is possible an array or struct could not be " 2146 "packed between varyings with explicit locations. Try " 2147 "using an explicit location for arrays and structs.", 2148 var->name); 2149 } 2150 2151 if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) { 2152 for (unsigned j = *location / 4u; j < slot_end / 4u; j++) 2153 components[j] = 4; 2154 components[slot_end / 4u] = (slot_end & 3) + 1; 2155 } 2156 2157 this->matches[i].generic_location = *location; 2158 2159 *location = slot_end + 1; 2160 } 2161 2162 return (generic_location + 3) / 4; 2163} 2164 2165 2166/** 2167 * Update the producer and consumer shaders to reflect the locations 2168 * assignments that were made by varying_matches::assign_locations(). 2169 */ 2170void 2171varying_matches::store_locations() const 2172{ 2173 /* Check is location needs to be packed with lower_packed_varyings() or if 2174 * we can just use ARB_enhanced_layouts packing. 2175 */ 2176 bool pack_loc[MAX_VARYINGS_INCL_PATCH] = {}; 2177 const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} }; 2178 2179 for (unsigned i = 0; i < this->num_matches; i++) { 2180 ir_variable *producer_var = this->matches[i].producer_var; 2181 ir_variable *consumer_var = this->matches[i].consumer_var; 2182 unsigned generic_location = this->matches[i].generic_location; 2183 unsigned slot = generic_location / 4; 2184 unsigned offset = generic_location % 4; 2185 2186 if (producer_var) { 2187 producer_var->data.location = VARYING_SLOT_VAR0 + slot; 2188 producer_var->data.location_frac = offset; 2189 } 2190 2191 if (consumer_var) { 2192 assert(consumer_var->data.location == -1); 2193 consumer_var->data.location = VARYING_SLOT_VAR0 + slot; 2194 consumer_var->data.location_frac = offset; 2195 } 2196 2197 /* Find locations suitable for native packing via 2198 * ARB_enhanced_layouts. 2199 */ 2200 if (producer_var && consumer_var) { 2201 if (enhanced_layouts_enabled) { 2202 const glsl_type *type = 2203 get_varying_type(producer_var, producer_stage); 2204 if (type->is_array() || type->is_matrix() || type->is_struct() || 2205 type->is_64bit()) { 2206 unsigned comp_slots = type->component_slots() + offset; 2207 unsigned slots = comp_slots / 4; 2208 if (comp_slots % 4) 2209 slots += 1; 2210 2211 for (unsigned j = 0; j < slots; j++) { 2212 pack_loc[slot + j] = true; 2213 } 2214 } else if (offset + type->vector_elements > 4) { 2215 pack_loc[slot] = true; 2216 pack_loc[slot + 1] = true; 2217 } else { 2218 loc_type[slot][offset] = type; 2219 } 2220 } 2221 } 2222 } 2223 2224 /* Attempt to use ARB_enhanced_layouts for more efficient packing if 2225 * suitable. 2226 */ 2227 if (enhanced_layouts_enabled) { 2228 for (unsigned i = 0; i < this->num_matches; i++) { 2229 ir_variable *producer_var = this->matches[i].producer_var; 2230 ir_variable *consumer_var = this->matches[i].consumer_var; 2231 unsigned generic_location = this->matches[i].generic_location; 2232 unsigned slot = generic_location / 4; 2233 2234 if (pack_loc[slot] || !producer_var || !consumer_var) 2235 continue; 2236 2237 const glsl_type *type = 2238 get_varying_type(producer_var, producer_stage); 2239 bool type_match = true; 2240 for (unsigned j = 0; j < 4; j++) { 2241 if (loc_type[slot][j]) { 2242 if (type->base_type != loc_type[slot][j]->base_type) 2243 type_match = false; 2244 } 2245 } 2246 2247 if (type_match) { 2248 producer_var->data.explicit_location = 1; 2249 consumer_var->data.explicit_location = 1; 2250 producer_var->data.explicit_component = 1; 2251 consumer_var->data.explicit_component = 1; 2252 } 2253 } 2254 } 2255} 2256 2257 2258/** 2259 * Compute the "packing class" of the given varying. This is an unsigned 2260 * integer with the property that two variables in the same packing class can 2261 * be safely backed into the same vec4. 2262 */ 2263unsigned 2264varying_matches::compute_packing_class(const ir_variable *var) 2265{ 2266 /* Without help from the back-end, there is no way to pack together 2267 * variables with different interpolation types, because 2268 * lower_packed_varyings must choose exactly one interpolation type for 2269 * each packed varying it creates. 2270 * 2271 * However, we can safely pack together floats, ints, and uints, because: 2272 * 2273 * - varyings of base type "int" and "uint" must use the "flat" 2274 * interpolation type, which can only occur in GLSL 1.30 and above. 2275 * 2276 * - On platforms that support GLSL 1.30 and above, lower_packed_varyings 2277 * can store flat floats as ints without losing any information (using 2278 * the ir_unop_bitcast_* opcodes). 2279 * 2280 * Therefore, the packing class depends only on the interpolation type. 2281 */ 2282 const unsigned interp = var->is_interpolation_flat() 2283 ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation; 2284 2285 assert(interp < (1 << 3)); 2286 2287 const unsigned packing_class = (interp << 0) | 2288 (var->data.centroid << 3) | 2289 (var->data.sample << 4) | 2290 (var->data.patch << 5) | 2291 (var->data.must_be_shader_input << 6); 2292 2293 return packing_class; 2294} 2295 2296 2297/** 2298 * Compute the "packing order" of the given varying. This is a sort key we 2299 * use to determine when to attempt to pack the given varying relative to 2300 * other varyings in the same packing class. 2301 */ 2302varying_matches::packing_order_enum 2303varying_matches::compute_packing_order(const ir_variable *var) 2304{ 2305 const glsl_type *element_type = var->type; 2306 2307 while (element_type->is_array()) { 2308 element_type = element_type->fields.array; 2309 } 2310 2311 switch (element_type->component_slots() % 4) { 2312 case 1: return PACKING_ORDER_SCALAR; 2313 case 2: return PACKING_ORDER_VEC2; 2314 case 3: return PACKING_ORDER_VEC3; 2315 case 0: return PACKING_ORDER_VEC4; 2316 default: 2317 assert(!"Unexpected value of vector_elements"); 2318 return PACKING_ORDER_VEC4; 2319 } 2320} 2321 2322 2323/** 2324 * Comparison function passed to qsort() to sort varyings by packing_class and 2325 * then by packing_order. 2326 */ 2327int 2328varying_matches::match_comparator(const void *x_generic, const void *y_generic) 2329{ 2330 const match *x = (const match *) x_generic; 2331 const match *y = (const match *) y_generic; 2332 2333 if (x->packing_class != y->packing_class) 2334 return x->packing_class - y->packing_class; 2335 return x->packing_order - y->packing_order; 2336} 2337 2338 2339/** 2340 * Comparison function passed to qsort() to sort varyings used only by 2341 * transform feedback when packing of other varyings is disabled. 2342 */ 2343int 2344varying_matches::xfb_comparator(const void *x_generic, const void *y_generic) 2345{ 2346 const match *x = (const match *) x_generic; 2347 2348 if (x->producer_var != NULL && x->producer_var->data.is_xfb_only) 2349 return match_comparator(x_generic, y_generic); 2350 2351 /* FIXME: When the comparator returns 0 it means the elements being 2352 * compared are equivalent. However the qsort documentation says: 2353 * 2354 * "The order of equivalent elements is undefined." 2355 * 2356 * In practice the sort ends up reversing the order of the varyings which 2357 * means locations are also assigned in this reversed order and happens to 2358 * be what we want. This is also whats happening in 2359 * varying_matches::match_comparator(). 2360 */ 2361 return 0; 2362} 2363 2364 2365/** 2366 * Comparison function passed to qsort() to sort varyings NOT used by 2367 * transform feedback when packing of xfb varyings is disabled. 2368 */ 2369int 2370varying_matches::not_xfb_comparator(const void *x_generic, const void *y_generic) 2371{ 2372 const match *x = (const match *) x_generic; 2373 2374 if (x->producer_var != NULL && !x->producer_var->data.is_xfb) 2375 return match_comparator(x_generic, y_generic); 2376 2377 /* FIXME: When the comparator returns 0 it means the elements being 2378 * compared are equivalent. However the qsort documentation says: 2379 * 2380 * "The order of equivalent elements is undefined." 2381 * 2382 * In practice the sort ends up reversing the order of the varyings which 2383 * means locations are also assigned in this reversed order and happens to 2384 * be what we want. This is also whats happening in 2385 * varying_matches::match_comparator(). 2386 */ 2387 return 0; 2388} 2389 2390 2391/** 2392 * Is the given variable a varying variable to be counted against the 2393 * limit in ctx->Const.MaxVarying? 2394 * This includes variables such as texcoords, colors and generic 2395 * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord. 2396 */ 2397static bool 2398var_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var) 2399{ 2400 /* Only fragment shaders will take a varying variable as an input */ 2401 if (stage == MESA_SHADER_FRAGMENT && 2402 var->data.mode == ir_var_shader_in) { 2403 switch (var->data.location) { 2404 case VARYING_SLOT_POS: 2405 case VARYING_SLOT_FACE: 2406 case VARYING_SLOT_PNTC: 2407 return false; 2408 default: 2409 return true; 2410 } 2411 } 2412 return false; 2413} 2414 2415 2416/** 2417 * Visitor class that generates tfeedback_candidate structs describing all 2418 * possible targets of transform feedback. 2419 * 2420 * tfeedback_candidate structs are stored in the hash table 2421 * tfeedback_candidates, which is passed to the constructor. This hash table 2422 * maps varying names to instances of the tfeedback_candidate struct. 2423 */ 2424class tfeedback_candidate_generator : public program_resource_visitor 2425{ 2426public: 2427 tfeedback_candidate_generator(void *mem_ctx, 2428 hash_table *tfeedback_candidates, 2429 gl_shader_stage stage) 2430 : mem_ctx(mem_ctx), 2431 tfeedback_candidates(tfeedback_candidates), 2432 stage(stage), 2433 toplevel_var(NULL), 2434 varying_floats(0), 2435 xfb_offset_floats(0) 2436 { 2437 } 2438 2439 void process(ir_variable *var) 2440 { 2441 /* All named varying interface blocks should be flattened by now */ 2442 assert(!var->is_interface_instance()); 2443 assert(var->data.mode == ir_var_shader_out); 2444 2445 this->toplevel_var = var; 2446 this->varying_floats = 0; 2447 this->xfb_offset_floats = 0; 2448 const glsl_type *t = 2449 var->data.from_named_ifc_block ? var->get_interface_type() : var->type; 2450 if (!var->data.patch && stage == MESA_SHADER_TESS_CTRL) { 2451 assert(t->is_array()); 2452 t = t->fields.array; 2453 } 2454 program_resource_visitor::process(var, t, false); 2455 } 2456 2457private: 2458 virtual void visit_field(const glsl_type *type, const char *name, 2459 bool /* row_major */, 2460 const glsl_type * /* record_type */, 2461 const enum glsl_interface_packing, 2462 bool /* last_field */) 2463 { 2464 assert(!type->without_array()->is_struct()); 2465 assert(!type->without_array()->is_interface()); 2466 2467 tfeedback_candidate *candidate 2468 = rzalloc(this->mem_ctx, tfeedback_candidate); 2469 candidate->toplevel_var = this->toplevel_var; 2470 candidate->type = type; 2471 2472 if (type->without_array()->is_64bit()) { 2473 /* From ARB_gpu_shader_fp64: 2474 * 2475 * If any variable captured in transform feedback has double-precision 2476 * components, the practical requirements for defined behavior are: 2477 * ... 2478 * (c) each double-precision variable captured must be aligned to a 2479 * multiple of eight bytes relative to the beginning of a vertex. 2480 */ 2481 this->xfb_offset_floats = ALIGN(this->xfb_offset_floats, 2); 2482 /* 64-bit members of structs are also aligned. */ 2483 this->varying_floats = ALIGN(this->varying_floats, 2); 2484 } 2485 2486 candidate->xfb_offset_floats = this->xfb_offset_floats; 2487 candidate->struct_offset_floats = this->varying_floats; 2488 2489 _mesa_hash_table_insert(this->tfeedback_candidates, 2490 ralloc_strdup(this->mem_ctx, name), 2491 candidate); 2492 2493 const unsigned component_slots = type->component_slots(); 2494 2495 if (varying_has_user_specified_location(this->toplevel_var)) { 2496 this->varying_floats += type->count_attribute_slots(false) * 4; 2497 } else { 2498 this->varying_floats += component_slots; 2499 } 2500 2501 this->xfb_offset_floats += component_slots; 2502 } 2503 2504 /** 2505 * Memory context used to allocate hash table keys and values. 2506 */ 2507 void * const mem_ctx; 2508 2509 /** 2510 * Hash table in which tfeedback_candidate objects should be stored. 2511 */ 2512 hash_table * const tfeedback_candidates; 2513 2514 gl_shader_stage stage; 2515 2516 /** 2517 * Pointer to the toplevel variable that is being traversed. 2518 */ 2519 ir_variable *toplevel_var; 2520 2521 /** 2522 * Total number of varying floats that have been visited so far. This is 2523 * used to determine the offset to each varying within the toplevel 2524 * variable. 2525 */ 2526 unsigned varying_floats; 2527 2528 /** 2529 * Offset within the xfb. Counted in floats. 2530 */ 2531 unsigned xfb_offset_floats; 2532}; 2533 2534 2535namespace linker { 2536 2537void 2538populate_consumer_input_sets(void *mem_ctx, exec_list *ir, 2539 hash_table *consumer_inputs, 2540 hash_table *consumer_interface_inputs, 2541 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) 2542{ 2543 memset(consumer_inputs_with_locations, 2544 0, 2545 sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX); 2546 2547 foreach_in_list(ir_instruction, node, ir) { 2548 ir_variable *const input_var = node->as_variable(); 2549 2550 if (input_var != NULL && input_var->data.mode == ir_var_shader_in) { 2551 /* All interface blocks should have been lowered by this point */ 2552 assert(!input_var->type->is_interface()); 2553 2554 if (input_var->data.explicit_location) { 2555 /* assign_varying_locations only cares about finding the 2556 * ir_variable at the start of a contiguous location block. 2557 * 2558 * - For !producer, consumer_inputs_with_locations isn't used. 2559 * 2560 * - For !consumer, consumer_inputs_with_locations is empty. 2561 * 2562 * For consumer && producer, if you were trying to set some 2563 * ir_variable to the middle of a location block on the other side 2564 * of producer/consumer, cross_validate_outputs_to_inputs() should 2565 * be link-erroring due to either type mismatch or location 2566 * overlaps. If the variables do match up, then they've got a 2567 * matching data.location and you only looked at 2568 * consumer_inputs_with_locations[var->data.location], not any 2569 * following entries for the array/structure. 2570 */ 2571 consumer_inputs_with_locations[input_var->data.location] = 2572 input_var; 2573 } else if (input_var->get_interface_type() != NULL) { 2574 char *const iface_field_name = 2575 ralloc_asprintf(mem_ctx, "%s.%s", 2576 input_var->get_interface_type()->without_array()->name, 2577 input_var->name); 2578 _mesa_hash_table_insert(consumer_interface_inputs, 2579 iface_field_name, input_var); 2580 } else { 2581 _mesa_hash_table_insert(consumer_inputs, 2582 ralloc_strdup(mem_ctx, input_var->name), 2583 input_var); 2584 } 2585 } 2586 } 2587} 2588 2589/** 2590 * Find a variable from the consumer that "matches" the specified variable 2591 * 2592 * This function only finds inputs with names that match. There is no 2593 * validation (here) that the types, etc. are compatible. 2594 */ 2595ir_variable * 2596get_matching_input(void *mem_ctx, 2597 const ir_variable *output_var, 2598 hash_table *consumer_inputs, 2599 hash_table *consumer_interface_inputs, 2600 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX]) 2601{ 2602 ir_variable *input_var; 2603 2604 if (output_var->data.explicit_location) { 2605 input_var = consumer_inputs_with_locations[output_var->data.location]; 2606 } else if (output_var->get_interface_type() != NULL) { 2607 char *const iface_field_name = 2608 ralloc_asprintf(mem_ctx, "%s.%s", 2609 output_var->get_interface_type()->without_array()->name, 2610 output_var->name); 2611 hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name); 2612 input_var = entry ? (ir_variable *) entry->data : NULL; 2613 } else { 2614 hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name); 2615 input_var = entry ? (ir_variable *) entry->data : NULL; 2616 } 2617 2618 return (input_var == NULL || input_var->data.mode != ir_var_shader_in) 2619 ? NULL : input_var; 2620} 2621 2622} 2623 2624static int 2625io_variable_cmp(const void *_a, const void *_b) 2626{ 2627 const ir_variable *const a = *(const ir_variable **) _a; 2628 const ir_variable *const b = *(const ir_variable **) _b; 2629 2630 if (a->data.explicit_location && b->data.explicit_location) 2631 return b->data.location - a->data.location; 2632 2633 if (a->data.explicit_location && !b->data.explicit_location) 2634 return 1; 2635 2636 if (!a->data.explicit_location && b->data.explicit_location) 2637 return -1; 2638 2639 return -strcmp(a->name, b->name); 2640} 2641 2642/** 2643 * Sort the shader IO variables into canonical order 2644 */ 2645static void 2646canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode) 2647{ 2648 ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4]; 2649 unsigned num_variables = 0; 2650 2651 foreach_in_list(ir_instruction, node, ir) { 2652 ir_variable *const var = node->as_variable(); 2653 2654 if (var == NULL || var->data.mode != io_mode) 2655 continue; 2656 2657 /* If we have already encountered more I/O variables that could 2658 * successfully link, bail. 2659 */ 2660 if (num_variables == ARRAY_SIZE(var_table)) 2661 return; 2662 2663 var_table[num_variables++] = var; 2664 } 2665 2666 if (num_variables == 0) 2667 return; 2668 2669 /* Sort the list in reverse order (io_variable_cmp handles this). Later 2670 * we're going to push the variables on to the IR list as a stack, so we 2671 * want the last variable (in canonical order) to be first in the list. 2672 */ 2673 qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp); 2674 2675 /* Remove the variable from it's current location in the IR, and put it at 2676 * the front. 2677 */ 2678 for (unsigned i = 0; i < num_variables; i++) { 2679 var_table[i]->remove(); 2680 ir->push_head(var_table[i]); 2681 } 2682} 2683 2684/** 2685 * Generate a bitfield map of the explicit locations for shader varyings. 2686 * 2687 * Note: For Tessellation shaders we are sitting right on the limits of the 2688 * 64 bit map. Per-vertex and per-patch both have separate location domains 2689 * with a max of MAX_VARYING. 2690 */ 2691static uint64_t 2692reserved_varying_slot(struct gl_linked_shader *stage, 2693 ir_variable_mode io_mode) 2694{ 2695 assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out); 2696 /* Avoid an overflow of the returned value */ 2697 assert(MAX_VARYINGS_INCL_PATCH <= 64); 2698 2699 uint64_t slots = 0; 2700 int var_slot; 2701 2702 if (!stage) 2703 return slots; 2704 2705 foreach_in_list(ir_instruction, node, stage->ir) { 2706 ir_variable *const var = node->as_variable(); 2707 2708 if (var == NULL || var->data.mode != io_mode || 2709 !var->data.explicit_location || 2710 var->data.location < VARYING_SLOT_VAR0) 2711 continue; 2712 2713 var_slot = var->data.location - VARYING_SLOT_VAR0; 2714 2715 unsigned num_elements = get_varying_type(var, stage->Stage) 2716 ->count_attribute_slots(io_mode == ir_var_shader_in && 2717 stage->Stage == MESA_SHADER_VERTEX); 2718 for (unsigned i = 0; i < num_elements; i++) { 2719 if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH) 2720 slots |= UINT64_C(1) << var_slot; 2721 var_slot += 1; 2722 } 2723 } 2724 2725 return slots; 2726} 2727 2728 2729/** 2730 * Assign locations for all variables that are produced in one pipeline stage 2731 * (the "producer") and consumed in the next stage (the "consumer"). 2732 * 2733 * Variables produced by the producer may also be consumed by transform 2734 * feedback. 2735 * 2736 * \param num_tfeedback_decls is the number of declarations indicating 2737 * variables that may be consumed by transform feedback. 2738 * 2739 * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects 2740 * representing the result of parsing the strings passed to 2741 * glTransformFeedbackVaryings(). assign_location() will be called for 2742 * each of these objects that matches one of the outputs of the 2743 * producer. 2744 * 2745 * When num_tfeedback_decls is nonzero, it is permissible for the consumer to 2746 * be NULL. In this case, varying locations are assigned solely based on the 2747 * requirements of transform feedback. 2748 */ 2749static bool 2750assign_varying_locations(struct gl_context *ctx, 2751 void *mem_ctx, 2752 struct gl_shader_program *prog, 2753 gl_linked_shader *producer, 2754 gl_linked_shader *consumer, 2755 unsigned num_tfeedback_decls, 2756 tfeedback_decl *tfeedback_decls, 2757 const uint64_t reserved_slots) 2758{ 2759 /* Tessellation shaders treat inputs and outputs as shared memory and can 2760 * access inputs and outputs of other invocations. 2761 * Therefore, they can't be lowered to temps easily (and definitely not 2762 * efficiently). 2763 */ 2764 bool unpackable_tess = 2765 (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) || 2766 (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) || 2767 (producer && producer->Stage == MESA_SHADER_TESS_CTRL); 2768 2769 /* Transform feedback code assumes varying arrays are packed, so if the 2770 * driver has disabled varying packing, make sure to at least enable 2771 * packing required by transform feedback. See below for exception. 2772 */ 2773 bool xfb_enabled = 2774 ctx->Extensions.EXT_transform_feedback && !unpackable_tess; 2775 2776 /* Some drivers actually requires packing to be explicitly disabled 2777 * for varyings used by transform feedback. 2778 */ 2779 bool disable_xfb_packing = 2780 ctx->Const.DisableTransformFeedbackPacking; 2781 2782 /* Disable packing on outward facing interfaces for SSO because in ES we 2783 * need to retain the unpacked varying information for draw time 2784 * validation. 2785 * 2786 * Packing is still enabled on individual arrays, structs, and matrices as 2787 * these are required by the transform feedback code and it is still safe 2788 * to do so. We also enable packing when a varying is only used for 2789 * transform feedback and its not a SSO. 2790 */ 2791 bool disable_varying_packing = 2792 ctx->Const.DisableVaryingPacking || unpackable_tess; 2793 if (prog->SeparateShader && (producer == NULL || consumer == NULL)) 2794 disable_varying_packing = true; 2795 2796 varying_matches matches(disable_varying_packing, 2797 disable_xfb_packing, 2798 xfb_enabled, 2799 ctx->Extensions.ARB_enhanced_layouts, 2800 producer ? producer->Stage : MESA_SHADER_NONE, 2801 consumer ? consumer->Stage : MESA_SHADER_NONE); 2802 void *hash_table_ctx = ralloc_context(NULL); 2803 hash_table *tfeedback_candidates = 2804 _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string, 2805 _mesa_key_string_equal); 2806 hash_table *consumer_inputs = 2807 _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string, 2808 _mesa_key_string_equal); 2809 hash_table *consumer_interface_inputs = 2810 _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string, 2811 _mesa_key_string_equal); 2812 ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = { 2813 NULL, 2814 }; 2815 2816 unsigned consumer_vertices = 0; 2817 if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY) 2818 consumer_vertices = prog->Geom.VerticesIn; 2819 2820 /* Operate in a total of four passes. 2821 * 2822 * 1. Sort inputs / outputs into a canonical order. This is necessary so 2823 * that inputs / outputs of separable shaders will be assigned 2824 * predictable locations regardless of the order in which declarations 2825 * appeared in the shader source. 2826 * 2827 * 2. Assign locations for any matching inputs and outputs. 2828 * 2829 * 3. Mark output variables in the producer that do not have locations as 2830 * not being outputs. This lets the optimizer eliminate them. 2831 * 2832 * 4. Mark input variables in the consumer that do not have locations as 2833 * not being inputs. This lets the optimizer eliminate them. 2834 */ 2835 if (consumer) 2836 canonicalize_shader_io(consumer->ir, ir_var_shader_in); 2837 2838 if (producer) 2839 canonicalize_shader_io(producer->ir, ir_var_shader_out); 2840 2841 if (consumer) 2842 linker::populate_consumer_input_sets(mem_ctx, consumer->ir, 2843 consumer_inputs, 2844 consumer_interface_inputs, 2845 consumer_inputs_with_locations); 2846 2847 if (producer) { 2848 foreach_in_list(ir_instruction, node, producer->ir) { 2849 ir_variable *const output_var = node->as_variable(); 2850 2851 if (output_var == NULL || output_var->data.mode != ir_var_shader_out) 2852 continue; 2853 2854 /* Only geometry shaders can use non-zero streams */ 2855 assert(output_var->data.stream == 0 || 2856 (output_var->data.stream < MAX_VERTEX_STREAMS && 2857 producer->Stage == MESA_SHADER_GEOMETRY)); 2858 2859 if (num_tfeedback_decls > 0) { 2860 tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates, producer->Stage); 2861 /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1 2862 * ("Vertex Shader Variables / Output Variables") 2863 * 2864 * "Each program object can specify a set of output variables from 2865 * one shader to be recorded in transform feedback mode (see 2866 * section 13.3). The variables that can be recorded are those 2867 * emitted by the first active shader, in order, from the 2868 * following list: 2869 * 2870 * * geometry shader 2871 * * tessellation evaluation shader 2872 * * tessellation control shader 2873 * * vertex shader" 2874 * 2875 * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader 2876 * Variables / Output Variables") tessellation control shader is 2877 * not included in the stages list. 2878 */ 2879 if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) { 2880 g.process(output_var); 2881 } 2882 } 2883 2884 ir_variable *const input_var = 2885 linker::get_matching_input(mem_ctx, output_var, consumer_inputs, 2886 consumer_interface_inputs, 2887 consumer_inputs_with_locations); 2888 2889 /* If a matching input variable was found, add this output (and the 2890 * input) to the set. If this is a separable program and there is no 2891 * consumer stage, add the output. 2892 * 2893 * Always add TCS outputs. They are shared by all invocations 2894 * within a patch and can be used as shared memory. 2895 */ 2896 if (input_var || (prog->SeparateShader && consumer == NULL) || 2897 producer->Stage == MESA_SHADER_TESS_CTRL) { 2898 matches.record(output_var, input_var); 2899 } 2900 2901 /* Only stream 0 outputs can be consumed in the next stage */ 2902 if (input_var && output_var->data.stream != 0) { 2903 linker_error(prog, "output %s is assigned to stream=%d but " 2904 "is linked to an input, which requires stream=0", 2905 output_var->name, output_var->data.stream); 2906 ralloc_free(hash_table_ctx); 2907 return false; 2908 } 2909 } 2910 } else { 2911 /* If there's no producer stage, then this must be a separable program. 2912 * For example, we may have a program that has just a fragment shader. 2913 * Later this program will be used with some arbitrary vertex (or 2914 * geometry) shader program. This means that locations must be assigned 2915 * for all the inputs. 2916 */ 2917 foreach_in_list(ir_instruction, node, consumer->ir) { 2918 ir_variable *const input_var = node->as_variable(); 2919 if (input_var && input_var->data.mode == ir_var_shader_in) { 2920 matches.record(NULL, input_var); 2921 } 2922 } 2923 } 2924 2925 for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 2926 if (!tfeedback_decls[i].is_varying()) 2927 continue; 2928 2929 const tfeedback_candidate *matched_candidate 2930 = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates); 2931 2932 if (matched_candidate == NULL) { 2933 ralloc_free(hash_table_ctx); 2934 return false; 2935 } 2936 2937 /* There are two situations where a new output varying is needed: 2938 * 2939 * - If varying packing is disabled for xfb and the current declaration 2940 * is subscripting an array, whether the subscript is aligned or not. 2941 * to preserve the rest of the array for the consumer. 2942 * 2943 * - If a builtin variable needs to be copied to a new variable 2944 * before its content is modified by another lowering pass (e.g. 2945 * \c gl_Position is transformed by \c nir_lower_viewport_transform). 2946 */ 2947 const bool lowered = 2948 (disable_xfb_packing && tfeedback_decls[i].subscripted()) || 2949 (matched_candidate->toplevel_var->data.explicit_location && 2950 matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 && 2951 (!consumer || consumer->Stage == MESA_SHADER_FRAGMENT) && 2952 (ctx->Const.ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb & 2953 BITFIELD_BIT(matched_candidate->toplevel_var->data.location))); 2954 2955 if (lowered) { 2956 ir_variable *new_var; 2957 tfeedback_candidate *new_candidate = NULL; 2958 2959 new_var = lower_xfb_varying(mem_ctx, producer, tfeedback_decls[i].name()); 2960 if (new_var == NULL) { 2961 ralloc_free(hash_table_ctx); 2962 return false; 2963 } 2964 2965 /* Create new candidate and replace matched_candidate */ 2966 new_candidate = rzalloc(mem_ctx, tfeedback_candidate); 2967 new_candidate->toplevel_var = new_var; 2968 new_candidate->toplevel_var->data.is_unmatched_generic_inout = 1; 2969 new_candidate->type = new_var->type; 2970 new_candidate->struct_offset_floats = 0; 2971 new_candidate->xfb_offset_floats = 0; 2972 _mesa_hash_table_insert(tfeedback_candidates, 2973 ralloc_strdup(mem_ctx, new_var->name), 2974 new_candidate); 2975 2976 tfeedback_decls[i].set_lowered_candidate(new_candidate); 2977 matched_candidate = new_candidate; 2978 } 2979 2980 /* Mark as xfb varying */ 2981 matched_candidate->toplevel_var->data.is_xfb = 1; 2982 2983 /* Mark xfb varyings as always active */ 2984 matched_candidate->toplevel_var->data.always_active_io = 1; 2985 2986 /* Mark any corresponding inputs as always active also. We must do this 2987 * because we have a NIR pass that lowers vectors to scalars and another 2988 * that removes unused varyings. 2989 * We don't split varyings marked as always active because there is no 2990 * point in doing so. This means we need to mark both sides of the 2991 * interface as always active otherwise we will have a mismatch and 2992 * start removing things we shouldn't. 2993 */ 2994 ir_variable *const input_var = 2995 linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var, 2996 consumer_inputs, 2997 consumer_interface_inputs, 2998 consumer_inputs_with_locations); 2999 if (input_var) { 3000 input_var->data.is_xfb = 1; 3001 input_var->data.always_active_io = 1; 3002 } 3003 3004 if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) { 3005 matched_candidate->toplevel_var->data.is_xfb_only = 1; 3006 matches.record(matched_candidate->toplevel_var, NULL); 3007 } 3008 } 3009 3010 uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0}; 3011 const unsigned slots_used = matches.assign_locations( 3012 prog, components, reserved_slots); 3013 matches.store_locations(); 3014 3015 for (unsigned i = 0; i < num_tfeedback_decls; ++i) { 3016 if (tfeedback_decls[i].is_varying()) { 3017 if (!tfeedback_decls[i].assign_location(ctx, prog)) { 3018 ralloc_free(hash_table_ctx); 3019 return false; 3020 } 3021 } 3022 } 3023 ralloc_free(hash_table_ctx); 3024 3025 if (consumer && producer) { 3026 foreach_in_list(ir_instruction, node, consumer->ir) { 3027 ir_variable *const var = node->as_variable(); 3028 3029 if (var && var->data.mode == ir_var_shader_in && 3030 var->data.is_unmatched_generic_inout) { 3031 if (!prog->IsES && prog->data->Version <= 120) { 3032 /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec: 3033 * 3034 * Only those varying variables used (i.e. read) in 3035 * the fragment shader executable must be written to 3036 * by the vertex shader executable; declaring 3037 * superfluous varying variables in a vertex shader is 3038 * permissible. 3039 * 3040 * We interpret this text as meaning that the VS must 3041 * write the variable for the FS to read it. See 3042 * "glsl1-varying read but not written" in piglit. 3043 */ 3044 linker_error(prog, "%s shader varying %s not written " 3045 "by %s shader\n.", 3046 _mesa_shader_stage_to_string(consumer->Stage), 3047 var->name, 3048 _mesa_shader_stage_to_string(producer->Stage)); 3049 } else { 3050 linker_warning(prog, "%s shader varying %s not written " 3051 "by %s shader\n.", 3052 _mesa_shader_stage_to_string(consumer->Stage), 3053 var->name, 3054 _mesa_shader_stage_to_string(producer->Stage)); 3055 } 3056 } 3057 } 3058 3059 /* Now that validation is done its safe to remove unused varyings. As 3060 * we have both a producer and consumer its safe to remove unused 3061 * varyings even if the program is a SSO because the stages are being 3062 * linked together i.e. we have a multi-stage SSO. 3063 */ 3064 remove_unused_shader_inputs_and_outputs(false, producer, 3065 ir_var_shader_out); 3066 remove_unused_shader_inputs_and_outputs(false, consumer, 3067 ir_var_shader_in); 3068 } 3069 3070 if (producer) { 3071 lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out, 3072 0, producer, disable_varying_packing, 3073 disable_xfb_packing, xfb_enabled); 3074 } 3075 3076 if (consumer) { 3077 lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in, 3078 consumer_vertices, consumer, disable_varying_packing, 3079 disable_xfb_packing, xfb_enabled); 3080 } 3081 3082 return true; 3083} 3084 3085static bool 3086check_against_output_limit(struct gl_context *ctx, 3087 struct gl_shader_program *prog, 3088 gl_linked_shader *producer, 3089 unsigned num_explicit_locations) 3090{ 3091 unsigned output_vectors = num_explicit_locations; 3092 3093 foreach_in_list(ir_instruction, node, producer->ir) { 3094 ir_variable *const var = node->as_variable(); 3095 3096 if (var && !var->data.explicit_location && 3097 var->data.mode == ir_var_shader_out && 3098 var_counts_against_varying_limit(producer->Stage, var)) { 3099 /* outputs for fragment shader can't be doubles */ 3100 output_vectors += var->type->count_attribute_slots(false); 3101 } 3102 } 3103 3104 assert(producer->Stage != MESA_SHADER_FRAGMENT); 3105 unsigned max_output_components = 3106 ctx->Const.Program[producer->Stage].MaxOutputComponents; 3107 3108 const unsigned output_components = output_vectors * 4; 3109 if (output_components > max_output_components) { 3110 if (ctx->API == API_OPENGLES2 || prog->IsES) 3111 linker_error(prog, "%s shader uses too many output vectors " 3112 "(%u > %u)\n", 3113 _mesa_shader_stage_to_string(producer->Stage), 3114 output_vectors, 3115 max_output_components / 4); 3116 else 3117 linker_error(prog, "%s shader uses too many output components " 3118 "(%u > %u)\n", 3119 _mesa_shader_stage_to_string(producer->Stage), 3120 output_components, 3121 max_output_components); 3122 3123 return false; 3124 } 3125 3126 return true; 3127} 3128 3129static bool 3130check_against_input_limit(struct gl_context *ctx, 3131 struct gl_shader_program *prog, 3132 gl_linked_shader *consumer, 3133 unsigned num_explicit_locations) 3134{ 3135 unsigned input_vectors = num_explicit_locations; 3136 3137 foreach_in_list(ir_instruction, node, consumer->ir) { 3138 ir_variable *const var = node->as_variable(); 3139 3140 if (var && !var->data.explicit_location && 3141 var->data.mode == ir_var_shader_in && 3142 var_counts_against_varying_limit(consumer->Stage, var)) { 3143 /* vertex inputs aren't varying counted */ 3144 input_vectors += var->type->count_attribute_slots(false); 3145 } 3146 } 3147 3148 assert(consumer->Stage != MESA_SHADER_VERTEX); 3149 unsigned max_input_components = 3150 ctx->Const.Program[consumer->Stage].MaxInputComponents; 3151 3152 const unsigned input_components = input_vectors * 4; 3153 if (input_components > max_input_components) { 3154 if (ctx->API == API_OPENGLES2 || prog->IsES) 3155 linker_error(prog, "%s shader uses too many input vectors " 3156 "(%u > %u)\n", 3157 _mesa_shader_stage_to_string(consumer->Stage), 3158 input_vectors, 3159 max_input_components / 4); 3160 else 3161 linker_error(prog, "%s shader uses too many input components " 3162 "(%u > %u)\n", 3163 _mesa_shader_stage_to_string(consumer->Stage), 3164 input_components, 3165 max_input_components); 3166 3167 return false; 3168 } 3169 3170 return true; 3171} 3172 3173bool 3174link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last, 3175 struct gl_context *ctx, void *mem_ctx) 3176{ 3177 bool has_xfb_qualifiers = false; 3178 unsigned num_tfeedback_decls = 0; 3179 char **varying_names = NULL; 3180 tfeedback_decl *tfeedback_decls = NULL; 3181 3182 /* From the ARB_enhanced_layouts spec: 3183 * 3184 * "If the shader used to record output variables for transform feedback 3185 * varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout 3186 * qualifiers, the values specified by TransformFeedbackVaryings are 3187 * ignored, and the set of variables captured for transform feedback is 3188 * instead derived from the specified layout qualifiers." 3189 */ 3190 for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) { 3191 /* Find last stage before fragment shader */ 3192 if (prog->_LinkedShaders[i]) { 3193 has_xfb_qualifiers = 3194 process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i], 3195 prog, &num_tfeedback_decls, 3196 &varying_names); 3197 break; 3198 } 3199 } 3200 3201 if (!has_xfb_qualifiers) { 3202 num_tfeedback_decls = prog->TransformFeedback.NumVarying; 3203 varying_names = prog->TransformFeedback.VaryingNames; 3204 } 3205 3206 if (num_tfeedback_decls != 0) { 3207 /* From GL_EXT_transform_feedback: 3208 * A program will fail to link if: 3209 * 3210 * * the <count> specified by TransformFeedbackVaryingsEXT is 3211 * non-zero, but the program object has no vertex or geometry 3212 * shader; 3213 */ 3214 if (first >= MESA_SHADER_FRAGMENT) { 3215 linker_error(prog, "Transform feedback varyings specified, but " 3216 "no vertex, tessellation, or geometry shader is " 3217 "present.\n"); 3218 return false; 3219 } 3220 3221 tfeedback_decls = rzalloc_array(mem_ctx, tfeedback_decl, 3222 num_tfeedback_decls); 3223 if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls, 3224 varying_names, tfeedback_decls)) 3225 return false; 3226 } 3227 3228 /* If there is no fragment shader we need to set transform feedback. 3229 * 3230 * For SSO we also need to assign output locations. We assign them here 3231 * because we need to do it for both single stage programs and multi stage 3232 * programs. 3233 */ 3234 if (last < MESA_SHADER_FRAGMENT && 3235 (num_tfeedback_decls != 0 || prog->SeparateShader)) { 3236 const uint64_t reserved_out_slots = 3237 reserved_varying_slot(prog->_LinkedShaders[last], ir_var_shader_out); 3238 if (!assign_varying_locations(ctx, mem_ctx, prog, 3239 prog->_LinkedShaders[last], NULL, 3240 num_tfeedback_decls, tfeedback_decls, 3241 reserved_out_slots)) 3242 return false; 3243 } 3244 3245 if (last <= MESA_SHADER_FRAGMENT) { 3246 /* Remove unused varyings from the first/last stage unless SSO */ 3247 remove_unused_shader_inputs_and_outputs(prog->SeparateShader, 3248 prog->_LinkedShaders[first], 3249 ir_var_shader_in); 3250 remove_unused_shader_inputs_and_outputs(prog->SeparateShader, 3251 prog->_LinkedShaders[last], 3252 ir_var_shader_out); 3253 3254 /* If the program is made up of only a single stage */ 3255 if (first == last) { 3256 gl_linked_shader *const sh = prog->_LinkedShaders[last]; 3257 3258 do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL); 3259 do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls, 3260 tfeedback_decls); 3261 3262 if (prog->SeparateShader) { 3263 const uint64_t reserved_slots = 3264 reserved_varying_slot(sh, ir_var_shader_in); 3265 3266 /* Assign input locations for SSO, output locations are already 3267 * assigned. 3268 */ 3269 if (!assign_varying_locations(ctx, mem_ctx, prog, 3270 NULL /* producer */, 3271 sh /* consumer */, 3272 0 /* num_tfeedback_decls */, 3273 NULL /* tfeedback_decls */, 3274 reserved_slots)) 3275 return false; 3276 } 3277 } else { 3278 /* Linking the stages in the opposite order (from fragment to vertex) 3279 * ensures that inter-shader outputs written to in an earlier stage 3280 * are eliminated if they are (transitively) not used in a later 3281 * stage. 3282 */ 3283 int next = last; 3284 for (int i = next - 1; i >= 0; i--) { 3285 if (prog->_LinkedShaders[i] == NULL && i != 0) 3286 continue; 3287 3288 gl_linked_shader *const sh_i = prog->_LinkedShaders[i]; 3289 gl_linked_shader *const sh_next = prog->_LinkedShaders[next]; 3290 3291 const uint64_t reserved_out_slots = 3292 reserved_varying_slot(sh_i, ir_var_shader_out); 3293 const uint64_t reserved_in_slots = 3294 reserved_varying_slot(sh_next, ir_var_shader_in); 3295 3296 do_dead_builtin_varyings(ctx, sh_i, sh_next, 3297 next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, 3298 tfeedback_decls); 3299 3300 if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next, 3301 next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0, 3302 tfeedback_decls, 3303 reserved_out_slots | reserved_in_slots)) 3304 return false; 3305 3306 /* This must be done after all dead varyings are eliminated. */ 3307 if (sh_i != NULL) { 3308 unsigned slots_used = util_bitcount64(reserved_out_slots); 3309 if (!check_against_output_limit(ctx, prog, sh_i, slots_used)) { 3310 return false; 3311 } 3312 } 3313 3314 unsigned slots_used = util_bitcount64(reserved_in_slots); 3315 if (!check_against_input_limit(ctx, prog, sh_next, slots_used)) 3316 return false; 3317 3318 next = i; 3319 } 3320 } 3321 } 3322 3323 if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls, 3324 has_xfb_qualifiers, mem_ctx)) 3325 return false; 3326 3327 return true; 3328} 3329