link_varyings.cpp revision 7ec681f3
1/*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file link_varyings.cpp
26 *
27 * Linker functions related specifically to linking varyings between shader
28 * stages.
29 */
30
31
32#include "main/errors.h"
33#include "main/mtypes.h"
34#include "glsl_symbol_table.h"
35#include "glsl_parser_extras.h"
36#include "ir_optimization.h"
37#include "linker.h"
38#include "link_varyings.h"
39#include "main/macros.h"
40#include "util/hash_table.h"
41#include "util/u_math.h"
42#include "program.h"
43
44
45/**
46 * Get the varying type stripped of the outermost array if we're processing
47 * a stage whose varyings are arrays indexed by a vertex number (such as
48 * geometry shader inputs).
49 */
50static const glsl_type *
51get_varying_type(const ir_variable *var, gl_shader_stage stage)
52{
53   const glsl_type *type = var->type;
54
55   if (!var->data.patch &&
56       ((var->data.mode == ir_var_shader_out &&
57         stage == MESA_SHADER_TESS_CTRL) ||
58        (var->data.mode == ir_var_shader_in &&
59         (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
60          stage == MESA_SHADER_GEOMETRY)))) {
61      assert(type->is_array());
62      type = type->fields.array;
63   }
64
65   return type;
66}
67
68static bool
69varying_has_user_specified_location(const ir_variable *var)
70{
71   return var->data.explicit_location &&
72      var->data.location >= VARYING_SLOT_VAR0;
73}
74
75static void
76create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name,
77                         size_t name_length, unsigned *count,
78                         const char *ifc_member_name,
79                         const glsl_type *ifc_member_t, char ***varying_names)
80{
81   if (t->is_interface()) {
82      size_t new_length = name_length;
83
84      assert(ifc_member_name && ifc_member_t);
85      ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
86
87      create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
88                               NULL, NULL, varying_names);
89   } else if (t->is_struct()) {
90      for (unsigned i = 0; i < t->length; i++) {
91         const char *field = t->fields.structure[i].name;
92         size_t new_length = name_length;
93
94         ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
95
96         create_xfb_varying_names(mem_ctx, t->fields.structure[i].type, name,
97                                  new_length, count, NULL, NULL,
98                                  varying_names);
99      }
100   } else if (t->without_array()->is_struct() ||
101              t->without_array()->is_interface() ||
102              (t->is_array() && t->fields.array->is_array())) {
103      for (unsigned i = 0; i < t->length; i++) {
104         size_t new_length = name_length;
105
106         /* Append the subscript to the current variable name */
107         ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
108
109         create_xfb_varying_names(mem_ctx, t->fields.array, name, new_length,
110                                  count, ifc_member_name, ifc_member_t,
111                                  varying_names);
112      }
113   } else {
114      (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
115   }
116}
117
118static bool
119process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,
120                              struct gl_shader_program *prog,
121                              unsigned *num_tfeedback_decls,
122                              char ***varying_names)
123{
124   bool has_xfb_qualifiers = false;
125
126   /* We still need to enable transform feedback mode even if xfb_stride is
127    * only applied to a global out. Also we don't bother to propagate
128    * xfb_stride to interface block members so this will catch that case also.
129    */
130   for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
131      if (prog->TransformFeedback.BufferStride[j]) {
132         has_xfb_qualifiers = true;
133         break;
134      }
135   }
136
137   foreach_in_list(ir_instruction, node, sh->ir) {
138      ir_variable *var = node->as_variable();
139      if (!var || var->data.mode != ir_var_shader_out)
140         continue;
141
142      /* From the ARB_enhanced_layouts spec:
143       *
144       *    "Any shader making any static use (after preprocessing) of any of
145       *     these *xfb_* qualifiers will cause the shader to be in a
146       *     transform feedback capturing mode and hence responsible for
147       *     describing the transform feedback setup.  This mode will capture
148       *     any output selected by *xfb_offset*, directly or indirectly, to
149       *     a transform feedback buffer."
150       */
151      if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
152         has_xfb_qualifiers = true;
153      }
154
155      if (var->data.explicit_xfb_offset) {
156         *num_tfeedback_decls += var->type->varying_count();
157         has_xfb_qualifiers = true;
158      }
159   }
160
161   if (*num_tfeedback_decls == 0)
162      return has_xfb_qualifiers;
163
164   unsigned i = 0;
165   *varying_names = ralloc_array(mem_ctx, char *, *num_tfeedback_decls);
166   foreach_in_list(ir_instruction, node, sh->ir) {
167      ir_variable *var = node->as_variable();
168      if (!var || var->data.mode != ir_var_shader_out)
169         continue;
170
171      if (var->data.explicit_xfb_offset) {
172         char *name;
173         const glsl_type *type, *member_type;
174
175         if (var->data.from_named_ifc_block) {
176            type = var->get_interface_type();
177
178            /* Find the member type before it was altered by lowering */
179            const glsl_type *type_wa = type->without_array();
180            member_type =
181               type_wa->fields.structure[type_wa->field_index(var->name)].type;
182            name = ralloc_strdup(NULL, type_wa->name);
183         } else {
184            type = var->type;
185            member_type = NULL;
186            name = ralloc_strdup(NULL, var->name);
187         }
188         create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
189                                  var->name, member_type, varying_names);
190         ralloc_free(name);
191      }
192   }
193
194   assert(i == *num_tfeedback_decls);
195   return has_xfb_qualifiers;
196}
197
198/**
199 * Validate the types and qualifiers of an output from one stage against the
200 * matching input to another stage.
201 */
202static void
203cross_validate_types_and_qualifiers(struct gl_context *ctx,
204                                    struct gl_shader_program *prog,
205                                    const ir_variable *input,
206                                    const ir_variable *output,
207                                    gl_shader_stage consumer_stage,
208                                    gl_shader_stage producer_stage)
209{
210   /* Check that the types match between stages.
211    */
212   const glsl_type *type_to_match = input->type;
213
214   /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
215   const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX &&
216                                   consumer_stage != MESA_SHADER_FRAGMENT) ||
217                                  consumer_stage == MESA_SHADER_GEOMETRY;
218   if (extra_array_level) {
219      assert(type_to_match->is_array());
220      type_to_match = type_to_match->fields.array;
221   }
222
223   if (type_to_match != output->type) {
224      if (output->type->is_struct()) {
225         /* Structures across shader stages can have different name
226          * and considered to match in type if and only if structure
227          * members match in name, type, qualification, and declaration
228          * order. The precision doesn’t need to match.
229          */
230         if (!output->type->record_compare(type_to_match,
231                                           false, /* match_name */
232                                           true, /* match_locations */
233                                           false /* match_precision */)) {
234            linker_error(prog,
235                  "%s shader output `%s' declared as struct `%s', "
236                  "doesn't match in type with %s shader input "
237                  "declared as struct `%s'\n",
238                  _mesa_shader_stage_to_string(producer_stage),
239                  output->name,
240                  output->type->name,
241                  _mesa_shader_stage_to_string(consumer_stage),
242                  input->type->name);
243         }
244      } else if (!output->type->is_array() || !is_gl_identifier(output->name)) {
245         /* There is a bit of a special case for gl_TexCoord.  This
246          * built-in is unsized by default.  Applications that variable
247          * access it must redeclare it with a size.  There is some
248          * language in the GLSL spec that implies the fragment shader
249          * and vertex shader do not have to agree on this size.  Other
250          * driver behave this way, and one or two applications seem to
251          * rely on it.
252          *
253          * Neither declaration needs to be modified here because the array
254          * sizes are fixed later when update_array_sizes is called.
255          *
256          * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
257          *
258          *     "Unlike user-defined varying variables, the built-in
259          *     varying variables don't have a strict one-to-one
260          *     correspondence between the vertex language and the
261          *     fragment language."
262          */
263         linker_error(prog,
264                      "%s shader output `%s' declared as type `%s', "
265                      "but %s shader input declared as type `%s'\n",
266                      _mesa_shader_stage_to_string(producer_stage),
267                      output->name,
268                      output->type->name,
269                      _mesa_shader_stage_to_string(consumer_stage),
270                      input->type->name);
271         return;
272      }
273   }
274
275   /* Check that all of the qualifiers match between stages.
276    */
277
278   /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
279    * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
280    * conformance test suite does not verify that the qualifiers must match.
281    * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
282    * OpenGLES 3.0 drivers, so we relax the checking in all cases.
283    */
284   if (false /* always skip the centroid check */ &&
285       prog->data->Version < (prog->IsES ? 310 : 430) &&
286       input->data.centroid != output->data.centroid) {
287      linker_error(prog,
288                   "%s shader output `%s' %s centroid qualifier, "
289                   "but %s shader input %s centroid qualifier\n",
290                   _mesa_shader_stage_to_string(producer_stage),
291                   output->name,
292                   (output->data.centroid) ? "has" : "lacks",
293                   _mesa_shader_stage_to_string(consumer_stage),
294                   (input->data.centroid) ? "has" : "lacks");
295      return;
296   }
297
298   if (input->data.sample != output->data.sample) {
299      linker_error(prog,
300                   "%s shader output `%s' %s sample qualifier, "
301                   "but %s shader input %s sample qualifier\n",
302                   _mesa_shader_stage_to_string(producer_stage),
303                   output->name,
304                   (output->data.sample) ? "has" : "lacks",
305                   _mesa_shader_stage_to_string(consumer_stage),
306                   (input->data.sample) ? "has" : "lacks");
307      return;
308   }
309
310   if (input->data.patch != output->data.patch) {
311      linker_error(prog,
312                   "%s shader output `%s' %s patch qualifier, "
313                   "but %s shader input %s patch qualifier\n",
314                   _mesa_shader_stage_to_string(producer_stage),
315                   output->name,
316                   (output->data.patch) ? "has" : "lacks",
317                   _mesa_shader_stage_to_string(consumer_stage),
318                   (input->data.patch) ? "has" : "lacks");
319      return;
320   }
321
322   /* The GLSL 4.20 and GLSL ES 3.00 specifications say:
323    *
324    *    "As only outputs need be declared with invariant, an output from
325    *     one shader stage will still match an input of a subsequent stage
326    *     without the input being declared as invariant."
327    *
328    * while GLSL 4.10 says:
329    *
330    *    "For variables leaving one shader and coming into another shader,
331    *     the invariant keyword has to be used in both shaders, or a link
332    *     error will result."
333    *
334    * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
335    *
336    *    "The invariance of varyings that are declared in both the vertex
337    *     and fragment shaders must match."
338    */
339   if (input->data.explicit_invariant != output->data.explicit_invariant &&
340       prog->data->Version < (prog->IsES ? 300 : 420)) {
341      linker_error(prog,
342                   "%s shader output `%s' %s invariant qualifier, "
343                   "but %s shader input %s invariant qualifier\n",
344                   _mesa_shader_stage_to_string(producer_stage),
345                   output->name,
346                   (output->data.explicit_invariant) ? "has" : "lacks",
347                   _mesa_shader_stage_to_string(consumer_stage),
348                   (input->data.explicit_invariant) ? "has" : "lacks");
349      return;
350   }
351
352   /* GLSL >= 4.40 removes text requiring interpolation qualifiers
353    * to match cross stage, they must only match within the same stage.
354    *
355    * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
356    *
357    *     "It is a link-time error if, within the same stage, the interpolation
358    *     qualifiers of variables of the same name do not match.
359    *
360    * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
361    *
362    *    "When no interpolation qualifier is present, smooth interpolation
363    *    is used."
364    *
365    * So we match variables where one is smooth and the other has no explicit
366    * qualifier.
367    */
368   unsigned input_interpolation = input->data.interpolation;
369   unsigned output_interpolation = output->data.interpolation;
370   if (prog->IsES) {
371      if (input_interpolation == INTERP_MODE_NONE)
372         input_interpolation = INTERP_MODE_SMOOTH;
373      if (output_interpolation == INTERP_MODE_NONE)
374         output_interpolation = INTERP_MODE_SMOOTH;
375   }
376   if (input_interpolation != output_interpolation &&
377       prog->data->Version < 440) {
378      if (!ctx->Const.AllowGLSLCrossStageInterpolationMismatch) {
379         linker_error(prog,
380                      "%s shader output `%s' specifies %s "
381                      "interpolation qualifier, "
382                      "but %s shader input specifies %s "
383                      "interpolation qualifier\n",
384                      _mesa_shader_stage_to_string(producer_stage),
385                      output->name,
386                      interpolation_string(output->data.interpolation),
387                      _mesa_shader_stage_to_string(consumer_stage),
388                      interpolation_string(input->data.interpolation));
389         return;
390      } else {
391         linker_warning(prog,
392                        "%s shader output `%s' specifies %s "
393                        "interpolation qualifier, "
394                        "but %s shader input specifies %s "
395                        "interpolation qualifier\n",
396                        _mesa_shader_stage_to_string(producer_stage),
397                        output->name,
398                        interpolation_string(output->data.interpolation),
399                        _mesa_shader_stage_to_string(consumer_stage),
400                        interpolation_string(input->data.interpolation));
401      }
402   }
403}
404
405/**
406 * Validate front and back color outputs against single color input
407 */
408static void
409cross_validate_front_and_back_color(struct gl_context *ctx,
410                                    struct gl_shader_program *prog,
411                                    const ir_variable *input,
412                                    const ir_variable *front_color,
413                                    const ir_variable *back_color,
414                                    gl_shader_stage consumer_stage,
415                                    gl_shader_stage producer_stage)
416{
417   if (front_color != NULL && front_color->data.assigned)
418      cross_validate_types_and_qualifiers(ctx, prog, input, front_color,
419                                          consumer_stage, producer_stage);
420
421   if (back_color != NULL && back_color->data.assigned)
422      cross_validate_types_and_qualifiers(ctx, prog, input, back_color,
423                                          consumer_stage, producer_stage);
424}
425
426static unsigned
427compute_variable_location_slot(ir_variable *var, gl_shader_stage stage)
428{
429   unsigned location_start = VARYING_SLOT_VAR0;
430
431   switch (stage) {
432      case MESA_SHADER_VERTEX:
433         if (var->data.mode == ir_var_shader_in)
434            location_start = VERT_ATTRIB_GENERIC0;
435         break;
436      case MESA_SHADER_TESS_CTRL:
437      case MESA_SHADER_TESS_EVAL:
438         if (var->data.patch)
439            location_start = VARYING_SLOT_PATCH0;
440         break;
441      case MESA_SHADER_FRAGMENT:
442         if (var->data.mode == ir_var_shader_out)
443            location_start = FRAG_RESULT_DATA0;
444         break;
445      default:
446         break;
447   }
448
449   return var->data.location - location_start;
450}
451
452struct explicit_location_info {
453   ir_variable *var;
454   bool base_type_is_integer;
455   unsigned base_type_bit_size;
456   unsigned interpolation;
457   bool centroid;
458   bool sample;
459   bool patch;
460};
461
462static bool
463check_location_aliasing(struct explicit_location_info explicit_locations[][4],
464                        ir_variable *var,
465                        unsigned location,
466                        unsigned component,
467                        unsigned location_limit,
468                        const glsl_type *type,
469                        unsigned interpolation,
470                        bool centroid,
471                        bool sample,
472                        bool patch,
473                        gl_shader_program *prog,
474                        gl_shader_stage stage)
475{
476   unsigned last_comp;
477   unsigned base_type_bit_size;
478   const glsl_type *type_without_array = type->without_array();
479   const bool base_type_is_integer =
480      glsl_base_type_is_integer(type_without_array->base_type);
481   const bool is_struct = type_without_array->is_struct();
482   if (is_struct) {
483      /* structs don't have a defined underlying base type so just treat all
484       * component slots as used and set the bit size to 0. If there is
485       * location aliasing, we'll fail anyway later.
486       */
487      last_comp = 4;
488      base_type_bit_size = 0;
489   } else {
490      unsigned dmul = type_without_array->is_64bit() ? 2 : 1;
491      last_comp = component + type_without_array->vector_elements * dmul;
492      base_type_bit_size =
493         glsl_base_type_get_bit_size(type_without_array->base_type);
494   }
495
496   while (location < location_limit) {
497      unsigned comp = 0;
498      while (comp < 4) {
499         struct explicit_location_info *info =
500            &explicit_locations[location][comp];
501
502         if (info->var) {
503            if (info->var->type->without_array()->is_struct() || is_struct) {
504               /* Structs cannot share location since they are incompatible
505                * with any other underlying numerical type.
506                */
507               linker_error(prog,
508                            "%s shader has multiple %sputs sharing the "
509                            "same location that don't have the same "
510                            "underlying numerical type. Struct variable '%s', "
511                            "location %u\n",
512                            _mesa_shader_stage_to_string(stage),
513                            var->data.mode == ir_var_shader_in ? "in" : "out",
514                            is_struct ? var->name : info->var->name,
515                            location);
516               return false;
517            } else if (comp >= component && comp < last_comp) {
518               /* Component aliasing is not allowed */
519               linker_error(prog,
520                            "%s shader has multiple %sputs explicitly "
521                            "assigned to location %d and component %d\n",
522                            _mesa_shader_stage_to_string(stage),
523                            var->data.mode == ir_var_shader_in ? "in" : "out",
524                            location, comp);
525               return false;
526            } else {
527               /* From the OpenGL 4.60.5 spec, section 4.4.1 Input Layout
528                * Qualifiers, Page 67, (Location aliasing):
529                *
530                *   " Further, when location aliasing, the aliases sharing the
531                *     location must have the same underlying numerical type
532                *     and bit width (floating-point or integer, 32-bit versus
533                *     64-bit, etc.) and the same auxiliary storage and
534                *     interpolation qualification."
535                */
536
537               /* If the underlying numerical type isn't integer, implicitly
538                * it will be float or else we would have failed by now.
539                */
540               if (info->base_type_is_integer != base_type_is_integer) {
541                  linker_error(prog,
542                               "%s shader has multiple %sputs sharing the "
543                               "same location that don't have the same "
544                               "underlying numerical type. Location %u "
545                               "component %u.\n",
546                               _mesa_shader_stage_to_string(stage),
547                               var->data.mode == ir_var_shader_in ?
548                               "in" : "out", location, comp);
549                  return false;
550               }
551
552               if (info->base_type_bit_size != base_type_bit_size) {
553                  linker_error(prog,
554                               "%s shader has multiple %sputs sharing the "
555                               "same location that don't have the same "
556                               "underlying numerical bit size. Location %u "
557                               "component %u.\n",
558                               _mesa_shader_stage_to_string(stage),
559                               var->data.mode == ir_var_shader_in ?
560                               "in" : "out", location, comp);
561                  return false;
562               }
563
564               if (info->interpolation != interpolation) {
565                  linker_error(prog,
566                               "%s shader has multiple %sputs sharing the "
567                               "same location that don't have the same "
568                               "interpolation qualification. Location %u "
569                               "component %u.\n",
570                               _mesa_shader_stage_to_string(stage),
571                               var->data.mode == ir_var_shader_in ?
572                               "in" : "out", location, comp);
573                  return false;
574               }
575
576               if (info->centroid != centroid ||
577                   info->sample != sample ||
578                   info->patch != patch) {
579                  linker_error(prog,
580                               "%s shader has multiple %sputs sharing the "
581                               "same location that don't have the same "
582                               "auxiliary storage qualification. Location %u "
583                               "component %u.\n",
584                               _mesa_shader_stage_to_string(stage),
585                               var->data.mode == ir_var_shader_in ?
586                               "in" : "out", location, comp);
587                  return false;
588               }
589            }
590         } else if (comp >= component && comp < last_comp) {
591            info->var = var;
592            info->base_type_is_integer = base_type_is_integer;
593            info->base_type_bit_size = base_type_bit_size;
594            info->interpolation = interpolation;
595            info->centroid = centroid;
596            info->sample = sample;
597            info->patch = patch;
598         }
599
600         comp++;
601
602         /* We need to do some special handling for doubles as dvec3 and
603          * dvec4 consume two consecutive locations. We don't need to
604          * worry about components beginning at anything other than 0 as
605          * the spec does not allow this for dvec3 and dvec4.
606          */
607         if (comp == 4 && last_comp > 4) {
608            last_comp = last_comp - 4;
609            /* Bump location index and reset the component index */
610            location++;
611            comp = 0;
612            component = 0;
613         }
614      }
615
616      location++;
617   }
618
619   return true;
620}
621
622static bool
623validate_explicit_variable_location(struct gl_context *ctx,
624                                    struct explicit_location_info explicit_locations[][4],
625                                    ir_variable *var,
626                                    gl_shader_program *prog,
627                                    gl_linked_shader *sh)
628{
629   const glsl_type *type = get_varying_type(var, sh->Stage);
630   unsigned num_elements = type->count_attribute_slots(false);
631   unsigned idx = compute_variable_location_slot(var, sh->Stage);
632   unsigned slot_limit = idx + num_elements;
633
634   /* Vertex shader inputs and fragment shader outputs are validated in
635    * assign_attribute_or_color_locations() so we should not attempt to
636    * validate them again here.
637    */
638   unsigned slot_max;
639   if (var->data.mode == ir_var_shader_out) {
640      assert(sh->Stage != MESA_SHADER_FRAGMENT);
641      slot_max =
642         ctx->Const.Program[sh->Stage].MaxOutputComponents / 4;
643   } else {
644      assert(var->data.mode == ir_var_shader_in);
645      assert(sh->Stage != MESA_SHADER_VERTEX);
646      slot_max =
647         ctx->Const.Program[sh->Stage].MaxInputComponents / 4;
648   }
649
650   if (slot_limit > slot_max) {
651      linker_error(prog,
652                   "Invalid location %u in %s shader\n",
653                   idx, _mesa_shader_stage_to_string(sh->Stage));
654      return false;
655   }
656
657   const glsl_type *type_without_array = type->without_array();
658   if (type_without_array->is_interface()) {
659      for (unsigned i = 0; i < type_without_array->length; i++) {
660         glsl_struct_field *field = &type_without_array->fields.structure[i];
661         unsigned field_location = field->location -
662            (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0);
663         unsigned field_slots = field->type->count_attribute_slots(false);
664         if (!check_location_aliasing(explicit_locations, var,
665                                      field_location,
666                                      0,
667                                      field_location + field_slots,
668                                      field->type,
669                                      field->interpolation,
670                                      field->centroid,
671                                      field->sample,
672                                      field->patch,
673                                      prog, sh->Stage)) {
674            return false;
675         }
676      }
677   } else if (!check_location_aliasing(explicit_locations, var,
678                                       idx, var->data.location_frac,
679                                       slot_limit, type,
680                                       var->data.interpolation,
681                                       var->data.centroid,
682                                       var->data.sample,
683                                       var->data.patch,
684                                       prog, sh->Stage)) {
685      return false;
686   }
687
688   return true;
689}
690
691/**
692 * Validate explicit locations for the inputs to the first stage and the
693 * outputs of the last stage in a program, if those are not the VS and FS
694 * shaders.
695 */
696void
697validate_first_and_last_interface_explicit_locations(struct gl_context *ctx,
698                                                     struct gl_shader_program *prog,
699                                                     gl_shader_stage first_stage,
700                                                     gl_shader_stage last_stage)
701{
702   /* VS inputs and FS outputs are validated in
703    * assign_attribute_or_color_locations()
704    */
705   bool validate_first_stage = first_stage != MESA_SHADER_VERTEX;
706   bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT;
707   if (!validate_first_stage && !validate_last_stage)
708      return;
709
710   struct explicit_location_info explicit_locations[MAX_VARYING][4];
711
712   gl_shader_stage stages[2] = { first_stage, last_stage };
713   bool validate_stage[2] = { validate_first_stage, validate_last_stage };
714   ir_variable_mode var_direction[2] = { ir_var_shader_in, ir_var_shader_out };
715
716   for (unsigned i = 0; i < 2; i++) {
717      if (!validate_stage[i])
718         continue;
719
720      gl_shader_stage stage = stages[i];
721
722      gl_linked_shader *sh = prog->_LinkedShaders[stage];
723      assert(sh);
724
725      memset(explicit_locations, 0, sizeof(explicit_locations));
726
727      foreach_in_list(ir_instruction, node, sh->ir) {
728         ir_variable *const var = node->as_variable();
729
730         if (var == NULL ||
731             !var->data.explicit_location ||
732             var->data.location < VARYING_SLOT_VAR0 ||
733             var->data.mode != var_direction[i])
734            continue;
735
736         if (!validate_explicit_variable_location(
737               ctx, explicit_locations, var, prog, sh)) {
738            return;
739         }
740      }
741   }
742}
743
744/**
745 * Check if we should force input / output matching between shader
746 * interfaces.
747 *
748 * Section 4.3.4 (Inputs) of the GLSL 4.10 specifications say:
749 *
750 *   "Only the input variables that are actually read need to be
751 *    written by the previous stage; it is allowed to have
752 *    superfluous declarations of input variables."
753 *
754 * However it's not defined anywhere as to how we should handle
755 * inputs that are not written in the previous stage and it's not
756 * clear what "actually read" means.
757 *
758 * The GLSL 4.20 spec however is much clearer:
759 *
760 *    "Only the input variables that are statically read need to
761 *     be written by the previous stage; it is allowed to have
762 *     superfluous declarations of input variables."
763 *
764 * It also has a table that states it is an error to statically
765 * read an input that is not defined in the previous stage. While
766 * it is not an error to not statically write to the output (it
767 * just needs to be defined to not be an error).
768 *
769 * The text in the GLSL 4.20 spec was an attempt to clarify the
770 * previous spec iterations. However given the difference in spec
771 * and that some applications seem to depend on not erroring when
772 * the input is not actually read in control flow we only apply
773 * this rule to GLSL 4.20 and higher. GLSL 4.10 shaders have been
774 * seen in the wild that depend on the less strict interpretation.
775 */
776static bool
777static_input_output_matching(struct gl_shader_program *prog)
778{
779   return prog->data->Version >= (prog->IsES ? 0 : 420);
780}
781
782/**
783 * Validate that outputs from one stage match inputs of another
784 */
785void
786cross_validate_outputs_to_inputs(struct gl_context *ctx,
787                                 struct gl_shader_program *prog,
788                                 gl_linked_shader *producer,
789                                 gl_linked_shader *consumer)
790{
791   glsl_symbol_table parameters;
792   struct explicit_location_info output_explicit_locations[MAX_VARYING][4] = {};
793   struct explicit_location_info input_explicit_locations[MAX_VARYING][4] = {};
794
795   /* Find all shader outputs in the "producer" stage.
796    */
797   foreach_in_list(ir_instruction, node, producer->ir) {
798      ir_variable *const var = node->as_variable();
799
800      if (var == NULL || var->data.mode != ir_var_shader_out)
801         continue;
802
803      if (!var->data.explicit_location
804          || var->data.location < VARYING_SLOT_VAR0)
805         parameters.add_variable(var);
806      else {
807         /* User-defined varyings with explicit locations are handled
808          * differently because they do not need to have matching names.
809          */
810         if (!validate_explicit_variable_location(ctx,
811                                                  output_explicit_locations,
812                                                  var, prog, producer)) {
813            return;
814         }
815      }
816   }
817
818
819   /* Find all shader inputs in the "consumer" stage.  Any variables that have
820    * matching outputs already in the symbol table must have the same type and
821    * qualifiers.
822    *
823    * Exception: if the consumer is the geometry shader, then the inputs
824    * should be arrays and the type of the array element should match the type
825    * of the corresponding producer output.
826    */
827   foreach_in_list(ir_instruction, node, consumer->ir) {
828      ir_variable *const input = node->as_variable();
829
830      if (input == NULL || input->data.mode != ir_var_shader_in)
831         continue;
832
833      if (strcmp(input->name, "gl_Color") == 0 && input->data.used) {
834         const ir_variable *const front_color =
835            parameters.get_variable("gl_FrontColor");
836
837         const ir_variable *const back_color =
838            parameters.get_variable("gl_BackColor");
839
840         cross_validate_front_and_back_color(ctx, prog, input,
841                                             front_color, back_color,
842                                             consumer->Stage, producer->Stage);
843      } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
844         const ir_variable *const front_color =
845            parameters.get_variable("gl_FrontSecondaryColor");
846
847         const ir_variable *const back_color =
848            parameters.get_variable("gl_BackSecondaryColor");
849
850         cross_validate_front_and_back_color(ctx, prog, input,
851                                             front_color, back_color,
852                                             consumer->Stage, producer->Stage);
853      } else {
854         /* The rules for connecting inputs and outputs change in the presence
855          * of explicit locations.  In this case, we no longer care about the
856          * names of the variables.  Instead, we care only about the
857          * explicitly assigned location.
858          */
859         ir_variable *output = NULL;
860         if (input->data.explicit_location
861             && input->data.location >= VARYING_SLOT_VAR0) {
862
863            const glsl_type *type = get_varying_type(input, consumer->Stage);
864            unsigned num_elements = type->count_attribute_slots(false);
865            unsigned idx =
866               compute_variable_location_slot(input, consumer->Stage);
867            unsigned slot_limit = idx + num_elements;
868
869            if (!validate_explicit_variable_location(ctx,
870                                                     input_explicit_locations,
871                                                     input, prog, consumer)) {
872               return;
873            }
874
875            while (idx < slot_limit) {
876               if (idx >= MAX_VARYING) {
877                  linker_error(prog,
878                               "Invalid location %u in %s shader\n", idx,
879                               _mesa_shader_stage_to_string(consumer->Stage));
880                  return;
881               }
882
883               output = output_explicit_locations[idx][input->data.location_frac].var;
884
885               if (output == NULL) {
886                  /* A linker failure should only happen when there is no
887                   * output declaration and there is Static Use of the
888                   * declared input.
889                   */
890                  if (input->data.used && static_input_output_matching(prog)) {
891                     linker_error(prog,
892                                  "%s shader input `%s' with explicit location "
893                                  "has no matching output\n",
894                                  _mesa_shader_stage_to_string(consumer->Stage),
895                                  input->name);
896                     break;
897                  }
898               } else if (input->data.location != output->data.location) {
899                  linker_error(prog,
900                               "%s shader input `%s' with explicit location "
901                               "has no matching output\n",
902                               _mesa_shader_stage_to_string(consumer->Stage),
903                               input->name);
904                  break;
905               }
906               idx++;
907            }
908         } else {
909            output = parameters.get_variable(input->name);
910         }
911
912         if (output != NULL) {
913            /* Interface blocks have their own validation elsewhere so don't
914             * try validating them here.
915             */
916            if (!(input->get_interface_type() &&
917                  output->get_interface_type()))
918               cross_validate_types_and_qualifiers(ctx, prog, input, output,
919                                                   consumer->Stage,
920                                                   producer->Stage);
921         } else {
922            /* Check for input vars with unmatched output vars in prev stage
923             * taking into account that interface blocks could have a matching
924             * output but with different name, so we ignore them.
925             */
926            assert(!input->data.assigned);
927            if (input->data.used && !input->get_interface_type() &&
928                !input->data.explicit_location &&
929                static_input_output_matching(prog))
930               linker_error(prog,
931                            "%s shader input `%s' "
932                            "has no matching output in the previous stage\n",
933                            _mesa_shader_stage_to_string(consumer->Stage),
934                            input->name);
935         }
936      }
937   }
938}
939
940/**
941 * Demote shader inputs and outputs that are not used in other stages, and
942 * remove them via dead code elimination.
943 */
944static void
945remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
946                                        gl_linked_shader *sh,
947                                        enum ir_variable_mode mode)
948{
949   if (is_separate_shader_object)
950      return;
951
952   foreach_in_list(ir_instruction, node, sh->ir) {
953      ir_variable *const var = node->as_variable();
954
955      if (var == NULL || var->data.mode != int(mode))
956         continue;
957
958      /* A shader 'in' or 'out' variable is only really an input or output if
959       * its value is used by other shader stages. This will cause the
960       * variable to have a location assigned.
961       */
962      if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) {
963         assert(var->data.mode != ir_var_temporary);
964
965         /* Assign zeros to demoted inputs to allow more optimizations. */
966         if (var->data.mode == ir_var_shader_in && !var->constant_value)
967            var->constant_value = ir_constant::zero(var, var->type);
968
969         var->data.mode = ir_var_auto;
970      }
971   }
972
973   /* Eliminate code that is now dead due to unused inputs/outputs being
974    * demoted.
975    */
976   while (do_dead_code(sh->ir, false))
977      ;
978
979}
980
981/**
982 * Initialize this object based on a string that was passed to
983 * glTransformFeedbackVaryings.
984 *
985 * If the input is mal-formed, this call still succeeds, but it sets
986 * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var()
987 * will fail to find any matching variable.
988 */
989void
990tfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx,
991                     const char *input)
992{
993   /* We don't have to be pedantic about what is a valid GLSL variable name,
994    * because any variable with an invalid name can't exist in the IR anyway.
995    */
996
997   this->location = -1;
998   this->orig_name = input;
999   this->lowered_builtin_array_variable = none;
1000   this->skip_components = 0;
1001   this->next_buffer_separator = false;
1002   this->matched_candidate = NULL;
1003   this->stream_id = 0;
1004   this->buffer = 0;
1005   this->offset = 0;
1006
1007   if (ctx->Extensions.ARB_transform_feedback3) {
1008      /* Parse gl_NextBuffer. */
1009      if (strcmp(input, "gl_NextBuffer") == 0) {
1010         this->next_buffer_separator = true;
1011         return;
1012      }
1013
1014      /* Parse gl_SkipComponents. */
1015      if (strcmp(input, "gl_SkipComponents1") == 0)
1016         this->skip_components = 1;
1017      else if (strcmp(input, "gl_SkipComponents2") == 0)
1018         this->skip_components = 2;
1019      else if (strcmp(input, "gl_SkipComponents3") == 0)
1020         this->skip_components = 3;
1021      else if (strcmp(input, "gl_SkipComponents4") == 0)
1022         this->skip_components = 4;
1023
1024      if (this->skip_components)
1025         return;
1026   }
1027
1028   /* Parse a declaration. */
1029   const char *base_name_end;
1030   long subscript = parse_program_resource_name(input, strlen(input),
1031                                                &base_name_end);
1032   this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
1033   if (this->var_name == NULL) {
1034      _mesa_error_no_memory(__func__);
1035      return;
1036   }
1037
1038   if (subscript >= 0) {
1039      this->array_subscript = subscript;
1040      this->is_subscripted = true;
1041   } else {
1042      this->is_subscripted = false;
1043   }
1044
1045   /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
1046    * class must behave specially to account for the fact that gl_ClipDistance
1047    * is converted from a float[8] to a vec4[2].
1048    */
1049   if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
1050       strcmp(this->var_name, "gl_ClipDistance") == 0) {
1051      this->lowered_builtin_array_variable = clip_distance;
1052   }
1053   if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
1054       strcmp(this->var_name, "gl_CullDistance") == 0) {
1055      this->lowered_builtin_array_variable = cull_distance;
1056   }
1057
1058   if (ctx->Const.LowerTessLevel &&
1059       (strcmp(this->var_name, "gl_TessLevelOuter") == 0))
1060      this->lowered_builtin_array_variable = tess_level_outer;
1061   if (ctx->Const.LowerTessLevel &&
1062       (strcmp(this->var_name, "gl_TessLevelInner") == 0))
1063      this->lowered_builtin_array_variable = tess_level_inner;
1064}
1065
1066
1067/**
1068 * Determine whether two tfeedback_decl objects refer to the same variable and
1069 * array index (if applicable).
1070 */
1071bool
1072tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y)
1073{
1074   assert(x.is_varying() && y.is_varying());
1075
1076   if (strcmp(x.var_name, y.var_name) != 0)
1077      return false;
1078   if (x.is_subscripted != y.is_subscripted)
1079      return false;
1080   if (x.is_subscripted && x.array_subscript != y.array_subscript)
1081      return false;
1082   return true;
1083}
1084
1085
1086/**
1087 * Assign a location and stream ID for this tfeedback_decl object based on the
1088 * transform feedback candidate found by find_candidate.
1089 *
1090 * If an error occurs, the error is reported through linker_error() and false
1091 * is returned.
1092 */
1093bool
1094tfeedback_decl::assign_location(struct gl_context *ctx,
1095                                struct gl_shader_program *prog)
1096{
1097   assert(this->is_varying());
1098
1099   unsigned fine_location
1100      = this->matched_candidate->toplevel_var->data.location * 4
1101      + this->matched_candidate->toplevel_var->data.location_frac
1102      + this->matched_candidate->struct_offset_floats;
1103   const unsigned dmul =
1104      this->matched_candidate->type->without_array()->is_64bit() ? 2 : 1;
1105
1106   if (this->matched_candidate->type->is_array()) {
1107      /* Array variable */
1108      const unsigned matrix_cols =
1109         this->matched_candidate->type->fields.array->matrix_columns;
1110      const unsigned vector_elements =
1111         this->matched_candidate->type->fields.array->vector_elements;
1112      unsigned actual_array_size;
1113      switch (this->lowered_builtin_array_variable) {
1114      case clip_distance:
1115         actual_array_size = prog->last_vert_prog ?
1116            prog->last_vert_prog->info.clip_distance_array_size : 0;
1117         break;
1118      case cull_distance:
1119         actual_array_size = prog->last_vert_prog ?
1120            prog->last_vert_prog->info.cull_distance_array_size : 0;
1121         break;
1122      case tess_level_outer:
1123         actual_array_size = 4;
1124         break;
1125      case tess_level_inner:
1126         actual_array_size = 2;
1127         break;
1128      case none:
1129      default:
1130         actual_array_size = this->matched_candidate->type->array_size();
1131         break;
1132      }
1133
1134      if (this->is_subscripted) {
1135         /* Check array bounds. */
1136         if (this->array_subscript >= actual_array_size) {
1137            linker_error(prog, "Transform feedback varying %s has index "
1138                         "%i, but the array size is %u.",
1139                         this->orig_name, this->array_subscript,
1140                         actual_array_size);
1141            return false;
1142         }
1143         unsigned array_elem_size = this->lowered_builtin_array_variable ?
1144            1 : vector_elements * matrix_cols * dmul;
1145         fine_location += array_elem_size * this->array_subscript;
1146         this->size = 1;
1147      } else {
1148         this->size = actual_array_size;
1149      }
1150      this->vector_elements = vector_elements;
1151      this->matrix_columns = matrix_cols;
1152      if (this->lowered_builtin_array_variable)
1153         this->type = GL_FLOAT;
1154      else
1155         this->type = this->matched_candidate->type->fields.array->gl_type;
1156   } else {
1157      /* Regular variable (scalar, vector, or matrix) */
1158      if (this->is_subscripted) {
1159         linker_error(prog, "Transform feedback varying %s requested, "
1160                      "but %s is not an array.",
1161                      this->orig_name, this->var_name);
1162         return false;
1163      }
1164      this->size = 1;
1165      this->vector_elements = this->matched_candidate->type->vector_elements;
1166      this->matrix_columns = this->matched_candidate->type->matrix_columns;
1167      this->type = this->matched_candidate->type->gl_type;
1168   }
1169   this->location = fine_location / 4;
1170   this->location_frac = fine_location % 4;
1171
1172   /* From GL_EXT_transform_feedback:
1173    *   A program will fail to link if:
1174    *
1175    *   * the total number of components to capture in any varying
1176    *     variable in <varyings> is greater than the constant
1177    *     MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
1178    *     buffer mode is SEPARATE_ATTRIBS_EXT;
1179    */
1180   if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
1181       this->num_components() >
1182       ctx->Const.MaxTransformFeedbackSeparateComponents) {
1183      linker_error(prog, "Transform feedback varying %s exceeds "
1184                   "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
1185                   this->orig_name);
1186      return false;
1187   }
1188
1189   /* Only transform feedback varyings can be assigned to non-zero streams,
1190    * so assign the stream id here.
1191    */
1192   this->stream_id = this->matched_candidate->toplevel_var->data.stream;
1193
1194   unsigned array_offset = this->array_subscript * 4 * dmul;
1195   unsigned struct_offset = this->matched_candidate->xfb_offset_floats * 4;
1196   this->buffer = this->matched_candidate->toplevel_var->data.xfb_buffer;
1197   this->offset = this->matched_candidate->toplevel_var->data.offset +
1198      array_offset + struct_offset;
1199
1200   return true;
1201}
1202
1203
1204unsigned
1205tfeedback_decl::get_num_outputs() const
1206{
1207   if (!this->is_varying()) {
1208      return 0;
1209   }
1210
1211   if (varying_has_user_specified_location(this->matched_candidate->toplevel_var)) {
1212      unsigned dmul = this->is_64bit() ? 2 : 1;
1213      unsigned rows_per_element = DIV_ROUND_UP(this->vector_elements * dmul, 4);
1214      return this->size * this->matrix_columns * rows_per_element;
1215   } else {
1216      return (this->num_components() + this->location_frac + 3) / 4;
1217   }
1218}
1219
1220
1221/**
1222 * Update gl_transform_feedback_info to reflect this tfeedback_decl.
1223 *
1224 * If an error occurs, the error is reported through linker_error() and false
1225 * is returned.
1226 */
1227bool
1228tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog,
1229                      struct gl_transform_feedback_info *info,
1230                      unsigned buffer, unsigned buffer_index,
1231                      const unsigned max_outputs,
1232                      BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS],
1233                      bool *explicit_stride, unsigned *max_member_alignment,
1234                      bool has_xfb_qualifiers, const void* mem_ctx) const
1235{
1236   unsigned xfb_offset = 0;
1237   unsigned size = this->size;
1238   /* Handle gl_SkipComponents. */
1239   if (this->skip_components) {
1240      info->Buffers[buffer].Stride += this->skip_components;
1241      size = this->skip_components;
1242      goto store_varying;
1243   }
1244
1245   if (this->next_buffer_separator) {
1246      size = 0;
1247      goto store_varying;
1248   }
1249
1250   if (has_xfb_qualifiers) {
1251      xfb_offset = this->offset / 4;
1252   } else {
1253      xfb_offset = info->Buffers[buffer].Stride;
1254   }
1255   info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
1256
1257   {
1258      unsigned location = this->location;
1259      unsigned location_frac = this->location_frac;
1260      unsigned num_components = this->num_components();
1261
1262      /* From GL_EXT_transform_feedback:
1263       *
1264       *   " A program will fail to link if:
1265       *
1266       *       * the total number of components to capture is greater than the
1267       *         constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
1268       *         and the buffer mode is INTERLEAVED_ATTRIBS_EXT."
1269       *
1270       * From GL_ARB_enhanced_layouts:
1271       *
1272       *   " The resulting stride (implicit or explicit) must be less than or
1273       *     equal to the implementation-dependent constant
1274       *     gl_MaxTransformFeedbackInterleavedComponents."
1275       */
1276      if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
1277           has_xfb_qualifiers) &&
1278          xfb_offset + num_components >
1279          ctx->Const.MaxTransformFeedbackInterleavedComponents) {
1280         linker_error(prog,
1281                      "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
1282                      "limit has been exceeded.");
1283         return false;
1284      }
1285
1286      /* From the OpenGL 4.60.5 spec, section 4.4.2. Output Layout Qualifiers,
1287       * Page 76, (Transform Feedback Layout Qualifiers):
1288       *
1289       *   " No aliasing in output buffers is allowed: It is a compile-time or
1290       *     link-time error to specify variables with overlapping transform
1291       *     feedback offsets."
1292       */
1293      const unsigned max_components =
1294         ctx->Const.MaxTransformFeedbackInterleavedComponents;
1295      const unsigned first_component = xfb_offset;
1296      const unsigned last_component = xfb_offset + num_components - 1;
1297      const unsigned start_word = BITSET_BITWORD(first_component);
1298      const unsigned end_word = BITSET_BITWORD(last_component);
1299      BITSET_WORD *used;
1300      assert(last_component < max_components);
1301
1302      if (!used_components[buffer]) {
1303         used_components[buffer] =
1304            rzalloc_array(mem_ctx, BITSET_WORD, BITSET_WORDS(max_components));
1305      }
1306      used = used_components[buffer];
1307
1308      for (unsigned word = start_word; word <= end_word; word++) {
1309         unsigned start_range = 0;
1310         unsigned end_range = BITSET_WORDBITS - 1;
1311
1312         if (word == start_word)
1313            start_range = first_component % BITSET_WORDBITS;
1314
1315         if (word == end_word)
1316            end_range = last_component % BITSET_WORDBITS;
1317
1318         if (used[word] & BITSET_RANGE(start_range, end_range)) {
1319            linker_error(prog,
1320                         "variable '%s', xfb_offset (%d) is causing aliasing.",
1321                         this->orig_name, xfb_offset * 4);
1322            return false;
1323         }
1324         used[word] |= BITSET_RANGE(start_range, end_range);
1325      }
1326
1327      const unsigned type_num_components =
1328         this->vector_elements * (this->is_64bit() ? 2 : 1);
1329      unsigned current_type_components_left = type_num_components;
1330
1331      while (num_components > 0) {
1332         unsigned output_size = 0;
1333
1334         /*  From GL_ARB_enhanced_layouts:
1335          *
1336          * "When an attribute variable declared using an array type is bound to
1337          * generic attribute index <i>, the active array elements are assigned to
1338          * consecutive generic attributes beginning with generic attribute <i>.  The
1339          * number of attributes and components assigned to each element are
1340          * determined according to the data type of array elements and "component"
1341          * layout qualifier (if any) specified in the declaration of the array."
1342          *
1343          * "When an attribute variable declared using a matrix type is bound to a
1344          * generic attribute index <i>, its values are taken from consecutive generic
1345          * attributes beginning with generic attribute <i>.  Such matrices are
1346          * treated as an array of column vectors with values taken from the generic
1347          * attributes.
1348          * This means there may be gaps in the varyings we are taking values from."
1349          *
1350          * Examples:
1351          *
1352          * | layout(location=0) dvec3[2] a; | layout(location=4) vec2[4] b; |
1353          * |                                |                               |
1354          * |        32b 32b 32b 32b         |        32b 32b 32b 32b        |
1355          * |      0  X   X   Y   Y          |      4  X   Y   0   0         |
1356          * |      1  Z   Z   0   0          |      5  X   Y   0   0         |
1357          * |      2  X   X   Y   Y          |      6  X   Y   0   0         |
1358          * |      3  Z   Z   0   0          |      7  X   Y   0   0         |
1359          *
1360          */
1361         if (varying_has_user_specified_location(this->matched_candidate->toplevel_var)) {
1362            output_size = MIN3(num_components, current_type_components_left, 4);
1363            current_type_components_left -= output_size;
1364            if (current_type_components_left == 0) {
1365               current_type_components_left = type_num_components;
1366            }
1367         } else {
1368            output_size = MIN2(num_components, 4 - location_frac);
1369         }
1370
1371         assert((info->NumOutputs == 0 && max_outputs == 0) ||
1372                info->NumOutputs < max_outputs);
1373
1374         /* From the ARB_enhanced_layouts spec:
1375          *
1376          *    "If such a block member or variable is not written during a shader
1377          *    invocation, the buffer contents at the assigned offset will be
1378          *    undefined.  Even if there are no static writes to a variable or
1379          *    member that is assigned a transform feedback offset, the space is
1380          *    still allocated in the buffer and still affects the stride."
1381          */
1382         if (this->is_varying_written()) {
1383            info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
1384            info->Outputs[info->NumOutputs].OutputRegister = location;
1385            info->Outputs[info->NumOutputs].NumComponents = output_size;
1386            info->Outputs[info->NumOutputs].StreamId = stream_id;
1387            info->Outputs[info->NumOutputs].OutputBuffer = buffer;
1388            info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
1389            ++info->NumOutputs;
1390         }
1391         info->Buffers[buffer].Stream = this->stream_id;
1392         xfb_offset += output_size;
1393
1394         num_components -= output_size;
1395         location++;
1396         location_frac = 0;
1397      }
1398   }
1399
1400   if (explicit_stride && explicit_stride[buffer]) {
1401      if (this->is_64bit() && info->Buffers[buffer].Stride % 2) {
1402         linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
1403                      "multiple of 8 as its applied to a type that is or "
1404                      "contains a double.",
1405                      info->Buffers[buffer].Stride * 4);
1406         return false;
1407      }
1408
1409      if (xfb_offset > info->Buffers[buffer].Stride) {
1410         linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
1411                      "buffer (%d)", xfb_offset * 4,
1412                      info->Buffers[buffer].Stride * 4, buffer);
1413         return false;
1414      }
1415   } else {
1416      if (max_member_alignment && has_xfb_qualifiers) {
1417         max_member_alignment[buffer] = MAX2(max_member_alignment[buffer],
1418                                             this->is_64bit() ? 2 : 1);
1419         info->Buffers[buffer].Stride = ALIGN(xfb_offset,
1420                                              max_member_alignment[buffer]);
1421      } else {
1422         info->Buffers[buffer].Stride = xfb_offset;
1423      }
1424   }
1425
1426 store_varying:
1427   info->Varyings[info->NumVarying].Name = ralloc_strdup(prog,
1428                                                         this->orig_name);
1429   info->Varyings[info->NumVarying].Type = this->type;
1430   info->Varyings[info->NumVarying].Size = size;
1431   info->Varyings[info->NumVarying].BufferIndex = buffer_index;
1432   info->NumVarying++;
1433   info->Buffers[buffer].NumVaryings++;
1434
1435   return true;
1436}
1437
1438
1439const tfeedback_candidate *
1440tfeedback_decl::find_candidate(gl_shader_program *prog,
1441                               hash_table *tfeedback_candidates)
1442{
1443   const char *name = this->var_name;
1444   switch (this->lowered_builtin_array_variable) {
1445   case none:
1446      name = this->var_name;
1447      break;
1448   case clip_distance:
1449      name = "gl_ClipDistanceMESA";
1450      break;
1451   case cull_distance:
1452      name = "gl_CullDistanceMESA";
1453      break;
1454   case tess_level_outer:
1455      name = "gl_TessLevelOuterMESA";
1456      break;
1457   case tess_level_inner:
1458      name = "gl_TessLevelInnerMESA";
1459      break;
1460   }
1461   hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name);
1462
1463   this->matched_candidate = entry ?
1464         (const tfeedback_candidate *) entry->data : NULL;
1465
1466   if (!this->matched_candidate) {
1467      /* From GL_EXT_transform_feedback:
1468       *   A program will fail to link if:
1469       *
1470       *   * any variable name specified in the <varyings> array is not
1471       *     declared as an output in the geometry shader (if present) or
1472       *     the vertex shader (if no geometry shader is present);
1473       */
1474      linker_error(prog, "Transform feedback varying %s undeclared.",
1475                   this->orig_name);
1476   }
1477
1478   return this->matched_candidate;
1479}
1480
1481/**
1482 * Force a candidate over the previously matched one. It happens when a new
1483 * varying needs to be created to match the xfb declaration, for example,
1484 * to fullfil an alignment criteria.
1485 */
1486void
1487tfeedback_decl::set_lowered_candidate(const tfeedback_candidate *candidate)
1488{
1489   this->matched_candidate = candidate;
1490
1491   /* The subscript part is no longer relevant */
1492   this->is_subscripted = false;
1493   this->array_subscript = 0;
1494}
1495
1496
1497/**
1498 * Parse all the transform feedback declarations that were passed to
1499 * glTransformFeedbackVaryings() and store them in tfeedback_decl objects.
1500 *
1501 * If an error occurs, the error is reported through linker_error() and false
1502 * is returned.
1503 */
1504static bool
1505parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
1506                      const void *mem_ctx, unsigned num_names,
1507                      char **varying_names, tfeedback_decl *decls)
1508{
1509   for (unsigned i = 0; i < num_names; ++i) {
1510      decls[i].init(ctx, mem_ctx, varying_names[i]);
1511
1512      if (!decls[i].is_varying())
1513         continue;
1514
1515      /* From GL_EXT_transform_feedback:
1516       *   A program will fail to link if:
1517       *
1518       *   * any two entries in the <varyings> array specify the same varying
1519       *     variable;
1520       *
1521       * We interpret this to mean "any two entries in the <varyings> array
1522       * specify the same varying variable and array index", since transform
1523       * feedback of arrays would be useless otherwise.
1524       */
1525      for (unsigned j = 0; j < i; ++j) {
1526         if (decls[j].is_varying()) {
1527            if (tfeedback_decl::is_same(decls[i], decls[j])) {
1528               linker_error(prog, "Transform feedback varying %s specified "
1529                            "more than once.", varying_names[i]);
1530               return false;
1531            }
1532         }
1533      }
1534   }
1535   return true;
1536}
1537
1538
1539static int
1540cmp_xfb_offset(const void * x_generic, const void * y_generic)
1541{
1542   tfeedback_decl *x = (tfeedback_decl *) x_generic;
1543   tfeedback_decl *y = (tfeedback_decl *) y_generic;
1544
1545   if (x->get_buffer() != y->get_buffer())
1546      return x->get_buffer() - y->get_buffer();
1547   return x->get_offset() - y->get_offset();
1548}
1549
1550/**
1551 * Store transform feedback location assignments into
1552 * prog->sh.LinkedTransformFeedback based on the data stored in
1553 * tfeedback_decls.
1554 *
1555 * If an error occurs, the error is reported through linker_error() and false
1556 * is returned.
1557 */
1558static bool
1559store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
1560                     unsigned num_tfeedback_decls,
1561                     tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers,
1562                     const void *mem_ctx)
1563{
1564   if (!prog->last_vert_prog)
1565      return true;
1566
1567   /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
1568    * tracking the number of buffers doesn't overflow.
1569    */
1570   assert(ctx->Const.MaxTransformFeedbackBuffers < 32);
1571
1572   bool separate_attribs_mode =
1573      prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
1574
1575   struct gl_program *xfb_prog = prog->last_vert_prog;
1576   xfb_prog->sh.LinkedTransformFeedback =
1577      rzalloc(xfb_prog, struct gl_transform_feedback_info);
1578
1579   /* The xfb_offset qualifier does not have to be used in increasing order
1580    * however some drivers expect to receive the list of transform feedback
1581    * declarations in order so sort it now for convenience.
1582    */
1583   if (has_xfb_qualifiers) {
1584      qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls),
1585            cmp_xfb_offset);
1586   }
1587
1588   xfb_prog->sh.LinkedTransformFeedback->Varyings =
1589      rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
1590                    num_tfeedback_decls);
1591
1592   unsigned num_outputs = 0;
1593   for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1594      if (tfeedback_decls[i].is_varying_written())
1595         num_outputs += tfeedback_decls[i].get_num_outputs();
1596   }
1597
1598   xfb_prog->sh.LinkedTransformFeedback->Outputs =
1599      rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
1600                    num_outputs);
1601
1602   unsigned num_buffers = 0;
1603   unsigned buffers = 0;
1604   BITSET_WORD *used_components[MAX_FEEDBACK_BUFFERS] = {};
1605
1606   if (!has_xfb_qualifiers && separate_attribs_mode) {
1607      /* GL_SEPARATE_ATTRIBS */
1608      for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1609         if (!tfeedback_decls[i].store(ctx, prog,
1610                                       xfb_prog->sh.LinkedTransformFeedback,
1611                                       num_buffers, num_buffers, num_outputs,
1612                                       used_components, NULL, NULL,
1613                                       has_xfb_qualifiers, mem_ctx))
1614            return false;
1615
1616         buffers |= 1 << num_buffers;
1617         num_buffers++;
1618      }
1619   }
1620   else {
1621      /* GL_INVERLEAVED_ATTRIBS */
1622      int buffer_stream_id = -1;
1623      unsigned buffer =
1624         num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0;
1625      bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
1626      unsigned max_member_alignment[MAX_FEEDBACK_BUFFERS] = { 1, 1, 1, 1 };
1627      /* Apply any xfb_stride global qualifiers */
1628      if (has_xfb_qualifiers) {
1629         for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
1630            if (prog->TransformFeedback.BufferStride[j]) {
1631               explicit_stride[j] = true;
1632               xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
1633                  prog->TransformFeedback.BufferStride[j] / 4;
1634            }
1635         }
1636      }
1637
1638      for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1639         if (has_xfb_qualifiers &&
1640             buffer != tfeedback_decls[i].get_buffer()) {
1641            /* we have moved to the next buffer so reset stream id */
1642            buffer_stream_id = -1;
1643            num_buffers++;
1644         }
1645
1646         if (tfeedback_decls[i].is_next_buffer_separator()) {
1647            if (!tfeedback_decls[i].store(ctx, prog,
1648                                          xfb_prog->sh.LinkedTransformFeedback,
1649                                          buffer, num_buffers, num_outputs,
1650                                          used_components, explicit_stride,
1651                                          max_member_alignment,
1652                                          has_xfb_qualifiers,
1653                                          mem_ctx))
1654               return false;
1655            num_buffers++;
1656            buffer_stream_id = -1;
1657            continue;
1658         }
1659
1660         if (has_xfb_qualifiers) {
1661            buffer = tfeedback_decls[i].get_buffer();
1662         } else {
1663            buffer = num_buffers;
1664         }
1665
1666         if (tfeedback_decls[i].is_varying()) {
1667            if (buffer_stream_id == -1)  {
1668               /* First varying writing to this buffer: remember its stream */
1669               buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
1670
1671               /* Only mark a buffer as active when there is a varying
1672                * attached to it. This behaviour is based on a revised version
1673                * of section 13.2.2 of the GL 4.6 spec.
1674                */
1675               buffers |= 1 << buffer;
1676            } else if (buffer_stream_id !=
1677                       (int) tfeedback_decls[i].get_stream_id()) {
1678               /* Varying writes to the same buffer from a different stream */
1679               linker_error(prog,
1680                            "Transform feedback can't capture varyings belonging "
1681                            "to different vertex streams in a single buffer. "
1682                            "Varying %s writes to buffer from stream %u, other "
1683                            "varyings in the same buffer write from stream %u.",
1684                            tfeedback_decls[i].name(),
1685                            tfeedback_decls[i].get_stream_id(),
1686                            buffer_stream_id);
1687               return false;
1688            }
1689         }
1690
1691         if (!tfeedback_decls[i].store(ctx, prog,
1692                                       xfb_prog->sh.LinkedTransformFeedback,
1693                                       buffer, num_buffers, num_outputs,
1694                                       used_components, explicit_stride,
1695                                       max_member_alignment,
1696                                       has_xfb_qualifiers,
1697                                       mem_ctx))
1698            return false;
1699      }
1700   }
1701
1702   assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
1703
1704   xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
1705   return true;
1706}
1707
1708namespace {
1709
1710/**
1711 * Data structure recording the relationship between outputs of one shader
1712 * stage (the "producer") and inputs of another (the "consumer").
1713 */
1714class varying_matches
1715{
1716public:
1717   varying_matches(bool disable_varying_packing,
1718                   bool disable_xfb_packing,
1719                   bool xfb_enabled,
1720                   bool enhanced_layouts_enabled,
1721                   gl_shader_stage producer_stage,
1722                   gl_shader_stage consumer_stage);
1723   ~varying_matches();
1724   void record(ir_variable *producer_var, ir_variable *consumer_var);
1725   unsigned assign_locations(struct gl_shader_program *prog,
1726                             uint8_t components[],
1727                             uint64_t reserved_slots);
1728   void store_locations() const;
1729
1730private:
1731   bool is_varying_packing_safe(const glsl_type *type,
1732                                const ir_variable *var) const;
1733
1734   /**
1735    * If true, this driver disables varying packing, so all varyings need to
1736    * be aligned on slot boundaries, and take up a number of slots equal to
1737    * their number of matrix columns times their array size.
1738    *
1739    * Packing may also be disabled because our current packing method is not
1740    * safe in SSO or versions of OpenGL where interpolation qualifiers are not
1741    * guaranteed to match across stages.
1742    */
1743   const bool disable_varying_packing;
1744
1745   /**
1746    * If true, this driver disables packing for varyings used by transform
1747    * feedback.
1748    */
1749   const bool disable_xfb_packing;
1750
1751   /**
1752    * If true, this driver has transform feedback enabled. The transform
1753    * feedback code usually requires at least some packing be done even
1754    * when varying packing is disabled, fortunately where transform feedback
1755    * requires packing it's safe to override the disabled setting. See
1756    * is_varying_packing_safe().
1757    */
1758   const bool xfb_enabled;
1759
1760   const bool enhanced_layouts_enabled;
1761
1762   /**
1763    * Enum representing the order in which varyings are packed within a
1764    * packing class.
1765    *
1766    * Currently we pack vec4's first, then vec2's, then scalar values, then
1767    * vec3's.  This order ensures that the only vectors that are at risk of
1768    * having to be "double parked" (split between two adjacent varying slots)
1769    * are the vec3's.
1770    */
1771   enum packing_order_enum {
1772      PACKING_ORDER_VEC4,
1773      PACKING_ORDER_VEC2,
1774      PACKING_ORDER_SCALAR,
1775      PACKING_ORDER_VEC3,
1776   };
1777
1778   static unsigned compute_packing_class(const ir_variable *var);
1779   static packing_order_enum compute_packing_order(const ir_variable *var);
1780   static int match_comparator(const void *x_generic, const void *y_generic);
1781   static int xfb_comparator(const void *x_generic, const void *y_generic);
1782   static int not_xfb_comparator(const void *x_generic, const void *y_generic);
1783
1784   /**
1785    * Structure recording the relationship between a single producer output
1786    * and a single consumer input.
1787    */
1788   struct match {
1789      /**
1790       * Packing class for this varying, computed by compute_packing_class().
1791       */
1792      unsigned packing_class;
1793
1794      /**
1795       * Packing order for this varying, computed by compute_packing_order().
1796       */
1797      packing_order_enum packing_order;
1798
1799      /**
1800       * The output variable in the producer stage.
1801       */
1802      ir_variable *producer_var;
1803
1804      /**
1805       * The input variable in the consumer stage.
1806       */
1807      ir_variable *consumer_var;
1808
1809      /**
1810       * The location which has been assigned for this varying.  This is
1811       * expressed in multiples of a float, with the first generic varying
1812       * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
1813       * value 0.
1814       */
1815      unsigned generic_location;
1816   } *matches;
1817
1818   /**
1819    * The number of elements in the \c matches array that are currently in
1820    * use.
1821    */
1822   unsigned num_matches;
1823
1824   /**
1825    * The number of elements that were set aside for the \c matches array when
1826    * it was allocated.
1827    */
1828   unsigned matches_capacity;
1829
1830   gl_shader_stage producer_stage;
1831   gl_shader_stage consumer_stage;
1832};
1833
1834} /* anonymous namespace */
1835
1836varying_matches::varying_matches(bool disable_varying_packing,
1837                                 bool disable_xfb_packing,
1838                                 bool xfb_enabled,
1839                                 bool enhanced_layouts_enabled,
1840                                 gl_shader_stage producer_stage,
1841                                 gl_shader_stage consumer_stage)
1842   : disable_varying_packing(disable_varying_packing),
1843     disable_xfb_packing(disable_xfb_packing),
1844     xfb_enabled(xfb_enabled),
1845     enhanced_layouts_enabled(enhanced_layouts_enabled),
1846     producer_stage(producer_stage),
1847     consumer_stage(consumer_stage)
1848{
1849   /* Note: this initial capacity is rather arbitrarily chosen to be large
1850    * enough for many cases without wasting an unreasonable amount of space.
1851    * varying_matches::record() will resize the array if there are more than
1852    * this number of varyings.
1853    */
1854   this->matches_capacity = 8;
1855   this->matches = (match *)
1856      malloc(sizeof(*this->matches) * this->matches_capacity);
1857   this->num_matches = 0;
1858}
1859
1860
1861varying_matches::~varying_matches()
1862{
1863   free(this->matches);
1864}
1865
1866
1867/**
1868 * Packing is always safe on individual arrays, structures, and matrices. It
1869 * is also safe if the varying is only used for transform feedback.
1870 */
1871bool
1872varying_matches::is_varying_packing_safe(const glsl_type *type,
1873                                         const ir_variable *var) const
1874{
1875   if (consumer_stage == MESA_SHADER_TESS_EVAL ||
1876       consumer_stage == MESA_SHADER_TESS_CTRL ||
1877       producer_stage == MESA_SHADER_TESS_CTRL)
1878      return false;
1879
1880   return xfb_enabled && (type->is_array() || type->is_struct() ||
1881                          type->is_matrix() || var->data.is_xfb_only);
1882}
1883
1884
1885/**
1886 * Record the given producer/consumer variable pair in the list of variables
1887 * that should later be assigned locations.
1888 *
1889 * It is permissible for \c consumer_var to be NULL (this happens if a
1890 * variable is output by the producer and consumed by transform feedback, but
1891 * not consumed by the consumer).
1892 *
1893 * If \c producer_var has already been paired up with a consumer_var, or
1894 * producer_var is part of fixed pipeline functionality (and hence already has
1895 * a location assigned), this function has no effect.
1896 *
1897 * Note: as a side effect this function may change the interpolation type of
1898 * \c producer_var, but only when the change couldn't possibly affect
1899 * rendering.
1900 */
1901void
1902varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
1903{
1904   assert(producer_var != NULL || consumer_var != NULL);
1905
1906   if ((producer_var && (!producer_var->data.is_unmatched_generic_inout ||
1907       producer_var->data.explicit_location)) ||
1908       (consumer_var && (!consumer_var->data.is_unmatched_generic_inout ||
1909       consumer_var->data.explicit_location))) {
1910      /* Either a location already exists for this variable (since it is part
1911       * of fixed functionality), or it has already been recorded as part of a
1912       * previous match.
1913       */
1914      return;
1915   }
1916
1917   bool needs_flat_qualifier = consumer_var == NULL &&
1918      (producer_var->type->contains_integer() ||
1919       producer_var->type->contains_double());
1920
1921   if (!disable_varying_packing &&
1922       (!disable_xfb_packing || producer_var  == NULL || !producer_var->data.is_xfb) &&
1923       (needs_flat_qualifier ||
1924        (consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) {
1925      /* Since this varying is not being consumed by the fragment shader, its
1926       * interpolation type varying cannot possibly affect rendering.
1927       * Also, this variable is non-flat and is (or contains) an integer
1928       * or a double.
1929       * If the consumer stage is unknown, don't modify the interpolation
1930       * type as it could affect rendering later with separate shaders.
1931       *
1932       * lower_packed_varyings requires all integer varyings to flat,
1933       * regardless of where they appear.  We can trivially satisfy that
1934       * requirement by changing the interpolation type to flat here.
1935       */
1936      if (producer_var) {
1937         producer_var->data.centroid = false;
1938         producer_var->data.sample = false;
1939         producer_var->data.interpolation = INTERP_MODE_FLAT;
1940      }
1941
1942      if (consumer_var) {
1943         consumer_var->data.centroid = false;
1944         consumer_var->data.sample = false;
1945         consumer_var->data.interpolation = INTERP_MODE_FLAT;
1946      }
1947   }
1948
1949   if (this->num_matches == this->matches_capacity) {
1950      this->matches_capacity *= 2;
1951      this->matches = (match *)
1952         realloc(this->matches,
1953                 sizeof(*this->matches) * this->matches_capacity);
1954   }
1955
1956   /* We must use the consumer to compute the packing class because in GL4.4+
1957    * there is no guarantee interpolation qualifiers will match across stages.
1958    *
1959    * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
1960    *
1961    *    "The type and presence of interpolation qualifiers of variables with
1962    *    the same name declared in all linked shaders for the same cross-stage
1963    *    interface must match, otherwise the link command will fail.
1964    *
1965    *    When comparing an output from one stage to an input of a subsequent
1966    *    stage, the input and output don't match if their interpolation
1967    *    qualifiers (or lack thereof) are not the same."
1968    *
1969    * This text was also in at least revison 7 of the 4.40 spec but is no
1970    * longer in revision 9 and not in the 4.50 spec.
1971    */
1972   const ir_variable *const var = (consumer_var != NULL)
1973      ? consumer_var : producer_var;
1974
1975   if (producer_var && consumer_var &&
1976       consumer_var->data.must_be_shader_input) {
1977      producer_var->data.must_be_shader_input = 1;
1978   }
1979
1980   this->matches[this->num_matches].packing_class
1981      = this->compute_packing_class(var);
1982   this->matches[this->num_matches].packing_order
1983      = this->compute_packing_order(var);
1984
1985   this->matches[this->num_matches].producer_var = producer_var;
1986   this->matches[this->num_matches].consumer_var = consumer_var;
1987   this->num_matches++;
1988   if (producer_var)
1989      producer_var->data.is_unmatched_generic_inout = 0;
1990   if (consumer_var)
1991      consumer_var->data.is_unmatched_generic_inout = 0;
1992}
1993
1994
1995/**
1996 * Choose locations for all of the variable matches that were previously
1997 * passed to varying_matches::record().
1998 * \param components  returns array[slot] of number of components used
1999 *                    per slot (1, 2, 3 or 4)
2000 * \param reserved_slots  bitmask indicating which varying slots are already
2001 *                        allocated
2002 * \return number of slots (4-element vectors) allocated
2003 */
2004unsigned
2005varying_matches::assign_locations(struct gl_shader_program *prog,
2006                                  uint8_t components[],
2007                                  uint64_t reserved_slots)
2008{
2009   /* If packing has been disabled then we cannot safely sort the varyings by
2010    * class as it may mean we are using a version of OpenGL where
2011    * interpolation qualifiers are not guaranteed to be matching across
2012    * shaders, sorting in this case could result in mismatching shader
2013    * interfaces.
2014    * When packing is disabled the sort orders varyings used by transform
2015    * feedback first, but also depends on *undefined behaviour* of qsort to
2016    * reverse the order of the varyings. See: xfb_comparator().
2017    *
2018    * If packing is only disabled for xfb varyings (mutually exclusive with
2019    * disable_varying_packing), we then group varyings depending on if they
2020    * are captured for transform feedback. The same *undefined behaviour* is
2021    * taken advantage of.
2022    */
2023   if (this->disable_varying_packing) {
2024      /* Only sort varyings that are only used by transform feedback. */
2025      qsort(this->matches, this->num_matches, sizeof(*this->matches),
2026            &varying_matches::xfb_comparator);
2027   } else if (this->disable_xfb_packing) {
2028      /* Only sort varyings that are NOT used by transform feedback. */
2029      qsort(this->matches, this->num_matches, sizeof(*this->matches),
2030            &varying_matches::not_xfb_comparator);
2031   } else {
2032      /* Sort varying matches into an order that makes them easy to pack. */
2033      qsort(this->matches, this->num_matches, sizeof(*this->matches),
2034            &varying_matches::match_comparator);
2035   }
2036
2037   unsigned generic_location = 0;
2038   unsigned generic_patch_location = MAX_VARYING*4;
2039   bool previous_var_xfb = false;
2040   bool previous_var_xfb_only = false;
2041   unsigned previous_packing_class = ~0u;
2042
2043   /* For tranform feedback separate mode, we know the number of attributes
2044    * is <= the number of buffers.  So packing isn't critical.  In fact,
2045    * packing vec3 attributes can cause trouble because splitting a vec3
2046    * effectively creates an additional transform feedback output.  The
2047    * extra TFB output may exceed device driver limits.
2048    */
2049   const bool dont_pack_vec3 =
2050      (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
2051       prog->TransformFeedback.NumVarying > 0);
2052
2053   for (unsigned i = 0; i < this->num_matches; i++) {
2054      unsigned *location = &generic_location;
2055      const ir_variable *var;
2056      const glsl_type *type;
2057      bool is_vertex_input = false;
2058
2059      if (matches[i].consumer_var) {
2060         var = matches[i].consumer_var;
2061         type = get_varying_type(var, consumer_stage);
2062         if (consumer_stage == MESA_SHADER_VERTEX)
2063            is_vertex_input = true;
2064      } else {
2065         var = matches[i].producer_var;
2066         type = get_varying_type(var, producer_stage);
2067      }
2068
2069      if (var->data.patch)
2070         location = &generic_patch_location;
2071
2072      /* Advance to the next slot if this varying has a different packing
2073       * class than the previous one, and we're not already on a slot
2074       * boundary.
2075       *
2076       * Also advance if varying packing is disabled for transform feedback,
2077       * and previous or current varying is used for transform feedback.
2078       *
2079       * Also advance to the next slot if packing is disabled. This makes sure
2080       * we don't assign varyings the same locations which is possible
2081       * because we still pack individual arrays, records and matrices even
2082       * when packing is disabled. Note we don't advance to the next slot if
2083       * we can pack varyings together that are only used for transform
2084       * feedback.
2085       */
2086      if (var->data.must_be_shader_input ||
2087          (this->disable_xfb_packing &&
2088           (previous_var_xfb || var->data.is_xfb)) ||
2089          (this->disable_varying_packing &&
2090           !(previous_var_xfb_only && var->data.is_xfb_only)) ||
2091          (previous_packing_class != this->matches[i].packing_class) ||
2092          (this->matches[i].packing_order == PACKING_ORDER_VEC3 &&
2093           dont_pack_vec3)) {
2094         *location = ALIGN(*location, 4);
2095      }
2096
2097      previous_var_xfb = var->data.is_xfb;
2098      previous_var_xfb_only = var->data.is_xfb_only;
2099      previous_packing_class = this->matches[i].packing_class;
2100
2101      /* The number of components taken up by this variable. For vertex shader
2102       * inputs, we use the number of slots * 4, as they have different
2103       * counting rules.
2104       */
2105      unsigned num_components = 0;
2106      if (is_vertex_input) {
2107         num_components = type->count_attribute_slots(is_vertex_input) * 4;
2108      } else {
2109         if ((this->disable_varying_packing &&
2110              !is_varying_packing_safe(type, var)) ||
2111              (this->disable_xfb_packing && var->data.is_xfb &&
2112               !(type->is_array() || type->is_struct() || type->is_matrix())) ||
2113             var->data.must_be_shader_input) {
2114            num_components = type->count_attribute_slots(false) * 4;
2115         } else {
2116            num_components = type->component_slots_aligned(*location);
2117         }
2118      }
2119
2120      /* The last slot for this variable, inclusive. */
2121      unsigned slot_end = *location + num_components - 1;
2122
2123      /* FIXME: We could be smarter in the below code and loop back over
2124       * trying to fill any locations that we skipped because we couldn't pack
2125       * the varying between an explicit location. For now just let the user
2126       * hit the linking error if we run out of room and suggest they use
2127       * explicit locations.
2128       */
2129      while (slot_end < MAX_VARYING * 4u) {
2130         const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
2131         const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
2132
2133         assert(slots > 0);
2134
2135         if ((reserved_slots & slot_mask) == 0) {
2136            break;
2137         }
2138
2139         *location = ALIGN(*location + 1, 4);
2140         slot_end = *location + num_components - 1;
2141      }
2142
2143      if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
2144         linker_error(prog, "insufficient contiguous locations available for "
2145                      "%s it is possible an array or struct could not be "
2146                      "packed between varyings with explicit locations. Try "
2147                      "using an explicit location for arrays and structs.",
2148                      var->name);
2149      }
2150
2151      if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
2152         for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
2153            components[j] = 4;
2154         components[slot_end / 4u] = (slot_end & 3) + 1;
2155      }
2156
2157      this->matches[i].generic_location = *location;
2158
2159      *location = slot_end + 1;
2160   }
2161
2162   return (generic_location + 3) / 4;
2163}
2164
2165
2166/**
2167 * Update the producer and consumer shaders to reflect the locations
2168 * assignments that were made by varying_matches::assign_locations().
2169 */
2170void
2171varying_matches::store_locations() const
2172{
2173   /* Check is location needs to be packed with lower_packed_varyings() or if
2174    * we can just use ARB_enhanced_layouts packing.
2175    */
2176   bool pack_loc[MAX_VARYINGS_INCL_PATCH] = {};
2177   const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
2178
2179   for (unsigned i = 0; i < this->num_matches; i++) {
2180      ir_variable *producer_var = this->matches[i].producer_var;
2181      ir_variable *consumer_var = this->matches[i].consumer_var;
2182      unsigned generic_location = this->matches[i].generic_location;
2183      unsigned slot = generic_location / 4;
2184      unsigned offset = generic_location % 4;
2185
2186      if (producer_var) {
2187         producer_var->data.location = VARYING_SLOT_VAR0 + slot;
2188         producer_var->data.location_frac = offset;
2189      }
2190
2191      if (consumer_var) {
2192         assert(consumer_var->data.location == -1);
2193         consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
2194         consumer_var->data.location_frac = offset;
2195      }
2196
2197      /* Find locations suitable for native packing via
2198       * ARB_enhanced_layouts.
2199       */
2200      if (producer_var && consumer_var) {
2201         if (enhanced_layouts_enabled) {
2202            const glsl_type *type =
2203               get_varying_type(producer_var, producer_stage);
2204            if (type->is_array() || type->is_matrix() || type->is_struct() ||
2205                type->is_64bit()) {
2206               unsigned comp_slots = type->component_slots() + offset;
2207               unsigned slots = comp_slots / 4;
2208               if (comp_slots % 4)
2209                  slots += 1;
2210
2211               for (unsigned j = 0; j < slots; j++) {
2212                  pack_loc[slot + j] = true;
2213               }
2214            } else if (offset + type->vector_elements > 4) {
2215               pack_loc[slot] = true;
2216               pack_loc[slot + 1] = true;
2217            } else {
2218               loc_type[slot][offset] = type;
2219            }
2220         }
2221      }
2222   }
2223
2224   /* Attempt to use ARB_enhanced_layouts for more efficient packing if
2225    * suitable.
2226    */
2227   if (enhanced_layouts_enabled) {
2228      for (unsigned i = 0; i < this->num_matches; i++) {
2229         ir_variable *producer_var = this->matches[i].producer_var;
2230         ir_variable *consumer_var = this->matches[i].consumer_var;
2231         unsigned generic_location = this->matches[i].generic_location;
2232         unsigned slot = generic_location / 4;
2233
2234         if (pack_loc[slot] || !producer_var || !consumer_var)
2235            continue;
2236
2237         const glsl_type *type =
2238            get_varying_type(producer_var, producer_stage);
2239         bool type_match = true;
2240         for (unsigned j = 0; j < 4; j++) {
2241            if (loc_type[slot][j]) {
2242               if (type->base_type != loc_type[slot][j]->base_type)
2243                  type_match = false;
2244            }
2245         }
2246
2247         if (type_match) {
2248            producer_var->data.explicit_location = 1;
2249            consumer_var->data.explicit_location = 1;
2250            producer_var->data.explicit_component = 1;
2251            consumer_var->data.explicit_component = 1;
2252         }
2253      }
2254   }
2255}
2256
2257
2258/**
2259 * Compute the "packing class" of the given varying.  This is an unsigned
2260 * integer with the property that two variables in the same packing class can
2261 * be safely backed into the same vec4.
2262 */
2263unsigned
2264varying_matches::compute_packing_class(const ir_variable *var)
2265{
2266   /* Without help from the back-end, there is no way to pack together
2267    * variables with different interpolation types, because
2268    * lower_packed_varyings must choose exactly one interpolation type for
2269    * each packed varying it creates.
2270    *
2271    * However, we can safely pack together floats, ints, and uints, because:
2272    *
2273    * - varyings of base type "int" and "uint" must use the "flat"
2274    *   interpolation type, which can only occur in GLSL 1.30 and above.
2275    *
2276    * - On platforms that support GLSL 1.30 and above, lower_packed_varyings
2277    *   can store flat floats as ints without losing any information (using
2278    *   the ir_unop_bitcast_* opcodes).
2279    *
2280    * Therefore, the packing class depends only on the interpolation type.
2281    */
2282   const unsigned interp = var->is_interpolation_flat()
2283      ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation;
2284
2285   assert(interp < (1 << 3));
2286
2287   const unsigned packing_class = (interp << 0) |
2288                                  (var->data.centroid << 3) |
2289                                  (var->data.sample << 4) |
2290                                  (var->data.patch << 5) |
2291                                  (var->data.must_be_shader_input << 6);
2292
2293   return packing_class;
2294}
2295
2296
2297/**
2298 * Compute the "packing order" of the given varying.  This is a sort key we
2299 * use to determine when to attempt to pack the given varying relative to
2300 * other varyings in the same packing class.
2301 */
2302varying_matches::packing_order_enum
2303varying_matches::compute_packing_order(const ir_variable *var)
2304{
2305   const glsl_type *element_type = var->type;
2306
2307   while (element_type->is_array()) {
2308      element_type = element_type->fields.array;
2309   }
2310
2311   switch (element_type->component_slots() % 4) {
2312   case 1: return PACKING_ORDER_SCALAR;
2313   case 2: return PACKING_ORDER_VEC2;
2314   case 3: return PACKING_ORDER_VEC3;
2315   case 0: return PACKING_ORDER_VEC4;
2316   default:
2317      assert(!"Unexpected value of vector_elements");
2318      return PACKING_ORDER_VEC4;
2319   }
2320}
2321
2322
2323/**
2324 * Comparison function passed to qsort() to sort varyings by packing_class and
2325 * then by packing_order.
2326 */
2327int
2328varying_matches::match_comparator(const void *x_generic, const void *y_generic)
2329{
2330   const match *x = (const match *) x_generic;
2331   const match *y = (const match *) y_generic;
2332
2333   if (x->packing_class != y->packing_class)
2334      return x->packing_class - y->packing_class;
2335   return x->packing_order - y->packing_order;
2336}
2337
2338
2339/**
2340 * Comparison function passed to qsort() to sort varyings used only by
2341 * transform feedback when packing of other varyings is disabled.
2342 */
2343int
2344varying_matches::xfb_comparator(const void *x_generic, const void *y_generic)
2345{
2346   const match *x = (const match *) x_generic;
2347
2348   if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
2349      return match_comparator(x_generic, y_generic);
2350
2351   /* FIXME: When the comparator returns 0 it means the elements being
2352    * compared are equivalent. However the qsort documentation says:
2353    *
2354    *    "The order of equivalent elements is undefined."
2355    *
2356    * In practice the sort ends up reversing the order of the varyings which
2357    * means locations are also assigned in this reversed order and happens to
2358    * be what we want. This is also whats happening in
2359    * varying_matches::match_comparator().
2360    */
2361   return 0;
2362}
2363
2364
2365/**
2366 * Comparison function passed to qsort() to sort varyings NOT used by
2367 * transform feedback when packing of xfb varyings is disabled.
2368 */
2369int
2370varying_matches::not_xfb_comparator(const void *x_generic, const void *y_generic)
2371{
2372   const match *x = (const match *) x_generic;
2373
2374   if (x->producer_var != NULL && !x->producer_var->data.is_xfb)
2375      return match_comparator(x_generic, y_generic);
2376
2377   /* FIXME: When the comparator returns 0 it means the elements being
2378    * compared are equivalent. However the qsort documentation says:
2379    *
2380    *    "The order of equivalent elements is undefined."
2381    *
2382    * In practice the sort ends up reversing the order of the varyings which
2383    * means locations are also assigned in this reversed order and happens to
2384    * be what we want. This is also whats happening in
2385    * varying_matches::match_comparator().
2386    */
2387   return 0;
2388}
2389
2390
2391/**
2392 * Is the given variable a varying variable to be counted against the
2393 * limit in ctx->Const.MaxVarying?
2394 * This includes variables such as texcoords, colors and generic
2395 * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
2396 */
2397static bool
2398var_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var)
2399{
2400   /* Only fragment shaders will take a varying variable as an input */
2401   if (stage == MESA_SHADER_FRAGMENT &&
2402       var->data.mode == ir_var_shader_in) {
2403      switch (var->data.location) {
2404      case VARYING_SLOT_POS:
2405      case VARYING_SLOT_FACE:
2406      case VARYING_SLOT_PNTC:
2407         return false;
2408      default:
2409         return true;
2410      }
2411   }
2412   return false;
2413}
2414
2415
2416/**
2417 * Visitor class that generates tfeedback_candidate structs describing all
2418 * possible targets of transform feedback.
2419 *
2420 * tfeedback_candidate structs are stored in the hash table
2421 * tfeedback_candidates, which is passed to the constructor.  This hash table
2422 * maps varying names to instances of the tfeedback_candidate struct.
2423 */
2424class tfeedback_candidate_generator : public program_resource_visitor
2425{
2426public:
2427   tfeedback_candidate_generator(void *mem_ctx,
2428                                 hash_table *tfeedback_candidates,
2429                                 gl_shader_stage stage)
2430      : mem_ctx(mem_ctx),
2431        tfeedback_candidates(tfeedback_candidates),
2432        stage(stage),
2433        toplevel_var(NULL),
2434        varying_floats(0),
2435        xfb_offset_floats(0)
2436   {
2437   }
2438
2439   void process(ir_variable *var)
2440   {
2441      /* All named varying interface blocks should be flattened by now */
2442      assert(!var->is_interface_instance());
2443      assert(var->data.mode == ir_var_shader_out);
2444
2445      this->toplevel_var = var;
2446      this->varying_floats = 0;
2447      this->xfb_offset_floats = 0;
2448      const glsl_type *t =
2449         var->data.from_named_ifc_block ? var->get_interface_type() : var->type;
2450      if (!var->data.patch && stage == MESA_SHADER_TESS_CTRL) {
2451         assert(t->is_array());
2452         t = t->fields.array;
2453      }
2454      program_resource_visitor::process(var, t, false);
2455   }
2456
2457private:
2458   virtual void visit_field(const glsl_type *type, const char *name,
2459                            bool /* row_major */,
2460                            const glsl_type * /* record_type */,
2461                            const enum glsl_interface_packing,
2462                            bool /* last_field */)
2463   {
2464      assert(!type->without_array()->is_struct());
2465      assert(!type->without_array()->is_interface());
2466
2467      tfeedback_candidate *candidate
2468         = rzalloc(this->mem_ctx, tfeedback_candidate);
2469      candidate->toplevel_var = this->toplevel_var;
2470      candidate->type = type;
2471
2472      if (type->without_array()->is_64bit()) {
2473         /*  From ARB_gpu_shader_fp64:
2474          *
2475          * If any variable captured in transform feedback has double-precision
2476          * components, the practical requirements for defined behavior are:
2477          *     ...
2478          * (c) each double-precision variable captured must be aligned to a
2479          *     multiple of eight bytes relative to the beginning of a vertex.
2480          */
2481         this->xfb_offset_floats = ALIGN(this->xfb_offset_floats, 2);
2482         /* 64-bit members of structs are also aligned. */
2483         this->varying_floats = ALIGN(this->varying_floats, 2);
2484      }
2485
2486      candidate->xfb_offset_floats = this->xfb_offset_floats;
2487      candidate->struct_offset_floats = this->varying_floats;
2488
2489       _mesa_hash_table_insert(this->tfeedback_candidates,
2490                               ralloc_strdup(this->mem_ctx, name),
2491                               candidate);
2492
2493      const unsigned component_slots = type->component_slots();
2494
2495      if (varying_has_user_specified_location(this->toplevel_var)) {
2496         this->varying_floats += type->count_attribute_slots(false) * 4;
2497      } else {
2498         this->varying_floats += component_slots;
2499      }
2500
2501      this->xfb_offset_floats += component_slots;
2502   }
2503
2504   /**
2505    * Memory context used to allocate hash table keys and values.
2506    */
2507   void * const mem_ctx;
2508
2509   /**
2510    * Hash table in which tfeedback_candidate objects should be stored.
2511    */
2512   hash_table * const tfeedback_candidates;
2513
2514   gl_shader_stage stage;
2515
2516   /**
2517    * Pointer to the toplevel variable that is being traversed.
2518    */
2519   ir_variable *toplevel_var;
2520
2521   /**
2522    * Total number of varying floats that have been visited so far.  This is
2523    * used to determine the offset to each varying within the toplevel
2524    * variable.
2525    */
2526   unsigned varying_floats;
2527
2528   /**
2529    * Offset within the xfb. Counted in floats.
2530    */
2531   unsigned xfb_offset_floats;
2532};
2533
2534
2535namespace linker {
2536
2537void
2538populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
2539                             hash_table *consumer_inputs,
2540                             hash_table *consumer_interface_inputs,
2541                             ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
2542{
2543   memset(consumer_inputs_with_locations,
2544          0,
2545          sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
2546
2547   foreach_in_list(ir_instruction, node, ir) {
2548      ir_variable *const input_var = node->as_variable();
2549
2550      if (input_var != NULL && input_var->data.mode == ir_var_shader_in) {
2551         /* All interface blocks should have been lowered by this point */
2552         assert(!input_var->type->is_interface());
2553
2554         if (input_var->data.explicit_location) {
2555            /* assign_varying_locations only cares about finding the
2556             * ir_variable at the start of a contiguous location block.
2557             *
2558             *     - For !producer, consumer_inputs_with_locations isn't used.
2559             *
2560             *     - For !consumer, consumer_inputs_with_locations is empty.
2561             *
2562             * For consumer && producer, if you were trying to set some
2563             * ir_variable to the middle of a location block on the other side
2564             * of producer/consumer, cross_validate_outputs_to_inputs() should
2565             * be link-erroring due to either type mismatch or location
2566             * overlaps.  If the variables do match up, then they've got a
2567             * matching data.location and you only looked at
2568             * consumer_inputs_with_locations[var->data.location], not any
2569             * following entries for the array/structure.
2570             */
2571            consumer_inputs_with_locations[input_var->data.location] =
2572               input_var;
2573         } else if (input_var->get_interface_type() != NULL) {
2574            char *const iface_field_name =
2575               ralloc_asprintf(mem_ctx, "%s.%s",
2576                  input_var->get_interface_type()->without_array()->name,
2577                  input_var->name);
2578            _mesa_hash_table_insert(consumer_interface_inputs,
2579                                    iface_field_name, input_var);
2580         } else {
2581            _mesa_hash_table_insert(consumer_inputs,
2582                                    ralloc_strdup(mem_ctx, input_var->name),
2583                                    input_var);
2584         }
2585      }
2586   }
2587}
2588
2589/**
2590 * Find a variable from the consumer that "matches" the specified variable
2591 *
2592 * This function only finds inputs with names that match.  There is no
2593 * validation (here) that the types, etc. are compatible.
2594 */
2595ir_variable *
2596get_matching_input(void *mem_ctx,
2597                   const ir_variable *output_var,
2598                   hash_table *consumer_inputs,
2599                   hash_table *consumer_interface_inputs,
2600                   ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
2601{
2602   ir_variable *input_var;
2603
2604   if (output_var->data.explicit_location) {
2605      input_var = consumer_inputs_with_locations[output_var->data.location];
2606   } else if (output_var->get_interface_type() != NULL) {
2607      char *const iface_field_name =
2608         ralloc_asprintf(mem_ctx, "%s.%s",
2609            output_var->get_interface_type()->without_array()->name,
2610            output_var->name);
2611      hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
2612      input_var = entry ? (ir_variable *) entry->data : NULL;
2613   } else {
2614      hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name);
2615      input_var = entry ? (ir_variable *) entry->data : NULL;
2616   }
2617
2618   return (input_var == NULL || input_var->data.mode != ir_var_shader_in)
2619      ? NULL : input_var;
2620}
2621
2622}
2623
2624static int
2625io_variable_cmp(const void *_a, const void *_b)
2626{
2627   const ir_variable *const a = *(const ir_variable **) _a;
2628   const ir_variable *const b = *(const ir_variable **) _b;
2629
2630   if (a->data.explicit_location && b->data.explicit_location)
2631      return b->data.location - a->data.location;
2632
2633   if (a->data.explicit_location && !b->data.explicit_location)
2634      return 1;
2635
2636   if (!a->data.explicit_location && b->data.explicit_location)
2637      return -1;
2638
2639   return -strcmp(a->name, b->name);
2640}
2641
2642/**
2643 * Sort the shader IO variables into canonical order
2644 */
2645static void
2646canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode)
2647{
2648   ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
2649   unsigned num_variables = 0;
2650
2651   foreach_in_list(ir_instruction, node, ir) {
2652      ir_variable *const var = node->as_variable();
2653
2654      if (var == NULL || var->data.mode != io_mode)
2655         continue;
2656
2657      /* If we have already encountered more I/O variables that could
2658       * successfully link, bail.
2659       */
2660      if (num_variables == ARRAY_SIZE(var_table))
2661         return;
2662
2663      var_table[num_variables++] = var;
2664   }
2665
2666   if (num_variables == 0)
2667      return;
2668
2669   /* Sort the list in reverse order (io_variable_cmp handles this).  Later
2670    * we're going to push the variables on to the IR list as a stack, so we
2671    * want the last variable (in canonical order) to be first in the list.
2672    */
2673   qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);
2674
2675   /* Remove the variable from it's current location in the IR, and put it at
2676    * the front.
2677    */
2678   for (unsigned i = 0; i < num_variables; i++) {
2679      var_table[i]->remove();
2680      ir->push_head(var_table[i]);
2681   }
2682}
2683
2684/**
2685 * Generate a bitfield map of the explicit locations for shader varyings.
2686 *
2687 * Note: For Tessellation shaders we are sitting right on the limits of the
2688 * 64 bit map. Per-vertex and per-patch both have separate location domains
2689 * with a max of MAX_VARYING.
2690 */
2691static uint64_t
2692reserved_varying_slot(struct gl_linked_shader *stage,
2693                      ir_variable_mode io_mode)
2694{
2695   assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
2696   /* Avoid an overflow of the returned value */
2697   assert(MAX_VARYINGS_INCL_PATCH <= 64);
2698
2699   uint64_t slots = 0;
2700   int var_slot;
2701
2702   if (!stage)
2703      return slots;
2704
2705   foreach_in_list(ir_instruction, node, stage->ir) {
2706      ir_variable *const var = node->as_variable();
2707
2708      if (var == NULL || var->data.mode != io_mode ||
2709          !var->data.explicit_location ||
2710          var->data.location < VARYING_SLOT_VAR0)
2711         continue;
2712
2713      var_slot = var->data.location - VARYING_SLOT_VAR0;
2714
2715      unsigned num_elements = get_varying_type(var, stage->Stage)
2716         ->count_attribute_slots(io_mode == ir_var_shader_in &&
2717                                 stage->Stage == MESA_SHADER_VERTEX);
2718      for (unsigned i = 0; i < num_elements; i++) {
2719         if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
2720            slots |= UINT64_C(1) << var_slot;
2721         var_slot += 1;
2722      }
2723   }
2724
2725   return slots;
2726}
2727
2728
2729/**
2730 * Assign locations for all variables that are produced in one pipeline stage
2731 * (the "producer") and consumed in the next stage (the "consumer").
2732 *
2733 * Variables produced by the producer may also be consumed by transform
2734 * feedback.
2735 *
2736 * \param num_tfeedback_decls is the number of declarations indicating
2737 *        variables that may be consumed by transform feedback.
2738 *
2739 * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects
2740 *        representing the result of parsing the strings passed to
2741 *        glTransformFeedbackVaryings().  assign_location() will be called for
2742 *        each of these objects that matches one of the outputs of the
2743 *        producer.
2744 *
2745 * When num_tfeedback_decls is nonzero, it is permissible for the consumer to
2746 * be NULL.  In this case, varying locations are assigned solely based on the
2747 * requirements of transform feedback.
2748 */
2749static bool
2750assign_varying_locations(struct gl_context *ctx,
2751                         void *mem_ctx,
2752                         struct gl_shader_program *prog,
2753                         gl_linked_shader *producer,
2754                         gl_linked_shader *consumer,
2755                         unsigned num_tfeedback_decls,
2756                         tfeedback_decl *tfeedback_decls,
2757                         const uint64_t reserved_slots)
2758{
2759   /* Tessellation shaders treat inputs and outputs as shared memory and can
2760    * access inputs and outputs of other invocations.
2761    * Therefore, they can't be lowered to temps easily (and definitely not
2762    * efficiently).
2763    */
2764   bool unpackable_tess =
2765      (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
2766      (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
2767      (producer && producer->Stage == MESA_SHADER_TESS_CTRL);
2768
2769   /* Transform feedback code assumes varying arrays are packed, so if the
2770    * driver has disabled varying packing, make sure to at least enable
2771    * packing required by transform feedback. See below for exception.
2772    */
2773   bool xfb_enabled =
2774      ctx->Extensions.EXT_transform_feedback && !unpackable_tess;
2775
2776   /* Some drivers actually requires packing to be explicitly disabled
2777    * for varyings used by transform feedback.
2778    */
2779   bool disable_xfb_packing =
2780      ctx->Const.DisableTransformFeedbackPacking;
2781
2782   /* Disable packing on outward facing interfaces for SSO because in ES we
2783    * need to retain the unpacked varying information for draw time
2784    * validation.
2785    *
2786    * Packing is still enabled on individual arrays, structs, and matrices as
2787    * these are required by the transform feedback code and it is still safe
2788    * to do so. We also enable packing when a varying is only used for
2789    * transform feedback and its not a SSO.
2790    */
2791   bool disable_varying_packing =
2792      ctx->Const.DisableVaryingPacking || unpackable_tess;
2793   if (prog->SeparateShader && (producer == NULL || consumer == NULL))
2794      disable_varying_packing = true;
2795
2796   varying_matches matches(disable_varying_packing,
2797                           disable_xfb_packing,
2798                           xfb_enabled,
2799                           ctx->Extensions.ARB_enhanced_layouts,
2800                           producer ? producer->Stage : MESA_SHADER_NONE,
2801                           consumer ? consumer->Stage : MESA_SHADER_NONE);
2802   void *hash_table_ctx = ralloc_context(NULL);
2803   hash_table *tfeedback_candidates =
2804         _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
2805                                 _mesa_key_string_equal);
2806   hash_table *consumer_inputs =
2807         _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
2808                                 _mesa_key_string_equal);
2809   hash_table *consumer_interface_inputs =
2810         _mesa_hash_table_create(hash_table_ctx, _mesa_hash_string,
2811                                 _mesa_key_string_equal);
2812   ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
2813      NULL,
2814   };
2815
2816   unsigned consumer_vertices = 0;
2817   if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
2818      consumer_vertices = prog->Geom.VerticesIn;
2819
2820   /* Operate in a total of four passes.
2821    *
2822    * 1. Sort inputs / outputs into a canonical order.  This is necessary so
2823    *    that inputs / outputs of separable shaders will be assigned
2824    *    predictable locations regardless of the order in which declarations
2825    *    appeared in the shader source.
2826    *
2827    * 2. Assign locations for any matching inputs and outputs.
2828    *
2829    * 3. Mark output variables in the producer that do not have locations as
2830    *    not being outputs.  This lets the optimizer eliminate them.
2831    *
2832    * 4. Mark input variables in the consumer that do not have locations as
2833    *    not being inputs.  This lets the optimizer eliminate them.
2834    */
2835   if (consumer)
2836      canonicalize_shader_io(consumer->ir, ir_var_shader_in);
2837
2838   if (producer)
2839      canonicalize_shader_io(producer->ir, ir_var_shader_out);
2840
2841   if (consumer)
2842      linker::populate_consumer_input_sets(mem_ctx, consumer->ir,
2843                                           consumer_inputs,
2844                                           consumer_interface_inputs,
2845                                           consumer_inputs_with_locations);
2846
2847   if (producer) {
2848      foreach_in_list(ir_instruction, node, producer->ir) {
2849         ir_variable *const output_var = node->as_variable();
2850
2851         if (output_var == NULL || output_var->data.mode != ir_var_shader_out)
2852            continue;
2853
2854         /* Only geometry shaders can use non-zero streams */
2855         assert(output_var->data.stream == 0 ||
2856                (output_var->data.stream < MAX_VERTEX_STREAMS &&
2857                 producer->Stage == MESA_SHADER_GEOMETRY));
2858
2859         if (num_tfeedback_decls > 0) {
2860            tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates, producer->Stage);
2861            /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1
2862             * ("Vertex Shader Variables / Output Variables")
2863             *
2864             * "Each program object can specify a set of output variables from
2865             * one shader to be recorded in transform feedback mode (see
2866             * section 13.3). The variables that can be recorded are those
2867             * emitted by the first active shader, in order, from the
2868             * following list:
2869             *
2870             *  * geometry shader
2871             *  * tessellation evaluation shader
2872             *  * tessellation control shader
2873             *  * vertex shader"
2874             *
2875             * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader
2876             * Variables / Output Variables") tessellation control shader is
2877             * not included in the stages list.
2878             */
2879            if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) {
2880               g.process(output_var);
2881            }
2882         }
2883
2884         ir_variable *const input_var =
2885            linker::get_matching_input(mem_ctx, output_var, consumer_inputs,
2886                                       consumer_interface_inputs,
2887                                       consumer_inputs_with_locations);
2888
2889         /* If a matching input variable was found, add this output (and the
2890          * input) to the set.  If this is a separable program and there is no
2891          * consumer stage, add the output.
2892          *
2893          * Always add TCS outputs. They are shared by all invocations
2894          * within a patch and can be used as shared memory.
2895          */
2896         if (input_var || (prog->SeparateShader && consumer == NULL) ||
2897             producer->Stage == MESA_SHADER_TESS_CTRL) {
2898            matches.record(output_var, input_var);
2899         }
2900
2901         /* Only stream 0 outputs can be consumed in the next stage */
2902         if (input_var && output_var->data.stream != 0) {
2903            linker_error(prog, "output %s is assigned to stream=%d but "
2904                         "is linked to an input, which requires stream=0",
2905                         output_var->name, output_var->data.stream);
2906            ralloc_free(hash_table_ctx);
2907            return false;
2908         }
2909      }
2910   } else {
2911      /* If there's no producer stage, then this must be a separable program.
2912       * For example, we may have a program that has just a fragment shader.
2913       * Later this program will be used with some arbitrary vertex (or
2914       * geometry) shader program.  This means that locations must be assigned
2915       * for all the inputs.
2916       */
2917      foreach_in_list(ir_instruction, node, consumer->ir) {
2918         ir_variable *const input_var = node->as_variable();
2919         if (input_var && input_var->data.mode == ir_var_shader_in) {
2920            matches.record(NULL, input_var);
2921         }
2922      }
2923   }
2924
2925   for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
2926      if (!tfeedback_decls[i].is_varying())
2927         continue;
2928
2929      const tfeedback_candidate *matched_candidate
2930         = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates);
2931
2932      if (matched_candidate == NULL) {
2933         ralloc_free(hash_table_ctx);
2934         return false;
2935      }
2936
2937      /* There are two situations where a new output varying is needed:
2938       *
2939       *  - If varying packing is disabled for xfb and the current declaration
2940       *    is subscripting an array, whether the subscript is aligned or not.
2941       *    to preserve the rest of the array for the consumer.
2942       *
2943       *  - If a builtin variable needs to be copied to a new variable
2944       *    before its content is modified by another lowering pass (e.g.
2945       *    \c gl_Position is transformed by \c nir_lower_viewport_transform).
2946       */
2947      const bool lowered =
2948         (disable_xfb_packing && tfeedback_decls[i].subscripted()) ||
2949         (matched_candidate->toplevel_var->data.explicit_location &&
2950          matched_candidate->toplevel_var->data.location < VARYING_SLOT_VAR0 &&
2951          (!consumer || consumer->Stage == MESA_SHADER_FRAGMENT) &&
2952          (ctx->Const.ShaderCompilerOptions[producer->Stage].LowerBuiltinVariablesXfb &
2953              BITFIELD_BIT(matched_candidate->toplevel_var->data.location)));
2954
2955      if (lowered) {
2956         ir_variable *new_var;
2957         tfeedback_candidate *new_candidate = NULL;
2958
2959         new_var = lower_xfb_varying(mem_ctx, producer, tfeedback_decls[i].name());
2960         if (new_var == NULL) {
2961            ralloc_free(hash_table_ctx);
2962            return false;
2963         }
2964
2965         /* Create new candidate and replace matched_candidate */
2966         new_candidate = rzalloc(mem_ctx, tfeedback_candidate);
2967         new_candidate->toplevel_var = new_var;
2968         new_candidate->toplevel_var->data.is_unmatched_generic_inout = 1;
2969         new_candidate->type = new_var->type;
2970         new_candidate->struct_offset_floats = 0;
2971         new_candidate->xfb_offset_floats = 0;
2972         _mesa_hash_table_insert(tfeedback_candidates,
2973                                 ralloc_strdup(mem_ctx, new_var->name),
2974                                 new_candidate);
2975
2976         tfeedback_decls[i].set_lowered_candidate(new_candidate);
2977         matched_candidate = new_candidate;
2978      }
2979
2980      /* Mark as xfb varying */
2981      matched_candidate->toplevel_var->data.is_xfb = 1;
2982
2983      /* Mark xfb varyings as always active */
2984      matched_candidate->toplevel_var->data.always_active_io = 1;
2985
2986      /* Mark any corresponding inputs as always active also. We must do this
2987       * because we have a NIR pass that lowers vectors to scalars and another
2988       * that removes unused varyings.
2989       * We don't split varyings marked as always active because there is no
2990       * point in doing so. This means we need to mark both sides of the
2991       * interface as always active otherwise we will have a mismatch and
2992       * start removing things we shouldn't.
2993       */
2994      ir_variable *const input_var =
2995         linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var,
2996                                    consumer_inputs,
2997                                    consumer_interface_inputs,
2998                                    consumer_inputs_with_locations);
2999      if (input_var) {
3000         input_var->data.is_xfb = 1;
3001         input_var->data.always_active_io = 1;
3002      }
3003
3004      if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
3005         matched_candidate->toplevel_var->data.is_xfb_only = 1;
3006         matches.record(matched_candidate->toplevel_var, NULL);
3007      }
3008   }
3009
3010   uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
3011   const unsigned slots_used = matches.assign_locations(
3012         prog, components, reserved_slots);
3013   matches.store_locations();
3014
3015   for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
3016      if (tfeedback_decls[i].is_varying()) {
3017         if (!tfeedback_decls[i].assign_location(ctx, prog)) {
3018            ralloc_free(hash_table_ctx);
3019            return false;
3020         }
3021      }
3022   }
3023   ralloc_free(hash_table_ctx);
3024
3025   if (consumer && producer) {
3026      foreach_in_list(ir_instruction, node, consumer->ir) {
3027         ir_variable *const var = node->as_variable();
3028
3029         if (var && var->data.mode == ir_var_shader_in &&
3030             var->data.is_unmatched_generic_inout) {
3031            if (!prog->IsES && prog->data->Version <= 120) {
3032               /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
3033                *
3034                *     Only those varying variables used (i.e. read) in
3035                *     the fragment shader executable must be written to
3036                *     by the vertex shader executable; declaring
3037                *     superfluous varying variables in a vertex shader is
3038                *     permissible.
3039                *
3040                * We interpret this text as meaning that the VS must
3041                * write the variable for the FS to read it.  See
3042                * "glsl1-varying read but not written" in piglit.
3043                */
3044               linker_error(prog, "%s shader varying %s not written "
3045                            "by %s shader\n.",
3046                            _mesa_shader_stage_to_string(consumer->Stage),
3047                            var->name,
3048                            _mesa_shader_stage_to_string(producer->Stage));
3049            } else {
3050               linker_warning(prog, "%s shader varying %s not written "
3051                              "by %s shader\n.",
3052                              _mesa_shader_stage_to_string(consumer->Stage),
3053                              var->name,
3054                              _mesa_shader_stage_to_string(producer->Stage));
3055            }
3056         }
3057      }
3058
3059      /* Now that validation is done its safe to remove unused varyings. As
3060       * we have both a producer and consumer its safe to remove unused
3061       * varyings even if the program is a SSO because the stages are being
3062       * linked together i.e. we have a multi-stage SSO.
3063       */
3064      remove_unused_shader_inputs_and_outputs(false, producer,
3065                                              ir_var_shader_out);
3066      remove_unused_shader_inputs_and_outputs(false, consumer,
3067                                              ir_var_shader_in);
3068   }
3069
3070   if (producer) {
3071      lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out,
3072                            0, producer, disable_varying_packing,
3073                            disable_xfb_packing, xfb_enabled);
3074   }
3075
3076   if (consumer) {
3077      lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in,
3078                            consumer_vertices, consumer, disable_varying_packing,
3079                            disable_xfb_packing, xfb_enabled);
3080   }
3081
3082   return true;
3083}
3084
3085static bool
3086check_against_output_limit(struct gl_context *ctx,
3087                           struct gl_shader_program *prog,
3088                           gl_linked_shader *producer,
3089                           unsigned num_explicit_locations)
3090{
3091   unsigned output_vectors = num_explicit_locations;
3092
3093   foreach_in_list(ir_instruction, node, producer->ir) {
3094      ir_variable *const var = node->as_variable();
3095
3096      if (var && !var->data.explicit_location &&
3097          var->data.mode == ir_var_shader_out &&
3098          var_counts_against_varying_limit(producer->Stage, var)) {
3099         /* outputs for fragment shader can't be doubles */
3100         output_vectors += var->type->count_attribute_slots(false);
3101      }
3102   }
3103
3104   assert(producer->Stage != MESA_SHADER_FRAGMENT);
3105   unsigned max_output_components =
3106      ctx->Const.Program[producer->Stage].MaxOutputComponents;
3107
3108   const unsigned output_components = output_vectors * 4;
3109   if (output_components > max_output_components) {
3110      if (ctx->API == API_OPENGLES2 || prog->IsES)
3111         linker_error(prog, "%s shader uses too many output vectors "
3112                      "(%u > %u)\n",
3113                      _mesa_shader_stage_to_string(producer->Stage),
3114                      output_vectors,
3115                      max_output_components / 4);
3116      else
3117         linker_error(prog, "%s shader uses too many output components "
3118                      "(%u > %u)\n",
3119                      _mesa_shader_stage_to_string(producer->Stage),
3120                      output_components,
3121                      max_output_components);
3122
3123      return false;
3124   }
3125
3126   return true;
3127}
3128
3129static bool
3130check_against_input_limit(struct gl_context *ctx,
3131                          struct gl_shader_program *prog,
3132                          gl_linked_shader *consumer,
3133                          unsigned num_explicit_locations)
3134{
3135   unsigned input_vectors = num_explicit_locations;
3136
3137   foreach_in_list(ir_instruction, node, consumer->ir) {
3138      ir_variable *const var = node->as_variable();
3139
3140      if (var && !var->data.explicit_location &&
3141          var->data.mode == ir_var_shader_in &&
3142          var_counts_against_varying_limit(consumer->Stage, var)) {
3143         /* vertex inputs aren't varying counted */
3144         input_vectors += var->type->count_attribute_slots(false);
3145      }
3146   }
3147
3148   assert(consumer->Stage != MESA_SHADER_VERTEX);
3149   unsigned max_input_components =
3150      ctx->Const.Program[consumer->Stage].MaxInputComponents;
3151
3152   const unsigned input_components = input_vectors * 4;
3153   if (input_components > max_input_components) {
3154      if (ctx->API == API_OPENGLES2 || prog->IsES)
3155         linker_error(prog, "%s shader uses too many input vectors "
3156                      "(%u > %u)\n",
3157                      _mesa_shader_stage_to_string(consumer->Stage),
3158                      input_vectors,
3159                      max_input_components / 4);
3160      else
3161         linker_error(prog, "%s shader uses too many input components "
3162                      "(%u > %u)\n",
3163                      _mesa_shader_stage_to_string(consumer->Stage),
3164                      input_components,
3165                      max_input_components);
3166
3167      return false;
3168   }
3169
3170   return true;
3171}
3172
3173bool
3174link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last,
3175              struct gl_context *ctx, void *mem_ctx)
3176{
3177   bool has_xfb_qualifiers = false;
3178   unsigned num_tfeedback_decls = 0;
3179   char **varying_names = NULL;
3180   tfeedback_decl *tfeedback_decls = NULL;
3181
3182   /* From the ARB_enhanced_layouts spec:
3183    *
3184    *    "If the shader used to record output variables for transform feedback
3185    *    varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
3186    *    qualifiers, the values specified by TransformFeedbackVaryings are
3187    *    ignored, and the set of variables captured for transform feedback is
3188    *    instead derived from the specified layout qualifiers."
3189    */
3190   for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
3191      /* Find last stage before fragment shader */
3192      if (prog->_LinkedShaders[i]) {
3193         has_xfb_qualifiers =
3194            process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
3195                                          prog, &num_tfeedback_decls,
3196                                          &varying_names);
3197         break;
3198      }
3199   }
3200
3201   if (!has_xfb_qualifiers) {
3202      num_tfeedback_decls = prog->TransformFeedback.NumVarying;
3203      varying_names = prog->TransformFeedback.VaryingNames;
3204   }
3205
3206   if (num_tfeedback_decls != 0) {
3207      /* From GL_EXT_transform_feedback:
3208       *   A program will fail to link if:
3209       *
3210       *   * the <count> specified by TransformFeedbackVaryingsEXT is
3211       *     non-zero, but the program object has no vertex or geometry
3212       *     shader;
3213       */
3214      if (first >= MESA_SHADER_FRAGMENT) {
3215         linker_error(prog, "Transform feedback varyings specified, but "
3216                      "no vertex, tessellation, or geometry shader is "
3217                      "present.\n");
3218         return false;
3219      }
3220
3221      tfeedback_decls = rzalloc_array(mem_ctx, tfeedback_decl,
3222                                      num_tfeedback_decls);
3223      if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls,
3224                                 varying_names, tfeedback_decls))
3225         return false;
3226   }
3227
3228   /* If there is no fragment shader we need to set transform feedback.
3229    *
3230    * For SSO we also need to assign output locations.  We assign them here
3231    * because we need to do it for both single stage programs and multi stage
3232    * programs.
3233    */
3234   if (last < MESA_SHADER_FRAGMENT &&
3235       (num_tfeedback_decls != 0 || prog->SeparateShader)) {
3236      const uint64_t reserved_out_slots =
3237         reserved_varying_slot(prog->_LinkedShaders[last], ir_var_shader_out);
3238      if (!assign_varying_locations(ctx, mem_ctx, prog,
3239                                    prog->_LinkedShaders[last], NULL,
3240                                    num_tfeedback_decls, tfeedback_decls,
3241                                    reserved_out_slots))
3242         return false;
3243   }
3244
3245   if (last <= MESA_SHADER_FRAGMENT) {
3246      /* Remove unused varyings from the first/last stage unless SSO */
3247      remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
3248                                              prog->_LinkedShaders[first],
3249                                              ir_var_shader_in);
3250      remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
3251                                              prog->_LinkedShaders[last],
3252                                              ir_var_shader_out);
3253
3254      /* If the program is made up of only a single stage */
3255      if (first == last) {
3256         gl_linked_shader *const sh = prog->_LinkedShaders[last];
3257
3258         do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL);
3259         do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls,
3260                                  tfeedback_decls);
3261
3262         if (prog->SeparateShader) {
3263            const uint64_t reserved_slots =
3264               reserved_varying_slot(sh, ir_var_shader_in);
3265
3266            /* Assign input locations for SSO, output locations are already
3267             * assigned.
3268             */
3269            if (!assign_varying_locations(ctx, mem_ctx, prog,
3270                                          NULL /* producer */,
3271                                          sh /* consumer */,
3272                                          0 /* num_tfeedback_decls */,
3273                                          NULL /* tfeedback_decls */,
3274                                          reserved_slots))
3275               return false;
3276         }
3277      } else {
3278         /* Linking the stages in the opposite order (from fragment to vertex)
3279          * ensures that inter-shader outputs written to in an earlier stage
3280          * are eliminated if they are (transitively) not used in a later
3281          * stage.
3282          */
3283         int next = last;
3284         for (int i = next - 1; i >= 0; i--) {
3285            if (prog->_LinkedShaders[i] == NULL && i != 0)
3286               continue;
3287
3288            gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
3289            gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
3290
3291            const uint64_t reserved_out_slots =
3292               reserved_varying_slot(sh_i, ir_var_shader_out);
3293            const uint64_t reserved_in_slots =
3294               reserved_varying_slot(sh_next, ir_var_shader_in);
3295
3296            do_dead_builtin_varyings(ctx, sh_i, sh_next,
3297                      next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
3298                      tfeedback_decls);
3299
3300            if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next,
3301                      next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
3302                      tfeedback_decls,
3303                      reserved_out_slots | reserved_in_slots))
3304               return false;
3305
3306            /* This must be done after all dead varyings are eliminated. */
3307            if (sh_i != NULL) {
3308               unsigned slots_used = util_bitcount64(reserved_out_slots);
3309               if (!check_against_output_limit(ctx, prog, sh_i, slots_used)) {
3310                  return false;
3311               }
3312            }
3313
3314            unsigned slots_used = util_bitcount64(reserved_in_slots);
3315            if (!check_against_input_limit(ctx, prog, sh_next, slots_used))
3316               return false;
3317
3318            next = i;
3319         }
3320      }
3321   }
3322
3323   if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls,
3324                             has_xfb_qualifiers, mem_ctx))
3325      return false;
3326
3327   return true;
3328}
3329