link_varyings.cpp revision 993e1d59
1/*
2 * Copyright © 2012 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 */
23
24/**
25 * \file link_varyings.cpp
26 *
27 * Linker functions related specifically to linking varyings between shader
28 * stages.
29 */
30
31
32#include "main/errors.h"
33#include "main/mtypes.h"
34#include "glsl_symbol_table.h"
35#include "glsl_parser_extras.h"
36#include "ir_optimization.h"
37#include "linker.h"
38#include "link_varyings.h"
39#include "main/macros.h"
40#include "util/hash_table.h"
41#include "util/u_math.h"
42#include "program.h"
43
44
45/**
46 * Get the varying type stripped of the outermost array if we're processing
47 * a stage whose varyings are arrays indexed by a vertex number (such as
48 * geometry shader inputs).
49 */
50static const glsl_type *
51get_varying_type(const ir_variable *var, gl_shader_stage stage)
52{
53   const glsl_type *type = var->type;
54
55   if (!var->data.patch &&
56       ((var->data.mode == ir_var_shader_out &&
57         stage == MESA_SHADER_TESS_CTRL) ||
58        (var->data.mode == ir_var_shader_in &&
59         (stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_TESS_EVAL ||
60          stage == MESA_SHADER_GEOMETRY)))) {
61      assert(type->is_array());
62      type = type->fields.array;
63   }
64
65   return type;
66}
67
68static void
69create_xfb_varying_names(void *mem_ctx, const glsl_type *t, char **name,
70                         size_t name_length, unsigned *count,
71                         const char *ifc_member_name,
72                         const glsl_type *ifc_member_t, char ***varying_names)
73{
74   if (t->is_interface()) {
75      size_t new_length = name_length;
76
77      assert(ifc_member_name && ifc_member_t);
78      ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", ifc_member_name);
79
80      create_xfb_varying_names(mem_ctx, ifc_member_t, name, new_length, count,
81                               NULL, NULL, varying_names);
82   } else if (t->is_record()) {
83      for (unsigned i = 0; i < t->length; i++) {
84         const char *field = t->fields.structure[i].name;
85         size_t new_length = name_length;
86
87         ralloc_asprintf_rewrite_tail(name, &new_length, ".%s", field);
88
89         create_xfb_varying_names(mem_ctx, t->fields.structure[i].type, name,
90                                  new_length, count, NULL, NULL,
91                                  varying_names);
92      }
93   } else if (t->without_array()->is_record() ||
94              t->without_array()->is_interface() ||
95              (t->is_array() && t->fields.array->is_array())) {
96      for (unsigned i = 0; i < t->length; i++) {
97         size_t new_length = name_length;
98
99         /* Append the subscript to the current variable name */
100         ralloc_asprintf_rewrite_tail(name, &new_length, "[%u]", i);
101
102         create_xfb_varying_names(mem_ctx, t->fields.array, name, new_length,
103                                  count, ifc_member_name, ifc_member_t,
104                                  varying_names);
105      }
106   } else {
107      (*varying_names)[(*count)++] = ralloc_strdup(mem_ctx, *name);
108   }
109}
110
111static bool
112process_xfb_layout_qualifiers(void *mem_ctx, const gl_linked_shader *sh,
113                              struct gl_shader_program *prog,
114                              unsigned *num_tfeedback_decls,
115                              char ***varying_names)
116{
117   bool has_xfb_qualifiers = false;
118
119   /* We still need to enable transform feedback mode even if xfb_stride is
120    * only applied to a global out. Also we don't bother to propagate
121    * xfb_stride to interface block members so this will catch that case also.
122    */
123   for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
124      if (prog->TransformFeedback.BufferStride[j]) {
125         has_xfb_qualifiers = true;
126         break;
127      }
128   }
129
130   foreach_in_list(ir_instruction, node, sh->ir) {
131      ir_variable *var = node->as_variable();
132      if (!var || var->data.mode != ir_var_shader_out)
133         continue;
134
135      /* From the ARB_enhanced_layouts spec:
136       *
137       *    "Any shader making any static use (after preprocessing) of any of
138       *     these *xfb_* qualifiers will cause the shader to be in a
139       *     transform feedback capturing mode and hence responsible for
140       *     describing the transform feedback setup.  This mode will capture
141       *     any output selected by *xfb_offset*, directly or indirectly, to
142       *     a transform feedback buffer."
143       */
144      if (var->data.explicit_xfb_buffer || var->data.explicit_xfb_stride) {
145         has_xfb_qualifiers = true;
146      }
147
148      if (var->data.explicit_xfb_offset) {
149         *num_tfeedback_decls += var->type->varying_count();
150         has_xfb_qualifiers = true;
151      }
152   }
153
154   if (*num_tfeedback_decls == 0)
155      return has_xfb_qualifiers;
156
157   unsigned i = 0;
158   *varying_names = ralloc_array(mem_ctx, char *, *num_tfeedback_decls);
159   foreach_in_list(ir_instruction, node, sh->ir) {
160      ir_variable *var = node->as_variable();
161      if (!var || var->data.mode != ir_var_shader_out)
162         continue;
163
164      if (var->data.explicit_xfb_offset) {
165         char *name;
166         const glsl_type *type, *member_type;
167
168         if (var->data.from_named_ifc_block) {
169            type = var->get_interface_type();
170
171            /* Find the member type before it was altered by lowering */
172            const glsl_type *type_wa = type->without_array();
173            member_type =
174               type_wa->fields.structure[type_wa->field_index(var->name)].type;
175            name = ralloc_strdup(NULL, type_wa->name);
176         } else {
177            type = var->type;
178            member_type = NULL;
179            name = ralloc_strdup(NULL, var->name);
180         }
181         create_xfb_varying_names(mem_ctx, type, &name, strlen(name), &i,
182                                  var->name, member_type, varying_names);
183         ralloc_free(name);
184      }
185   }
186
187   assert(i == *num_tfeedback_decls);
188   return has_xfb_qualifiers;
189}
190
191/**
192 * Validate the types and qualifiers of an output from one stage against the
193 * matching input to another stage.
194 */
195static void
196cross_validate_types_and_qualifiers(struct gl_context *ctx,
197                                    struct gl_shader_program *prog,
198                                    const ir_variable *input,
199                                    const ir_variable *output,
200                                    gl_shader_stage consumer_stage,
201                                    gl_shader_stage producer_stage)
202{
203   /* Check that the types match between stages.
204    */
205   const glsl_type *type_to_match = input->type;
206
207   /* VS -> GS, VS -> TCS, VS -> TES, TES -> GS */
208   const bool extra_array_level = (producer_stage == MESA_SHADER_VERTEX &&
209                                   consumer_stage != MESA_SHADER_FRAGMENT) ||
210                                  consumer_stage == MESA_SHADER_GEOMETRY;
211   if (extra_array_level) {
212      assert(type_to_match->is_array());
213      type_to_match = type_to_match->fields.array;
214   }
215
216   if (type_to_match != output->type) {
217      /* There is a bit of a special case for gl_TexCoord.  This
218       * built-in is unsized by default.  Applications that variable
219       * access it must redeclare it with a size.  There is some
220       * language in the GLSL spec that implies the fragment shader
221       * and vertex shader do not have to agree on this size.  Other
222       * driver behave this way, and one or two applications seem to
223       * rely on it.
224       *
225       * Neither declaration needs to be modified here because the array
226       * sizes are fixed later when update_array_sizes is called.
227       *
228       * From page 48 (page 54 of the PDF) of the GLSL 1.10 spec:
229       *
230       *     "Unlike user-defined varying variables, the built-in
231       *     varying variables don't have a strict one-to-one
232       *     correspondence between the vertex language and the
233       *     fragment language."
234       */
235      if (!output->type->is_array() || !is_gl_identifier(output->name)) {
236         linker_error(prog,
237                      "%s shader output `%s' declared as type `%s', "
238                      "but %s shader input declared as type `%s'\n",
239                      _mesa_shader_stage_to_string(producer_stage),
240                      output->name,
241                      output->type->name,
242                      _mesa_shader_stage_to_string(consumer_stage),
243                      input->type->name);
244         return;
245      }
246   }
247
248   /* Check that all of the qualifiers match between stages.
249    */
250
251   /* According to the OpenGL and OpenGLES GLSL specs, the centroid qualifier
252    * should match until OpenGL 4.3 and OpenGLES 3.1. The OpenGLES 3.0
253    * conformance test suite does not verify that the qualifiers must match.
254    * The deqp test suite expects the opposite (OpenGLES 3.1) behavior for
255    * OpenGLES 3.0 drivers, so we relax the checking in all cases.
256    */
257   if (false /* always skip the centroid check */ &&
258       prog->data->Version < (prog->IsES ? 310 : 430) &&
259       input->data.centroid != output->data.centroid) {
260      linker_error(prog,
261                   "%s shader output `%s' %s centroid qualifier, "
262                   "but %s shader input %s centroid qualifier\n",
263                   _mesa_shader_stage_to_string(producer_stage),
264                   output->name,
265                   (output->data.centroid) ? "has" : "lacks",
266                   _mesa_shader_stage_to_string(consumer_stage),
267                   (input->data.centroid) ? "has" : "lacks");
268      return;
269   }
270
271   if (input->data.sample != output->data.sample) {
272      linker_error(prog,
273                   "%s shader output `%s' %s sample qualifier, "
274                   "but %s shader input %s sample qualifier\n",
275                   _mesa_shader_stage_to_string(producer_stage),
276                   output->name,
277                   (output->data.sample) ? "has" : "lacks",
278                   _mesa_shader_stage_to_string(consumer_stage),
279                   (input->data.sample) ? "has" : "lacks");
280      return;
281   }
282
283   if (input->data.patch != output->data.patch) {
284      linker_error(prog,
285                   "%s shader output `%s' %s patch qualifier, "
286                   "but %s shader input %s patch qualifier\n",
287                   _mesa_shader_stage_to_string(producer_stage),
288                   output->name,
289                   (output->data.patch) ? "has" : "lacks",
290                   _mesa_shader_stage_to_string(consumer_stage),
291                   (input->data.patch) ? "has" : "lacks");
292      return;
293   }
294
295   /* The GLSL 4.30 and GLSL ES 3.00 specifications say:
296    *
297    *    "As only outputs need be declared with invariant, an output from
298    *     one shader stage will still match an input of a subsequent stage
299    *     without the input being declared as invariant."
300    *
301    * while GLSL 4.20 says:
302    *
303    *    "For variables leaving one shader and coming into another shader,
304    *     the invariant keyword has to be used in both shaders, or a link
305    *     error will result."
306    *
307    * and GLSL ES 1.00 section 4.6.4 "Invariance and Linking" says:
308    *
309    *    "The invariance of varyings that are declared in both the vertex
310    *     and fragment shaders must match."
311    */
312   if (input->data.explicit_invariant != output->data.explicit_invariant &&
313       prog->data->Version < (prog->IsES ? 300 : 430)) {
314      linker_error(prog,
315                   "%s shader output `%s' %s invariant qualifier, "
316                   "but %s shader input %s invariant qualifier\n",
317                   _mesa_shader_stage_to_string(producer_stage),
318                   output->name,
319                   (output->data.explicit_invariant) ? "has" : "lacks",
320                   _mesa_shader_stage_to_string(consumer_stage),
321                   (input->data.explicit_invariant) ? "has" : "lacks");
322      return;
323   }
324
325   /* GLSL >= 4.40 removes text requiring interpolation qualifiers
326    * to match cross stage, they must only match within the same stage.
327    *
328    * From page 84 (page 90 of the PDF) of the GLSL 4.40 spec:
329    *
330    *     "It is a link-time error if, within the same stage, the interpolation
331    *     qualifiers of variables of the same name do not match.
332    *
333    * Section 4.3.9 (Interpolation) of the GLSL ES 3.00 spec says:
334    *
335    *    "When no interpolation qualifier is present, smooth interpolation
336    *    is used."
337    *
338    * So we match variables where one is smooth and the other has no explicit
339    * qualifier.
340    */
341   unsigned input_interpolation = input->data.interpolation;
342   unsigned output_interpolation = output->data.interpolation;
343   if (prog->IsES) {
344      if (input_interpolation == INTERP_MODE_NONE)
345         input_interpolation = INTERP_MODE_SMOOTH;
346      if (output_interpolation == INTERP_MODE_NONE)
347         output_interpolation = INTERP_MODE_SMOOTH;
348   }
349   if (input_interpolation != output_interpolation &&
350       prog->data->Version < 440) {
351      if (!ctx->Const.AllowGLSLCrossStageInterpolationMismatch) {
352         linker_error(prog,
353                      "%s shader output `%s' specifies %s "
354                      "interpolation qualifier, "
355                      "but %s shader input specifies %s "
356                      "interpolation qualifier\n",
357                      _mesa_shader_stage_to_string(producer_stage),
358                      output->name,
359                      interpolation_string(output->data.interpolation),
360                      _mesa_shader_stage_to_string(consumer_stage),
361                      interpolation_string(input->data.interpolation));
362         return;
363      } else {
364         linker_warning(prog,
365                        "%s shader output `%s' specifies %s "
366                        "interpolation qualifier, "
367                        "but %s shader input specifies %s "
368                        "interpolation qualifier\n",
369                        _mesa_shader_stage_to_string(producer_stage),
370                        output->name,
371                        interpolation_string(output->data.interpolation),
372                        _mesa_shader_stage_to_string(consumer_stage),
373                        interpolation_string(input->data.interpolation));
374      }
375   }
376}
377
378/**
379 * Validate front and back color outputs against single color input
380 */
381static void
382cross_validate_front_and_back_color(struct gl_context *ctx,
383                                    struct gl_shader_program *prog,
384                                    const ir_variable *input,
385                                    const ir_variable *front_color,
386                                    const ir_variable *back_color,
387                                    gl_shader_stage consumer_stage,
388                                    gl_shader_stage producer_stage)
389{
390   if (front_color != NULL && front_color->data.assigned)
391      cross_validate_types_and_qualifiers(ctx, prog, input, front_color,
392                                          consumer_stage, producer_stage);
393
394   if (back_color != NULL && back_color->data.assigned)
395      cross_validate_types_and_qualifiers(ctx, prog, input, back_color,
396                                          consumer_stage, producer_stage);
397}
398
399static unsigned
400compute_variable_location_slot(ir_variable *var, gl_shader_stage stage)
401{
402   unsigned location_start = VARYING_SLOT_VAR0;
403
404   switch (stage) {
405      case MESA_SHADER_VERTEX:
406         if (var->data.mode == ir_var_shader_in)
407            location_start = VERT_ATTRIB_GENERIC0;
408         break;
409      case MESA_SHADER_TESS_CTRL:
410      case MESA_SHADER_TESS_EVAL:
411         if (var->data.patch)
412            location_start = VARYING_SLOT_PATCH0;
413         break;
414      case MESA_SHADER_FRAGMENT:
415         if (var->data.mode == ir_var_shader_out)
416            location_start = FRAG_RESULT_DATA0;
417         break;
418      default:
419         break;
420   }
421
422   return var->data.location - location_start;
423}
424
425struct explicit_location_info {
426   ir_variable *var;
427   unsigned numerical_type;
428   unsigned interpolation;
429   bool centroid;
430   bool sample;
431   bool patch;
432};
433
434static inline unsigned
435get_numerical_type(const glsl_type *type)
436{
437   /* From the OpenGL 4.6 spec, section 4.4.1 Input Layout Qualifiers, Page 68,
438    * (Location aliasing):
439    *
440    *    "Further, when location aliasing, the aliases sharing the location
441    *     must have the same underlying numerical type  (floating-point or
442    *     integer)
443    */
444   if (type->is_float() || type->is_double())
445      return GLSL_TYPE_FLOAT;
446   return GLSL_TYPE_INT;
447}
448
449static bool
450check_location_aliasing(struct explicit_location_info explicit_locations[][4],
451                        ir_variable *var,
452                        unsigned location,
453                        unsigned component,
454                        unsigned location_limit,
455                        const glsl_type *type,
456                        unsigned interpolation,
457                        bool centroid,
458                        bool sample,
459                        bool patch,
460                        gl_shader_program *prog,
461                        gl_shader_stage stage)
462{
463   unsigned last_comp;
464   if (type->without_array()->is_record()) {
465      /* The component qualifier can't be used on structs so just treat
466       * all component slots as used.
467       */
468      last_comp = 4;
469   } else {
470      unsigned dmul = type->without_array()->is_64bit() ? 2 : 1;
471      last_comp = component + type->without_array()->vector_elements * dmul;
472   }
473
474   while (location < location_limit) {
475      unsigned comp = 0;
476      while (comp < 4) {
477         struct explicit_location_info *info =
478            &explicit_locations[location][comp];
479
480         if (info->var) {
481            /* Component aliasing is not alloed */
482            if (comp >= component && comp < last_comp) {
483               linker_error(prog,
484                            "%s shader has multiple %sputs explicitly "
485                            "assigned to location %d and component %d\n",
486                            _mesa_shader_stage_to_string(stage),
487                            var->data.mode == ir_var_shader_in ? "in" : "out",
488                            location, comp);
489               return false;
490            } else {
491               /* For all other used components we need to have matching
492                * types, interpolation and auxiliary storage
493                */
494               if (info->numerical_type !=
495                   get_numerical_type(type->without_array())) {
496                  linker_error(prog,
497                               "Varyings sharing the same location must "
498                               "have the same underlying numerical type. "
499                               "Location %u component %u\n",
500                               location, comp);
501                  return false;
502               }
503
504               if (info->interpolation != interpolation) {
505                  linker_error(prog,
506                               "%s shader has multiple %sputs at explicit "
507                               "location %u with different interpolation "
508                               "settings\n",
509                               _mesa_shader_stage_to_string(stage),
510                               var->data.mode == ir_var_shader_in ?
511                               "in" : "out", location);
512                  return false;
513               }
514
515               if (info->centroid != centroid ||
516                   info->sample != sample ||
517                   info->patch != patch) {
518                  linker_error(prog,
519                               "%s shader has multiple %sputs at explicit "
520                               "location %u with different aux storage\n",
521                               _mesa_shader_stage_to_string(stage),
522                               var->data.mode == ir_var_shader_in ?
523                               "in" : "out", location);
524                  return false;
525               }
526            }
527         } else if (comp >= component && comp < last_comp) {
528            info->var = var;
529            info->numerical_type = get_numerical_type(type->without_array());
530            info->interpolation = interpolation;
531            info->centroid = centroid;
532            info->sample = sample;
533            info->patch = patch;
534         }
535
536         comp++;
537
538         /* We need to do some special handling for doubles as dvec3 and
539          * dvec4 consume two consecutive locations. We don't need to
540          * worry about components beginning at anything other than 0 as
541          * the spec does not allow this for dvec3 and dvec4.
542          */
543         if (comp == 4 && last_comp > 4) {
544            last_comp = last_comp - 4;
545            /* Bump location index and reset the component index */
546            location++;
547            comp = 0;
548            component = 0;
549         }
550      }
551
552      location++;
553   }
554
555   return true;
556}
557
558static bool
559validate_explicit_variable_location(struct gl_context *ctx,
560                                    struct explicit_location_info explicit_locations[][4],
561                                    ir_variable *var,
562                                    gl_shader_program *prog,
563                                    gl_linked_shader *sh)
564{
565   const glsl_type *type = get_varying_type(var, sh->Stage);
566   unsigned num_elements = type->count_attribute_slots(false);
567   unsigned idx = compute_variable_location_slot(var, sh->Stage);
568   unsigned slot_limit = idx + num_elements;
569
570   /* Vertex shader inputs and fragment shader outputs are validated in
571    * assign_attribute_or_color_locations() so we should not attempt to
572    * validate them again here.
573    */
574   unsigned slot_max;
575   if (var->data.mode == ir_var_shader_out) {
576      assert(sh->Stage != MESA_SHADER_FRAGMENT);
577      slot_max =
578         ctx->Const.Program[sh->Stage].MaxOutputComponents / 4;
579   } else {
580      assert(var->data.mode == ir_var_shader_in);
581      assert(sh->Stage != MESA_SHADER_VERTEX);
582      slot_max =
583         ctx->Const.Program[sh->Stage].MaxInputComponents / 4;
584   }
585
586   if (slot_limit > slot_max) {
587      linker_error(prog,
588                   "Invalid location %u in %s shader\n",
589                   idx, _mesa_shader_stage_to_string(sh->Stage));
590      return false;
591   }
592
593   const glsl_type *type_without_array = type->without_array();
594   if (type_without_array->is_interface()) {
595      for (unsigned i = 0; i < type_without_array->length; i++) {
596         glsl_struct_field *field = &type_without_array->fields.structure[i];
597         unsigned field_location = field->location -
598            (field->patch ? VARYING_SLOT_PATCH0 : VARYING_SLOT_VAR0);
599         if (!check_location_aliasing(explicit_locations, var,
600                                      field_location,
601                                      0, field_location + 1,
602                                      field->type,
603                                      field->interpolation,
604                                      field->centroid,
605                                      field->sample,
606                                      field->patch,
607                                      prog, sh->Stage)) {
608            return false;
609         }
610      }
611   } else if (!check_location_aliasing(explicit_locations, var,
612                                       idx, var->data.location_frac,
613                                       slot_limit, type,
614                                       var->data.interpolation,
615                                       var->data.centroid,
616                                       var->data.sample,
617                                       var->data.patch,
618                                       prog, sh->Stage)) {
619      return false;
620   }
621
622   return true;
623}
624
625/**
626 * Validate explicit locations for the inputs to the first stage and the
627 * outputs of the last stage in an SSO program (everything in between is
628 * validated in cross_validate_outputs_to_inputs).
629 */
630void
631validate_sso_explicit_locations(struct gl_context *ctx,
632                                struct gl_shader_program *prog,
633                                gl_shader_stage first_stage,
634                                gl_shader_stage last_stage)
635{
636   assert(prog->SeparateShader);
637
638   /* VS inputs and FS outputs are validated in
639    * assign_attribute_or_color_locations()
640    */
641   bool validate_first_stage = first_stage != MESA_SHADER_VERTEX;
642   bool validate_last_stage = last_stage != MESA_SHADER_FRAGMENT;
643   if (!validate_first_stage && !validate_last_stage)
644      return;
645
646   struct explicit_location_info explicit_locations[MAX_VARYING][4];
647
648   gl_shader_stage stages[2] = { first_stage, last_stage };
649   bool validate_stage[2] = { validate_first_stage, validate_last_stage };
650   ir_variable_mode var_direction[2] = { ir_var_shader_in, ir_var_shader_out };
651
652   for (unsigned i = 0; i < 2; i++) {
653      if (!validate_stage[i])
654         continue;
655
656      gl_shader_stage stage = stages[i];
657
658      gl_linked_shader *sh = prog->_LinkedShaders[stage];
659      assert(sh);
660
661      memset(explicit_locations, 0, sizeof(explicit_locations));
662
663      foreach_in_list(ir_instruction, node, sh->ir) {
664         ir_variable *const var = node->as_variable();
665
666         if (var == NULL ||
667             !var->data.explicit_location ||
668             var->data.location < VARYING_SLOT_VAR0 ||
669             var->data.mode != var_direction[i])
670            continue;
671
672         if (!validate_explicit_variable_location(
673               ctx, explicit_locations, var, prog, sh)) {
674            return;
675         }
676      }
677   }
678}
679
680/**
681 * Validate that outputs from one stage match inputs of another
682 */
683void
684cross_validate_outputs_to_inputs(struct gl_context *ctx,
685                                 struct gl_shader_program *prog,
686                                 gl_linked_shader *producer,
687                                 gl_linked_shader *consumer)
688{
689   glsl_symbol_table parameters;
690   struct explicit_location_info explicit_locations[MAX_VARYING][4] = { 0 };
691
692   /* Find all shader outputs in the "producer" stage.
693    */
694   foreach_in_list(ir_instruction, node, producer->ir) {
695      ir_variable *const var = node->as_variable();
696
697      if (var == NULL || var->data.mode != ir_var_shader_out)
698         continue;
699
700      if (!var->data.explicit_location
701          || var->data.location < VARYING_SLOT_VAR0)
702         parameters.add_variable(var);
703      else {
704         /* User-defined varyings with explicit locations are handled
705          * differently because they do not need to have matching names.
706          */
707         if (!validate_explicit_variable_location(ctx,
708                                                  explicit_locations,
709                                                  var, prog, producer)) {
710            return;
711         }
712      }
713   }
714
715
716   /* Find all shader inputs in the "consumer" stage.  Any variables that have
717    * matching outputs already in the symbol table must have the same type and
718    * qualifiers.
719    *
720    * Exception: if the consumer is the geometry shader, then the inputs
721    * should be arrays and the type of the array element should match the type
722    * of the corresponding producer output.
723    */
724   foreach_in_list(ir_instruction, node, consumer->ir) {
725      ir_variable *const input = node->as_variable();
726
727      if (input == NULL || input->data.mode != ir_var_shader_in)
728         continue;
729
730      if (strcmp(input->name, "gl_Color") == 0 && input->data.used) {
731         const ir_variable *const front_color =
732            parameters.get_variable("gl_FrontColor");
733
734         const ir_variable *const back_color =
735            parameters.get_variable("gl_BackColor");
736
737         cross_validate_front_and_back_color(ctx, prog, input,
738                                             front_color, back_color,
739                                             consumer->Stage, producer->Stage);
740      } else if (strcmp(input->name, "gl_SecondaryColor") == 0 && input->data.used) {
741         const ir_variable *const front_color =
742            parameters.get_variable("gl_FrontSecondaryColor");
743
744         const ir_variable *const back_color =
745            parameters.get_variable("gl_BackSecondaryColor");
746
747         cross_validate_front_and_back_color(ctx, prog, input,
748                                             front_color, back_color,
749                                             consumer->Stage, producer->Stage);
750      } else {
751         /* The rules for connecting inputs and outputs change in the presence
752          * of explicit locations.  In this case, we no longer care about the
753          * names of the variables.  Instead, we care only about the
754          * explicitly assigned location.
755          */
756         ir_variable *output = NULL;
757         if (input->data.explicit_location
758             && input->data.location >= VARYING_SLOT_VAR0) {
759
760            const glsl_type *type = get_varying_type(input, consumer->Stage);
761            unsigned num_elements = type->count_attribute_slots(false);
762            unsigned idx =
763               compute_variable_location_slot(input, consumer->Stage);
764            unsigned slot_limit = idx + num_elements;
765
766            while (idx < slot_limit) {
767               if (idx >= MAX_VARYING) {
768                  linker_error(prog,
769                               "Invalid location %u in %s shader\n", idx,
770                               _mesa_shader_stage_to_string(consumer->Stage));
771                  return;
772               }
773
774               output = explicit_locations[idx][input->data.location_frac].var;
775
776               if (output == NULL) {
777                  /* A linker failure should only happen when there is no
778                   * output declaration and there is Static Use of the
779                   * declared input.
780                   */
781                  if (input->data.used) {
782                     linker_error(prog,
783                                  "%s shader input `%s' with explicit location "
784                                  "has no matching output\n",
785                                  _mesa_shader_stage_to_string(consumer->Stage),
786                                  input->name);
787                     break;
788                  }
789               } else if (input->data.location != output->data.location) {
790                  linker_error(prog,
791                               "%s shader input `%s' with explicit location "
792                               "has no matching output\n",
793                               _mesa_shader_stage_to_string(consumer->Stage),
794                               input->name);
795                  break;
796               }
797               idx++;
798            }
799         } else {
800            output = parameters.get_variable(input->name);
801         }
802
803         if (output != NULL) {
804            /* Interface blocks have their own validation elsewhere so don't
805             * try validating them here.
806             */
807            if (!(input->get_interface_type() &&
808                  output->get_interface_type()))
809               cross_validate_types_and_qualifiers(ctx, prog, input, output,
810                                                   consumer->Stage,
811                                                   producer->Stage);
812         } else {
813            /* Check for input vars with unmatched output vars in prev stage
814             * taking into account that interface blocks could have a matching
815             * output but with different name, so we ignore them.
816             */
817            assert(!input->data.assigned);
818            if (input->data.used && !input->get_interface_type() &&
819                !input->data.explicit_location)
820               linker_error(prog,
821                            "%s shader input `%s' "
822                            "has no matching output in the previous stage\n",
823                            _mesa_shader_stage_to_string(consumer->Stage),
824                            input->name);
825         }
826      }
827   }
828}
829
830/**
831 * Demote shader inputs and outputs that are not used in other stages, and
832 * remove them via dead code elimination.
833 */
834static void
835remove_unused_shader_inputs_and_outputs(bool is_separate_shader_object,
836                                        gl_linked_shader *sh,
837                                        enum ir_variable_mode mode)
838{
839   if (is_separate_shader_object)
840      return;
841
842   foreach_in_list(ir_instruction, node, sh->ir) {
843      ir_variable *const var = node->as_variable();
844
845      if (var == NULL || var->data.mode != int(mode))
846         continue;
847
848      /* A shader 'in' or 'out' variable is only really an input or output if
849       * its value is used by other shader stages. This will cause the
850       * variable to have a location assigned.
851       */
852      if (var->data.is_unmatched_generic_inout && !var->data.is_xfb_only) {
853         assert(var->data.mode != ir_var_temporary);
854
855         /* Assign zeros to demoted inputs to allow more optimizations. */
856         if (var->data.mode == ir_var_shader_in && !var->constant_value)
857            var->constant_value = ir_constant::zero(var, var->type);
858
859         var->data.mode = ir_var_auto;
860      }
861   }
862
863   /* Eliminate code that is now dead due to unused inputs/outputs being
864    * demoted.
865    */
866   while (do_dead_code(sh->ir, false))
867      ;
868
869}
870
871/**
872 * Initialize this object based on a string that was passed to
873 * glTransformFeedbackVaryings.
874 *
875 * If the input is mal-formed, this call still succeeds, but it sets
876 * this->var_name to a mal-formed input, so tfeedback_decl::find_output_var()
877 * will fail to find any matching variable.
878 */
879void
880tfeedback_decl::init(struct gl_context *ctx, const void *mem_ctx,
881                     const char *input)
882{
883   /* We don't have to be pedantic about what is a valid GLSL variable name,
884    * because any variable with an invalid name can't exist in the IR anyway.
885    */
886
887   this->location = -1;
888   this->orig_name = input;
889   this->lowered_builtin_array_variable = none;
890   this->skip_components = 0;
891   this->next_buffer_separator = false;
892   this->matched_candidate = NULL;
893   this->stream_id = 0;
894   this->buffer = 0;
895   this->offset = 0;
896
897   if (ctx->Extensions.ARB_transform_feedback3) {
898      /* Parse gl_NextBuffer. */
899      if (strcmp(input, "gl_NextBuffer") == 0) {
900         this->next_buffer_separator = true;
901         return;
902      }
903
904      /* Parse gl_SkipComponents. */
905      if (strcmp(input, "gl_SkipComponents1") == 0)
906         this->skip_components = 1;
907      else if (strcmp(input, "gl_SkipComponents2") == 0)
908         this->skip_components = 2;
909      else if (strcmp(input, "gl_SkipComponents3") == 0)
910         this->skip_components = 3;
911      else if (strcmp(input, "gl_SkipComponents4") == 0)
912         this->skip_components = 4;
913
914      if (this->skip_components)
915         return;
916   }
917
918   /* Parse a declaration. */
919   const char *base_name_end;
920   long subscript = parse_program_resource_name(input, &base_name_end);
921   this->var_name = ralloc_strndup(mem_ctx, input, base_name_end - input);
922   if (this->var_name == NULL) {
923      _mesa_error_no_memory(__func__);
924      return;
925   }
926
927   if (subscript >= 0) {
928      this->array_subscript = subscript;
929      this->is_subscripted = true;
930   } else {
931      this->is_subscripted = false;
932   }
933
934   /* For drivers that lower gl_ClipDistance to gl_ClipDistanceMESA, this
935    * class must behave specially to account for the fact that gl_ClipDistance
936    * is converted from a float[8] to a vec4[2].
937    */
938   if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
939       strcmp(this->var_name, "gl_ClipDistance") == 0) {
940      this->lowered_builtin_array_variable = clip_distance;
941   }
942   if (ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].LowerCombinedClipCullDistance &&
943       strcmp(this->var_name, "gl_CullDistance") == 0) {
944      this->lowered_builtin_array_variable = cull_distance;
945   }
946
947   if (ctx->Const.LowerTessLevel &&
948       (strcmp(this->var_name, "gl_TessLevelOuter") == 0))
949      this->lowered_builtin_array_variable = tess_level_outer;
950   if (ctx->Const.LowerTessLevel &&
951       (strcmp(this->var_name, "gl_TessLevelInner") == 0))
952      this->lowered_builtin_array_variable = tess_level_inner;
953}
954
955
956/**
957 * Determine whether two tfeedback_decl objects refer to the same variable and
958 * array index (if applicable).
959 */
960bool
961tfeedback_decl::is_same(const tfeedback_decl &x, const tfeedback_decl &y)
962{
963   assert(x.is_varying() && y.is_varying());
964
965   if (strcmp(x.var_name, y.var_name) != 0)
966      return false;
967   if (x.is_subscripted != y.is_subscripted)
968      return false;
969   if (x.is_subscripted && x.array_subscript != y.array_subscript)
970      return false;
971   return true;
972}
973
974
975/**
976 * Assign a location and stream ID for this tfeedback_decl object based on the
977 * transform feedback candidate found by find_candidate.
978 *
979 * If an error occurs, the error is reported through linker_error() and false
980 * is returned.
981 */
982bool
983tfeedback_decl::assign_location(struct gl_context *ctx,
984                                struct gl_shader_program *prog)
985{
986   assert(this->is_varying());
987
988   unsigned fine_location
989      = this->matched_candidate->toplevel_var->data.location * 4
990      + this->matched_candidate->toplevel_var->data.location_frac
991      + this->matched_candidate->offset;
992   const unsigned dmul =
993      this->matched_candidate->type->without_array()->is_64bit() ? 2 : 1;
994
995   if (this->matched_candidate->type->is_array()) {
996      /* Array variable */
997      const unsigned matrix_cols =
998         this->matched_candidate->type->fields.array->matrix_columns;
999      const unsigned vector_elements =
1000         this->matched_candidate->type->fields.array->vector_elements;
1001      unsigned actual_array_size;
1002      switch (this->lowered_builtin_array_variable) {
1003      case clip_distance:
1004         actual_array_size = prog->last_vert_prog ?
1005            prog->last_vert_prog->info.clip_distance_array_size : 0;
1006         break;
1007      case cull_distance:
1008         actual_array_size = prog->last_vert_prog ?
1009            prog->last_vert_prog->info.cull_distance_array_size : 0;
1010         break;
1011      case tess_level_outer:
1012         actual_array_size = 4;
1013         break;
1014      case tess_level_inner:
1015         actual_array_size = 2;
1016         break;
1017      case none:
1018      default:
1019         actual_array_size = this->matched_candidate->type->array_size();
1020         break;
1021      }
1022
1023      if (this->is_subscripted) {
1024         /* Check array bounds. */
1025         if (this->array_subscript >= actual_array_size) {
1026            linker_error(prog, "Transform feedback varying %s has index "
1027                         "%i, but the array size is %u.",
1028                         this->orig_name, this->array_subscript,
1029                         actual_array_size);
1030            return false;
1031         }
1032         unsigned array_elem_size = this->lowered_builtin_array_variable ?
1033            1 : vector_elements * matrix_cols * dmul;
1034         fine_location += array_elem_size * this->array_subscript;
1035         this->size = 1;
1036      } else {
1037         this->size = actual_array_size;
1038      }
1039      this->vector_elements = vector_elements;
1040      this->matrix_columns = matrix_cols;
1041      if (this->lowered_builtin_array_variable)
1042         this->type = GL_FLOAT;
1043      else
1044         this->type = this->matched_candidate->type->fields.array->gl_type;
1045   } else {
1046      /* Regular variable (scalar, vector, or matrix) */
1047      if (this->is_subscripted) {
1048         linker_error(prog, "Transform feedback varying %s requested, "
1049                      "but %s is not an array.",
1050                      this->orig_name, this->var_name);
1051         return false;
1052      }
1053      this->size = 1;
1054      this->vector_elements = this->matched_candidate->type->vector_elements;
1055      this->matrix_columns = this->matched_candidate->type->matrix_columns;
1056      this->type = this->matched_candidate->type->gl_type;
1057   }
1058   this->location = fine_location / 4;
1059   this->location_frac = fine_location % 4;
1060
1061   /* From GL_EXT_transform_feedback:
1062    *   A program will fail to link if:
1063    *
1064    *   * the total number of components to capture in any varying
1065    *     variable in <varyings> is greater than the constant
1066    *     MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS_EXT and the
1067    *     buffer mode is SEPARATE_ATTRIBS_EXT;
1068    */
1069   if (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
1070       this->num_components() >
1071       ctx->Const.MaxTransformFeedbackSeparateComponents) {
1072      linker_error(prog, "Transform feedback varying %s exceeds "
1073                   "MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS.",
1074                   this->orig_name);
1075      return false;
1076   }
1077
1078   /* Only transform feedback varyings can be assigned to non-zero streams,
1079    * so assign the stream id here.
1080    */
1081   this->stream_id = this->matched_candidate->toplevel_var->data.stream;
1082
1083   unsigned array_offset = this->array_subscript * 4 * dmul;
1084   unsigned struct_offset = this->matched_candidate->offset * 4 * dmul;
1085   this->buffer = this->matched_candidate->toplevel_var->data.xfb_buffer;
1086   this->offset = this->matched_candidate->toplevel_var->data.offset +
1087      array_offset + struct_offset;
1088
1089   return true;
1090}
1091
1092
1093unsigned
1094tfeedback_decl::get_num_outputs() const
1095{
1096   if (!this->is_varying()) {
1097      return 0;
1098   }
1099   return (this->num_components() + this->location_frac + 3)/4;
1100}
1101
1102
1103/**
1104 * Update gl_transform_feedback_info to reflect this tfeedback_decl.
1105 *
1106 * If an error occurs, the error is reported through linker_error() and false
1107 * is returned.
1108 */
1109bool
1110tfeedback_decl::store(struct gl_context *ctx, struct gl_shader_program *prog,
1111                      struct gl_transform_feedback_info *info,
1112                      unsigned buffer, unsigned buffer_index,
1113                      const unsigned max_outputs, bool *explicit_stride,
1114                      bool has_xfb_qualifiers) const
1115{
1116   unsigned xfb_offset = 0;
1117   unsigned size = this->size;
1118   /* Handle gl_SkipComponents. */
1119   if (this->skip_components) {
1120      info->Buffers[buffer].Stride += this->skip_components;
1121      size = this->skip_components;
1122      goto store_varying;
1123   }
1124
1125   if (this->next_buffer_separator) {
1126      size = 0;
1127      goto store_varying;
1128   }
1129
1130   if (has_xfb_qualifiers) {
1131      xfb_offset = this->offset / 4;
1132   } else {
1133      xfb_offset = info->Buffers[buffer].Stride;
1134   }
1135   info->Varyings[info->NumVarying].Offset = xfb_offset * 4;
1136
1137   {
1138      unsigned location = this->location;
1139      unsigned location_frac = this->location_frac;
1140      unsigned num_components = this->num_components();
1141      while (num_components > 0) {
1142         unsigned output_size = MIN2(num_components, 4 - location_frac);
1143         assert((info->NumOutputs == 0 && max_outputs == 0) ||
1144                info->NumOutputs < max_outputs);
1145
1146         /* From the ARB_enhanced_layouts spec:
1147          *
1148          *    "If such a block member or variable is not written during a shader
1149          *    invocation, the buffer contents at the assigned offset will be
1150          *    undefined.  Even if there are no static writes to a variable or
1151          *    member that is assigned a transform feedback offset, the space is
1152          *    still allocated in the buffer and still affects the stride."
1153          */
1154         if (this->is_varying_written()) {
1155            info->Outputs[info->NumOutputs].ComponentOffset = location_frac;
1156            info->Outputs[info->NumOutputs].OutputRegister = location;
1157            info->Outputs[info->NumOutputs].NumComponents = output_size;
1158            info->Outputs[info->NumOutputs].StreamId = stream_id;
1159            info->Outputs[info->NumOutputs].OutputBuffer = buffer;
1160            info->Outputs[info->NumOutputs].DstOffset = xfb_offset;
1161            ++info->NumOutputs;
1162         }
1163         info->Buffers[buffer].Stream = this->stream_id;
1164         xfb_offset += output_size;
1165
1166         num_components -= output_size;
1167         location++;
1168         location_frac = 0;
1169      }
1170   }
1171
1172   if (explicit_stride && explicit_stride[buffer]) {
1173      if (this->is_64bit() && info->Buffers[buffer].Stride % 2) {
1174         linker_error(prog, "invalid qualifier xfb_stride=%d must be a "
1175                      "multiple of 8 as its applied to a type that is or "
1176                      "contains a double.",
1177                      info->Buffers[buffer].Stride * 4);
1178         return false;
1179      }
1180
1181      if (xfb_offset > info->Buffers[buffer].Stride) {
1182         linker_error(prog, "xfb_offset (%d) overflows xfb_stride (%d) for "
1183                      "buffer (%d)", xfb_offset * 4,
1184                      info->Buffers[buffer].Stride * 4, buffer);
1185         return false;
1186      }
1187   } else {
1188      info->Buffers[buffer].Stride = xfb_offset;
1189   }
1190
1191   /* From GL_EXT_transform_feedback:
1192    *   A program will fail to link if:
1193    *
1194    *     * the total number of components to capture is greater than
1195    *       the constant MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS_EXT
1196    *       and the buffer mode is INTERLEAVED_ATTRIBS_EXT.
1197    *
1198    * From GL_ARB_enhanced_layouts:
1199    *
1200    *   "The resulting stride (implicit or explicit) must be less than or
1201    *   equal to the implementation-dependent constant
1202    *   gl_MaxTransformFeedbackInterleavedComponents."
1203    */
1204   if ((prog->TransformFeedback.BufferMode == GL_INTERLEAVED_ATTRIBS ||
1205        has_xfb_qualifiers) &&
1206       info->Buffers[buffer].Stride >
1207       ctx->Const.MaxTransformFeedbackInterleavedComponents) {
1208      linker_error(prog, "The MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS "
1209                   "limit has been exceeded.");
1210      return false;
1211   }
1212
1213 store_varying:
1214   info->Varyings[info->NumVarying].Name = ralloc_strdup(prog,
1215                                                         this->orig_name);
1216   info->Varyings[info->NumVarying].Type = this->type;
1217   info->Varyings[info->NumVarying].Size = size;
1218   info->Varyings[info->NumVarying].BufferIndex = buffer_index;
1219   info->NumVarying++;
1220   info->Buffers[buffer].NumVaryings++;
1221
1222   return true;
1223}
1224
1225
1226const tfeedback_candidate *
1227tfeedback_decl::find_candidate(gl_shader_program *prog,
1228                               hash_table *tfeedback_candidates)
1229{
1230   const char *name = this->var_name;
1231   switch (this->lowered_builtin_array_variable) {
1232   case none:
1233      name = this->var_name;
1234      break;
1235   case clip_distance:
1236      name = "gl_ClipDistanceMESA";
1237      break;
1238   case cull_distance:
1239      name = "gl_CullDistanceMESA";
1240      break;
1241   case tess_level_outer:
1242      name = "gl_TessLevelOuterMESA";
1243      break;
1244   case tess_level_inner:
1245      name = "gl_TessLevelInnerMESA";
1246      break;
1247   }
1248   hash_entry *entry = _mesa_hash_table_search(tfeedback_candidates, name);
1249
1250   this->matched_candidate = entry ?
1251         (const tfeedback_candidate *) entry->data : NULL;
1252
1253   if (!this->matched_candidate) {
1254      /* From GL_EXT_transform_feedback:
1255       *   A program will fail to link if:
1256       *
1257       *   * any variable name specified in the <varyings> array is not
1258       *     declared as an output in the geometry shader (if present) or
1259       *     the vertex shader (if no geometry shader is present);
1260       */
1261      linker_error(prog, "Transform feedback varying %s undeclared.",
1262                   this->orig_name);
1263   }
1264
1265   return this->matched_candidate;
1266}
1267
1268
1269/**
1270 * Parse all the transform feedback declarations that were passed to
1271 * glTransformFeedbackVaryings() and store them in tfeedback_decl objects.
1272 *
1273 * If an error occurs, the error is reported through linker_error() and false
1274 * is returned.
1275 */
1276static bool
1277parse_tfeedback_decls(struct gl_context *ctx, struct gl_shader_program *prog,
1278                      const void *mem_ctx, unsigned num_names,
1279                      char **varying_names, tfeedback_decl *decls)
1280{
1281   for (unsigned i = 0; i < num_names; ++i) {
1282      decls[i].init(ctx, mem_ctx, varying_names[i]);
1283
1284      if (!decls[i].is_varying())
1285         continue;
1286
1287      /* From GL_EXT_transform_feedback:
1288       *   A program will fail to link if:
1289       *
1290       *   * any two entries in the <varyings> array specify the same varying
1291       *     variable;
1292       *
1293       * We interpret this to mean "any two entries in the <varyings> array
1294       * specify the same varying variable and array index", since transform
1295       * feedback of arrays would be useless otherwise.
1296       */
1297      for (unsigned j = 0; j < i; ++j) {
1298         if (decls[j].is_varying()) {
1299            if (tfeedback_decl::is_same(decls[i], decls[j])) {
1300               linker_error(prog, "Transform feedback varying %s specified "
1301                            "more than once.", varying_names[i]);
1302               return false;
1303            }
1304         }
1305      }
1306   }
1307   return true;
1308}
1309
1310
1311static int
1312cmp_xfb_offset(const void * x_generic, const void * y_generic)
1313{
1314   tfeedback_decl *x = (tfeedback_decl *) x_generic;
1315   tfeedback_decl *y = (tfeedback_decl *) y_generic;
1316
1317   if (x->get_buffer() != y->get_buffer())
1318      return x->get_buffer() - y->get_buffer();
1319   return x->get_offset() - y->get_offset();
1320}
1321
1322/**
1323 * Store transform feedback location assignments into
1324 * prog->sh.LinkedTransformFeedback based on the data stored in
1325 * tfeedback_decls.
1326 *
1327 * If an error occurs, the error is reported through linker_error() and false
1328 * is returned.
1329 */
1330static bool
1331store_tfeedback_info(struct gl_context *ctx, struct gl_shader_program *prog,
1332                     unsigned num_tfeedback_decls,
1333                     tfeedback_decl *tfeedback_decls, bool has_xfb_qualifiers)
1334{
1335   if (!prog->last_vert_prog)
1336      return true;
1337
1338   /* Make sure MaxTransformFeedbackBuffers is less than 32 so the bitmask for
1339    * tracking the number of buffers doesn't overflow.
1340    */
1341   assert(ctx->Const.MaxTransformFeedbackBuffers < 32);
1342
1343   bool separate_attribs_mode =
1344      prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS;
1345
1346   struct gl_program *xfb_prog = prog->last_vert_prog;
1347   xfb_prog->sh.LinkedTransformFeedback =
1348      rzalloc(xfb_prog, struct gl_transform_feedback_info);
1349
1350   /* The xfb_offset qualifier does not have to be used in increasing order
1351    * however some drivers expect to receive the list of transform feedback
1352    * declarations in order so sort it now for convenience.
1353    */
1354   if (has_xfb_qualifiers) {
1355      qsort(tfeedback_decls, num_tfeedback_decls, sizeof(*tfeedback_decls),
1356            cmp_xfb_offset);
1357   }
1358
1359   xfb_prog->sh.LinkedTransformFeedback->Varyings =
1360      rzalloc_array(xfb_prog, struct gl_transform_feedback_varying_info,
1361                    num_tfeedback_decls);
1362
1363   unsigned num_outputs = 0;
1364   for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1365      if (tfeedback_decls[i].is_varying_written())
1366         num_outputs += tfeedback_decls[i].get_num_outputs();
1367   }
1368
1369   xfb_prog->sh.LinkedTransformFeedback->Outputs =
1370      rzalloc_array(xfb_prog, struct gl_transform_feedback_output,
1371                    num_outputs);
1372
1373   unsigned num_buffers = 0;
1374   unsigned buffers = 0;
1375
1376   if (!has_xfb_qualifiers && separate_attribs_mode) {
1377      /* GL_SEPARATE_ATTRIBS */
1378      for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1379         if (!tfeedback_decls[i].store(ctx, prog,
1380                                       xfb_prog->sh.LinkedTransformFeedback,
1381                                       num_buffers, num_buffers, num_outputs,
1382                                       NULL, has_xfb_qualifiers))
1383            return false;
1384
1385         buffers |= 1 << num_buffers;
1386         num_buffers++;
1387      }
1388   }
1389   else {
1390      /* GL_INVERLEAVED_ATTRIBS */
1391      int buffer_stream_id = -1;
1392      unsigned buffer =
1393         num_tfeedback_decls ? tfeedback_decls[0].get_buffer() : 0;
1394      bool explicit_stride[MAX_FEEDBACK_BUFFERS] = { false };
1395
1396      /* Apply any xfb_stride global qualifiers */
1397      if (has_xfb_qualifiers) {
1398         for (unsigned j = 0; j < MAX_FEEDBACK_BUFFERS; j++) {
1399            if (prog->TransformFeedback.BufferStride[j]) {
1400               explicit_stride[j] = true;
1401               xfb_prog->sh.LinkedTransformFeedback->Buffers[j].Stride =
1402                  prog->TransformFeedback.BufferStride[j] / 4;
1403            }
1404         }
1405      }
1406
1407      for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
1408         if (has_xfb_qualifiers &&
1409             buffer != tfeedback_decls[i].get_buffer()) {
1410            /* we have moved to the next buffer so reset stream id */
1411            buffer_stream_id = -1;
1412            num_buffers++;
1413         }
1414
1415         if (tfeedback_decls[i].is_next_buffer_separator()) {
1416            if (!tfeedback_decls[i].store(ctx, prog,
1417                                          xfb_prog->sh.LinkedTransformFeedback,
1418                                          buffer, num_buffers, num_outputs,
1419                                          explicit_stride, has_xfb_qualifiers))
1420               return false;
1421            num_buffers++;
1422            buffer_stream_id = -1;
1423            continue;
1424         }
1425
1426         if (has_xfb_qualifiers) {
1427            buffer = tfeedback_decls[i].get_buffer();
1428         } else {
1429            buffer = num_buffers;
1430         }
1431
1432         if (tfeedback_decls[i].is_varying()) {
1433            if (buffer_stream_id == -1)  {
1434               /* First varying writing to this buffer: remember its stream */
1435               buffer_stream_id = (int) tfeedback_decls[i].get_stream_id();
1436
1437               /* Only mark a buffer as active when there is a varying
1438                * attached to it. This behaviour is based on a revised version
1439                * of section 13.2.2 of the GL 4.6 spec.
1440                */
1441               buffers |= 1 << buffer;
1442            } else if (buffer_stream_id !=
1443                       (int) tfeedback_decls[i].get_stream_id()) {
1444               /* Varying writes to the same buffer from a different stream */
1445               linker_error(prog,
1446                            "Transform feedback can't capture varyings belonging "
1447                            "to different vertex streams in a single buffer. "
1448                            "Varying %s writes to buffer from stream %u, other "
1449                            "varyings in the same buffer write from stream %u.",
1450                            tfeedback_decls[i].name(),
1451                            tfeedback_decls[i].get_stream_id(),
1452                            buffer_stream_id);
1453               return false;
1454            }
1455         }
1456
1457         if (!tfeedback_decls[i].store(ctx, prog,
1458                                       xfb_prog->sh.LinkedTransformFeedback,
1459                                       buffer, num_buffers, num_outputs,
1460                                       explicit_stride, has_xfb_qualifiers))
1461            return false;
1462      }
1463   }
1464
1465   assert(xfb_prog->sh.LinkedTransformFeedback->NumOutputs == num_outputs);
1466
1467   xfb_prog->sh.LinkedTransformFeedback->ActiveBuffers = buffers;
1468   return true;
1469}
1470
1471namespace {
1472
1473/**
1474 * Data structure recording the relationship between outputs of one shader
1475 * stage (the "producer") and inputs of another (the "consumer").
1476 */
1477class varying_matches
1478{
1479public:
1480   varying_matches(bool disable_varying_packing, bool xfb_enabled,
1481                   bool enhanced_layouts_enabled,
1482                   gl_shader_stage producer_stage,
1483                   gl_shader_stage consumer_stage);
1484   ~varying_matches();
1485   void record(ir_variable *producer_var, ir_variable *consumer_var);
1486   unsigned assign_locations(struct gl_shader_program *prog,
1487                             uint8_t components[],
1488                             uint64_t reserved_slots);
1489   void store_locations() const;
1490
1491private:
1492   bool is_varying_packing_safe(const glsl_type *type,
1493                                const ir_variable *var) const;
1494
1495   /**
1496    * If true, this driver disables varying packing, so all varyings need to
1497    * be aligned on slot boundaries, and take up a number of slots equal to
1498    * their number of matrix columns times their array size.
1499    *
1500    * Packing may also be disabled because our current packing method is not
1501    * safe in SSO or versions of OpenGL where interpolation qualifiers are not
1502    * guaranteed to match across stages.
1503    */
1504   const bool disable_varying_packing;
1505
1506   /**
1507    * If true, this driver has transform feedback enabled. The transform
1508    * feedback code requires at least some packing be done even when varying
1509    * packing is disabled, fortunately where transform feedback requires
1510    * packing it's safe to override the disabled setting. See
1511    * is_varying_packing_safe().
1512    */
1513   const bool xfb_enabled;
1514
1515   const bool enhanced_layouts_enabled;
1516
1517   /**
1518    * Enum representing the order in which varyings are packed within a
1519    * packing class.
1520    *
1521    * Currently we pack vec4's first, then vec2's, then scalar values, then
1522    * vec3's.  This order ensures that the only vectors that are at risk of
1523    * having to be "double parked" (split between two adjacent varying slots)
1524    * are the vec3's.
1525    */
1526   enum packing_order_enum {
1527      PACKING_ORDER_VEC4,
1528      PACKING_ORDER_VEC2,
1529      PACKING_ORDER_SCALAR,
1530      PACKING_ORDER_VEC3,
1531   };
1532
1533   static unsigned compute_packing_class(const ir_variable *var);
1534   static packing_order_enum compute_packing_order(const ir_variable *var);
1535   static int match_comparator(const void *x_generic, const void *y_generic);
1536   static int xfb_comparator(const void *x_generic, const void *y_generic);
1537
1538   /**
1539    * Structure recording the relationship between a single producer output
1540    * and a single consumer input.
1541    */
1542   struct match {
1543      /**
1544       * Packing class for this varying, computed by compute_packing_class().
1545       */
1546      unsigned packing_class;
1547
1548      /**
1549       * Packing order for this varying, computed by compute_packing_order().
1550       */
1551      packing_order_enum packing_order;
1552      unsigned num_components;
1553
1554      /**
1555       * The output variable in the producer stage.
1556       */
1557      ir_variable *producer_var;
1558
1559      /**
1560       * The input variable in the consumer stage.
1561       */
1562      ir_variable *consumer_var;
1563
1564      /**
1565       * The location which has been assigned for this varying.  This is
1566       * expressed in multiples of a float, with the first generic varying
1567       * (i.e. the one referred to by VARYING_SLOT_VAR0) represented by the
1568       * value 0.
1569       */
1570      unsigned generic_location;
1571   } *matches;
1572
1573   /**
1574    * The number of elements in the \c matches array that are currently in
1575    * use.
1576    */
1577   unsigned num_matches;
1578
1579   /**
1580    * The number of elements that were set aside for the \c matches array when
1581    * it was allocated.
1582    */
1583   unsigned matches_capacity;
1584
1585   gl_shader_stage producer_stage;
1586   gl_shader_stage consumer_stage;
1587};
1588
1589} /* anonymous namespace */
1590
1591varying_matches::varying_matches(bool disable_varying_packing,
1592                                 bool xfb_enabled,
1593                                 bool enhanced_layouts_enabled,
1594                                 gl_shader_stage producer_stage,
1595                                 gl_shader_stage consumer_stage)
1596   : disable_varying_packing(disable_varying_packing),
1597     xfb_enabled(xfb_enabled),
1598     enhanced_layouts_enabled(enhanced_layouts_enabled),
1599     producer_stage(producer_stage),
1600     consumer_stage(consumer_stage)
1601{
1602   /* Note: this initial capacity is rather arbitrarily chosen to be large
1603    * enough for many cases without wasting an unreasonable amount of space.
1604    * varying_matches::record() will resize the array if there are more than
1605    * this number of varyings.
1606    */
1607   this->matches_capacity = 8;
1608   this->matches = (match *)
1609      malloc(sizeof(*this->matches) * this->matches_capacity);
1610   this->num_matches = 0;
1611}
1612
1613
1614varying_matches::~varying_matches()
1615{
1616   free(this->matches);
1617}
1618
1619
1620/**
1621 * Packing is always safe on individual arrays, structures, and matrices. It
1622 * is also safe if the varying is only used for transform feedback.
1623 */
1624bool
1625varying_matches::is_varying_packing_safe(const glsl_type *type,
1626                                         const ir_variable *var) const
1627{
1628   if (consumer_stage == MESA_SHADER_TESS_EVAL ||
1629       consumer_stage == MESA_SHADER_TESS_CTRL ||
1630       producer_stage == MESA_SHADER_TESS_CTRL)
1631      return false;
1632
1633   return xfb_enabled && (type->is_array() || type->is_record() ||
1634                          type->is_matrix() || var->data.is_xfb_only);
1635}
1636
1637
1638/**
1639 * Record the given producer/consumer variable pair in the list of variables
1640 * that should later be assigned locations.
1641 *
1642 * It is permissible for \c consumer_var to be NULL (this happens if a
1643 * variable is output by the producer and consumed by transform feedback, but
1644 * not consumed by the consumer).
1645 *
1646 * If \c producer_var has already been paired up with a consumer_var, or
1647 * producer_var is part of fixed pipeline functionality (and hence already has
1648 * a location assigned), this function has no effect.
1649 *
1650 * Note: as a side effect this function may change the interpolation type of
1651 * \c producer_var, but only when the change couldn't possibly affect
1652 * rendering.
1653 */
1654void
1655varying_matches::record(ir_variable *producer_var, ir_variable *consumer_var)
1656{
1657   assert(producer_var != NULL || consumer_var != NULL);
1658
1659   if ((producer_var && (!producer_var->data.is_unmatched_generic_inout ||
1660       producer_var->data.explicit_location)) ||
1661       (consumer_var && (!consumer_var->data.is_unmatched_generic_inout ||
1662       consumer_var->data.explicit_location))) {
1663      /* Either a location already exists for this variable (since it is part
1664       * of fixed functionality), or it has already been recorded as part of a
1665       * previous match.
1666       */
1667      return;
1668   }
1669
1670   bool needs_flat_qualifier = consumer_var == NULL &&
1671      (producer_var->type->contains_integer() ||
1672       producer_var->type->contains_double());
1673
1674   if (!disable_varying_packing &&
1675       (needs_flat_qualifier ||
1676        (consumer_stage != MESA_SHADER_NONE && consumer_stage != MESA_SHADER_FRAGMENT))) {
1677      /* Since this varying is not being consumed by the fragment shader, its
1678       * interpolation type varying cannot possibly affect rendering.
1679       * Also, this variable is non-flat and is (or contains) an integer
1680       * or a double.
1681       * If the consumer stage is unknown, don't modify the interpolation
1682       * type as it could affect rendering later with separate shaders.
1683       *
1684       * lower_packed_varyings requires all integer varyings to flat,
1685       * regardless of where they appear.  We can trivially satisfy that
1686       * requirement by changing the interpolation type to flat here.
1687       */
1688      if (producer_var) {
1689         producer_var->data.centroid = false;
1690         producer_var->data.sample = false;
1691         producer_var->data.interpolation = INTERP_MODE_FLAT;
1692      }
1693
1694      if (consumer_var) {
1695         consumer_var->data.centroid = false;
1696         consumer_var->data.sample = false;
1697         consumer_var->data.interpolation = INTERP_MODE_FLAT;
1698      }
1699   }
1700
1701   if (this->num_matches == this->matches_capacity) {
1702      this->matches_capacity *= 2;
1703      this->matches = (match *)
1704         realloc(this->matches,
1705                 sizeof(*this->matches) * this->matches_capacity);
1706   }
1707
1708   /* We must use the consumer to compute the packing class because in GL4.4+
1709    * there is no guarantee interpolation qualifiers will match across stages.
1710    *
1711    * From Section 4.5 (Interpolation Qualifiers) of the GLSL 4.30 spec:
1712    *
1713    *    "The type and presence of interpolation qualifiers of variables with
1714    *    the same name declared in all linked shaders for the same cross-stage
1715    *    interface must match, otherwise the link command will fail.
1716    *
1717    *    When comparing an output from one stage to an input of a subsequent
1718    *    stage, the input and output don't match if their interpolation
1719    *    qualifiers (or lack thereof) are not the same."
1720    *
1721    * This text was also in at least revison 7 of the 4.40 spec but is no
1722    * longer in revision 9 and not in the 4.50 spec.
1723    */
1724   const ir_variable *const var = (consumer_var != NULL)
1725      ? consumer_var : producer_var;
1726   const gl_shader_stage stage = (consumer_var != NULL)
1727      ? consumer_stage : producer_stage;
1728   const glsl_type *type = get_varying_type(var, stage);
1729
1730   if (producer_var && consumer_var &&
1731       consumer_var->data.must_be_shader_input) {
1732      producer_var->data.must_be_shader_input = 1;
1733   }
1734
1735   this->matches[this->num_matches].packing_class
1736      = this->compute_packing_class(var);
1737   this->matches[this->num_matches].packing_order
1738      = this->compute_packing_order(var);
1739   if ((this->disable_varying_packing && !is_varying_packing_safe(type, var)) ||
1740       var->data.must_be_shader_input) {
1741      unsigned slots = type->count_attribute_slots(false);
1742      this->matches[this->num_matches].num_components = slots * 4;
1743   } else {
1744      this->matches[this->num_matches].num_components
1745         = type->component_slots();
1746   }
1747
1748   this->matches[this->num_matches].producer_var = producer_var;
1749   this->matches[this->num_matches].consumer_var = consumer_var;
1750   this->num_matches++;
1751   if (producer_var)
1752      producer_var->data.is_unmatched_generic_inout = 0;
1753   if (consumer_var)
1754      consumer_var->data.is_unmatched_generic_inout = 0;
1755}
1756
1757
1758/**
1759 * Choose locations for all of the variable matches that were previously
1760 * passed to varying_matches::record().
1761 * \param components  returns array[slot] of number of components used
1762 *                    per slot (1, 2, 3 or 4)
1763 * \param reserved_slots  bitmask indicating which varying slots are already
1764 *                        allocated
1765 * \return number of slots (4-element vectors) allocated
1766 */
1767unsigned
1768varying_matches::assign_locations(struct gl_shader_program *prog,
1769                                  uint8_t components[],
1770                                  uint64_t reserved_slots)
1771{
1772   /* If packing has been disabled then we cannot safely sort the varyings by
1773    * class as it may mean we are using a version of OpenGL where
1774    * interpolation qualifiers are not guaranteed to be matching across
1775    * shaders, sorting in this case could result in mismatching shader
1776    * interfaces.
1777    * When packing is disabled the sort orders varyings used by transform
1778    * feedback first, but also depends on *undefined behaviour* of qsort to
1779    * reverse the order of the varyings. See: xfb_comparator().
1780    */
1781   if (!this->disable_varying_packing) {
1782      /* Sort varying matches into an order that makes them easy to pack. */
1783      qsort(this->matches, this->num_matches, sizeof(*this->matches),
1784            &varying_matches::match_comparator);
1785   } else {
1786      /* Only sort varyings that are only used by transform feedback. */
1787      qsort(this->matches, this->num_matches, sizeof(*this->matches),
1788            &varying_matches::xfb_comparator);
1789   }
1790
1791   unsigned generic_location = 0;
1792   unsigned generic_patch_location = MAX_VARYING*4;
1793   bool previous_var_xfb_only = false;
1794   unsigned previous_packing_class = ~0u;
1795
1796   /* For tranform feedback separate mode, we know the number of attributes
1797    * is <= the number of buffers.  So packing isn't critical.  In fact,
1798    * packing vec3 attributes can cause trouble because splitting a vec3
1799    * effectively creates an additional transform feedback output.  The
1800    * extra TFB output may exceed device driver limits.
1801    */
1802   const bool dont_pack_vec3 =
1803      (prog->TransformFeedback.BufferMode == GL_SEPARATE_ATTRIBS &&
1804       prog->TransformFeedback.NumVarying > 0);
1805
1806   for (unsigned i = 0; i < this->num_matches; i++) {
1807      unsigned *location = &generic_location;
1808      const ir_variable *var;
1809      const glsl_type *type;
1810      bool is_vertex_input = false;
1811
1812      if (matches[i].consumer_var) {
1813         var = matches[i].consumer_var;
1814         type = get_varying_type(var, consumer_stage);
1815         if (consumer_stage == MESA_SHADER_VERTEX)
1816            is_vertex_input = true;
1817      } else {
1818         var = matches[i].producer_var;
1819         type = get_varying_type(var, producer_stage);
1820      }
1821
1822      if (var->data.patch)
1823         location = &generic_patch_location;
1824
1825      /* Advance to the next slot if this varying has a different packing
1826       * class than the previous one, and we're not already on a slot
1827       * boundary.
1828       *
1829       * Also advance to the next slot if packing is disabled. This makes sure
1830       * we don't assign varyings the same locations which is possible
1831       * because we still pack individual arrays, records and matrices even
1832       * when packing is disabled. Note we don't advance to the next slot if
1833       * we can pack varyings together that are only used for transform
1834       * feedback.
1835       */
1836      if (var->data.must_be_shader_input ||
1837          (this->disable_varying_packing &&
1838           !(previous_var_xfb_only && var->data.is_xfb_only)) ||
1839          (previous_packing_class != this->matches[i].packing_class) ||
1840          (this->matches[i].packing_order == PACKING_ORDER_VEC3 &&
1841           dont_pack_vec3)) {
1842         *location = ALIGN(*location, 4);
1843      }
1844
1845      previous_var_xfb_only = var->data.is_xfb_only;
1846      previous_packing_class = this->matches[i].packing_class;
1847
1848      /* The number of components taken up by this variable. For vertex shader
1849       * inputs, we use the number of slots * 4, as they have different
1850       * counting rules.
1851       */
1852      unsigned num_components = is_vertex_input ?
1853         type->count_attribute_slots(is_vertex_input) * 4 :
1854         this->matches[i].num_components;
1855
1856      /* The last slot for this variable, inclusive. */
1857      unsigned slot_end = *location + num_components - 1;
1858
1859      /* FIXME: We could be smarter in the below code and loop back over
1860       * trying to fill any locations that we skipped because we couldn't pack
1861       * the varying between an explicit location. For now just let the user
1862       * hit the linking error if we run out of room and suggest they use
1863       * explicit locations.
1864       */
1865      while (slot_end < MAX_VARYING * 4u) {
1866         const unsigned slots = (slot_end / 4u) - (*location / 4u) + 1;
1867         const uint64_t slot_mask = ((1ull << slots) - 1) << (*location / 4u);
1868
1869         assert(slots > 0);
1870
1871         if ((reserved_slots & slot_mask) == 0) {
1872            break;
1873         }
1874
1875         *location = ALIGN(*location + 1, 4);
1876         slot_end = *location + num_components - 1;
1877      }
1878
1879      if (!var->data.patch && slot_end >= MAX_VARYING * 4u) {
1880         linker_error(prog, "insufficient contiguous locations available for "
1881                      "%s it is possible an array or struct could not be "
1882                      "packed between varyings with explicit locations. Try "
1883                      "using an explicit location for arrays and structs.",
1884                      var->name);
1885      }
1886
1887      if (slot_end < MAX_VARYINGS_INCL_PATCH * 4u) {
1888         for (unsigned j = *location / 4u; j < slot_end / 4u; j++)
1889            components[j] = 4;
1890         components[slot_end / 4u] = (slot_end & 3) + 1;
1891      }
1892
1893      this->matches[i].generic_location = *location;
1894
1895      *location = slot_end + 1;
1896   }
1897
1898   return (generic_location + 3) / 4;
1899}
1900
1901
1902/**
1903 * Update the producer and consumer shaders to reflect the locations
1904 * assignments that were made by varying_matches::assign_locations().
1905 */
1906void
1907varying_matches::store_locations() const
1908{
1909   /* Check is location needs to be packed with lower_packed_varyings() or if
1910    * we can just use ARB_enhanced_layouts packing.
1911    */
1912   bool pack_loc[MAX_VARYINGS_INCL_PATCH] = { 0 };
1913   const glsl_type *loc_type[MAX_VARYINGS_INCL_PATCH][4] = { {NULL, NULL} };
1914
1915   for (unsigned i = 0; i < this->num_matches; i++) {
1916      ir_variable *producer_var = this->matches[i].producer_var;
1917      ir_variable *consumer_var = this->matches[i].consumer_var;
1918      unsigned generic_location = this->matches[i].generic_location;
1919      unsigned slot = generic_location / 4;
1920      unsigned offset = generic_location % 4;
1921
1922      if (producer_var) {
1923         producer_var->data.location = VARYING_SLOT_VAR0 + slot;
1924         producer_var->data.location_frac = offset;
1925      }
1926
1927      if (consumer_var) {
1928         assert(consumer_var->data.location == -1);
1929         consumer_var->data.location = VARYING_SLOT_VAR0 + slot;
1930         consumer_var->data.location_frac = offset;
1931      }
1932
1933      /* Find locations suitable for native packing via
1934       * ARB_enhanced_layouts.
1935       */
1936      if (producer_var && consumer_var) {
1937         if (enhanced_layouts_enabled) {
1938            const glsl_type *type =
1939               get_varying_type(producer_var, producer_stage);
1940            if (type->is_array() || type->is_matrix() || type->is_record() ||
1941                type->is_double()) {
1942               unsigned comp_slots = type->component_slots() + offset;
1943               unsigned slots = comp_slots / 4;
1944               if (comp_slots % 4)
1945                  slots += 1;
1946
1947               for (unsigned j = 0; j < slots; j++) {
1948                  pack_loc[slot + j] = true;
1949               }
1950            } else if (offset + type->vector_elements > 4) {
1951               pack_loc[slot] = true;
1952               pack_loc[slot + 1] = true;
1953            } else {
1954               loc_type[slot][offset] = type;
1955            }
1956         }
1957      }
1958   }
1959
1960   /* Attempt to use ARB_enhanced_layouts for more efficient packing if
1961    * suitable.
1962    */
1963   if (enhanced_layouts_enabled) {
1964      for (unsigned i = 0; i < this->num_matches; i++) {
1965         ir_variable *producer_var = this->matches[i].producer_var;
1966         ir_variable *consumer_var = this->matches[i].consumer_var;
1967         unsigned generic_location = this->matches[i].generic_location;
1968         unsigned slot = generic_location / 4;
1969
1970         if (pack_loc[slot] || !producer_var || !consumer_var)
1971            continue;
1972
1973         const glsl_type *type =
1974            get_varying_type(producer_var, producer_stage);
1975         bool type_match = true;
1976         for (unsigned j = 0; j < 4; j++) {
1977            if (loc_type[slot][j]) {
1978               if (type->base_type != loc_type[slot][j]->base_type)
1979                  type_match = false;
1980            }
1981         }
1982
1983         if (type_match) {
1984            producer_var->data.explicit_location = 1;
1985            consumer_var->data.explicit_location = 1;
1986            producer_var->data.explicit_component = 1;
1987            consumer_var->data.explicit_component = 1;
1988         }
1989      }
1990   }
1991}
1992
1993
1994/**
1995 * Compute the "packing class" of the given varying.  This is an unsigned
1996 * integer with the property that two variables in the same packing class can
1997 * be safely backed into the same vec4.
1998 */
1999unsigned
2000varying_matches::compute_packing_class(const ir_variable *var)
2001{
2002   /* Without help from the back-end, there is no way to pack together
2003    * variables with different interpolation types, because
2004    * lower_packed_varyings must choose exactly one interpolation type for
2005    * each packed varying it creates.
2006    *
2007    * However, we can safely pack together floats, ints, and uints, because:
2008    *
2009    * - varyings of base type "int" and "uint" must use the "flat"
2010    *   interpolation type, which can only occur in GLSL 1.30 and above.
2011    *
2012    * - On platforms that support GLSL 1.30 and above, lower_packed_varyings
2013    *   can store flat floats as ints without losing any information (using
2014    *   the ir_unop_bitcast_* opcodes).
2015    *
2016    * Therefore, the packing class depends only on the interpolation type.
2017    */
2018   const unsigned interp = var->is_interpolation_flat()
2019      ? unsigned(INTERP_MODE_FLAT) : var->data.interpolation;
2020
2021   assert(interp < (1 << 3));
2022
2023   const unsigned packing_class = (interp << 0) |
2024                                  (var->data.centroid << 3) |
2025                                  (var->data.sample << 4) |
2026                                  (var->data.patch << 5) |
2027                                  (var->data.must_be_shader_input << 6);
2028
2029   return packing_class;
2030}
2031
2032
2033/**
2034 * Compute the "packing order" of the given varying.  This is a sort key we
2035 * use to determine when to attempt to pack the given varying relative to
2036 * other varyings in the same packing class.
2037 */
2038varying_matches::packing_order_enum
2039varying_matches::compute_packing_order(const ir_variable *var)
2040{
2041   const glsl_type *element_type = var->type;
2042
2043   while (element_type->is_array()) {
2044      element_type = element_type->fields.array;
2045   }
2046
2047   switch (element_type->component_slots() % 4) {
2048   case 1: return PACKING_ORDER_SCALAR;
2049   case 2: return PACKING_ORDER_VEC2;
2050   case 3: return PACKING_ORDER_VEC3;
2051   case 0: return PACKING_ORDER_VEC4;
2052   default:
2053      assert(!"Unexpected value of vector_elements");
2054      return PACKING_ORDER_VEC4;
2055   }
2056}
2057
2058
2059/**
2060 * Comparison function passed to qsort() to sort varyings by packing_class and
2061 * then by packing_order.
2062 */
2063int
2064varying_matches::match_comparator(const void *x_generic, const void *y_generic)
2065{
2066   const match *x = (const match *) x_generic;
2067   const match *y = (const match *) y_generic;
2068
2069   if (x->packing_class != y->packing_class)
2070      return x->packing_class - y->packing_class;
2071   return x->packing_order - y->packing_order;
2072}
2073
2074
2075/**
2076 * Comparison function passed to qsort() to sort varyings used only by
2077 * transform feedback when packing of other varyings is disabled.
2078 */
2079int
2080varying_matches::xfb_comparator(const void *x_generic, const void *y_generic)
2081{
2082   const match *x = (const match *) x_generic;
2083
2084   if (x->producer_var != NULL && x->producer_var->data.is_xfb_only)
2085      return match_comparator(x_generic, y_generic);
2086
2087   /* FIXME: When the comparator returns 0 it means the elements being
2088    * compared are equivalent. However the qsort documentation says:
2089    *
2090    *    "The order of equivalent elements is undefined."
2091    *
2092    * In practice the sort ends up reversing the order of the varyings which
2093    * means locations are also assigned in this reversed order and happens to
2094    * be what we want. This is also whats happening in
2095    * varying_matches::match_comparator().
2096    */
2097   return 0;
2098}
2099
2100
2101/**
2102 * Is the given variable a varying variable to be counted against the
2103 * limit in ctx->Const.MaxVarying?
2104 * This includes variables such as texcoords, colors and generic
2105 * varyings, but excludes variables such as gl_FrontFacing and gl_FragCoord.
2106 */
2107static bool
2108var_counts_against_varying_limit(gl_shader_stage stage, const ir_variable *var)
2109{
2110   /* Only fragment shaders will take a varying variable as an input */
2111   if (stage == MESA_SHADER_FRAGMENT &&
2112       var->data.mode == ir_var_shader_in) {
2113      switch (var->data.location) {
2114      case VARYING_SLOT_POS:
2115      case VARYING_SLOT_FACE:
2116      case VARYING_SLOT_PNTC:
2117         return false;
2118      default:
2119         return true;
2120      }
2121   }
2122   return false;
2123}
2124
2125
2126/**
2127 * Visitor class that generates tfeedback_candidate structs describing all
2128 * possible targets of transform feedback.
2129 *
2130 * tfeedback_candidate structs are stored in the hash table
2131 * tfeedback_candidates, which is passed to the constructor.  This hash table
2132 * maps varying names to instances of the tfeedback_candidate struct.
2133 */
2134class tfeedback_candidate_generator : public program_resource_visitor
2135{
2136public:
2137   tfeedback_candidate_generator(void *mem_ctx,
2138                                 hash_table *tfeedback_candidates,
2139                                 gl_shader_stage stage)
2140      : mem_ctx(mem_ctx),
2141        tfeedback_candidates(tfeedback_candidates),
2142        stage(stage),
2143        toplevel_var(NULL),
2144        varying_floats(0)
2145   {
2146   }
2147
2148   void process(ir_variable *var)
2149   {
2150      /* All named varying interface blocks should be flattened by now */
2151      assert(!var->is_interface_instance());
2152      assert(var->data.mode == ir_var_shader_out);
2153
2154      this->toplevel_var = var;
2155      this->varying_floats = 0;
2156      const glsl_type *t =
2157         var->data.from_named_ifc_block ? var->get_interface_type() : var->type;
2158      if (!var->data.patch && stage == MESA_SHADER_TESS_CTRL) {
2159         assert(t->is_array());
2160         t = t->fields.array;
2161      }
2162      program_resource_visitor::process(var, t, false);
2163   }
2164
2165private:
2166   virtual void visit_field(const glsl_type *type, const char *name,
2167                            bool /* row_major */,
2168                            const glsl_type * /* record_type */,
2169                            const enum glsl_interface_packing,
2170                            bool /* last_field */)
2171   {
2172      assert(!type->without_array()->is_record());
2173      assert(!type->without_array()->is_interface());
2174
2175      tfeedback_candidate *candidate
2176         = rzalloc(this->mem_ctx, tfeedback_candidate);
2177      candidate->toplevel_var = this->toplevel_var;
2178      candidate->type = type;
2179      candidate->offset = this->varying_floats;
2180      _mesa_hash_table_insert(this->tfeedback_candidates,
2181                              ralloc_strdup(this->mem_ctx, name),
2182                              candidate);
2183      this->varying_floats += type->component_slots();
2184   }
2185
2186   /**
2187    * Memory context used to allocate hash table keys and values.
2188    */
2189   void * const mem_ctx;
2190
2191   /**
2192    * Hash table in which tfeedback_candidate objects should be stored.
2193    */
2194   hash_table * const tfeedback_candidates;
2195
2196   gl_shader_stage stage;
2197
2198   /**
2199    * Pointer to the toplevel variable that is being traversed.
2200    */
2201   ir_variable *toplevel_var;
2202
2203   /**
2204    * Total number of varying floats that have been visited so far.  This is
2205    * used to determine the offset to each varying within the toplevel
2206    * variable.
2207    */
2208   unsigned varying_floats;
2209};
2210
2211
2212namespace linker {
2213
2214void
2215populate_consumer_input_sets(void *mem_ctx, exec_list *ir,
2216                             hash_table *consumer_inputs,
2217                             hash_table *consumer_interface_inputs,
2218                             ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
2219{
2220   memset(consumer_inputs_with_locations,
2221          0,
2222          sizeof(consumer_inputs_with_locations[0]) * VARYING_SLOT_TESS_MAX);
2223
2224   foreach_in_list(ir_instruction, node, ir) {
2225      ir_variable *const input_var = node->as_variable();
2226
2227      if (input_var != NULL && input_var->data.mode == ir_var_shader_in) {
2228         /* All interface blocks should have been lowered by this point */
2229         assert(!input_var->type->is_interface());
2230
2231         if (input_var->data.explicit_location) {
2232            /* assign_varying_locations only cares about finding the
2233             * ir_variable at the start of a contiguous location block.
2234             *
2235             *     - For !producer, consumer_inputs_with_locations isn't used.
2236             *
2237             *     - For !consumer, consumer_inputs_with_locations is empty.
2238             *
2239             * For consumer && producer, if you were trying to set some
2240             * ir_variable to the middle of a location block on the other side
2241             * of producer/consumer, cross_validate_outputs_to_inputs() should
2242             * be link-erroring due to either type mismatch or location
2243             * overlaps.  If the variables do match up, then they've got a
2244             * matching data.location and you only looked at
2245             * consumer_inputs_with_locations[var->data.location], not any
2246             * following entries for the array/structure.
2247             */
2248            consumer_inputs_with_locations[input_var->data.location] =
2249               input_var;
2250         } else if (input_var->get_interface_type() != NULL) {
2251            char *const iface_field_name =
2252               ralloc_asprintf(mem_ctx, "%s.%s",
2253                  input_var->get_interface_type()->without_array()->name,
2254                  input_var->name);
2255            _mesa_hash_table_insert(consumer_interface_inputs,
2256                                    iface_field_name, input_var);
2257         } else {
2258            _mesa_hash_table_insert(consumer_inputs,
2259                                    ralloc_strdup(mem_ctx, input_var->name),
2260                                    input_var);
2261         }
2262      }
2263   }
2264}
2265
2266/**
2267 * Find a variable from the consumer that "matches" the specified variable
2268 *
2269 * This function only finds inputs with names that match.  There is no
2270 * validation (here) that the types, etc. are compatible.
2271 */
2272ir_variable *
2273get_matching_input(void *mem_ctx,
2274                   const ir_variable *output_var,
2275                   hash_table *consumer_inputs,
2276                   hash_table *consumer_interface_inputs,
2277                   ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX])
2278{
2279   ir_variable *input_var;
2280
2281   if (output_var->data.explicit_location) {
2282      input_var = consumer_inputs_with_locations[output_var->data.location];
2283   } else if (output_var->get_interface_type() != NULL) {
2284      char *const iface_field_name =
2285         ralloc_asprintf(mem_ctx, "%s.%s",
2286            output_var->get_interface_type()->without_array()->name,
2287            output_var->name);
2288      hash_entry *entry = _mesa_hash_table_search(consumer_interface_inputs, iface_field_name);
2289      input_var = entry ? (ir_variable *) entry->data : NULL;
2290   } else {
2291      hash_entry *entry = _mesa_hash_table_search(consumer_inputs, output_var->name);
2292      input_var = entry ? (ir_variable *) entry->data : NULL;
2293   }
2294
2295   return (input_var == NULL || input_var->data.mode != ir_var_shader_in)
2296      ? NULL : input_var;
2297}
2298
2299}
2300
2301static int
2302io_variable_cmp(const void *_a, const void *_b)
2303{
2304   const ir_variable *const a = *(const ir_variable **) _a;
2305   const ir_variable *const b = *(const ir_variable **) _b;
2306
2307   if (a->data.explicit_location && b->data.explicit_location)
2308      return b->data.location - a->data.location;
2309
2310   if (a->data.explicit_location && !b->data.explicit_location)
2311      return 1;
2312
2313   if (!a->data.explicit_location && b->data.explicit_location)
2314      return -1;
2315
2316   return -strcmp(a->name, b->name);
2317}
2318
2319/**
2320 * Sort the shader IO variables into canonical order
2321 */
2322static void
2323canonicalize_shader_io(exec_list *ir, enum ir_variable_mode io_mode)
2324{
2325   ir_variable *var_table[MAX_PROGRAM_OUTPUTS * 4];
2326   unsigned num_variables = 0;
2327
2328   foreach_in_list(ir_instruction, node, ir) {
2329      ir_variable *const var = node->as_variable();
2330
2331      if (var == NULL || var->data.mode != io_mode)
2332         continue;
2333
2334      /* If we have already encountered more I/O variables that could
2335       * successfully link, bail.
2336       */
2337      if (num_variables == ARRAY_SIZE(var_table))
2338         return;
2339
2340      var_table[num_variables++] = var;
2341   }
2342
2343   if (num_variables == 0)
2344      return;
2345
2346   /* Sort the list in reverse order (io_variable_cmp handles this).  Later
2347    * we're going to push the variables on to the IR list as a stack, so we
2348    * want the last variable (in canonical order) to be first in the list.
2349    */
2350   qsort(var_table, num_variables, sizeof(var_table[0]), io_variable_cmp);
2351
2352   /* Remove the variable from it's current location in the IR, and put it at
2353    * the front.
2354    */
2355   for (unsigned i = 0; i < num_variables; i++) {
2356      var_table[i]->remove();
2357      ir->push_head(var_table[i]);
2358   }
2359}
2360
2361/**
2362 * Generate a bitfield map of the explicit locations for shader varyings.
2363 *
2364 * Note: For Tessellation shaders we are sitting right on the limits of the
2365 * 64 bit map. Per-vertex and per-patch both have separate location domains
2366 * with a max of MAX_VARYING.
2367 */
2368static uint64_t
2369reserved_varying_slot(struct gl_linked_shader *stage,
2370                      ir_variable_mode io_mode)
2371{
2372   assert(io_mode == ir_var_shader_in || io_mode == ir_var_shader_out);
2373   /* Avoid an overflow of the returned value */
2374   assert(MAX_VARYINGS_INCL_PATCH <= 64);
2375
2376   uint64_t slots = 0;
2377   int var_slot;
2378
2379   if (!stage)
2380      return slots;
2381
2382   foreach_in_list(ir_instruction, node, stage->ir) {
2383      ir_variable *const var = node->as_variable();
2384
2385      if (var == NULL || var->data.mode != io_mode ||
2386          !var->data.explicit_location ||
2387          var->data.location < VARYING_SLOT_VAR0)
2388         continue;
2389
2390      var_slot = var->data.location - VARYING_SLOT_VAR0;
2391
2392      unsigned num_elements = get_varying_type(var, stage->Stage)
2393         ->count_attribute_slots(io_mode == ir_var_shader_in &&
2394                                 stage->Stage == MESA_SHADER_VERTEX);
2395      for (unsigned i = 0; i < num_elements; i++) {
2396         if (var_slot >= 0 && var_slot < MAX_VARYINGS_INCL_PATCH)
2397            slots |= UINT64_C(1) << var_slot;
2398         var_slot += 1;
2399      }
2400   }
2401
2402   return slots;
2403}
2404
2405
2406/**
2407 * Assign locations for all variables that are produced in one pipeline stage
2408 * (the "producer") and consumed in the next stage (the "consumer").
2409 *
2410 * Variables produced by the producer may also be consumed by transform
2411 * feedback.
2412 *
2413 * \param num_tfeedback_decls is the number of declarations indicating
2414 *        variables that may be consumed by transform feedback.
2415 *
2416 * \param tfeedback_decls is a pointer to an array of tfeedback_decl objects
2417 *        representing the result of parsing the strings passed to
2418 *        glTransformFeedbackVaryings().  assign_location() will be called for
2419 *        each of these objects that matches one of the outputs of the
2420 *        producer.
2421 *
2422 * When num_tfeedback_decls is nonzero, it is permissible for the consumer to
2423 * be NULL.  In this case, varying locations are assigned solely based on the
2424 * requirements of transform feedback.
2425 */
2426static bool
2427assign_varying_locations(struct gl_context *ctx,
2428                         void *mem_ctx,
2429                         struct gl_shader_program *prog,
2430                         gl_linked_shader *producer,
2431                         gl_linked_shader *consumer,
2432                         unsigned num_tfeedback_decls,
2433                         tfeedback_decl *tfeedback_decls,
2434                         const uint64_t reserved_slots)
2435{
2436   /* Tessellation shaders treat inputs and outputs as shared memory and can
2437    * access inputs and outputs of other invocations.
2438    * Therefore, they can't be lowered to temps easily (and definitely not
2439    * efficiently).
2440    */
2441   bool unpackable_tess =
2442      (consumer && consumer->Stage == MESA_SHADER_TESS_EVAL) ||
2443      (consumer && consumer->Stage == MESA_SHADER_TESS_CTRL) ||
2444      (producer && producer->Stage == MESA_SHADER_TESS_CTRL);
2445
2446   /* Transform feedback code assumes varying arrays are packed, so if the
2447    * driver has disabled varying packing, make sure to at least enable
2448    * packing required by transform feedback.
2449    */
2450   bool xfb_enabled =
2451      ctx->Extensions.EXT_transform_feedback && !unpackable_tess;
2452
2453   /* Disable packing on outward facing interfaces for SSO because in ES we
2454    * need to retain the unpacked varying information for draw time
2455    * validation.
2456    *
2457    * Packing is still enabled on individual arrays, structs, and matrices as
2458    * these are required by the transform feedback code and it is still safe
2459    * to do so. We also enable packing when a varying is only used for
2460    * transform feedback and its not a SSO.
2461    */
2462   bool disable_varying_packing =
2463      ctx->Const.DisableVaryingPacking || unpackable_tess;
2464   if (prog->SeparateShader && (producer == NULL || consumer == NULL))
2465      disable_varying_packing = true;
2466
2467   varying_matches matches(disable_varying_packing, xfb_enabled,
2468                           ctx->Extensions.ARB_enhanced_layouts,
2469                           producer ? producer->Stage : MESA_SHADER_NONE,
2470                           consumer ? consumer->Stage : MESA_SHADER_NONE);
2471   hash_table *tfeedback_candidates =
2472         _mesa_hash_table_create(NULL, _mesa_key_hash_string,
2473                                 _mesa_key_string_equal);
2474   hash_table *consumer_inputs =
2475         _mesa_hash_table_create(NULL, _mesa_key_hash_string,
2476                                 _mesa_key_string_equal);
2477   hash_table *consumer_interface_inputs =
2478         _mesa_hash_table_create(NULL, _mesa_key_hash_string,
2479                                 _mesa_key_string_equal);
2480   ir_variable *consumer_inputs_with_locations[VARYING_SLOT_TESS_MAX] = {
2481      NULL,
2482   };
2483
2484   unsigned consumer_vertices = 0;
2485   if (consumer && consumer->Stage == MESA_SHADER_GEOMETRY)
2486      consumer_vertices = prog->Geom.VerticesIn;
2487
2488   /* Operate in a total of four passes.
2489    *
2490    * 1. Sort inputs / outputs into a canonical order.  This is necessary so
2491    *    that inputs / outputs of separable shaders will be assigned
2492    *    predictable locations regardless of the order in which declarations
2493    *    appeared in the shader source.
2494    *
2495    * 2. Assign locations for any matching inputs and outputs.
2496    *
2497    * 3. Mark output variables in the producer that do not have locations as
2498    *    not being outputs.  This lets the optimizer eliminate them.
2499    *
2500    * 4. Mark input variables in the consumer that do not have locations as
2501    *    not being inputs.  This lets the optimizer eliminate them.
2502    */
2503   if (consumer)
2504      canonicalize_shader_io(consumer->ir, ir_var_shader_in);
2505
2506   if (producer)
2507      canonicalize_shader_io(producer->ir, ir_var_shader_out);
2508
2509   if (consumer)
2510      linker::populate_consumer_input_sets(mem_ctx, consumer->ir,
2511                                           consumer_inputs,
2512                                           consumer_interface_inputs,
2513                                           consumer_inputs_with_locations);
2514
2515   if (producer) {
2516      foreach_in_list(ir_instruction, node, producer->ir) {
2517         ir_variable *const output_var = node->as_variable();
2518
2519         if (output_var == NULL || output_var->data.mode != ir_var_shader_out)
2520            continue;
2521
2522         /* Only geometry shaders can use non-zero streams */
2523         assert(output_var->data.stream == 0 ||
2524                (output_var->data.stream < MAX_VERTEX_STREAMS &&
2525                 producer->Stage == MESA_SHADER_GEOMETRY));
2526
2527         if (num_tfeedback_decls > 0) {
2528            tfeedback_candidate_generator g(mem_ctx, tfeedback_candidates, producer->Stage);
2529            /* From OpenGL 4.6 (Core Profile) spec, section 11.1.2.1
2530             * ("Vertex Shader Variables / Output Variables")
2531             *
2532             * "Each program object can specify a set of output variables from
2533             * one shader to be recorded in transform feedback mode (see
2534             * section 13.3). The variables that can be recorded are those
2535             * emitted by the first active shader, in order, from the
2536             * following list:
2537             *
2538             *  * geometry shader
2539             *  * tessellation evaluation shader
2540             *  * tessellation control shader
2541             *  * vertex shader"
2542             *
2543             * But on OpenGL ES 3.2, section 11.1.2.1 ("Vertex Shader
2544             * Variables / Output Variables") tessellation control shader is
2545             * not included in the stages list.
2546             */
2547            if (!prog->IsES || producer->Stage != MESA_SHADER_TESS_CTRL) {
2548               g.process(output_var);
2549            }
2550         }
2551
2552         ir_variable *const input_var =
2553            linker::get_matching_input(mem_ctx, output_var, consumer_inputs,
2554                                       consumer_interface_inputs,
2555                                       consumer_inputs_with_locations);
2556
2557         /* If a matching input variable was found, add this output (and the
2558          * input) to the set.  If this is a separable program and there is no
2559          * consumer stage, add the output.
2560          *
2561          * Always add TCS outputs. They are shared by all invocations
2562          * within a patch and can be used as shared memory.
2563          */
2564         if (input_var || (prog->SeparateShader && consumer == NULL) ||
2565             producer->Stage == MESA_SHADER_TESS_CTRL) {
2566            matches.record(output_var, input_var);
2567         }
2568
2569         /* Only stream 0 outputs can be consumed in the next stage */
2570         if (input_var && output_var->data.stream != 0) {
2571            linker_error(prog, "output %s is assigned to stream=%d but "
2572                         "is linked to an input, which requires stream=0",
2573                         output_var->name, output_var->data.stream);
2574            return false;
2575         }
2576      }
2577   } else {
2578      /* If there's no producer stage, then this must be a separable program.
2579       * For example, we may have a program that has just a fragment shader.
2580       * Later this program will be used with some arbitrary vertex (or
2581       * geometry) shader program.  This means that locations must be assigned
2582       * for all the inputs.
2583       */
2584      foreach_in_list(ir_instruction, node, consumer->ir) {
2585         ir_variable *const input_var = node->as_variable();
2586         if (input_var && input_var->data.mode == ir_var_shader_in) {
2587            matches.record(NULL, input_var);
2588         }
2589      }
2590   }
2591
2592   for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
2593      if (!tfeedback_decls[i].is_varying())
2594         continue;
2595
2596      const tfeedback_candidate *matched_candidate
2597         = tfeedback_decls[i].find_candidate(prog, tfeedback_candidates);
2598
2599      if (matched_candidate == NULL) {
2600         _mesa_hash_table_destroy(tfeedback_candidates, NULL);
2601         return false;
2602      }
2603
2604      /* Mark xfb varyings as always active */
2605      matched_candidate->toplevel_var->data.always_active_io = 1;
2606
2607      /* Mark any corresponding inputs as always active also. We must do this
2608       * because we have a NIR pass that lowers vectors to scalars and another
2609       * that removes unused varyings.
2610       * We don't split varyings marked as always active because there is no
2611       * point in doing so. This means we need to mark both sides of the
2612       * interface as always active otherwise we will have a mismatch and
2613       * start removing things we shouldn't.
2614       */
2615      ir_variable *const input_var =
2616         linker::get_matching_input(mem_ctx, matched_candidate->toplevel_var,
2617                                    consumer_inputs,
2618                                    consumer_interface_inputs,
2619                                    consumer_inputs_with_locations);
2620      if (input_var)
2621         input_var->data.always_active_io = 1;
2622
2623      if (matched_candidate->toplevel_var->data.is_unmatched_generic_inout) {
2624         matched_candidate->toplevel_var->data.is_xfb_only = 1;
2625         matches.record(matched_candidate->toplevel_var, NULL);
2626      }
2627   }
2628
2629   _mesa_hash_table_destroy(consumer_inputs, NULL);
2630   _mesa_hash_table_destroy(consumer_interface_inputs, NULL);
2631
2632   uint8_t components[MAX_VARYINGS_INCL_PATCH] = {0};
2633   const unsigned slots_used = matches.assign_locations(
2634         prog, components, reserved_slots);
2635   matches.store_locations();
2636
2637   for (unsigned i = 0; i < num_tfeedback_decls; ++i) {
2638      if (tfeedback_decls[i].is_varying()) {
2639         if (!tfeedback_decls[i].assign_location(ctx, prog)) {
2640            _mesa_hash_table_destroy(tfeedback_candidates, NULL);
2641            return false;
2642         }
2643      }
2644   }
2645   _mesa_hash_table_destroy(tfeedback_candidates, NULL);
2646
2647   if (consumer && producer) {
2648      foreach_in_list(ir_instruction, node, consumer->ir) {
2649         ir_variable *const var = node->as_variable();
2650
2651         if (var && var->data.mode == ir_var_shader_in &&
2652             var->data.is_unmatched_generic_inout) {
2653            if (!prog->IsES && prog->data->Version <= 120) {
2654               /* On page 25 (page 31 of the PDF) of the GLSL 1.20 spec:
2655                *
2656                *     Only those varying variables used (i.e. read) in
2657                *     the fragment shader executable must be written to
2658                *     by the vertex shader executable; declaring
2659                *     superfluous varying variables in a vertex shader is
2660                *     permissible.
2661                *
2662                * We interpret this text as meaning that the VS must
2663                * write the variable for the FS to read it.  See
2664                * "glsl1-varying read but not written" in piglit.
2665                */
2666               linker_error(prog, "%s shader varying %s not written "
2667                            "by %s shader\n.",
2668                            _mesa_shader_stage_to_string(consumer->Stage),
2669                            var->name,
2670                            _mesa_shader_stage_to_string(producer->Stage));
2671            } else {
2672               linker_warning(prog, "%s shader varying %s not written "
2673                              "by %s shader\n.",
2674                              _mesa_shader_stage_to_string(consumer->Stage),
2675                              var->name,
2676                              _mesa_shader_stage_to_string(producer->Stage));
2677            }
2678         }
2679      }
2680
2681      /* Now that validation is done its safe to remove unused varyings. As
2682       * we have both a producer and consumer its safe to remove unused
2683       * varyings even if the program is a SSO because the stages are being
2684       * linked together i.e. we have a multi-stage SSO.
2685       */
2686      remove_unused_shader_inputs_and_outputs(false, producer,
2687                                              ir_var_shader_out);
2688      remove_unused_shader_inputs_and_outputs(false, consumer,
2689                                              ir_var_shader_in);
2690   }
2691
2692   if (producer) {
2693      lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_out,
2694                            0, producer, disable_varying_packing,
2695                            xfb_enabled);
2696   }
2697
2698   if (consumer) {
2699      lower_packed_varyings(mem_ctx, slots_used, components, ir_var_shader_in,
2700                            consumer_vertices, consumer,
2701                            disable_varying_packing, xfb_enabled);
2702   }
2703
2704   return true;
2705}
2706
2707static bool
2708check_against_output_limit(struct gl_context *ctx,
2709                           struct gl_shader_program *prog,
2710                           gl_linked_shader *producer,
2711                           unsigned num_explicit_locations)
2712{
2713   unsigned output_vectors = num_explicit_locations;
2714
2715   foreach_in_list(ir_instruction, node, producer->ir) {
2716      ir_variable *const var = node->as_variable();
2717
2718      if (var && !var->data.explicit_location &&
2719          var->data.mode == ir_var_shader_out &&
2720          var_counts_against_varying_limit(producer->Stage, var)) {
2721         /* outputs for fragment shader can't be doubles */
2722         output_vectors += var->type->count_attribute_slots(false);
2723      }
2724   }
2725
2726   assert(producer->Stage != MESA_SHADER_FRAGMENT);
2727   unsigned max_output_components =
2728      ctx->Const.Program[producer->Stage].MaxOutputComponents;
2729
2730   const unsigned output_components = output_vectors * 4;
2731   if (output_components > max_output_components) {
2732      if (ctx->API == API_OPENGLES2 || prog->IsES)
2733         linker_error(prog, "%s shader uses too many output vectors "
2734                      "(%u > %u)\n",
2735                      _mesa_shader_stage_to_string(producer->Stage),
2736                      output_vectors,
2737                      max_output_components / 4);
2738      else
2739         linker_error(prog, "%s shader uses too many output components "
2740                      "(%u > %u)\n",
2741                      _mesa_shader_stage_to_string(producer->Stage),
2742                      output_components,
2743                      max_output_components);
2744
2745      return false;
2746   }
2747
2748   return true;
2749}
2750
2751static bool
2752check_against_input_limit(struct gl_context *ctx,
2753                          struct gl_shader_program *prog,
2754                          gl_linked_shader *consumer,
2755                          unsigned num_explicit_locations)
2756{
2757   unsigned input_vectors = num_explicit_locations;
2758
2759   foreach_in_list(ir_instruction, node, consumer->ir) {
2760      ir_variable *const var = node->as_variable();
2761
2762      if (var && !var->data.explicit_location &&
2763          var->data.mode == ir_var_shader_in &&
2764          var_counts_against_varying_limit(consumer->Stage, var)) {
2765         /* vertex inputs aren't varying counted */
2766         input_vectors += var->type->count_attribute_slots(false);
2767      }
2768   }
2769
2770   assert(consumer->Stage != MESA_SHADER_VERTEX);
2771   unsigned max_input_components =
2772      ctx->Const.Program[consumer->Stage].MaxInputComponents;
2773
2774   const unsigned input_components = input_vectors * 4;
2775   if (input_components > max_input_components) {
2776      if (ctx->API == API_OPENGLES2 || prog->IsES)
2777         linker_error(prog, "%s shader uses too many input vectors "
2778                      "(%u > %u)\n",
2779                      _mesa_shader_stage_to_string(consumer->Stage),
2780                      input_vectors,
2781                      max_input_components / 4);
2782      else
2783         linker_error(prog, "%s shader uses too many input components "
2784                      "(%u > %u)\n",
2785                      _mesa_shader_stage_to_string(consumer->Stage),
2786                      input_components,
2787                      max_input_components);
2788
2789      return false;
2790   }
2791
2792   return true;
2793}
2794
2795bool
2796link_varyings(struct gl_shader_program *prog, unsigned first, unsigned last,
2797              struct gl_context *ctx, void *mem_ctx)
2798{
2799   bool has_xfb_qualifiers = false;
2800   unsigned num_tfeedback_decls = 0;
2801   char **varying_names = NULL;
2802   tfeedback_decl *tfeedback_decls = NULL;
2803
2804   /* From the ARB_enhanced_layouts spec:
2805    *
2806    *    "If the shader used to record output variables for transform feedback
2807    *    varyings uses the "xfb_buffer", "xfb_offset", or "xfb_stride" layout
2808    *    qualifiers, the values specified by TransformFeedbackVaryings are
2809    *    ignored, and the set of variables captured for transform feedback is
2810    *    instead derived from the specified layout qualifiers."
2811    */
2812   for (int i = MESA_SHADER_FRAGMENT - 1; i >= 0; i--) {
2813      /* Find last stage before fragment shader */
2814      if (prog->_LinkedShaders[i]) {
2815         has_xfb_qualifiers =
2816            process_xfb_layout_qualifiers(mem_ctx, prog->_LinkedShaders[i],
2817                                          prog, &num_tfeedback_decls,
2818                                          &varying_names);
2819         break;
2820      }
2821   }
2822
2823   if (!has_xfb_qualifiers) {
2824      num_tfeedback_decls = prog->TransformFeedback.NumVarying;
2825      varying_names = prog->TransformFeedback.VaryingNames;
2826   }
2827
2828   if (num_tfeedback_decls != 0) {
2829      /* From GL_EXT_transform_feedback:
2830       *   A program will fail to link if:
2831       *
2832       *   * the <count> specified by TransformFeedbackVaryingsEXT is
2833       *     non-zero, but the program object has no vertex or geometry
2834       *     shader;
2835       */
2836      if (first >= MESA_SHADER_FRAGMENT) {
2837         linker_error(prog, "Transform feedback varyings specified, but "
2838                      "no vertex, tessellation, or geometry shader is "
2839                      "present.\n");
2840         return false;
2841      }
2842
2843      tfeedback_decls = rzalloc_array(mem_ctx, tfeedback_decl,
2844                                      num_tfeedback_decls);
2845      if (!parse_tfeedback_decls(ctx, prog, mem_ctx, num_tfeedback_decls,
2846                                 varying_names, tfeedback_decls))
2847         return false;
2848   }
2849
2850   /* If there is no fragment shader we need to set transform feedback.
2851    *
2852    * For SSO we also need to assign output locations.  We assign them here
2853    * because we need to do it for both single stage programs and multi stage
2854    * programs.
2855    */
2856   if (last < MESA_SHADER_FRAGMENT &&
2857       (num_tfeedback_decls != 0 || prog->SeparateShader)) {
2858      const uint64_t reserved_out_slots =
2859         reserved_varying_slot(prog->_LinkedShaders[last], ir_var_shader_out);
2860      if (!assign_varying_locations(ctx, mem_ctx, prog,
2861                                    prog->_LinkedShaders[last], NULL,
2862                                    num_tfeedback_decls, tfeedback_decls,
2863                                    reserved_out_slots))
2864         return false;
2865   }
2866
2867   if (last <= MESA_SHADER_FRAGMENT) {
2868      /* Remove unused varyings from the first/last stage unless SSO */
2869      remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
2870                                              prog->_LinkedShaders[first],
2871                                              ir_var_shader_in);
2872      remove_unused_shader_inputs_and_outputs(prog->SeparateShader,
2873                                              prog->_LinkedShaders[last],
2874                                              ir_var_shader_out);
2875
2876      /* If the program is made up of only a single stage */
2877      if (first == last) {
2878         gl_linked_shader *const sh = prog->_LinkedShaders[last];
2879
2880         do_dead_builtin_varyings(ctx, NULL, sh, 0, NULL);
2881         do_dead_builtin_varyings(ctx, sh, NULL, num_tfeedback_decls,
2882                                  tfeedback_decls);
2883
2884         if (prog->SeparateShader) {
2885            const uint64_t reserved_slots =
2886               reserved_varying_slot(sh, ir_var_shader_in);
2887
2888            /* Assign input locations for SSO, output locations are already
2889             * assigned.
2890             */
2891            if (!assign_varying_locations(ctx, mem_ctx, prog,
2892                                          NULL /* producer */,
2893                                          sh /* consumer */,
2894                                          0 /* num_tfeedback_decls */,
2895                                          NULL /* tfeedback_decls */,
2896                                          reserved_slots))
2897               return false;
2898         }
2899      } else {
2900         /* Linking the stages in the opposite order (from fragment to vertex)
2901          * ensures that inter-shader outputs written to in an earlier stage
2902          * are eliminated if they are (transitively) not used in a later
2903          * stage.
2904          */
2905         int next = last;
2906         for (int i = next - 1; i >= 0; i--) {
2907            if (prog->_LinkedShaders[i] == NULL && i != 0)
2908               continue;
2909
2910            gl_linked_shader *const sh_i = prog->_LinkedShaders[i];
2911            gl_linked_shader *const sh_next = prog->_LinkedShaders[next];
2912
2913            const uint64_t reserved_out_slots =
2914               reserved_varying_slot(sh_i, ir_var_shader_out);
2915            const uint64_t reserved_in_slots =
2916               reserved_varying_slot(sh_next, ir_var_shader_in);
2917
2918            do_dead_builtin_varyings(ctx, sh_i, sh_next,
2919                      next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
2920                      tfeedback_decls);
2921
2922            if (!assign_varying_locations(ctx, mem_ctx, prog, sh_i, sh_next,
2923                      next == MESA_SHADER_FRAGMENT ? num_tfeedback_decls : 0,
2924                      tfeedback_decls,
2925                      reserved_out_slots | reserved_in_slots))
2926               return false;
2927
2928            /* This must be done after all dead varyings are eliminated. */
2929            if (sh_i != NULL) {
2930               unsigned slots_used = util_bitcount64(reserved_out_slots);
2931               if (!check_against_output_limit(ctx, prog, sh_i, slots_used)) {
2932                  return false;
2933               }
2934            }
2935
2936            unsigned slots_used = util_bitcount64(reserved_in_slots);
2937            if (!check_against_input_limit(ctx, prog, sh_next, slots_used))
2938               return false;
2939
2940            next = i;
2941         }
2942      }
2943   }
2944
2945   if (!store_tfeedback_info(ctx, prog, num_tfeedback_decls, tfeedback_decls,
2946                             has_xfb_qualifiers))
2947      return false;
2948
2949   return true;
2950}
2951