1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir.h"
25#include "nir_builder.h"
26#include "util/set.h"
27#include "util/hash_table.h"
28
29/* This file contains various little helpers for doing simple linking in
30 * NIR.  Eventually, we'll probably want a full-blown varying packing
31 * implementation in here.  Right now, it just deletes unused things.
32 */
33
34/**
35 * Returns the bits in the inputs_read, or outputs_written
36 * bitfield corresponding to this variable.
37 */
38static uint64_t
39get_variable_io_mask(nir_variable *var, gl_shader_stage stage)
40{
41   if (var->data.location < 0)
42      return 0;
43
44   unsigned location = var->data.patch ?
45      var->data.location - VARYING_SLOT_PATCH0 : var->data.location;
46
47   assert(var->data.mode == nir_var_shader_in ||
48          var->data.mode == nir_var_shader_out);
49   assert(var->data.location >= 0);
50
51   const struct glsl_type *type = var->type;
52   if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
53      assert(glsl_type_is_array(type));
54      type = glsl_get_array_element(type);
55   }
56
57   unsigned slots = glsl_count_attribute_slots(type, false);
58   return ((1ull << slots) - 1) << location;
59}
60
61static bool
62is_non_generic_patch_var(nir_variable *var)
63{
64   return var->data.location == VARYING_SLOT_TESS_LEVEL_INNER ||
65          var->data.location == VARYING_SLOT_TESS_LEVEL_OUTER ||
66          var->data.location == VARYING_SLOT_BOUNDING_BOX0 ||
67          var->data.location == VARYING_SLOT_BOUNDING_BOX1;
68}
69
70static uint8_t
71get_num_components(nir_variable *var)
72{
73   if (glsl_type_is_struct_or_ifc(glsl_without_array(var->type)))
74      return 4;
75
76   return glsl_get_vector_elements(glsl_without_array(var->type));
77}
78
79static void
80tcs_add_output_reads(nir_shader *shader, uint64_t *read, uint64_t *patches_read)
81{
82   nir_foreach_function(function, shader) {
83      if (!function->impl)
84         continue;
85
86      nir_foreach_block(block, function->impl) {
87         nir_foreach_instr(instr, block) {
88            if (instr->type != nir_instr_type_intrinsic)
89               continue;
90
91            nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
92            if (intrin->intrinsic != nir_intrinsic_load_deref)
93               continue;
94
95            nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);
96            if (!nir_deref_mode_is(deref, nir_var_shader_out))
97               continue;
98
99            nir_variable *var = nir_deref_instr_get_variable(deref);
100            for (unsigned i = 0; i < get_num_components(var); i++) {
101               if (var->data.patch) {
102                  if (is_non_generic_patch_var(var))
103                     continue;
104
105                  patches_read[var->data.location_frac + i] |=
106                     get_variable_io_mask(var, shader->info.stage);
107               } else {
108                  read[var->data.location_frac + i] |=
109                     get_variable_io_mask(var, shader->info.stage);
110               }
111            }
112         }
113      }
114   }
115}
116
117/**
118 * Helper for removing unused shader I/O variables, by demoting them to global
119 * variables (which may then by dead code eliminated).
120 *
121 * Example usage is:
122 *
123 * progress = nir_remove_unused_io_vars(producer, nir_var_shader_out,
124 *                                      read, patches_read) ||
125 *                                      progress;
126 *
127 * The "used" should be an array of 4 uint64_ts (probably of VARYING_BIT_*)
128 * representing each .location_frac used.  Note that for vector variables,
129 * only the first channel (.location_frac) is examined for deciding if the
130 * variable is used!
131 */
132bool
133nir_remove_unused_io_vars(nir_shader *shader,
134                          nir_variable_mode mode,
135                          uint64_t *used_by_other_stage,
136                          uint64_t *used_by_other_stage_patches)
137{
138   bool progress = false;
139   uint64_t *used;
140
141   assert(mode == nir_var_shader_in || mode == nir_var_shader_out);
142
143   nir_foreach_variable_with_modes_safe(var, shader, mode) {
144      if (var->data.patch)
145         used = used_by_other_stage_patches;
146      else
147         used = used_by_other_stage;
148
149      if (var->data.location < VARYING_SLOT_VAR0 && var->data.location >= 0)
150         continue;
151
152      if (var->data.always_active_io)
153         continue;
154
155      if (var->data.explicit_xfb_buffer)
156         continue;
157
158      uint64_t other_stage = used[var->data.location_frac];
159
160      if (!(other_stage & get_variable_io_mask(var, shader->info.stage))) {
161         /* This one is invalid, make it a global variable instead */
162         var->data.location = 0;
163         var->data.mode = nir_var_shader_temp;
164
165         progress = true;
166      }
167   }
168
169   if (progress)
170      nir_fixup_deref_modes(shader);
171
172   return progress;
173}
174
175bool
176nir_remove_unused_varyings(nir_shader *producer, nir_shader *consumer)
177{
178   assert(producer->info.stage != MESA_SHADER_FRAGMENT);
179   assert(consumer->info.stage != MESA_SHADER_VERTEX);
180
181   uint64_t read[4] = { 0 }, written[4] = { 0 };
182   uint64_t patches_read[4] = { 0 }, patches_written[4] = { 0 };
183
184   nir_foreach_shader_out_variable(var, producer) {
185      for (unsigned i = 0; i < get_num_components(var); i++) {
186         if (var->data.patch) {
187            if (is_non_generic_patch_var(var))
188               continue;
189
190            patches_written[var->data.location_frac + i] |=
191               get_variable_io_mask(var, producer->info.stage);
192         } else {
193            written[var->data.location_frac + i] |=
194               get_variable_io_mask(var, producer->info.stage);
195         }
196      }
197   }
198
199   nir_foreach_shader_in_variable(var, consumer) {
200      for (unsigned i = 0; i < get_num_components(var); i++) {
201         if (var->data.patch) {
202            if (is_non_generic_patch_var(var))
203               continue;
204
205            patches_read[var->data.location_frac + i] |=
206               get_variable_io_mask(var, consumer->info.stage);
207         } else {
208            read[var->data.location_frac + i] |=
209               get_variable_io_mask(var, consumer->info.stage);
210         }
211      }
212   }
213
214   /* Each TCS invocation can read data written by other TCS invocations,
215    * so even if the outputs are not used by the TES we must also make
216    * sure they are not read by the TCS before demoting them to globals.
217    */
218   if (producer->info.stage == MESA_SHADER_TESS_CTRL)
219      tcs_add_output_reads(producer, read, patches_read);
220
221   bool progress = false;
222   progress = nir_remove_unused_io_vars(producer, nir_var_shader_out, read,
223                                        patches_read);
224
225   progress = nir_remove_unused_io_vars(consumer, nir_var_shader_in, written,
226                                        patches_written) || progress;
227
228   return progress;
229}
230
231static uint8_t
232get_interp_type(nir_variable *var, const struct glsl_type *type,
233                bool default_to_smooth_interp)
234{
235   if (glsl_type_is_integer(type))
236      return INTERP_MODE_FLAT;
237   else if (var->data.interpolation != INTERP_MODE_NONE)
238      return var->data.interpolation;
239   else if (default_to_smooth_interp)
240      return INTERP_MODE_SMOOTH;
241   else
242      return INTERP_MODE_NONE;
243}
244
245#define INTERPOLATE_LOC_SAMPLE 0
246#define INTERPOLATE_LOC_CENTROID 1
247#define INTERPOLATE_LOC_CENTER 2
248
249static uint8_t
250get_interp_loc(nir_variable *var)
251{
252   if (var->data.sample)
253      return INTERPOLATE_LOC_SAMPLE;
254   else if (var->data.centroid)
255      return INTERPOLATE_LOC_CENTROID;
256   else
257      return INTERPOLATE_LOC_CENTER;
258}
259
260static bool
261is_packing_supported_for_type(const struct glsl_type *type)
262{
263   /* We ignore complex types such as arrays, matrices, structs and bitsizes
264    * other then 32bit. All other vector types should have been split into
265    * scalar variables by the lower_io_to_scalar pass. The only exception
266    * should be OpenGL xfb varyings.
267    * TODO: add support for more complex types?
268    */
269   return glsl_type_is_scalar(type) && glsl_type_is_32bit(type);
270}
271
272struct assigned_comps
273{
274   uint8_t comps;
275   uint8_t interp_type;
276   uint8_t interp_loc;
277   bool is_32bit;
278   bool is_mediump;
279};
280
281/* Packing arrays and dual slot varyings is difficult so to avoid complex
282 * algorithms this function just assigns them their existing location for now.
283 * TODO: allow better packing of complex types.
284 */
285static void
286get_unmoveable_components_masks(nir_shader *shader,
287                                nir_variable_mode mode,
288                                struct assigned_comps *comps,
289                                gl_shader_stage stage,
290                                bool default_to_smooth_interp)
291{
292   nir_foreach_variable_with_modes_safe(var, shader, mode) {
293      assert(var->data.location >= 0);
294
295      /* Only remap things that aren't built-ins. */
296      if (var->data.location >= VARYING_SLOT_VAR0 &&
297          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
298
299         const struct glsl_type *type = var->type;
300         if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
301            assert(glsl_type_is_array(type));
302            type = glsl_get_array_element(type);
303         }
304
305         /* If we can pack this varying then don't mark the components as
306          * used.
307          */
308         if (is_packing_supported_for_type(type))
309            continue;
310
311         unsigned location = var->data.location - VARYING_SLOT_VAR0;
312
313         unsigned elements =
314            glsl_type_is_vector_or_scalar(glsl_without_array(type)) ?
315            glsl_get_vector_elements(glsl_without_array(type)) : 4;
316
317         bool dual_slot = glsl_type_is_dual_slot(glsl_without_array(type));
318         unsigned slots = glsl_count_attribute_slots(type, false);
319         unsigned dmul = glsl_type_is_64bit(glsl_without_array(type)) ? 2 : 1;
320         unsigned comps_slot2 = 0;
321         for (unsigned i = 0; i < slots; i++) {
322            if (dual_slot) {
323               if (i & 1) {
324                  comps[location + i].comps |= ((1 << comps_slot2) - 1);
325               } else {
326                  unsigned num_comps = 4 - var->data.location_frac;
327                  comps_slot2 = (elements * dmul) - num_comps;
328
329                  /* Assume ARB_enhanced_layouts packing rules for doubles */
330                  assert(var->data.location_frac == 0 ||
331                         var->data.location_frac == 2);
332                  assert(comps_slot2 <= 4);
333
334                  comps[location + i].comps |=
335                     ((1 << num_comps) - 1) << var->data.location_frac;
336               }
337            } else {
338               comps[location + i].comps |=
339                  ((1 << (elements * dmul)) - 1) << var->data.location_frac;
340            }
341
342            comps[location + i].interp_type =
343               get_interp_type(var, type, default_to_smooth_interp);
344            comps[location + i].interp_loc = get_interp_loc(var);
345            comps[location + i].is_32bit =
346               glsl_type_is_32bit(glsl_without_array(type));
347            comps[location + i].is_mediump =
348               var->data.precision == GLSL_PRECISION_MEDIUM ||
349               var->data.precision == GLSL_PRECISION_LOW;
350         }
351      }
352   }
353}
354
355struct varying_loc
356{
357   uint8_t component;
358   uint32_t location;
359};
360
361static void
362mark_all_used_slots(nir_variable *var, uint64_t *slots_used,
363                    uint64_t slots_used_mask, unsigned num_slots)
364{
365   unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
366
367   slots_used[var->data.patch ? 1 : 0] |= slots_used_mask &
368      BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
369}
370
371static void
372mark_used_slot(nir_variable *var, uint64_t *slots_used, unsigned offset)
373{
374   unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
375
376   slots_used[var->data.patch ? 1 : 0] |=
377      BITFIELD64_BIT(var->data.location - loc_offset + offset);
378}
379
380static void
381remap_slots_and_components(nir_shader *shader, nir_variable_mode mode,
382                           struct varying_loc (*remap)[4],
383                           uint64_t *slots_used, uint64_t *out_slots_read,
384                           uint32_t *p_slots_used, uint32_t *p_out_slots_read)
385 {
386   const gl_shader_stage stage = shader->info.stage;
387   uint64_t out_slots_read_tmp[2] = {0};
388   uint64_t slots_used_tmp[2] = {0};
389
390   /* We don't touch builtins so just copy the bitmask */
391   slots_used_tmp[0] = *slots_used & BITFIELD64_RANGE(0, VARYING_SLOT_VAR0);
392
393   nir_foreach_variable_with_modes(var, shader, mode) {
394      assert(var->data.location >= 0);
395
396      /* Only remap things that aren't built-ins */
397      if (var->data.location >= VARYING_SLOT_VAR0 &&
398          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
399
400         const struct glsl_type *type = var->type;
401         if (nir_is_arrayed_io(var, stage) || var->data.per_view) {
402            assert(glsl_type_is_array(type));
403            type = glsl_get_array_element(type);
404         }
405
406         unsigned num_slots = glsl_count_attribute_slots(type, false);
407         bool used_across_stages = false;
408         bool outputs_read = false;
409
410         unsigned location = var->data.location - VARYING_SLOT_VAR0;
411         struct varying_loc *new_loc = &remap[location][var->data.location_frac];
412
413         unsigned loc_offset = var->data.patch ? VARYING_SLOT_PATCH0 : 0;
414         uint64_t used = var->data.patch ? *p_slots_used : *slots_used;
415         uint64_t outs_used =
416            var->data.patch ? *p_out_slots_read : *out_slots_read;
417         uint64_t slots =
418            BITFIELD64_RANGE(var->data.location - loc_offset, num_slots);
419
420         if (slots & used)
421            used_across_stages = true;
422
423         if (slots & outs_used)
424            outputs_read = true;
425
426         if (new_loc->location) {
427            var->data.location = new_loc->location;
428            var->data.location_frac = new_loc->component;
429         }
430
431         if (var->data.always_active_io) {
432            /* We can't apply link time optimisations (specifically array
433             * splitting) to these so we need to copy the existing mask
434             * otherwise we will mess up the mask for things like partially
435             * marked arrays.
436             */
437            if (used_across_stages)
438               mark_all_used_slots(var, slots_used_tmp, used, num_slots);
439
440            if (outputs_read) {
441               mark_all_used_slots(var, out_slots_read_tmp, outs_used,
442                                   num_slots);
443            }
444         } else {
445            for (unsigned i = 0; i < num_slots; i++) {
446               if (used_across_stages)
447                  mark_used_slot(var, slots_used_tmp, i);
448
449               if (outputs_read)
450                  mark_used_slot(var, out_slots_read_tmp, i);
451            }
452         }
453      }
454   }
455
456   *slots_used = slots_used_tmp[0];
457   *out_slots_read = out_slots_read_tmp[0];
458   *p_slots_used = slots_used_tmp[1];
459   *p_out_slots_read = out_slots_read_tmp[1];
460}
461
462struct varying_component {
463   nir_variable *var;
464   uint8_t interp_type;
465   uint8_t interp_loc;
466   bool is_32bit;
467   bool is_patch;
468   bool is_mediump;
469   bool is_intra_stage_only;
470   bool initialised;
471};
472
473static int
474cmp_varying_component(const void *comp1_v, const void *comp2_v)
475{
476   struct varying_component *comp1 = (struct varying_component *) comp1_v;
477   struct varying_component *comp2 = (struct varying_component *) comp2_v;
478
479   /* We want patches to be order at the end of the array */
480   if (comp1->is_patch != comp2->is_patch)
481      return comp1->is_patch ? 1 : -1;
482
483   /* We want to try to group together TCS outputs that are only read by other
484    * TCS invocations and not consumed by the follow stage.
485    */
486   if (comp1->is_intra_stage_only != comp2->is_intra_stage_only)
487      return comp1->is_intra_stage_only ? 1 : -1;
488
489   /* Group mediump varyings together. */
490   if (comp1->is_mediump != comp2->is_mediump)
491      return comp1->is_mediump ? 1 : -1;
492
493   /* We can only pack varyings with matching interpolation types so group
494    * them together.
495    */
496   if (comp1->interp_type != comp2->interp_type)
497      return comp1->interp_type - comp2->interp_type;
498
499   /* Interpolation loc must match also. */
500   if (comp1->interp_loc != comp2->interp_loc)
501      return comp1->interp_loc - comp2->interp_loc;
502
503   /* If everything else matches just use the original location to sort */
504   const struct nir_variable_data *const data1 = &comp1->var->data;
505   const struct nir_variable_data *const data2 = &comp2->var->data;
506   if (data1->location != data2->location)
507      return data1->location - data2->location;
508   return (int)data1->location_frac - (int)data2->location_frac;
509}
510
511static void
512gather_varying_component_info(nir_shader *producer, nir_shader *consumer,
513                              struct varying_component **varying_comp_info,
514                              unsigned *varying_comp_info_size,
515                              bool default_to_smooth_interp)
516{
517   unsigned store_varying_info_idx[MAX_VARYINGS_INCL_PATCH][4] = {{0}};
518   unsigned num_of_comps_to_pack = 0;
519
520   /* Count the number of varying that can be packed and create a mapping
521    * of those varyings to the array we will pass to qsort.
522    */
523   nir_foreach_shader_out_variable(var, producer) {
524
525      /* Only remap things that aren't builtins. */
526      if (var->data.location >= VARYING_SLOT_VAR0 &&
527          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYINGS_INCL_PATCH) {
528
529         /* We can't repack xfb varyings. */
530         if (var->data.always_active_io)
531            continue;
532
533         const struct glsl_type *type = var->type;
534         if (nir_is_arrayed_io(var, producer->info.stage) || var->data.per_view) {
535            assert(glsl_type_is_array(type));
536            type = glsl_get_array_element(type);
537         }
538
539         if (!is_packing_supported_for_type(type))
540            continue;
541
542         unsigned loc = var->data.location - VARYING_SLOT_VAR0;
543         store_varying_info_idx[loc][var->data.location_frac] =
544            ++num_of_comps_to_pack;
545      }
546   }
547
548   *varying_comp_info_size = num_of_comps_to_pack;
549   *varying_comp_info = rzalloc_array(NULL, struct varying_component,
550                                      num_of_comps_to_pack);
551
552   nir_function_impl *impl = nir_shader_get_entrypoint(consumer);
553
554   /* Walk over the shader and populate the varying component info array */
555   nir_foreach_block(block, impl) {
556      nir_foreach_instr(instr, block) {
557         if (instr->type != nir_instr_type_intrinsic)
558            continue;
559
560         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
561         if (intr->intrinsic != nir_intrinsic_load_deref &&
562             intr->intrinsic != nir_intrinsic_interp_deref_at_centroid &&
563             intr->intrinsic != nir_intrinsic_interp_deref_at_sample &&
564             intr->intrinsic != nir_intrinsic_interp_deref_at_offset &&
565             intr->intrinsic != nir_intrinsic_interp_deref_at_vertex)
566            continue;
567
568         nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
569         if (!nir_deref_mode_is(deref, nir_var_shader_in))
570            continue;
571
572         /* We only remap things that aren't builtins. */
573         nir_variable *in_var = nir_deref_instr_get_variable(deref);
574         if (in_var->data.location < VARYING_SLOT_VAR0)
575            continue;
576
577         unsigned location = in_var->data.location - VARYING_SLOT_VAR0;
578         if (location >= MAX_VARYINGS_INCL_PATCH)
579            continue;
580
581         unsigned var_info_idx =
582            store_varying_info_idx[location][in_var->data.location_frac];
583         if (!var_info_idx)
584            continue;
585
586         struct varying_component *vc_info =
587            &(*varying_comp_info)[var_info_idx-1];
588
589         if (!vc_info->initialised) {
590            const struct glsl_type *type = in_var->type;
591            if (nir_is_arrayed_io(in_var, consumer->info.stage) ||
592                in_var->data.per_view) {
593               assert(glsl_type_is_array(type));
594               type = glsl_get_array_element(type);
595            }
596
597            vc_info->var = in_var;
598            vc_info->interp_type =
599               get_interp_type(in_var, type, default_to_smooth_interp);
600            vc_info->interp_loc = get_interp_loc(in_var);
601            vc_info->is_32bit = glsl_type_is_32bit(type);
602            vc_info->is_patch = in_var->data.patch;
603            vc_info->is_mediump = !producer->options->linker_ignore_precision &&
604               (in_var->data.precision == GLSL_PRECISION_MEDIUM ||
605                in_var->data.precision == GLSL_PRECISION_LOW);
606            vc_info->is_intra_stage_only = false;
607            vc_info->initialised = true;
608         }
609      }
610   }
611
612   /* Walk over the shader and populate the varying component info array
613    * for varyings which are read by other TCS instances but are not consumed
614    * by the TES.
615    */
616   if (producer->info.stage == MESA_SHADER_TESS_CTRL) {
617      impl = nir_shader_get_entrypoint(producer);
618
619      nir_foreach_block(block, impl) {
620         nir_foreach_instr(instr, block) {
621            if (instr->type != nir_instr_type_intrinsic)
622               continue;
623
624            nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
625            if (intr->intrinsic != nir_intrinsic_load_deref)
626               continue;
627
628            nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
629            if (!nir_deref_mode_is(deref, nir_var_shader_out))
630               continue;
631
632            /* We only remap things that aren't builtins. */
633            nir_variable *out_var = nir_deref_instr_get_variable(deref);
634            if (out_var->data.location < VARYING_SLOT_VAR0)
635               continue;
636
637            unsigned location = out_var->data.location - VARYING_SLOT_VAR0;
638            if (location >= MAX_VARYINGS_INCL_PATCH)
639               continue;
640
641            unsigned var_info_idx =
642               store_varying_info_idx[location][out_var->data.location_frac];
643            if (!var_info_idx) {
644               /* Something went wrong, the shader interfaces didn't match, so
645                * abandon packing. This can happen for example when the
646                * inputs are scalars but the outputs are struct members.
647                */
648               *varying_comp_info_size = 0;
649               break;
650            }
651
652            struct varying_component *vc_info =
653               &(*varying_comp_info)[var_info_idx-1];
654
655            if (!vc_info->initialised) {
656               const struct glsl_type *type = out_var->type;
657               if (nir_is_arrayed_io(out_var, producer->info.stage)) {
658                  assert(glsl_type_is_array(type));
659                  type = glsl_get_array_element(type);
660               }
661
662               vc_info->var = out_var;
663               vc_info->interp_type =
664                  get_interp_type(out_var, type, default_to_smooth_interp);
665               vc_info->interp_loc = get_interp_loc(out_var);
666               vc_info->is_32bit = glsl_type_is_32bit(type);
667               vc_info->is_patch = out_var->data.patch;
668               vc_info->is_mediump = !producer->options->linker_ignore_precision &&
669                  (out_var->data.precision == GLSL_PRECISION_MEDIUM ||
670                   out_var->data.precision == GLSL_PRECISION_LOW);
671               vc_info->is_intra_stage_only = true;
672               vc_info->initialised = true;
673            }
674         }
675      }
676   }
677
678   for (unsigned i = 0; i < *varying_comp_info_size; i++ ) {
679      struct varying_component *vc_info = &(*varying_comp_info)[i];
680      if (!vc_info->initialised) {
681         /* Something went wrong, the shader interfaces didn't match, so
682          * abandon packing. This can happen for example when the outputs are
683          * scalars but the inputs are struct members.
684          */
685         *varying_comp_info_size = 0;
686         break;
687      }
688   }
689}
690
691static bool
692allow_pack_interp_type(nir_pack_varying_options options, int type)
693{
694   int sel;
695
696   switch (type) {
697   case INTERP_MODE_NONE:
698      sel = nir_pack_varying_interp_mode_none;
699      break;
700   case INTERP_MODE_SMOOTH:
701      sel = nir_pack_varying_interp_mode_smooth;
702      break;
703   case INTERP_MODE_FLAT:
704      sel = nir_pack_varying_interp_mode_flat;
705      break;
706   case INTERP_MODE_NOPERSPECTIVE:
707      sel = nir_pack_varying_interp_mode_noperspective;
708      break;
709   default:
710      return false;
711   }
712
713   return options & sel;
714}
715
716static bool
717allow_pack_interp_loc(nir_pack_varying_options options, int loc)
718{
719   int sel;
720
721   switch (loc) {
722   case INTERPOLATE_LOC_SAMPLE:
723      sel = nir_pack_varying_interp_loc_sample;
724      break;
725   case INTERPOLATE_LOC_CENTROID:
726      sel = nir_pack_varying_interp_loc_centroid;
727      break;
728   case INTERPOLATE_LOC_CENTER:
729      sel = nir_pack_varying_interp_loc_center;
730      break;
731   default:
732      return false;
733   }
734
735   return options & sel;
736}
737
738static void
739assign_remap_locations(struct varying_loc (*remap)[4],
740                       struct assigned_comps *assigned_comps,
741                       struct varying_component *info,
742                       unsigned *cursor, unsigned *comp,
743                       unsigned max_location,
744                       nir_pack_varying_options options)
745{
746   unsigned tmp_cursor = *cursor;
747   unsigned tmp_comp = *comp;
748
749   for (; tmp_cursor < max_location; tmp_cursor++) {
750
751      if (assigned_comps[tmp_cursor].comps) {
752         /* We can only pack varyings with matching precision. */
753         if (assigned_comps[tmp_cursor].is_mediump != info->is_mediump) {
754            tmp_comp = 0;
755            continue;
756         }
757
758         /* We can only pack varyings with matching interpolation type
759          * if driver does not support it.
760          */
761         if (assigned_comps[tmp_cursor].interp_type != info->interp_type &&
762             (!allow_pack_interp_type(options, assigned_comps[tmp_cursor].interp_type) ||
763              !allow_pack_interp_type(options, info->interp_type))) {
764            tmp_comp = 0;
765            continue;
766         }
767
768         /* We can only pack varyings with matching interpolation location
769          * if driver does not support it.
770          */
771         if (assigned_comps[tmp_cursor].interp_loc != info->interp_loc &&
772             (!allow_pack_interp_loc(options, assigned_comps[tmp_cursor].interp_loc) ||
773              !allow_pack_interp_loc(options, info->interp_loc))) {
774            tmp_comp = 0;
775            continue;
776         }
777
778         /* We can only pack varyings with matching types, and the current
779          * algorithm only supports packing 32-bit.
780          */
781         if (!assigned_comps[tmp_cursor].is_32bit) {
782            tmp_comp = 0;
783            continue;
784         }
785
786         while (tmp_comp < 4 &&
787                (assigned_comps[tmp_cursor].comps & (1 << tmp_comp))) {
788            tmp_comp++;
789         }
790      }
791
792      if (tmp_comp == 4) {
793         tmp_comp = 0;
794         continue;
795      }
796
797      unsigned location = info->var->data.location - VARYING_SLOT_VAR0;
798
799      /* Once we have assigned a location mark it as used */
800      assigned_comps[tmp_cursor].comps |= (1 << tmp_comp);
801      assigned_comps[tmp_cursor].interp_type = info->interp_type;
802      assigned_comps[tmp_cursor].interp_loc = info->interp_loc;
803      assigned_comps[tmp_cursor].is_32bit = info->is_32bit;
804      assigned_comps[tmp_cursor].is_mediump = info->is_mediump;
805
806      /* Assign remap location */
807      remap[location][info->var->data.location_frac].component = tmp_comp++;
808      remap[location][info->var->data.location_frac].location =
809         tmp_cursor + VARYING_SLOT_VAR0;
810
811      break;
812   }
813
814   *cursor = tmp_cursor;
815   *comp = tmp_comp;
816}
817
818/* If there are empty components in the slot compact the remaining components
819 * as close to component 0 as possible. This will make it easier to fill the
820 * empty components with components from a different slot in a following pass.
821 */
822static void
823compact_components(nir_shader *producer, nir_shader *consumer,
824                   struct assigned_comps *assigned_comps,
825                   bool default_to_smooth_interp)
826{
827   struct varying_loc remap[MAX_VARYINGS_INCL_PATCH][4] = {{{0}, {0}}};
828   struct varying_component *varying_comp_info;
829   unsigned varying_comp_info_size;
830
831   /* Gather varying component info */
832   gather_varying_component_info(producer, consumer, &varying_comp_info,
833                                 &varying_comp_info_size,
834                                 default_to_smooth_interp);
835
836   /* Sort varying components. */
837   qsort(varying_comp_info, varying_comp_info_size,
838         sizeof(struct varying_component), cmp_varying_component);
839
840   nir_pack_varying_options options = consumer->options->pack_varying_options;
841
842   unsigned cursor = 0;
843   unsigned comp = 0;
844
845   /* Set the remap array based on the sorted components */
846   for (unsigned i = 0; i < varying_comp_info_size; i++ ) {
847      struct varying_component *info = &varying_comp_info[i];
848
849      assert(info->is_patch || cursor < MAX_VARYING);
850      if (info->is_patch) {
851         /* The list should be sorted with all non-patch inputs first followed
852          * by patch inputs.  When we hit our first patch input, we need to
853          * reset the cursor to MAX_VARYING so we put them in the right slot.
854          */
855         if (cursor < MAX_VARYING) {
856            cursor = MAX_VARYING;
857            comp = 0;
858         }
859
860         assign_remap_locations(remap, assigned_comps, info,
861                                &cursor, &comp, MAX_VARYINGS_INCL_PATCH,
862                                options);
863      } else {
864         assign_remap_locations(remap, assigned_comps, info,
865                                &cursor, &comp, MAX_VARYING,
866                                options);
867
868         /* Check if we failed to assign a remap location. This can happen if
869          * for example there are a bunch of unmovable components with
870          * mismatching interpolation types causing us to skip over locations
871          * that would have been useful for packing later components.
872          * The solution is to iterate over the locations again (this should
873          * happen very rarely in practice).
874          */
875         if (cursor == MAX_VARYING) {
876            cursor = 0;
877            comp = 0;
878            assign_remap_locations(remap, assigned_comps, info,
879                                   &cursor, &comp, MAX_VARYING,
880                                   options);
881         }
882      }
883   }
884
885   ralloc_free(varying_comp_info);
886
887   uint64_t zero = 0;
888   uint32_t zero32 = 0;
889   remap_slots_and_components(consumer, nir_var_shader_in, remap,
890                              &consumer->info.inputs_read, &zero,
891                              &consumer->info.patch_inputs_read, &zero32);
892   remap_slots_and_components(producer, nir_var_shader_out, remap,
893                              &producer->info.outputs_written,
894                              &producer->info.outputs_read,
895                              &producer->info.patch_outputs_written,
896                              &producer->info.patch_outputs_read);
897}
898
899/* We assume that this has been called more-or-less directly after
900 * remove_unused_varyings.  At this point, all of the varyings that we
901 * aren't going to be using have been completely removed and the
902 * inputs_read and outputs_written fields in nir_shader_info reflect
903 * this.  Therefore, the total set of valid slots is the OR of the two
904 * sets of varyings;  this accounts for varyings which one side may need
905 * to read/write even if the other doesn't.  This can happen if, for
906 * instance, an array is used indirectly from one side causing it to be
907 * unsplittable but directly from the other.
908 */
909void
910nir_compact_varyings(nir_shader *producer, nir_shader *consumer,
911                     bool default_to_smooth_interp)
912{
913   assert(producer->info.stage != MESA_SHADER_FRAGMENT);
914   assert(consumer->info.stage != MESA_SHADER_VERTEX);
915
916   struct assigned_comps assigned_comps[MAX_VARYINGS_INCL_PATCH] = {{0}};
917
918   get_unmoveable_components_masks(producer, nir_var_shader_out,
919                                   assigned_comps,
920                                   producer->info.stage,
921                                   default_to_smooth_interp);
922   get_unmoveable_components_masks(consumer, nir_var_shader_in,
923                                   assigned_comps,
924                                   consumer->info.stage,
925                                   default_to_smooth_interp);
926
927   compact_components(producer, consumer, assigned_comps,
928                      default_to_smooth_interp);
929}
930
931/*
932 * Mark XFB varyings as always_active_io in the consumer so the linking opts
933 * don't touch them.
934 */
935void
936nir_link_xfb_varyings(nir_shader *producer, nir_shader *consumer)
937{
938   nir_variable *input_vars[MAX_VARYING] = { 0 };
939
940   nir_foreach_shader_in_variable(var, consumer) {
941      if (var->data.location >= VARYING_SLOT_VAR0 &&
942          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
943
944         unsigned location = var->data.location - VARYING_SLOT_VAR0;
945         input_vars[location] = var;
946      }
947   }
948
949   nir_foreach_shader_out_variable(var, producer) {
950      if (var->data.location >= VARYING_SLOT_VAR0 &&
951          var->data.location - VARYING_SLOT_VAR0 < MAX_VARYING) {
952
953         if (!var->data.always_active_io)
954            continue;
955
956         unsigned location = var->data.location - VARYING_SLOT_VAR0;
957         if (input_vars[location]) {
958            input_vars[location]->data.always_active_io = true;
959         }
960      }
961   }
962}
963
964static bool
965does_varying_match(nir_variable *out_var, nir_variable *in_var)
966{
967   return in_var->data.location == out_var->data.location &&
968          in_var->data.location_frac == out_var->data.location_frac;
969}
970
971static nir_variable *
972get_matching_input_var(nir_shader *consumer, nir_variable *out_var)
973{
974   nir_foreach_shader_in_variable(var, consumer) {
975      if (does_varying_match(out_var, var))
976         return var;
977   }
978
979   return NULL;
980}
981
982static bool
983can_replace_varying(nir_variable *out_var)
984{
985   /* Skip types that require more complex handling.
986    * TODO: add support for these types.
987    */
988   if (glsl_type_is_array(out_var->type) ||
989       glsl_type_is_dual_slot(out_var->type) ||
990       glsl_type_is_matrix(out_var->type) ||
991       glsl_type_is_struct_or_ifc(out_var->type))
992      return false;
993
994   /* Limit this pass to scalars for now to keep things simple. Most varyings
995    * should have been lowered to scalars at this point anyway.
996    */
997   if (!glsl_type_is_scalar(out_var->type))
998      return false;
999
1000   if (out_var->data.location < VARYING_SLOT_VAR0 ||
1001       out_var->data.location - VARYING_SLOT_VAR0 >= MAX_VARYING)
1002      return false;
1003
1004   return true;
1005}
1006
1007static bool
1008replace_varying_input_by_constant_load(nir_shader *shader,
1009                                       nir_intrinsic_instr *store_intr)
1010{
1011   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1012
1013   nir_builder b;
1014   nir_builder_init(&b, impl);
1015
1016   nir_variable *out_var =
1017      nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
1018
1019   bool progress = false;
1020   nir_foreach_block(block, impl) {
1021      nir_foreach_instr(instr, block) {
1022         if (instr->type != nir_instr_type_intrinsic)
1023            continue;
1024
1025         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1026         if (intr->intrinsic != nir_intrinsic_load_deref)
1027            continue;
1028
1029         nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1030         if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1031            continue;
1032
1033         nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1034
1035         if (!does_varying_match(out_var, in_var))
1036            continue;
1037
1038         b.cursor = nir_before_instr(instr);
1039
1040         nir_load_const_instr *out_const =
1041            nir_instr_as_load_const(store_intr->src[1].ssa->parent_instr);
1042
1043         /* Add new const to replace the input */
1044         nir_ssa_def *nconst = nir_build_imm(&b, store_intr->num_components,
1045                                             intr->dest.ssa.bit_size,
1046                                             out_const->value);
1047
1048         nir_ssa_def_rewrite_uses(&intr->dest.ssa, nconst);
1049
1050         progress = true;
1051      }
1052   }
1053
1054   return progress;
1055}
1056
1057static bool
1058replace_duplicate_input(nir_shader *shader, nir_variable *input_var,
1059                         nir_intrinsic_instr *dup_store_intr)
1060{
1061   assert(input_var);
1062
1063   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1064
1065   nir_builder b;
1066   nir_builder_init(&b, impl);
1067
1068   nir_variable *dup_out_var =
1069      nir_deref_instr_get_variable(nir_src_as_deref(dup_store_intr->src[0]));
1070
1071   bool progress = false;
1072   nir_foreach_block(block, impl) {
1073      nir_foreach_instr(instr, block) {
1074         if (instr->type != nir_instr_type_intrinsic)
1075            continue;
1076
1077         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1078         if (intr->intrinsic != nir_intrinsic_load_deref)
1079            continue;
1080
1081         nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1082         if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1083            continue;
1084
1085         nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1086
1087         if (!does_varying_match(dup_out_var, in_var) ||
1088             in_var->data.interpolation != input_var->data.interpolation ||
1089             get_interp_loc(in_var) != get_interp_loc(input_var))
1090            continue;
1091
1092         b.cursor = nir_before_instr(instr);
1093
1094         nir_ssa_def *load = nir_load_var(&b, input_var);
1095         nir_ssa_def_rewrite_uses(&intr->dest.ssa, load);
1096
1097         progress = true;
1098      }
1099   }
1100
1101   return progress;
1102}
1103
1104static bool
1105is_direct_uniform_load(nir_ssa_def *def, nir_ssa_scalar *s)
1106{
1107   /* def is sure to be scalar as can_replace_varying() filter out vector case. */
1108   assert(def->num_components == 1);
1109
1110   /* Uniform load may hide behind some move instruction for converting
1111    * vector to scalar:
1112    *
1113    *     vec1 32 ssa_1 = deref_var &color (uniform vec3)
1114    *     vec3 32 ssa_2 = intrinsic load_deref (ssa_1) (0)
1115    *     vec1 32 ssa_3 = mov ssa_2.x
1116    *     vec1 32 ssa_4 = deref_var &color_out (shader_out float)
1117    *     intrinsic store_deref (ssa_4, ssa_3) (1, 0)
1118    */
1119   *s = nir_ssa_scalar_resolved(def, 0);
1120
1121   nir_ssa_def *ssa = s->def;
1122   if (ssa->parent_instr->type != nir_instr_type_intrinsic)
1123      return false;
1124
1125   nir_intrinsic_instr *intr = nir_instr_as_intrinsic(ssa->parent_instr);
1126   if (intr->intrinsic != nir_intrinsic_load_deref)
1127      return false;
1128
1129   nir_deref_instr *deref = nir_src_as_deref(intr->src[0]);
1130   /* TODO: support nir_var_mem_ubo. */
1131   if (!nir_deref_mode_is(deref, nir_var_uniform))
1132      return false;
1133
1134   /* Does not support indirect uniform load. */
1135   return !nir_deref_instr_has_indirect(deref);
1136}
1137
1138static nir_variable *
1139get_uniform_var_in_consumer(nir_shader *consumer,
1140                            nir_variable *var_in_producer)
1141{
1142   /* Find if uniform already exists in consumer. */
1143   nir_variable *new_var = NULL;
1144   nir_foreach_uniform_variable(v, consumer) {
1145      if (!strcmp(var_in_producer->name, v->name)) {
1146         new_var = v;
1147         break;
1148      }
1149   }
1150
1151   /* Create a variable if not exist. */
1152   if (!new_var) {
1153      new_var = nir_variable_clone(var_in_producer, consumer);
1154      nir_shader_add_variable(consumer, new_var);
1155   }
1156
1157   return new_var;
1158}
1159
1160static nir_deref_instr *
1161clone_deref_instr(nir_builder *b, nir_variable *var, nir_deref_instr *deref)
1162{
1163   if (deref->deref_type == nir_deref_type_var)
1164       return nir_build_deref_var(b, var);
1165
1166   nir_deref_instr *parent_deref = nir_deref_instr_parent(deref);
1167   nir_deref_instr *parent = clone_deref_instr(b, var, parent_deref);
1168
1169   /* Build array and struct deref instruction.
1170    * "deref" instr is sure to be direct (see is_direct_uniform_load()).
1171    */
1172   switch (deref->deref_type) {
1173   case nir_deref_type_array: {
1174      nir_load_const_instr *index =
1175         nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1176      return nir_build_deref_array_imm(b, parent, index->value->i64);
1177   }
1178   case nir_deref_type_ptr_as_array: {
1179      nir_load_const_instr *index =
1180         nir_instr_as_load_const(deref->arr.index.ssa->parent_instr);
1181      nir_ssa_def *ssa = nir_imm_intN_t(b, index->value->i64,
1182                                        parent->dest.ssa.bit_size);
1183      return nir_build_deref_ptr_as_array(b, parent, ssa);
1184   }
1185   case nir_deref_type_struct:
1186      return nir_build_deref_struct(b, parent, deref->strct.index);
1187   default:
1188      unreachable("invalid type");
1189      return NULL;
1190   }
1191}
1192
1193static bool
1194replace_varying_input_by_uniform_load(nir_shader *shader,
1195                                      nir_intrinsic_instr *store_intr,
1196                                      nir_ssa_scalar *scalar)
1197{
1198   nir_function_impl *impl = nir_shader_get_entrypoint(shader);
1199
1200   nir_builder b;
1201   nir_builder_init(&b, impl);
1202
1203   nir_variable *out_var =
1204      nir_deref_instr_get_variable(nir_src_as_deref(store_intr->src[0]));
1205
1206   nir_intrinsic_instr *load = nir_instr_as_intrinsic(scalar->def->parent_instr);
1207   nir_deref_instr *deref = nir_src_as_deref(load->src[0]);
1208   nir_variable *uni_var = nir_deref_instr_get_variable(deref);
1209   uni_var = get_uniform_var_in_consumer(shader, uni_var);
1210
1211   bool progress = false;
1212   nir_foreach_block(block, impl) {
1213      nir_foreach_instr(instr, block) {
1214         if (instr->type != nir_instr_type_intrinsic)
1215            continue;
1216
1217         nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1218         if (intr->intrinsic != nir_intrinsic_load_deref)
1219            continue;
1220
1221         nir_deref_instr *in_deref = nir_src_as_deref(intr->src[0]);
1222         if (!nir_deref_mode_is(in_deref, nir_var_shader_in))
1223            continue;
1224
1225         nir_variable *in_var = nir_deref_instr_get_variable(in_deref);
1226
1227         if (!does_varying_match(out_var, in_var))
1228            continue;
1229
1230         b.cursor = nir_before_instr(instr);
1231
1232         /* Clone instructions start from deref load to variable deref. */
1233         nir_deref_instr *uni_deref = clone_deref_instr(&b, uni_var, deref);
1234         nir_ssa_def *uni_def = nir_load_deref(&b, uni_deref);
1235
1236         /* Add a vector to scalar move if uniform is a vector. */
1237         if (uni_def->num_components > 1) {
1238            nir_alu_src src = {0};
1239            src.src = nir_src_for_ssa(uni_def);
1240            src.swizzle[0] = scalar->comp;
1241            uni_def = nir_mov_alu(&b, src, 1);
1242         }
1243
1244         /* Replace load input with load uniform. */
1245         nir_ssa_def_rewrite_uses(&intr->dest.ssa, uni_def);
1246
1247         progress = true;
1248      }
1249   }
1250
1251   return progress;
1252}
1253
1254/* The GLSL ES 3.20 spec says:
1255 *
1256 * "The precision of a vertex output does not need to match the precision of
1257 * the corresponding fragment input. The minimum precision at which vertex
1258 * outputs are interpolated is the minimum of the vertex output precision and
1259 * the fragment input precision, with the exception that for highp,
1260 * implementations do not have to support full IEEE 754 precision." (9.1 "Input
1261 * Output Matching by Name in Linked Programs")
1262 *
1263 * To implement this, when linking shaders we will take the minimum precision
1264 * qualifier (allowing drivers to interpolate at lower precision). For
1265 * input/output between non-fragment stages (e.g. VERTEX to GEOMETRY), the spec
1266 * requires we use the *last* specified precision if there is a conflict.
1267 *
1268 * Precisions are ordered as (NONE, HIGH, MEDIUM, LOW). If either precision is
1269 * NONE, we'll return the other precision, since there is no conflict.
1270 * Otherwise for fragment interpolation, we'll pick the smallest of (HIGH,
1271 * MEDIUM, LOW) by picking the maximum of the raw values - note the ordering is
1272 * "backwards". For non-fragment stages, we'll pick the latter precision to
1273 * comply with the spec. (Note that the order matters.)
1274 *
1275 * For streamout, "Variables declared with lowp or mediump precision are
1276 * promoted to highp before being written." (12.2 "Transform Feedback", p. 341
1277 * of OpenGL ES 3.2 specification). So drivers should promote them
1278 * the transform feedback memory store, but not the output store.
1279 */
1280
1281static unsigned
1282nir_link_precision(unsigned producer, unsigned consumer, bool fs)
1283{
1284   if (producer == GLSL_PRECISION_NONE)
1285      return consumer;
1286   else if (consumer == GLSL_PRECISION_NONE)
1287      return producer;
1288   else
1289      return fs ? MAX2(producer, consumer) : consumer;
1290}
1291
1292void
1293nir_link_varying_precision(nir_shader *producer, nir_shader *consumer)
1294{
1295   bool frag = consumer->info.stage == MESA_SHADER_FRAGMENT;
1296
1297   nir_foreach_shader_out_variable(producer_var, producer) {
1298      /* Skip if the slot is not assigned */
1299      if (producer_var->data.location < 0)
1300         continue;
1301
1302      nir_variable *consumer_var = nir_find_variable_with_location(consumer,
1303            nir_var_shader_in, producer_var->data.location);
1304
1305      /* Skip if the variable will be eliminated */
1306      if (!consumer_var)
1307         continue;
1308
1309      /* Now we have a pair of variables. Let's pick the smaller precision. */
1310      unsigned precision_1 = producer_var->data.precision;
1311      unsigned precision_2 = consumer_var->data.precision;
1312      unsigned minimum = nir_link_precision(precision_1, precision_2, frag);
1313
1314      /* Propagate the new precision */
1315      producer_var->data.precision = consumer_var->data.precision = minimum;
1316   }
1317}
1318
1319bool
1320nir_link_opt_varyings(nir_shader *producer, nir_shader *consumer)
1321{
1322   /* TODO: Add support for more shader stage combinations */
1323   if (consumer->info.stage != MESA_SHADER_FRAGMENT ||
1324       (producer->info.stage != MESA_SHADER_VERTEX &&
1325        producer->info.stage != MESA_SHADER_TESS_EVAL))
1326      return false;
1327
1328   bool progress = false;
1329
1330   nir_function_impl *impl = nir_shader_get_entrypoint(producer);
1331
1332   struct hash_table *varying_values = _mesa_pointer_hash_table_create(NULL);
1333
1334   /* If we find a store in the last block of the producer we can be sure this
1335    * is the only possible value for this output.
1336    */
1337   nir_block *last_block = nir_impl_last_block(impl);
1338   nir_foreach_instr_reverse(instr, last_block) {
1339      if (instr->type != nir_instr_type_intrinsic)
1340         continue;
1341
1342      nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);
1343
1344      if (intr->intrinsic != nir_intrinsic_store_deref)
1345         continue;
1346
1347      nir_deref_instr *out_deref = nir_src_as_deref(intr->src[0]);
1348      if (!nir_deref_mode_is(out_deref, nir_var_shader_out))
1349         continue;
1350
1351      nir_variable *out_var = nir_deref_instr_get_variable(out_deref);
1352      if (!can_replace_varying(out_var))
1353         continue;
1354
1355      nir_ssa_scalar uni_scalar;
1356      nir_ssa_def *ssa = intr->src[1].ssa;
1357      if (ssa->parent_instr->type == nir_instr_type_load_const) {
1358         progress |= replace_varying_input_by_constant_load(consumer, intr);
1359      } else if (is_direct_uniform_load(ssa, &uni_scalar)) {
1360         progress |= replace_varying_input_by_uniform_load(consumer, intr,
1361                                                           &uni_scalar);
1362      } else {
1363         struct hash_entry *entry =
1364               _mesa_hash_table_search(varying_values, ssa);
1365         if (entry) {
1366            progress |= replace_duplicate_input(consumer,
1367                                                (nir_variable *) entry->data,
1368                                                intr);
1369         } else {
1370            nir_variable *in_var = get_matching_input_var(consumer, out_var);
1371            if (in_var) {
1372               _mesa_hash_table_insert(varying_values, ssa, in_var);
1373            }
1374         }
1375      }
1376   }
1377
1378   _mesa_hash_table_destroy(varying_values, NULL);
1379
1380   return progress;
1381}
1382
1383/* TODO any better helper somewhere to sort a list? */
1384
1385static void
1386insert_sorted(struct exec_list *var_list, nir_variable *new_var)
1387{
1388   nir_foreach_variable_in_list(var, var_list) {
1389      if (var->data.location > new_var->data.location) {
1390         exec_node_insert_node_before(&var->node, &new_var->node);
1391         return;
1392      }
1393   }
1394   exec_list_push_tail(var_list, &new_var->node);
1395}
1396
1397static void
1398sort_varyings(nir_shader *shader, nir_variable_mode mode,
1399              struct exec_list *sorted_list)
1400{
1401   exec_list_make_empty(sorted_list);
1402   nir_foreach_variable_with_modes_safe(var, shader, mode) {
1403      exec_node_remove(&var->node);
1404      insert_sorted(sorted_list, var);
1405   }
1406}
1407
1408void
1409nir_assign_io_var_locations(nir_shader *shader, nir_variable_mode mode,
1410                            unsigned *size, gl_shader_stage stage)
1411{
1412   unsigned location = 0;
1413   unsigned assigned_locations[VARYING_SLOT_TESS_MAX];
1414   uint64_t processed_locs[2] = {0};
1415
1416   struct exec_list io_vars;
1417   sort_varyings(shader, mode, &io_vars);
1418
1419   int UNUSED last_loc = 0;
1420   bool last_partial = false;
1421   nir_foreach_variable_in_list(var, &io_vars) {
1422      const struct glsl_type *type = var->type;
1423      if (nir_is_arrayed_io(var, stage)) {
1424         assert(glsl_type_is_array(type));
1425         type = glsl_get_array_element(type);
1426      }
1427
1428      int base;
1429      if (var->data.mode == nir_var_shader_in && stage == MESA_SHADER_VERTEX)
1430         base = VERT_ATTRIB_GENERIC0;
1431      else if (var->data.mode == nir_var_shader_out &&
1432               stage == MESA_SHADER_FRAGMENT)
1433         base = FRAG_RESULT_DATA0;
1434      else
1435         base = VARYING_SLOT_VAR0;
1436
1437      unsigned var_size, driver_size;
1438      if (var->data.compact) {
1439         /* If we are inside a partial compact,
1440          * don't allow another compact to be in this slot
1441          * if it starts at component 0.
1442          */
1443         if (last_partial && var->data.location_frac == 0) {
1444            location++;
1445         }
1446
1447         /* compact variables must be arrays of scalars */
1448         assert(!var->data.per_view);
1449         assert(glsl_type_is_array(type));
1450         assert(glsl_type_is_scalar(glsl_get_array_element(type)));
1451         unsigned start = 4 * location + var->data.location_frac;
1452         unsigned end = start + glsl_get_length(type);
1453         var_size = driver_size = end / 4 - location;
1454         last_partial = end % 4 != 0;
1455      } else {
1456         /* Compact variables bypass the normal varying compacting pass,
1457          * which means they cannot be in the same vec4 slot as a normal
1458          * variable. If part of the current slot is taken up by a compact
1459          * variable, we need to go to the next one.
1460          */
1461         if (last_partial) {
1462            location++;
1463            last_partial = false;
1464         }
1465
1466         /* per-view variables have an extra array dimension, which is ignored
1467          * when counting user-facing slots (var->data.location), but *not*
1468          * with driver slots (var->data.driver_location). That is, each user
1469          * slot maps to multiple driver slots.
1470          */
1471         driver_size = glsl_count_attribute_slots(type, false);
1472         if (var->data.per_view) {
1473            assert(glsl_type_is_array(type));
1474            var_size =
1475               glsl_count_attribute_slots(glsl_get_array_element(type), false);
1476         } else {
1477            var_size = driver_size;
1478         }
1479      }
1480
1481      /* Builtins don't allow component packing so we only need to worry about
1482       * user defined varyings sharing the same location.
1483       */
1484      bool processed = false;
1485      if (var->data.location >= base) {
1486         unsigned glsl_location = var->data.location - base;
1487
1488         for (unsigned i = 0; i < var_size; i++) {
1489            if (processed_locs[var->data.index] &
1490                ((uint64_t)1 << (glsl_location + i)))
1491               processed = true;
1492            else
1493               processed_locs[var->data.index] |=
1494                  ((uint64_t)1 << (glsl_location + i));
1495         }
1496      }
1497
1498      /* Because component packing allows varyings to share the same location
1499       * we may have already have processed this location.
1500       */
1501      if (processed) {
1502         /* TODO handle overlapping per-view variables */
1503         assert(!var->data.per_view);
1504         unsigned driver_location = assigned_locations[var->data.location];
1505         var->data.driver_location = driver_location;
1506
1507         /* An array may be packed such that is crosses multiple other arrays
1508          * or variables, we need to make sure we have allocated the elements
1509          * consecutively if the previously proccessed var was shorter than
1510          * the current array we are processing.
1511          *
1512          * NOTE: The code below assumes the var list is ordered in ascending
1513          * location order.
1514          */
1515         assert(last_loc <= var->data.location);
1516         last_loc = var->data.location;
1517         unsigned last_slot_location = driver_location + var_size;
1518         if (last_slot_location > location) {
1519            unsigned num_unallocated_slots = last_slot_location - location;
1520            unsigned first_unallocated_slot = var_size - num_unallocated_slots;
1521            for (unsigned i = first_unallocated_slot; i < var_size; i++) {
1522               assigned_locations[var->data.location + i] = location;
1523               location++;
1524            }
1525         }
1526         continue;
1527      }
1528
1529      for (unsigned i = 0; i < var_size; i++) {
1530         assigned_locations[var->data.location + i] = location + i;
1531      }
1532
1533      var->data.driver_location = location;
1534      location += driver_size;
1535   }
1536
1537   if (last_partial)
1538      location++;
1539
1540   exec_list_append(&shader->variables, &io_vars);
1541   *size = location;
1542}
1543
1544static uint64_t
1545get_linked_variable_location(unsigned location, bool patch)
1546{
1547   if (!patch)
1548      return location;
1549
1550   /* Reserve locations 0...3 for special patch variables
1551    * like tess factors and bounding boxes, and the generic patch
1552    * variables will come after them.
1553    */
1554   if (location >= VARYING_SLOT_PATCH0)
1555      return location - VARYING_SLOT_PATCH0 + 4;
1556   else if (location >= VARYING_SLOT_TESS_LEVEL_OUTER &&
1557            location <= VARYING_SLOT_BOUNDING_BOX1)
1558      return location - VARYING_SLOT_TESS_LEVEL_OUTER;
1559   else
1560      unreachable("Unsupported variable in get_linked_variable_location.");
1561}
1562
1563static uint64_t
1564get_linked_variable_io_mask(nir_variable *variable, gl_shader_stage stage)
1565{
1566   const struct glsl_type *type = variable->type;
1567
1568   if (nir_is_arrayed_io(variable, stage)) {
1569      assert(glsl_type_is_array(type));
1570      type = glsl_get_array_element(type);
1571   }
1572
1573   unsigned slots = glsl_count_attribute_slots(type, false);
1574   if (variable->data.compact) {
1575      unsigned component_count = variable->data.location_frac + glsl_get_length(type);
1576      slots = DIV_ROUND_UP(component_count, 4);
1577   }
1578
1579   uint64_t mask = u_bit_consecutive64(0, slots);
1580   return mask;
1581}
1582
1583nir_linked_io_var_info
1584nir_assign_linked_io_var_locations(nir_shader *producer, nir_shader *consumer)
1585{
1586   assert(producer);
1587   assert(consumer);
1588
1589   uint64_t producer_output_mask = 0;
1590   uint64_t producer_patch_output_mask = 0;
1591
1592   nir_foreach_shader_out_variable(variable, producer) {
1593      uint64_t mask = get_linked_variable_io_mask(variable, producer->info.stage);
1594      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1595
1596      if (variable->data.patch)
1597         producer_patch_output_mask |= mask << loc;
1598      else
1599         producer_output_mask |= mask << loc;
1600   }
1601
1602   uint64_t consumer_input_mask = 0;
1603   uint64_t consumer_patch_input_mask = 0;
1604
1605   nir_foreach_shader_in_variable(variable, consumer) {
1606      uint64_t mask = get_linked_variable_io_mask(variable, consumer->info.stage);
1607      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1608
1609      if (variable->data.patch)
1610         consumer_patch_input_mask |= mask << loc;
1611      else
1612         consumer_input_mask |= mask << loc;
1613   }
1614
1615   uint64_t io_mask = producer_output_mask | consumer_input_mask;
1616   uint64_t patch_io_mask = producer_patch_output_mask | consumer_patch_input_mask;
1617
1618   nir_foreach_shader_out_variable(variable, producer) {
1619      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1620
1621      if (variable->data.patch)
1622         variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1623      else
1624         variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1625   }
1626
1627   nir_foreach_shader_in_variable(variable, consumer) {
1628      uint64_t loc = get_linked_variable_location(variable->data.location, variable->data.patch);
1629
1630      if (variable->data.patch)
1631         variable->data.driver_location = util_bitcount64(patch_io_mask & u_bit_consecutive64(0, loc));
1632      else
1633         variable->data.driver_location = util_bitcount64(io_mask & u_bit_consecutive64(0, loc));
1634   }
1635
1636   nir_linked_io_var_info result = {
1637      .num_linked_io_vars = util_bitcount64(io_mask),
1638      .num_linked_patch_io_vars = util_bitcount64(patch_io_mask),
1639   };
1640
1641   return result;
1642}
1643