1/*
2 * Copyright © 2015 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir.h"
25#include "nir_deref.h"
26#include "main/menums.h"
27
28static bool
29src_is_invocation_id(const nir_src *src)
30{
31   assert(src->is_ssa);
32   if (src->ssa->parent_instr->type != nir_instr_type_intrinsic)
33      return false;
34
35   return nir_instr_as_intrinsic(src->ssa->parent_instr)->intrinsic ==
36             nir_intrinsic_load_invocation_id;
37}
38
39static void
40get_deref_info(nir_shader *shader, nir_variable *var, nir_deref_instr *deref,
41               bool *cross_invocation, bool *indirect)
42{
43   *cross_invocation = false;
44   *indirect = false;
45
46   const bool is_arrayed = nir_is_arrayed_io(var, shader->info.stage);
47
48   nir_deref_path path;
49   nir_deref_path_init(&path, deref, NULL);
50   assert(path.path[0]->deref_type == nir_deref_type_var);
51   nir_deref_instr **p = &path.path[1];
52
53   /* Vertex index is the outermost array index. */
54   if (is_arrayed) {
55      assert((*p)->deref_type == nir_deref_type_array);
56      *cross_invocation = !src_is_invocation_id(&(*p)->arr.index);
57      p++;
58   }
59
60   /* We always lower indirect dereferences for "compact" array vars. */
61   if (!path.path[0]->var->data.compact) {
62      /* Non-compact array vars: find out if they are indirect. */
63      for (; *p; p++) {
64         if ((*p)->deref_type == nir_deref_type_array) {
65            *indirect |= !nir_src_is_const((*p)->arr.index);
66         } else if ((*p)->deref_type == nir_deref_type_struct) {
67            /* Struct indices are always constant. */
68         } else {
69            unreachable("Unsupported deref type");
70         }
71      }
72   }
73
74   nir_deref_path_finish(&path);
75}
76
77static void
78set_io_mask(nir_shader *shader, nir_variable *var, int offset, int len,
79            nir_deref_instr *deref, bool is_output_read)
80{
81   for (int i = 0; i < len; i++) {
82      assert(var->data.location != -1);
83
84      int idx = var->data.location + offset + i;
85      bool is_patch_generic = var->data.patch &&
86                              idx != VARYING_SLOT_TESS_LEVEL_INNER &&
87                              idx != VARYING_SLOT_TESS_LEVEL_OUTER &&
88                              idx != VARYING_SLOT_BOUNDING_BOX0 &&
89                              idx != VARYING_SLOT_BOUNDING_BOX1;
90      uint64_t bitfield;
91
92      if (is_patch_generic) {
93         assert(idx >= VARYING_SLOT_PATCH0 && idx < VARYING_SLOT_TESS_MAX);
94         bitfield = BITFIELD64_BIT(idx - VARYING_SLOT_PATCH0);
95      }
96      else {
97         assert(idx < VARYING_SLOT_MAX);
98         bitfield = BITFIELD64_BIT(idx);
99      }
100
101      bool cross_invocation;
102      bool indirect;
103      get_deref_info(shader, var, deref, &cross_invocation, &indirect);
104
105      if (var->data.mode == nir_var_shader_in) {
106         if (is_patch_generic) {
107            shader->info.patch_inputs_read |= bitfield;
108            if (indirect)
109               shader->info.patch_inputs_read_indirectly |= bitfield;
110         } else {
111            shader->info.inputs_read |= bitfield;
112            if (indirect)
113               shader->info.inputs_read_indirectly |= bitfield;
114         }
115
116         if (cross_invocation && shader->info.stage == MESA_SHADER_TESS_CTRL)
117            shader->info.tess.tcs_cross_invocation_inputs_read |= bitfield;
118
119         if (shader->info.stage == MESA_SHADER_FRAGMENT) {
120            shader->info.fs.uses_sample_qualifier |= var->data.sample;
121         }
122      } else {
123         assert(var->data.mode == nir_var_shader_out);
124         if (is_output_read) {
125            if (is_patch_generic) {
126               shader->info.patch_outputs_read |= bitfield;
127               if (indirect)
128                  shader->info.patch_outputs_accessed_indirectly |= bitfield;
129            } else {
130               shader->info.outputs_read |= bitfield;
131               if (indirect)
132                  shader->info.outputs_accessed_indirectly |= bitfield;
133            }
134
135            if (cross_invocation && shader->info.stage == MESA_SHADER_TESS_CTRL)
136               shader->info.tess.tcs_cross_invocation_outputs_read |= bitfield;
137         } else {
138            if (is_patch_generic) {
139               shader->info.patch_outputs_written |= bitfield;
140               if (indirect)
141                  shader->info.patch_outputs_accessed_indirectly |= bitfield;
142            } else if (!var->data.read_only) {
143               shader->info.outputs_written |= bitfield;
144               if (indirect)
145                  shader->info.outputs_accessed_indirectly |= bitfield;
146            }
147         }
148
149
150         if (var->data.fb_fetch_output) {
151            shader->info.outputs_read |= bitfield;
152            if (shader->info.stage == MESA_SHADER_FRAGMENT)
153               shader->info.fs.uses_fbfetch_output = true;
154         }
155
156         if (shader->info.stage == MESA_SHADER_FRAGMENT &&
157             !is_output_read && var->data.index == 1)
158            shader->info.fs.color_is_dual_source = true;
159      }
160   }
161}
162
163/**
164 * Mark an entire variable as used.  Caller must ensure that the variable
165 * represents a shader input or output.
166 */
167static void
168mark_whole_variable(nir_shader *shader, nir_variable *var,
169                    nir_deref_instr *deref, bool is_output_read)
170{
171   const struct glsl_type *type = var->type;
172
173   if (nir_is_arrayed_io(var, shader->info.stage)) {
174      assert(glsl_type_is_array(type));
175      type = glsl_get_array_element(type);
176   }
177
178   if (var->data.per_view) {
179      /* TODO: Per view and Per Vertex are not currently used together.  When
180       * they start to be used (e.g. when adding Primitive Replication for GS
181       * on Intel), verify that "peeling" the type twice is correct.  This
182       * assert ensures we remember it.
183       */
184      assert(!nir_is_arrayed_io(var, shader->info.stage));
185      assert(glsl_type_is_array(type));
186      type = glsl_get_array_element(type);
187   }
188
189   const unsigned slots =
190      var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
191                        : glsl_count_attribute_slots(type, false);
192
193   set_io_mask(shader, var, 0, slots, deref, is_output_read);
194}
195
196static unsigned
197get_io_offset(nir_deref_instr *deref, nir_variable *var, bool is_arrayed)
198{
199   if (var->data.compact) {
200      assert(deref->deref_type == nir_deref_type_array);
201      return nir_src_is_const(deref->arr.index) ?
202             (nir_src_as_uint(deref->arr.index) + var->data.location_frac) / 4u :
203             (unsigned)-1;
204   }
205
206   unsigned offset = 0;
207
208   for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) {
209      if (d->deref_type == nir_deref_type_array) {
210         if (is_arrayed && nir_deref_instr_parent(d)->deref_type == nir_deref_type_var)
211            break;
212
213         if (!nir_src_is_const(d->arr.index))
214            return -1;
215
216         offset += glsl_count_attribute_slots(d->type, false) *
217                   nir_src_as_uint(d->arr.index);
218      } else if (d->deref_type == nir_deref_type_struct) {
219         const struct glsl_type *parent_type = nir_deref_instr_parent(d)->type;
220         for (unsigned i = 0; i < d->strct.index; i++) {
221            const struct glsl_type *field_type = glsl_get_struct_field(parent_type, i);
222            offset += glsl_count_attribute_slots(field_type, false);
223         }
224      }
225   }
226
227   return offset;
228}
229
230/**
231 * Try to mark a portion of the given varying as used.  Caller must ensure
232 * that the variable represents a shader input or output.
233 *
234 * If the index can't be interpreted as a constant, or some other problem
235 * occurs, then nothing will be marked and false will be returned.
236 */
237static bool
238try_mask_partial_io(nir_shader *shader, nir_variable *var,
239                    nir_deref_instr *deref, bool is_output_read)
240{
241   const struct glsl_type *type = var->type;
242   bool is_arrayed = nir_is_arrayed_io(var, shader->info.stage);
243
244   if (is_arrayed) {
245      assert(glsl_type_is_array(type));
246      type = glsl_get_array_element(type);
247   }
248
249   /* Per view variables will be considered as a whole. */
250   if (var->data.per_view)
251      return false;
252
253   unsigned offset = get_io_offset(deref, var, is_arrayed);
254   if (offset == -1)
255      return false;
256
257   const unsigned slots =
258      var->data.compact ? DIV_ROUND_UP(glsl_get_length(type), 4)
259                        : glsl_count_attribute_slots(type, false);
260
261   if (offset >= slots) {
262      /* Constant index outside the bounds of the matrix/array.  This could
263       * arise as a result of constant folding of a legal GLSL program.
264       *
265       * Even though the spec says that indexing outside the bounds of a
266       * matrix/array results in undefined behaviour, we don't want to pass
267       * out-of-range values to set_io_mask() (since this could result in
268       * slots that don't exist being marked as used), so just let the caller
269       * mark the whole variable as used.
270       */
271      return false;
272   }
273
274   unsigned len = glsl_count_attribute_slots(deref->type, false);
275   set_io_mask(shader, var, offset, len, deref, is_output_read);
276   return true;
277}
278
279/** Returns true if the given intrinsic writes external memory
280 *
281 * Only returns true for writes to globally visible memory, not scratch and
282 * not shared.
283 */
284bool
285nir_intrinsic_writes_external_memory(const nir_intrinsic_instr *instr)
286{
287   switch (instr->intrinsic) {
288   case nir_intrinsic_atomic_counter_inc:
289   case nir_intrinsic_atomic_counter_inc_deref:
290   case nir_intrinsic_atomic_counter_add:
291   case nir_intrinsic_atomic_counter_add_deref:
292   case nir_intrinsic_atomic_counter_pre_dec:
293   case nir_intrinsic_atomic_counter_pre_dec_deref:
294   case nir_intrinsic_atomic_counter_post_dec:
295   case nir_intrinsic_atomic_counter_post_dec_deref:
296   case nir_intrinsic_atomic_counter_min:
297   case nir_intrinsic_atomic_counter_min_deref:
298   case nir_intrinsic_atomic_counter_max:
299   case nir_intrinsic_atomic_counter_max_deref:
300   case nir_intrinsic_atomic_counter_and:
301   case nir_intrinsic_atomic_counter_and_deref:
302   case nir_intrinsic_atomic_counter_or:
303   case nir_intrinsic_atomic_counter_or_deref:
304   case nir_intrinsic_atomic_counter_xor:
305   case nir_intrinsic_atomic_counter_xor_deref:
306   case nir_intrinsic_atomic_counter_exchange:
307   case nir_intrinsic_atomic_counter_exchange_deref:
308   case nir_intrinsic_atomic_counter_comp_swap:
309   case nir_intrinsic_atomic_counter_comp_swap_deref:
310   case nir_intrinsic_bindless_image_atomic_add:
311   case nir_intrinsic_bindless_image_atomic_and:
312   case nir_intrinsic_bindless_image_atomic_comp_swap:
313   case nir_intrinsic_bindless_image_atomic_dec_wrap:
314   case nir_intrinsic_bindless_image_atomic_exchange:
315   case nir_intrinsic_bindless_image_atomic_fadd:
316   case nir_intrinsic_bindless_image_atomic_imax:
317   case nir_intrinsic_bindless_image_atomic_imin:
318   case nir_intrinsic_bindless_image_atomic_inc_wrap:
319   case nir_intrinsic_bindless_image_atomic_or:
320   case nir_intrinsic_bindless_image_atomic_umax:
321   case nir_intrinsic_bindless_image_atomic_umin:
322   case nir_intrinsic_bindless_image_atomic_xor:
323   case nir_intrinsic_bindless_image_store:
324   case nir_intrinsic_bindless_image_store_raw_intel:
325   case nir_intrinsic_global_atomic_add:
326   case nir_intrinsic_global_atomic_and:
327   case nir_intrinsic_global_atomic_comp_swap:
328   case nir_intrinsic_global_atomic_exchange:
329   case nir_intrinsic_global_atomic_fadd:
330   case nir_intrinsic_global_atomic_fcomp_swap:
331   case nir_intrinsic_global_atomic_fmax:
332   case nir_intrinsic_global_atomic_fmin:
333   case nir_intrinsic_global_atomic_imax:
334   case nir_intrinsic_global_atomic_imin:
335   case nir_intrinsic_global_atomic_or:
336   case nir_intrinsic_global_atomic_umax:
337   case nir_intrinsic_global_atomic_umin:
338   case nir_intrinsic_global_atomic_xor:
339   case nir_intrinsic_image_atomic_add:
340   case nir_intrinsic_image_atomic_and:
341   case nir_intrinsic_image_atomic_comp_swap:
342   case nir_intrinsic_image_atomic_dec_wrap:
343   case nir_intrinsic_image_atomic_exchange:
344   case nir_intrinsic_image_atomic_fadd:
345   case nir_intrinsic_image_atomic_imax:
346   case nir_intrinsic_image_atomic_imin:
347   case nir_intrinsic_image_atomic_inc_wrap:
348   case nir_intrinsic_image_atomic_or:
349   case nir_intrinsic_image_atomic_umax:
350   case nir_intrinsic_image_atomic_umin:
351   case nir_intrinsic_image_atomic_xor:
352   case nir_intrinsic_image_deref_atomic_add:
353   case nir_intrinsic_image_deref_atomic_and:
354   case nir_intrinsic_image_deref_atomic_comp_swap:
355   case nir_intrinsic_image_deref_atomic_dec_wrap:
356   case nir_intrinsic_image_deref_atomic_exchange:
357   case nir_intrinsic_image_deref_atomic_fadd:
358   case nir_intrinsic_image_deref_atomic_imax:
359   case nir_intrinsic_image_deref_atomic_imin:
360   case nir_intrinsic_image_deref_atomic_inc_wrap:
361   case nir_intrinsic_image_deref_atomic_or:
362   case nir_intrinsic_image_deref_atomic_umax:
363   case nir_intrinsic_image_deref_atomic_umin:
364   case nir_intrinsic_image_deref_atomic_xor:
365   case nir_intrinsic_image_deref_store:
366   case nir_intrinsic_image_deref_store_raw_intel:
367   case nir_intrinsic_image_store:
368   case nir_intrinsic_image_store_raw_intel:
369   case nir_intrinsic_ssbo_atomic_add:
370   case nir_intrinsic_ssbo_atomic_add_ir3:
371   case nir_intrinsic_ssbo_atomic_and:
372   case nir_intrinsic_ssbo_atomic_and_ir3:
373   case nir_intrinsic_ssbo_atomic_comp_swap:
374   case nir_intrinsic_ssbo_atomic_comp_swap_ir3:
375   case nir_intrinsic_ssbo_atomic_exchange:
376   case nir_intrinsic_ssbo_atomic_exchange_ir3:
377   case nir_intrinsic_ssbo_atomic_fadd:
378   case nir_intrinsic_ssbo_atomic_fcomp_swap:
379   case nir_intrinsic_ssbo_atomic_fmax:
380   case nir_intrinsic_ssbo_atomic_fmin:
381   case nir_intrinsic_ssbo_atomic_imax:
382   case nir_intrinsic_ssbo_atomic_imax_ir3:
383   case nir_intrinsic_ssbo_atomic_imin:
384   case nir_intrinsic_ssbo_atomic_imin_ir3:
385   case nir_intrinsic_ssbo_atomic_or:
386   case nir_intrinsic_ssbo_atomic_or_ir3:
387   case nir_intrinsic_ssbo_atomic_umax:
388   case nir_intrinsic_ssbo_atomic_umax_ir3:
389   case nir_intrinsic_ssbo_atomic_umin:
390   case nir_intrinsic_ssbo_atomic_umin_ir3:
391   case nir_intrinsic_ssbo_atomic_xor:
392   case nir_intrinsic_ssbo_atomic_xor_ir3:
393   case nir_intrinsic_store_global:
394   case nir_intrinsic_store_global_ir3:
395   case nir_intrinsic_store_ssbo:
396   case nir_intrinsic_store_ssbo_ir3:
397      return true;
398
399   case nir_intrinsic_store_deref:
400   case nir_intrinsic_deref_atomic_add:
401   case nir_intrinsic_deref_atomic_imin:
402   case nir_intrinsic_deref_atomic_umin:
403   case nir_intrinsic_deref_atomic_imax:
404   case nir_intrinsic_deref_atomic_umax:
405   case nir_intrinsic_deref_atomic_and:
406   case nir_intrinsic_deref_atomic_or:
407   case nir_intrinsic_deref_atomic_xor:
408   case nir_intrinsic_deref_atomic_exchange:
409   case nir_intrinsic_deref_atomic_comp_swap:
410   case nir_intrinsic_deref_atomic_fadd:
411   case nir_intrinsic_deref_atomic_fmin:
412   case nir_intrinsic_deref_atomic_fmax:
413   case nir_intrinsic_deref_atomic_fcomp_swap:
414      return nir_deref_mode_may_be(nir_src_as_deref(instr->src[0]),
415                                   nir_var_mem_ssbo | nir_var_mem_global);
416
417   default:
418      return false;
419   }
420}
421
422static void
423gather_intrinsic_info(nir_intrinsic_instr *instr, nir_shader *shader,
424                      void *dead_ctx)
425{
426   uint64_t slot_mask = 0;
427   uint16_t slot_mask_16bit = 0;
428
429   if (nir_intrinsic_infos[instr->intrinsic].index_map[NIR_INTRINSIC_IO_SEMANTICS] > 0) {
430      nir_io_semantics semantics = nir_intrinsic_io_semantics(instr);
431
432      if (semantics.location >= VARYING_SLOT_PATCH0 &&
433          semantics.location <= VARYING_SLOT_PATCH31) {
434         /* Generic per-patch I/O. */
435         assert((shader->info.stage == MESA_SHADER_TESS_EVAL &&
436                 instr->intrinsic == nir_intrinsic_load_input) ||
437                (shader->info.stage == MESA_SHADER_TESS_CTRL &&
438                 (instr->intrinsic == nir_intrinsic_load_output ||
439                  instr->intrinsic == nir_intrinsic_store_output)));
440
441         semantics.location -= VARYING_SLOT_PATCH0;
442      }
443
444      if (semantics.location >= VARYING_SLOT_VAR0_16BIT &&
445          semantics.location <= VARYING_SLOT_VAR15_16BIT) {
446         /* Convert num_slots from the units of half vectors to full vectors. */
447         unsigned num_slots = (semantics.num_slots + semantics.high_16bits + 1) / 2;
448         slot_mask_16bit =
449            BITFIELD_RANGE(semantics.location - VARYING_SLOT_VAR0_16BIT, num_slots);
450      } else {
451         slot_mask = BITFIELD64_RANGE(semantics.location, semantics.num_slots);
452         assert(util_bitcount64(slot_mask) == semantics.num_slots);
453      }
454   }
455
456   switch (instr->intrinsic) {
457   case nir_intrinsic_demote:
458   case nir_intrinsic_demote_if:
459      shader->info.fs.uses_demote = true;
460      FALLTHROUGH; /* quads with helper lanes only might be discarded entirely */
461   case nir_intrinsic_discard:
462   case nir_intrinsic_discard_if:
463      /* Freedreno uses the discard_if intrinsic to end GS invocations that
464       * don't produce a vertex, so we only set uses_discard if executing on
465       * a fragment shader. */
466      if (shader->info.stage == MESA_SHADER_FRAGMENT)
467         shader->info.fs.uses_discard = true;
468      break;
469
470   case nir_intrinsic_terminate:
471   case nir_intrinsic_terminate_if:
472      assert(shader->info.stage == MESA_SHADER_FRAGMENT);
473      shader->info.fs.uses_discard = true;
474      break;
475
476   case nir_intrinsic_interp_deref_at_centroid:
477   case nir_intrinsic_interp_deref_at_sample:
478   case nir_intrinsic_interp_deref_at_offset:
479   case nir_intrinsic_interp_deref_at_vertex:
480   case nir_intrinsic_load_deref:
481   case nir_intrinsic_store_deref:{
482      nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);
483      if (nir_deref_mode_is_one_of(deref, nir_var_shader_in |
484                                          nir_var_shader_out)) {
485         nir_variable *var = nir_deref_instr_get_variable(deref);
486         bool is_output_read = false;
487         if (var->data.mode == nir_var_shader_out &&
488             instr->intrinsic == nir_intrinsic_load_deref)
489            is_output_read = true;
490
491         if (!try_mask_partial_io(shader, var, deref, is_output_read))
492            mark_whole_variable(shader, var, deref, is_output_read);
493
494         /* We need to track which input_reads bits correspond to a
495          * dvec3/dvec4 input attribute */
496         if (shader->info.stage == MESA_SHADER_VERTEX &&
497             var->data.mode == nir_var_shader_in &&
498             glsl_type_is_dual_slot(glsl_without_array(var->type))) {
499            for (unsigned i = 0; i < glsl_count_attribute_slots(var->type, false); i++) {
500               int idx = var->data.location + i;
501               shader->info.vs.double_inputs |= BITFIELD64_BIT(idx);
502            }
503         }
504      }
505      if (nir_intrinsic_writes_external_memory(instr))
506         shader->info.writes_memory = true;
507      break;
508   }
509
510   case nir_intrinsic_load_input:
511   case nir_intrinsic_load_per_vertex_input:
512   case nir_intrinsic_load_input_vertex:
513   case nir_intrinsic_load_interpolated_input:
514      if (shader->info.stage == MESA_SHADER_TESS_EVAL &&
515          instr->intrinsic == nir_intrinsic_load_input) {
516         shader->info.patch_inputs_read |= slot_mask;
517         if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
518            shader->info.patch_inputs_read_indirectly |= slot_mask;
519      } else {
520         shader->info.inputs_read |= slot_mask;
521         shader->info.inputs_read_16bit |= slot_mask_16bit;
522         if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
523            shader->info.inputs_read_indirectly |= slot_mask;
524            shader->info.inputs_read_indirectly_16bit |= slot_mask_16bit;
525         }
526      }
527
528      if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
529          instr->intrinsic == nir_intrinsic_load_per_vertex_input &&
530          !src_is_invocation_id(nir_get_io_vertex_index_src(instr)))
531         shader->info.tess.tcs_cross_invocation_inputs_read |= slot_mask;
532      break;
533
534   case nir_intrinsic_load_output:
535   case nir_intrinsic_load_per_vertex_output:
536   case nir_intrinsic_load_per_primitive_output:
537      if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
538          instr->intrinsic == nir_intrinsic_load_output) {
539         shader->info.patch_outputs_read |= slot_mask;
540         if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
541            shader->info.patch_outputs_accessed_indirectly |= slot_mask;
542      } else {
543         shader->info.outputs_read |= slot_mask;
544         shader->info.outputs_read_16bit |= slot_mask_16bit;
545         if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
546            shader->info.outputs_accessed_indirectly |= slot_mask;
547            shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit;
548         }
549      }
550
551      if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
552          instr->intrinsic == nir_intrinsic_load_per_vertex_output &&
553          !src_is_invocation_id(nir_get_io_vertex_index_src(instr)))
554         shader->info.tess.tcs_cross_invocation_outputs_read |= slot_mask;
555
556      if (shader->info.stage == MESA_SHADER_FRAGMENT &&
557          nir_intrinsic_io_semantics(instr).fb_fetch_output)
558         shader->info.fs.uses_fbfetch_output = true;
559      break;
560
561   case nir_intrinsic_store_output:
562   case nir_intrinsic_store_per_vertex_output:
563   case nir_intrinsic_store_per_primitive_output:
564      if (shader->info.stage == MESA_SHADER_TESS_CTRL &&
565          instr->intrinsic == nir_intrinsic_store_output) {
566         shader->info.patch_outputs_written |= slot_mask;
567         if (!nir_src_is_const(*nir_get_io_offset_src(instr)))
568            shader->info.patch_outputs_accessed_indirectly |= slot_mask;
569      } else {
570         shader->info.outputs_written |= slot_mask;
571         shader->info.outputs_written_16bit |= slot_mask_16bit;
572         if (!nir_src_is_const(*nir_get_io_offset_src(instr))) {
573            shader->info.outputs_accessed_indirectly |= slot_mask;
574            shader->info.outputs_accessed_indirectly_16bit |= slot_mask_16bit;
575         }
576      }
577
578      if (shader->info.stage == MESA_SHADER_FRAGMENT &&
579          nir_intrinsic_io_semantics(instr).dual_source_blend_index)
580         shader->info.fs.color_is_dual_source = true;
581      break;
582
583   case nir_intrinsic_load_color0:
584   case nir_intrinsic_load_color1:
585      shader->info.inputs_read |=
586         BITFIELD64_BIT(VARYING_SLOT_COL0 <<
587                        (instr->intrinsic == nir_intrinsic_load_color1));
588      FALLTHROUGH;
589   case nir_intrinsic_load_subgroup_size:
590   case nir_intrinsic_load_subgroup_invocation:
591   case nir_intrinsic_load_subgroup_eq_mask:
592   case nir_intrinsic_load_subgroup_ge_mask:
593   case nir_intrinsic_load_subgroup_gt_mask:
594   case nir_intrinsic_load_subgroup_le_mask:
595   case nir_intrinsic_load_subgroup_lt_mask:
596   case nir_intrinsic_load_num_subgroups:
597   case nir_intrinsic_load_subgroup_id:
598   case nir_intrinsic_load_vertex_id:
599   case nir_intrinsic_load_instance_id:
600   case nir_intrinsic_load_vertex_id_zero_base:
601   case nir_intrinsic_load_base_vertex:
602   case nir_intrinsic_load_first_vertex:
603   case nir_intrinsic_load_is_indexed_draw:
604   case nir_intrinsic_load_base_instance:
605   case nir_intrinsic_load_draw_id:
606   case nir_intrinsic_load_invocation_id:
607   case nir_intrinsic_load_frag_coord:
608   case nir_intrinsic_load_frag_shading_rate:
609   case nir_intrinsic_load_point_coord:
610   case nir_intrinsic_load_line_coord:
611   case nir_intrinsic_load_front_face:
612   case nir_intrinsic_load_sample_id:
613   case nir_intrinsic_load_sample_pos:
614   case nir_intrinsic_load_sample_mask_in:
615   case nir_intrinsic_load_helper_invocation:
616   case nir_intrinsic_load_tess_coord:
617   case nir_intrinsic_load_patch_vertices_in:
618   case nir_intrinsic_load_primitive_id:
619   case nir_intrinsic_load_tess_level_outer:
620   case nir_intrinsic_load_tess_level_inner:
621   case nir_intrinsic_load_tess_level_outer_default:
622   case nir_intrinsic_load_tess_level_inner_default:
623   case nir_intrinsic_load_local_invocation_id:
624   case nir_intrinsic_load_local_invocation_index:
625   case nir_intrinsic_load_global_invocation_id:
626   case nir_intrinsic_load_base_global_invocation_id:
627   case nir_intrinsic_load_global_invocation_index:
628   case nir_intrinsic_load_workgroup_id:
629   case nir_intrinsic_load_num_workgroups:
630   case nir_intrinsic_load_workgroup_size:
631   case nir_intrinsic_load_work_dim:
632   case nir_intrinsic_load_user_data_amd:
633   case nir_intrinsic_load_view_index:
634   case nir_intrinsic_load_barycentric_model:
635   case nir_intrinsic_load_gs_header_ir3:
636   case nir_intrinsic_load_tcs_header_ir3:
637      BITSET_SET(shader->info.system_values_read,
638                 nir_system_value_from_intrinsic(instr->intrinsic));
639      break;
640
641   case nir_intrinsic_load_barycentric_pixel:
642      if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||
643          nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {
644         BITSET_SET(shader->info.system_values_read,
645                    SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL);
646      } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {
647         BITSET_SET(shader->info.system_values_read,
648                    SYSTEM_VALUE_BARYCENTRIC_LINEAR_PIXEL);
649      }
650      break;
651
652   case nir_intrinsic_load_barycentric_centroid:
653      if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||
654          nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {
655         BITSET_SET(shader->info.system_values_read,
656                    SYSTEM_VALUE_BARYCENTRIC_PERSP_CENTROID);
657      } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {
658         BITSET_SET(shader->info.system_values_read,
659                    SYSTEM_VALUE_BARYCENTRIC_LINEAR_CENTROID);
660      }
661      break;
662
663   case nir_intrinsic_load_barycentric_sample:
664      if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_SMOOTH ||
665          nir_intrinsic_interp_mode(instr) == INTERP_MODE_NONE) {
666         BITSET_SET(shader->info.system_values_read,
667                    SYSTEM_VALUE_BARYCENTRIC_PERSP_SAMPLE);
668      } else if (nir_intrinsic_interp_mode(instr) == INTERP_MODE_NOPERSPECTIVE) {
669         BITSET_SET(shader->info.system_values_read,
670                    SYSTEM_VALUE_BARYCENTRIC_LINEAR_SAMPLE);
671      }
672      if (shader->info.stage == MESA_SHADER_FRAGMENT)
673         shader->info.fs.uses_sample_qualifier = true;
674      break;
675
676   case nir_intrinsic_quad_broadcast:
677   case nir_intrinsic_quad_swap_horizontal:
678   case nir_intrinsic_quad_swap_vertical:
679   case nir_intrinsic_quad_swap_diagonal:
680   case nir_intrinsic_quad_swizzle_amd:
681      if (shader->info.stage == MESA_SHADER_FRAGMENT)
682         shader->info.fs.needs_quad_helper_invocations = true;
683      break;
684
685   case nir_intrinsic_vote_any:
686   case nir_intrinsic_vote_all:
687   case nir_intrinsic_vote_feq:
688   case nir_intrinsic_vote_ieq:
689   case nir_intrinsic_ballot:
690   case nir_intrinsic_ballot_bit_count_exclusive:
691   case nir_intrinsic_ballot_bit_count_inclusive:
692   case nir_intrinsic_ballot_bitfield_extract:
693   case nir_intrinsic_ballot_bit_count_reduce:
694   case nir_intrinsic_ballot_find_lsb:
695   case nir_intrinsic_ballot_find_msb:
696   case nir_intrinsic_first_invocation:
697   case nir_intrinsic_read_invocation:
698   case nir_intrinsic_read_first_invocation:
699   case nir_intrinsic_elect:
700   case nir_intrinsic_reduce:
701   case nir_intrinsic_inclusive_scan:
702   case nir_intrinsic_exclusive_scan:
703   case nir_intrinsic_shuffle:
704   case nir_intrinsic_shuffle_xor:
705   case nir_intrinsic_shuffle_up:
706   case nir_intrinsic_shuffle_down:
707   case nir_intrinsic_write_invocation_amd:
708      if (shader->info.stage == MESA_SHADER_FRAGMENT)
709         shader->info.fs.needs_all_helper_invocations = true;
710      if (shader->info.stage == MESA_SHADER_COMPUTE)
711         shader->info.cs.uses_wide_subgroup_intrinsics = true;
712      break;
713
714   case nir_intrinsic_end_primitive:
715   case nir_intrinsic_end_primitive_with_counter:
716      assert(shader->info.stage == MESA_SHADER_GEOMETRY);
717      shader->info.gs.uses_end_primitive = 1;
718      FALLTHROUGH;
719
720   case nir_intrinsic_emit_vertex:
721   case nir_intrinsic_emit_vertex_with_counter:
722      shader->info.gs.active_stream_mask |= 1 << nir_intrinsic_stream_id(instr);
723
724      break;
725
726   case nir_intrinsic_control_barrier:
727      shader->info.uses_control_barrier = true;
728      break;
729
730   case nir_intrinsic_scoped_barrier:
731      shader->info.uses_control_barrier |=
732         nir_intrinsic_execution_scope(instr) != NIR_SCOPE_NONE;
733
734      shader->info.uses_memory_barrier |=
735         nir_intrinsic_memory_scope(instr) != NIR_SCOPE_NONE;
736      break;
737
738   case nir_intrinsic_memory_barrier:
739   case nir_intrinsic_group_memory_barrier:
740   case nir_intrinsic_memory_barrier_atomic_counter:
741   case nir_intrinsic_memory_barrier_buffer:
742   case nir_intrinsic_memory_barrier_image:
743   case nir_intrinsic_memory_barrier_shared:
744   case nir_intrinsic_memory_barrier_tcs_patch:
745      shader->info.uses_memory_barrier = true;
746      break;
747
748   default:
749      if (nir_intrinsic_writes_external_memory(instr))
750         shader->info.writes_memory = true;
751      break;
752   }
753}
754
755static void
756gather_tex_info(nir_tex_instr *instr, nir_shader *shader)
757{
758   if (shader->info.stage == MESA_SHADER_FRAGMENT &&
759       nir_tex_instr_has_implicit_derivative(instr))
760      shader->info.fs.needs_quad_helper_invocations = true;
761
762   switch (instr->op) {
763   case nir_texop_tg4:
764      shader->info.uses_texture_gather = true;
765      break;
766   default:
767      break;
768   }
769}
770
771static void
772gather_alu_info(nir_alu_instr *instr, nir_shader *shader)
773{
774   switch (instr->op) {
775   case nir_op_fddx:
776   case nir_op_fddy:
777      shader->info.uses_fddx_fddy = true;
778      FALLTHROUGH;
779   case nir_op_fddx_fine:
780   case nir_op_fddy_fine:
781   case nir_op_fddx_coarse:
782   case nir_op_fddy_coarse:
783      if (shader->info.stage == MESA_SHADER_FRAGMENT)
784         shader->info.fs.needs_quad_helper_invocations = true;
785      break;
786   default:
787      break;
788   }
789
790   const nir_op_info *info = &nir_op_infos[instr->op];
791
792   for (unsigned i = 0; i < info->num_inputs; i++) {
793      if (nir_alu_type_get_base_type(info->input_types[i]) == nir_type_float)
794         shader->info.bit_sizes_float |= nir_src_bit_size(instr->src[i].src);
795      else
796         shader->info.bit_sizes_int |= nir_src_bit_size(instr->src[i].src);
797   }
798   if (nir_alu_type_get_base_type(info->output_type) == nir_type_float)
799      shader->info.bit_sizes_float |= nir_dest_bit_size(instr->dest.dest);
800   else
801      shader->info.bit_sizes_int |= nir_dest_bit_size(instr->dest.dest);
802}
803
804static void
805gather_info_block(nir_block *block, nir_shader *shader, void *dead_ctx)
806{
807   nir_foreach_instr(instr, block) {
808      switch (instr->type) {
809      case nir_instr_type_alu:
810         gather_alu_info(nir_instr_as_alu(instr), shader);
811         break;
812      case nir_instr_type_intrinsic:
813         gather_intrinsic_info(nir_instr_as_intrinsic(instr), shader, dead_ctx);
814         break;
815      case nir_instr_type_tex:
816         gather_tex_info(nir_instr_as_tex(instr), shader);
817         break;
818      case nir_instr_type_call:
819         assert(!"nir_shader_gather_info only works if functions are inlined");
820         break;
821      default:
822         break;
823      }
824   }
825}
826
827void
828nir_shader_gather_info(nir_shader *shader, nir_function_impl *entrypoint)
829{
830   shader->info.num_textures = 0;
831   shader->info.num_images = 0;
832   shader->info.image_buffers = 0;
833   shader->info.msaa_images = 0;
834   shader->info.bit_sizes_float = 0;
835   shader->info.bit_sizes_int = 0;
836
837   nir_foreach_uniform_variable(var, shader) {
838      /* Bindless textures and images don't use non-bindless slots.
839       * Interface blocks imply inputs, outputs, UBO, or SSBO, which can only
840       * mean bindless.
841       */
842      if (var->data.bindless || var->interface_type)
843         continue;
844
845      shader->info.num_textures += glsl_type_get_sampler_count(var->type);
846
847      unsigned num_image_slots = glsl_type_get_image_count(var->type);
848      if (num_image_slots) {
849         const struct glsl_type *image_type = glsl_without_array(var->type);
850
851         if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_BUF) {
852            shader->info.image_buffers |=
853               BITFIELD_RANGE(shader->info.num_images, num_image_slots);
854         }
855         if (glsl_get_sampler_dim(image_type) == GLSL_SAMPLER_DIM_MS) {
856            shader->info.msaa_images |=
857               BITFIELD_RANGE(shader->info.num_images, num_image_slots);
858         }
859         shader->info.num_images += num_image_slots;
860      }
861   }
862
863   shader->info.inputs_read = 0;
864   shader->info.outputs_written = 0;
865   shader->info.outputs_read = 0;
866   shader->info.inputs_read_16bit = 0;
867   shader->info.outputs_written_16bit = 0;
868   shader->info.outputs_read_16bit = 0;
869   shader->info.inputs_read_indirectly_16bit = 0;
870   shader->info.outputs_accessed_indirectly_16bit = 0;
871   shader->info.patch_outputs_read = 0;
872   shader->info.patch_inputs_read = 0;
873   shader->info.patch_outputs_written = 0;
874   BITSET_ZERO(shader->info.system_values_read);
875   shader->info.inputs_read_indirectly = 0;
876   shader->info.outputs_accessed_indirectly = 0;
877   shader->info.patch_inputs_read_indirectly = 0;
878   shader->info.patch_outputs_accessed_indirectly = 0;
879
880   if (shader->info.stage == MESA_SHADER_VERTEX) {
881      shader->info.vs.double_inputs = 0;
882   }
883   if (shader->info.stage == MESA_SHADER_FRAGMENT) {
884      shader->info.fs.uses_sample_qualifier = false;
885      shader->info.fs.uses_discard = false;
886      shader->info.fs.uses_demote = false;
887      shader->info.fs.color_is_dual_source = false;
888      shader->info.fs.uses_fbfetch_output = false;
889      shader->info.fs.needs_quad_helper_invocations = false;
890      shader->info.fs.needs_all_helper_invocations = false;
891   }
892   if (shader->info.stage == MESA_SHADER_TESS_CTRL) {
893      shader->info.tess.tcs_cross_invocation_inputs_read = 0;
894      shader->info.tess.tcs_cross_invocation_outputs_read = 0;
895   }
896
897   shader->info.writes_memory = shader->info.has_transform_feedback_varyings;
898
899   void *dead_ctx = ralloc_context(NULL);
900   nir_foreach_block(block, entrypoint) {
901      gather_info_block(block, shader, dead_ctx);
902   }
903   ralloc_free(dead_ctx);
904
905   if (shader->info.stage == MESA_SHADER_FRAGMENT &&
906       (shader->info.fs.uses_sample_qualifier ||
907        (BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||
908         BITSET_TEST(shader->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS)))) {
909      /* This shouldn't be cleared because if optimizations remove all
910       * sample-qualified inputs and that pass is run again, the sample
911       * shading must stay enabled.
912       */
913      shader->info.fs.uses_sample_shading = true;
914   }
915
916   shader->info.per_primitive_outputs = 0;
917   if (shader->info.stage == MESA_SHADER_MESH) {
918      nir_foreach_shader_out_variable(var, shader) {
919         if (var->data.per_primitive) {
920            assert(nir_is_arrayed_io(var, shader->info.stage));
921            const unsigned slots =
922               glsl_count_attribute_slots(glsl_get_array_element(var->type), false);
923            shader->info.per_primitive_outputs |= BITFIELD64_RANGE(var->data.location, slots);
924         }
925      }
926   }
927
928   shader->info.per_primitive_inputs = 0;
929   if (shader->info.stage == MESA_SHADER_FRAGMENT) {
930      nir_foreach_shader_in_variable(var, shader) {
931         if (var->data.per_primitive) {
932            const unsigned slots =
933               glsl_count_attribute_slots(var->type, false);
934            shader->info.per_primitive_inputs |= BITFIELD64_RANGE(var->data.location, slots);
935         }
936      }
937   }
938}
939