1/*
2 * Copyright © 2018 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#include "nir.h"
26
27/* This pass computes for each ssa definition if it is uniform.
28 * That is, the variable has the same value for all invocations
29 * of the group.
30 *
31 * This divergence analysis pass expects the shader to be in LCSSA-form.
32 *
33 * This algorithm implements "The Simple Divergence Analysis" from
34 * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
35 * Divergence Analysis.  ACM Transactions on Programming Languages and Systems (TOPLAS),
36 * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
37 */
38
39struct divergence_state {
40   const gl_shader_stage stage;
41   nir_shader *shader;
42
43   /** current control flow state */
44   /* True if some loop-active invocations might take a different control-flow path.
45    * A divergent break does not cause subsequent control-flow to be considered
46    * divergent because those invocations are no longer active in the loop.
47    * For a divergent if, both sides are considered divergent flow because
48    * the other side is still loop-active. */
49   bool divergent_loop_cf;
50   /* True if a divergent continue happened since the loop header */
51   bool divergent_loop_continue;
52   /* True if a divergent break happened since the loop header */
53   bool divergent_loop_break;
54
55   /* True if we visit the block for the fist time */
56   bool first_visit;
57};
58
59static bool
60visit_cf_list(struct exec_list *list, struct divergence_state *state);
61
62static bool
63visit_alu(nir_alu_instr *instr)
64{
65   if (instr->dest.dest.ssa.divergent)
66      return false;
67
68   unsigned num_src = nir_op_infos[instr->op].num_inputs;
69
70   for (unsigned i = 0; i < num_src; i++) {
71      if (instr->src[i].src.ssa->divergent) {
72         instr->dest.dest.ssa.divergent = true;
73         return true;
74      }
75   }
76
77   return false;
78}
79
80static bool
81visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
82{
83   if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
84      return false;
85
86   if (instr->dest.ssa.divergent)
87      return false;
88
89   nir_divergence_options options = shader->options->divergence_analysis_options;
90   gl_shader_stage stage = shader->info.stage;
91   bool is_divergent = false;
92   switch (instr->intrinsic) {
93   /* Intrinsics which are always uniform */
94   case nir_intrinsic_shader_clock:
95   case nir_intrinsic_ballot:
96   case nir_intrinsic_read_invocation:
97   case nir_intrinsic_read_first_invocation:
98   case nir_intrinsic_vote_any:
99   case nir_intrinsic_vote_all:
100   case nir_intrinsic_vote_feq:
101   case nir_intrinsic_vote_ieq:
102   case nir_intrinsic_load_push_constant:
103   case nir_intrinsic_load_work_dim:
104   case nir_intrinsic_load_num_workgroups:
105   case nir_intrinsic_load_workgroup_size:
106   case nir_intrinsic_load_subgroup_id:
107   case nir_intrinsic_load_num_subgroups:
108   case nir_intrinsic_load_ray_launch_size:
109   case nir_intrinsic_load_subgroup_size:
110   case nir_intrinsic_load_subgroup_eq_mask:
111   case nir_intrinsic_load_subgroup_ge_mask:
112   case nir_intrinsic_load_subgroup_gt_mask:
113   case nir_intrinsic_load_subgroup_le_mask:
114   case nir_intrinsic_load_subgroup_lt_mask:
115   case nir_intrinsic_first_invocation:
116   case nir_intrinsic_last_invocation:
117   case nir_intrinsic_load_base_instance:
118   case nir_intrinsic_load_base_vertex:
119   case nir_intrinsic_load_first_vertex:
120   case nir_intrinsic_load_draw_id:
121   case nir_intrinsic_load_is_indexed_draw:
122   case nir_intrinsic_load_viewport_scale:
123   case nir_intrinsic_load_user_clip_plane:
124   case nir_intrinsic_load_viewport_x_scale:
125   case nir_intrinsic_load_viewport_y_scale:
126   case nir_intrinsic_load_viewport_z_scale:
127   case nir_intrinsic_load_viewport_offset:
128   case nir_intrinsic_load_viewport_x_offset:
129   case nir_intrinsic_load_viewport_y_offset:
130   case nir_intrinsic_load_viewport_z_offset:
131   case nir_intrinsic_load_blend_const_color_a_float:
132   case nir_intrinsic_load_blend_const_color_b_float:
133   case nir_intrinsic_load_blend_const_color_g_float:
134   case nir_intrinsic_load_blend_const_color_r_float:
135   case nir_intrinsic_load_blend_const_color_rgba:
136   case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
137   case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
138   case nir_intrinsic_load_line_width:
139   case nir_intrinsic_load_aa_line_width:
140   case nir_intrinsic_load_fb_layers_v3d:
141   case nir_intrinsic_load_tcs_num_patches_amd:
142   case nir_intrinsic_load_ring_tess_factors_amd:
143   case nir_intrinsic_load_ring_tess_offchip_amd:
144   case nir_intrinsic_load_ring_tess_factors_offset_amd:
145   case nir_intrinsic_load_ring_tess_offchip_offset_amd:
146   case nir_intrinsic_load_ring_esgs_amd:
147   case nir_intrinsic_load_ring_es2gs_offset_amd:
148   case nir_intrinsic_load_sample_positions_pan:
149   case nir_intrinsic_load_workgroup_num_input_vertices_amd:
150   case nir_intrinsic_load_workgroup_num_input_primitives_amd:
151   case nir_intrinsic_load_shader_query_enabled_amd:
152   case nir_intrinsic_load_cull_front_face_enabled_amd:
153   case nir_intrinsic_load_cull_back_face_enabled_amd:
154   case nir_intrinsic_load_cull_ccw_amd:
155   case nir_intrinsic_load_cull_small_primitives_enabled_amd:
156   case nir_intrinsic_load_cull_any_enabled_amd:
157   case nir_intrinsic_load_cull_small_prim_precision_amd:
158      is_divergent = false;
159      break;
160
161   /* Intrinsics with divergence depending on shader stage and hardware */
162   case nir_intrinsic_load_frag_shading_rate:
163      is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup);
164      break;
165   case nir_intrinsic_load_input:
166      is_divergent = instr->src[0].ssa->divergent;
167      if (stage == MESA_SHADER_FRAGMENT)
168         is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
169      else if (stage == MESA_SHADER_TESS_EVAL)
170         is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
171      else if (stage != MESA_SHADER_MESH)
172         is_divergent = true;
173      break;
174   case nir_intrinsic_load_per_vertex_input:
175      is_divergent = instr->src[0].ssa->divergent ||
176                     instr->src[1].ssa->divergent;
177      if (stage == MESA_SHADER_TESS_CTRL)
178         is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
179      if (stage == MESA_SHADER_TESS_EVAL)
180         is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
181      else
182         is_divergent = true;
183      break;
184   case nir_intrinsic_load_input_vertex:
185      is_divergent = instr->src[1].ssa->divergent;
186      assert(stage == MESA_SHADER_FRAGMENT);
187      is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
188      break;
189   case nir_intrinsic_load_output:
190      is_divergent = instr->src[0].ssa->divergent;
191      switch (stage) {
192      case MESA_SHADER_TESS_CTRL:
193         is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
194         break;
195      case MESA_SHADER_FRAGMENT:
196         is_divergent = true;
197         break;
198      case MESA_SHADER_TASK:
199      case MESA_SHADER_MESH:
200         /* Divergent if src[0] is, so nothing else to do. */
201         break;
202      default:
203         unreachable("Invalid stage for load_output");
204      }
205      break;
206   case nir_intrinsic_load_per_vertex_output:
207      assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH);
208      is_divergent = instr->src[0].ssa->divergent ||
209                     instr->src[1].ssa->divergent ||
210                     (stage == MESA_SHADER_TESS_CTRL &&
211                      !(options & nir_divergence_single_patch_per_tcs_subgroup));
212      break;
213   case nir_intrinsic_load_per_primitive_output:
214      assert(stage == MESA_SHADER_MESH);
215      is_divergent = instr->src[0].ssa->divergent ||
216                     instr->src[1].ssa->divergent;
217      break;
218   case nir_intrinsic_load_layer_id:
219   case nir_intrinsic_load_front_face:
220      assert(stage == MESA_SHADER_FRAGMENT);
221      is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
222      break;
223   case nir_intrinsic_load_view_index:
224      assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
225      if (options & nir_divergence_view_index_uniform)
226         is_divergent = false;
227      else if (stage == MESA_SHADER_FRAGMENT)
228         is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
229      break;
230   case nir_intrinsic_load_fs_input_interp_deltas:
231      assert(stage == MESA_SHADER_FRAGMENT);
232      is_divergent = instr->src[0].ssa->divergent;
233      is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
234      break;
235   case nir_intrinsic_load_primitive_id:
236      if (stage == MESA_SHADER_FRAGMENT)
237         is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
238      else if (stage == MESA_SHADER_TESS_CTRL)
239         is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
240      else if (stage == MESA_SHADER_TESS_EVAL)
241         is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
242      else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX)
243         is_divergent = true;
244      else
245         unreachable("Invalid stage for load_primitive_id");
246      break;
247   case nir_intrinsic_load_tess_level_inner:
248   case nir_intrinsic_load_tess_level_outer:
249      if (stage == MESA_SHADER_TESS_CTRL)
250         is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
251      else if (stage == MESA_SHADER_TESS_EVAL)
252         is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
253      else
254         unreachable("Invalid stage for load_primitive_tess_level_*");
255      break;
256   case nir_intrinsic_load_patch_vertices_in:
257      if (stage == MESA_SHADER_TESS_EVAL)
258         is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
259      else
260         assert(stage == MESA_SHADER_TESS_CTRL);
261      break;
262
263   case nir_intrinsic_load_workgroup_id:
264      assert(gl_shader_stage_uses_workgroup(stage));
265      if (stage == MESA_SHADER_COMPUTE)
266         is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup);
267      break;
268
269   /* Clustered reductions are uniform if cluster_size == subgroup_size or
270    * the source is uniform and the operation is invariant.
271    * Inclusive scans are uniform if
272    * the source is uniform and the operation is invariant
273    */
274   case nir_intrinsic_reduce:
275      if (nir_intrinsic_cluster_size(instr) == 0)
276         return false;
277      FALLTHROUGH;
278   case nir_intrinsic_inclusive_scan: {
279      nir_op op = nir_intrinsic_reduction_op(instr);
280      is_divergent = instr->src[0].ssa->divergent;
281      if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
282          op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
283          op != nir_op_iand && op != nir_op_ior)
284         is_divergent = true;
285      break;
286   }
287
288   case nir_intrinsic_load_ubo:
289   case nir_intrinsic_load_ssbo:
290      is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
291                     instr->src[1].ssa->divergent;
292      break;
293
294   case nir_intrinsic_get_ssbo_size:
295   case nir_intrinsic_deref_buffer_array_length:
296      is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
297      break;
298
299   case nir_intrinsic_image_load:
300   case nir_intrinsic_image_deref_load:
301   case nir_intrinsic_bindless_image_load:
302   case nir_intrinsic_image_sparse_load:
303   case nir_intrinsic_image_deref_sparse_load:
304   case nir_intrinsic_bindless_image_sparse_load:
305      is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
306                     instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent;
307      break;
308
309
310   /* Intrinsics with divergence depending on sources */
311   case nir_intrinsic_ballot_bitfield_extract:
312   case nir_intrinsic_ballot_find_lsb:
313   case nir_intrinsic_ballot_find_msb:
314   case nir_intrinsic_ballot_bit_count_reduce:
315   case nir_intrinsic_shuffle_xor:
316   case nir_intrinsic_shuffle_up:
317   case nir_intrinsic_shuffle_down:
318   case nir_intrinsic_quad_broadcast:
319   case nir_intrinsic_quad_swap_horizontal:
320   case nir_intrinsic_quad_swap_vertical:
321   case nir_intrinsic_quad_swap_diagonal:
322   case nir_intrinsic_byte_permute_amd:
323   case nir_intrinsic_load_deref:
324   case nir_intrinsic_load_shared:
325   case nir_intrinsic_load_global:
326   case nir_intrinsic_load_global_constant:
327   case nir_intrinsic_load_uniform:
328   case nir_intrinsic_load_constant:
329   case nir_intrinsic_load_sample_pos_from_id:
330   case nir_intrinsic_load_kernel_input:
331   case nir_intrinsic_load_buffer_amd:
332   case nir_intrinsic_image_samples:
333   case nir_intrinsic_image_deref_samples:
334   case nir_intrinsic_bindless_image_samples:
335   case nir_intrinsic_image_size:
336   case nir_intrinsic_image_deref_size:
337   case nir_intrinsic_bindless_image_size:
338   case nir_intrinsic_copy_deref:
339   case nir_intrinsic_vulkan_resource_index:
340   case nir_intrinsic_vulkan_resource_reindex:
341   case nir_intrinsic_load_vulkan_descriptor:
342   case nir_intrinsic_atomic_counter_read:
343   case nir_intrinsic_atomic_counter_read_deref:
344   case nir_intrinsic_quad_swizzle_amd:
345   case nir_intrinsic_masked_swizzle_amd:
346   case nir_intrinsic_is_sparse_texels_resident:
347   case nir_intrinsic_sparse_residency_code_and:
348   case nir_intrinsic_load_sbt_amd:
349   case nir_intrinsic_bvh64_intersect_ray_amd:
350   case nir_intrinsic_get_ubo_size:
351   case nir_intrinsic_load_ssbo_address: {
352      unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
353      for (unsigned i = 0; i < num_srcs; i++) {
354         if (instr->src[i].ssa->divergent) {
355            is_divergent = true;
356            break;
357         }
358      }
359      break;
360   }
361
362   case nir_intrinsic_shuffle:
363      is_divergent = instr->src[0].ssa->divergent &&
364                     instr->src[1].ssa->divergent;
365      break;
366
367   /* Intrinsics which are always divergent */
368   case nir_intrinsic_load_color0:
369   case nir_intrinsic_load_color1:
370   case nir_intrinsic_load_param:
371   case nir_intrinsic_load_sample_id:
372   case nir_intrinsic_load_sample_id_no_per_sample:
373   case nir_intrinsic_load_sample_mask_in:
374   case nir_intrinsic_load_interpolated_input:
375   case nir_intrinsic_load_barycentric_pixel:
376   case nir_intrinsic_load_barycentric_centroid:
377   case nir_intrinsic_load_barycentric_sample:
378   case nir_intrinsic_load_barycentric_model:
379   case nir_intrinsic_load_barycentric_at_sample:
380   case nir_intrinsic_load_barycentric_at_offset:
381   case nir_intrinsic_interp_deref_at_offset:
382   case nir_intrinsic_interp_deref_at_sample:
383   case nir_intrinsic_interp_deref_at_centroid:
384   case nir_intrinsic_interp_deref_at_vertex:
385   case nir_intrinsic_load_tess_coord:
386   case nir_intrinsic_load_point_coord:
387   case nir_intrinsic_load_line_coord:
388   case nir_intrinsic_load_frag_coord:
389   case nir_intrinsic_load_sample_pos:
390   case nir_intrinsic_load_vertex_id_zero_base:
391   case nir_intrinsic_load_vertex_id:
392   case nir_intrinsic_load_instance_id:
393   case nir_intrinsic_load_invocation_id:
394   case nir_intrinsic_load_local_invocation_id:
395   case nir_intrinsic_load_local_invocation_index:
396   case nir_intrinsic_load_global_invocation_id:
397   case nir_intrinsic_load_global_invocation_id_zero_base:
398   case nir_intrinsic_load_global_invocation_index:
399   case nir_intrinsic_load_subgroup_invocation:
400   case nir_intrinsic_load_helper_invocation:
401   case nir_intrinsic_is_helper_invocation:
402   case nir_intrinsic_load_scratch:
403   case nir_intrinsic_deref_atomic_add:
404   case nir_intrinsic_deref_atomic_imin:
405   case nir_intrinsic_deref_atomic_umin:
406   case nir_intrinsic_deref_atomic_imax:
407   case nir_intrinsic_deref_atomic_umax:
408   case nir_intrinsic_deref_atomic_and:
409   case nir_intrinsic_deref_atomic_or:
410   case nir_intrinsic_deref_atomic_xor:
411   case nir_intrinsic_deref_atomic_exchange:
412   case nir_intrinsic_deref_atomic_comp_swap:
413   case nir_intrinsic_deref_atomic_fadd:
414   case nir_intrinsic_deref_atomic_fmin:
415   case nir_intrinsic_deref_atomic_fmax:
416   case nir_intrinsic_deref_atomic_fcomp_swap:
417   case nir_intrinsic_ssbo_atomic_add:
418   case nir_intrinsic_ssbo_atomic_imin:
419   case nir_intrinsic_ssbo_atomic_umin:
420   case nir_intrinsic_ssbo_atomic_imax:
421   case nir_intrinsic_ssbo_atomic_umax:
422   case nir_intrinsic_ssbo_atomic_and:
423   case nir_intrinsic_ssbo_atomic_or:
424   case nir_intrinsic_ssbo_atomic_xor:
425   case nir_intrinsic_ssbo_atomic_exchange:
426   case nir_intrinsic_ssbo_atomic_comp_swap:
427   case nir_intrinsic_ssbo_atomic_fadd:
428   case nir_intrinsic_ssbo_atomic_fmax:
429   case nir_intrinsic_ssbo_atomic_fmin:
430   case nir_intrinsic_ssbo_atomic_fcomp_swap:
431   case nir_intrinsic_image_deref_atomic_add:
432   case nir_intrinsic_image_deref_atomic_imin:
433   case nir_intrinsic_image_deref_atomic_umin:
434   case nir_intrinsic_image_deref_atomic_imax:
435   case nir_intrinsic_image_deref_atomic_umax:
436   case nir_intrinsic_image_deref_atomic_and:
437   case nir_intrinsic_image_deref_atomic_or:
438   case nir_intrinsic_image_deref_atomic_xor:
439   case nir_intrinsic_image_deref_atomic_exchange:
440   case nir_intrinsic_image_deref_atomic_comp_swap:
441   case nir_intrinsic_image_deref_atomic_fadd:
442   case nir_intrinsic_image_deref_atomic_fmin:
443   case nir_intrinsic_image_deref_atomic_fmax:
444   case nir_intrinsic_image_atomic_add:
445   case nir_intrinsic_image_atomic_imin:
446   case nir_intrinsic_image_atomic_umin:
447   case nir_intrinsic_image_atomic_imax:
448   case nir_intrinsic_image_atomic_umax:
449   case nir_intrinsic_image_atomic_and:
450   case nir_intrinsic_image_atomic_or:
451   case nir_intrinsic_image_atomic_xor:
452   case nir_intrinsic_image_atomic_exchange:
453   case nir_intrinsic_image_atomic_comp_swap:
454   case nir_intrinsic_image_atomic_fadd:
455   case nir_intrinsic_image_atomic_fmin:
456   case nir_intrinsic_image_atomic_fmax:
457   case nir_intrinsic_bindless_image_atomic_add:
458   case nir_intrinsic_bindless_image_atomic_imin:
459   case nir_intrinsic_bindless_image_atomic_umin:
460   case nir_intrinsic_bindless_image_atomic_imax:
461   case nir_intrinsic_bindless_image_atomic_umax:
462   case nir_intrinsic_bindless_image_atomic_and:
463   case nir_intrinsic_bindless_image_atomic_or:
464   case nir_intrinsic_bindless_image_atomic_xor:
465   case nir_intrinsic_bindless_image_atomic_exchange:
466   case nir_intrinsic_bindless_image_atomic_comp_swap:
467   case nir_intrinsic_bindless_image_atomic_fadd:
468   case nir_intrinsic_bindless_image_atomic_fmin:
469   case nir_intrinsic_bindless_image_atomic_fmax:
470   case nir_intrinsic_shared_atomic_add:
471   case nir_intrinsic_shared_atomic_imin:
472   case nir_intrinsic_shared_atomic_umin:
473   case nir_intrinsic_shared_atomic_imax:
474   case nir_intrinsic_shared_atomic_umax:
475   case nir_intrinsic_shared_atomic_and:
476   case nir_intrinsic_shared_atomic_or:
477   case nir_intrinsic_shared_atomic_xor:
478   case nir_intrinsic_shared_atomic_exchange:
479   case nir_intrinsic_shared_atomic_comp_swap:
480   case nir_intrinsic_shared_atomic_fadd:
481   case nir_intrinsic_shared_atomic_fmin:
482   case nir_intrinsic_shared_atomic_fmax:
483   case nir_intrinsic_shared_atomic_fcomp_swap:
484   case nir_intrinsic_global_atomic_add:
485   case nir_intrinsic_global_atomic_imin:
486   case nir_intrinsic_global_atomic_umin:
487   case nir_intrinsic_global_atomic_imax:
488   case nir_intrinsic_global_atomic_umax:
489   case nir_intrinsic_global_atomic_and:
490   case nir_intrinsic_global_atomic_or:
491   case nir_intrinsic_global_atomic_xor:
492   case nir_intrinsic_global_atomic_exchange:
493   case nir_intrinsic_global_atomic_comp_swap:
494   case nir_intrinsic_global_atomic_fadd:
495   case nir_intrinsic_global_atomic_fmin:
496   case nir_intrinsic_global_atomic_fmax:
497   case nir_intrinsic_global_atomic_fcomp_swap:
498   case nir_intrinsic_atomic_counter_add:
499   case nir_intrinsic_atomic_counter_min:
500   case nir_intrinsic_atomic_counter_max:
501   case nir_intrinsic_atomic_counter_and:
502   case nir_intrinsic_atomic_counter_or:
503   case nir_intrinsic_atomic_counter_xor:
504   case nir_intrinsic_atomic_counter_inc:
505   case nir_intrinsic_atomic_counter_pre_dec:
506   case nir_intrinsic_atomic_counter_post_dec:
507   case nir_intrinsic_atomic_counter_exchange:
508   case nir_intrinsic_atomic_counter_comp_swap:
509   case nir_intrinsic_atomic_counter_add_deref:
510   case nir_intrinsic_atomic_counter_min_deref:
511   case nir_intrinsic_atomic_counter_max_deref:
512   case nir_intrinsic_atomic_counter_and_deref:
513   case nir_intrinsic_atomic_counter_or_deref:
514   case nir_intrinsic_atomic_counter_xor_deref:
515   case nir_intrinsic_atomic_counter_inc_deref:
516   case nir_intrinsic_atomic_counter_pre_dec_deref:
517   case nir_intrinsic_atomic_counter_post_dec_deref:
518   case nir_intrinsic_atomic_counter_exchange_deref:
519   case nir_intrinsic_atomic_counter_comp_swap_deref:
520   case nir_intrinsic_exclusive_scan:
521   case nir_intrinsic_ballot_bit_count_exclusive:
522   case nir_intrinsic_ballot_bit_count_inclusive:
523   case nir_intrinsic_write_invocation_amd:
524   case nir_intrinsic_mbcnt_amd:
525   case nir_intrinsic_lane_permute_16_amd:
526   case nir_intrinsic_elect:
527   case nir_intrinsic_load_tlb_color_v3d:
528   case nir_intrinsic_load_tess_rel_patch_id_amd:
529   case nir_intrinsic_load_gs_vertex_offset_amd:
530   case nir_intrinsic_has_input_vertex_amd:
531   case nir_intrinsic_has_input_primitive_amd:
532   case nir_intrinsic_load_packed_passthrough_primitive_amd:
533   case nir_intrinsic_load_initial_edgeflags_amd:
534   case nir_intrinsic_gds_atomic_add_amd:
535   case nir_intrinsic_load_rt_arg_scratch_offset_amd:
536   case nir_intrinsic_load_intersection_opaque_amd:
537      is_divergent = true;
538      break;
539
540   default:
541#ifdef NDEBUG
542      is_divergent = true;
543      break;
544#else
545      nir_print_instr(&instr->instr, stderr);
546      unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
547#endif
548   }
549
550   instr->dest.ssa.divergent = is_divergent;
551   return is_divergent;
552}
553
554static bool
555visit_tex(nir_tex_instr *instr)
556{
557   if (instr->dest.ssa.divergent)
558      return false;
559
560   bool is_divergent = false;
561
562   for (unsigned i = 0; i < instr->num_srcs; i++) {
563      switch (instr->src[i].src_type) {
564      case nir_tex_src_sampler_deref:
565      case nir_tex_src_sampler_handle:
566      case nir_tex_src_sampler_offset:
567         is_divergent |= instr->src[i].src.ssa->divergent &&
568                         instr->sampler_non_uniform;
569         break;
570      case nir_tex_src_texture_deref:
571      case nir_tex_src_texture_handle:
572      case nir_tex_src_texture_offset:
573         is_divergent |= instr->src[i].src.ssa->divergent &&
574                         instr->texture_non_uniform;
575         break;
576      default:
577         is_divergent |= instr->src[i].src.ssa->divergent;
578         break;
579      }
580   }
581
582   instr->dest.ssa.divergent = is_divergent;
583   return is_divergent;
584}
585
586static bool
587visit_load_const(nir_load_const_instr *instr)
588{
589   return false;
590}
591
592static bool
593visit_ssa_undef(nir_ssa_undef_instr *instr)
594{
595   return false;
596}
597
598static bool
599nir_variable_mode_is_uniform(nir_variable_mode mode) {
600   switch (mode) {
601   case nir_var_uniform:
602   case nir_var_mem_ubo:
603   case nir_var_mem_ssbo:
604   case nir_var_mem_shared:
605   case nir_var_mem_global:
606      return true;
607   default:
608      return false;
609   }
610}
611
612static bool
613nir_variable_is_uniform(nir_shader *shader, nir_variable *var)
614{
615   if (nir_variable_mode_is_uniform(var->data.mode))
616      return true;
617
618   nir_divergence_options options = shader->options->divergence_analysis_options;
619   gl_shader_stage stage = shader->info.stage;
620
621   if (stage == MESA_SHADER_FRAGMENT &&
622       (options & nir_divergence_single_prim_per_subgroup) &&
623       var->data.mode == nir_var_shader_in &&
624       var->data.interpolation == INTERP_MODE_FLAT)
625      return true;
626
627   if (stage == MESA_SHADER_TESS_CTRL &&
628       (options & nir_divergence_single_patch_per_tcs_subgroup) &&
629       var->data.mode == nir_var_shader_out && var->data.patch)
630      return true;
631
632   if (stage == MESA_SHADER_TESS_EVAL &&
633       (options & nir_divergence_single_patch_per_tes_subgroup) &&
634       var->data.mode == nir_var_shader_in && var->data.patch)
635      return true;
636
637   return false;
638}
639
640static bool
641visit_deref(nir_shader *shader, nir_deref_instr *deref)
642{
643   if (deref->dest.ssa.divergent)
644      return false;
645
646   bool is_divergent = false;
647   switch (deref->deref_type) {
648   case nir_deref_type_var:
649      is_divergent = !nir_variable_is_uniform(shader, deref->var);
650      break;
651   case nir_deref_type_array:
652   case nir_deref_type_ptr_as_array:
653      is_divergent = deref->arr.index.ssa->divergent;
654      FALLTHROUGH;
655   case nir_deref_type_struct:
656   case nir_deref_type_array_wildcard:
657      is_divergent |= deref->parent.ssa->divergent;
658      break;
659   case nir_deref_type_cast:
660      is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
661                     deref->parent.ssa->divergent;
662      break;
663   }
664
665   deref->dest.ssa.divergent = is_divergent;
666   return is_divergent;
667}
668
669static bool
670visit_jump(nir_jump_instr *jump, struct divergence_state *state)
671{
672   switch (jump->type) {
673   case nir_jump_continue:
674      if (state->divergent_loop_continue)
675         return false;
676      if (state->divergent_loop_cf)
677         state->divergent_loop_continue = true;
678      return state->divergent_loop_continue;
679   case nir_jump_break:
680      if (state->divergent_loop_break)
681         return false;
682      if (state->divergent_loop_cf)
683         state->divergent_loop_break = true;
684      return state->divergent_loop_break;
685   case nir_jump_halt:
686      /* This totally kills invocations so it doesn't add divergence */
687      break;
688   case nir_jump_return:
689      unreachable("NIR divergence analysis: Unsupported return instruction.");
690      break;
691   case nir_jump_goto:
692   case nir_jump_goto_if:
693      unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
694      break;
695   }
696   return false;
697}
698
699static bool
700set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
701{
702   def->divergent = false;
703   return true;
704}
705
706static bool
707update_instr_divergence(nir_shader *shader, nir_instr *instr)
708{
709   switch (instr->type) {
710   case nir_instr_type_alu:
711      return visit_alu(nir_instr_as_alu(instr));
712   case nir_instr_type_intrinsic:
713      return visit_intrinsic(shader, nir_instr_as_intrinsic(instr));
714   case nir_instr_type_tex:
715      return visit_tex(nir_instr_as_tex(instr));
716   case nir_instr_type_load_const:
717      return visit_load_const(nir_instr_as_load_const(instr));
718   case nir_instr_type_ssa_undef:
719      return visit_ssa_undef(nir_instr_as_ssa_undef(instr));
720   case nir_instr_type_deref:
721      return visit_deref(shader, nir_instr_as_deref(instr));
722   case nir_instr_type_jump:
723   case nir_instr_type_phi:
724   case nir_instr_type_call:
725   case nir_instr_type_parallel_copy:
726   default:
727      unreachable("NIR divergence analysis: Unsupported instruction type.");
728   }
729}
730
731static bool
732visit_block(nir_block *block, struct divergence_state *state)
733{
734   bool has_changed = false;
735
736   nir_foreach_instr(instr, block) {
737      /* phis are handled when processing the branches */
738      if (instr->type == nir_instr_type_phi)
739         continue;
740
741      if (state->first_visit)
742         nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
743
744      if (instr->type == nir_instr_type_jump)
745         has_changed |= visit_jump(nir_instr_as_jump(instr), state);
746      else
747         has_changed |= update_instr_divergence(state->shader, instr);
748   }
749
750   return has_changed;
751}
752
753/* There are 3 types of phi instructions:
754 * (1) gamma: represent the joining point of different paths
755 *     created by an “if-then-else” branch.
756 *     The resulting value is divergent if the branch condition
757 *     or any of the source values is divergent. */
758static bool
759visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
760{
761   if (phi->dest.ssa.divergent)
762      return false;
763
764   unsigned defined_srcs = 0;
765   nir_foreach_phi_src(src, phi) {
766      /* if any source value is divergent, the resulting value is divergent */
767      if (src->src.ssa->divergent) {
768         phi->dest.ssa.divergent = true;
769         return true;
770      }
771      if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
772         defined_srcs++;
773      }
774   }
775
776   /* if the condition is divergent and two sources defined, the definition is divergent */
777   if (defined_srcs > 1 && if_cond_divergent) {
778      phi->dest.ssa.divergent = true;
779      return true;
780   }
781
782   return false;
783}
784
785/* There are 3 types of phi instructions:
786 * (2) mu: which only exist at loop headers,
787 *     merge initial and loop-carried values.
788 *     The resulting value is divergent if any source value
789 *     is divergent or a divergent loop continue condition
790 *     is associated with a different ssa-def. */
791static bool
792visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
793{
794   if (phi->dest.ssa.divergent)
795      return false;
796
797   nir_ssa_def* same = NULL;
798   nir_foreach_phi_src(src, phi) {
799      /* if any source value is divergent, the resulting value is divergent */
800      if (src->src.ssa->divergent) {
801         phi->dest.ssa.divergent = true;
802         return true;
803      }
804      /* if this loop is uniform, we're done here */
805      if (!divergent_continue)
806         continue;
807      /* skip the loop preheader */
808      if (src->pred == preheader)
809         continue;
810      /* skip undef values */
811      if (nir_src_is_undef(src->src))
812         continue;
813
814      /* check if all loop-carried values are from the same ssa-def */
815      if (!same)
816         same = src->src.ssa;
817      else if (same != src->src.ssa) {
818         phi->dest.ssa.divergent = true;
819         return true;
820      }
821   }
822
823   return false;
824}
825
826/* There are 3 types of phi instructions:
827 * (3) eta: represent values that leave a loop.
828 *     The resulting value is divergent if the source value is divergent
829 *     or any loop exit condition is divergent for a value which is
830 *     not loop-invariant.
831 *     (note: there should be no phi for loop-invariant variables.) */
832static bool
833visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
834{
835   if (phi->dest.ssa.divergent)
836      return false;
837
838   if (divergent_break) {
839      phi->dest.ssa.divergent = true;
840      return true;
841   }
842
843   /* if any source value is divergent, the resulting value is divergent */
844   nir_foreach_phi_src(src, phi) {
845      if (src->src.ssa->divergent) {
846         phi->dest.ssa.divergent = true;
847         return true;
848      }
849   }
850
851   return false;
852}
853
854static bool
855visit_if(nir_if *if_stmt, struct divergence_state *state)
856{
857   bool progress = false;
858
859   struct divergence_state then_state = *state;
860   then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
861   progress |= visit_cf_list(&if_stmt->then_list, &then_state);
862
863   struct divergence_state else_state = *state;
864   else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
865   progress |= visit_cf_list(&if_stmt->else_list, &else_state);
866
867   /* handle phis after the IF */
868   nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
869      if (instr->type != nir_instr_type_phi)
870         break;
871
872      if (state->first_visit)
873         nir_instr_as_phi(instr)->dest.ssa.divergent = false;
874      progress |= visit_if_merge_phi(nir_instr_as_phi(instr),
875                                     if_stmt->condition.ssa->divergent);
876   }
877
878   /* join loop divergence information from both branch legs */
879   state->divergent_loop_continue |= then_state.divergent_loop_continue ||
880                                     else_state.divergent_loop_continue;
881   state->divergent_loop_break |= then_state.divergent_loop_break ||
882                                  else_state.divergent_loop_break;
883
884   /* A divergent continue makes succeeding loop CF divergent:
885    * not all loop-active invocations participate in the remaining loop-body
886    * which means that a following break might be taken by some invocations, only */
887   state->divergent_loop_cf |= state->divergent_loop_continue;
888
889   return progress;
890}
891
892static bool
893visit_loop(nir_loop *loop, struct divergence_state *state)
894{
895   bool progress = false;
896   nir_block *loop_header = nir_loop_first_block(loop);
897   nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
898
899   /* handle loop header phis first: we have no knowledge yet about
900    * the loop's control flow or any loop-carried sources. */
901   nir_foreach_instr(instr, loop_header) {
902      if (instr->type != nir_instr_type_phi)
903         break;
904
905      nir_phi_instr *phi = nir_instr_as_phi(instr);
906      if (!state->first_visit && phi->dest.ssa.divergent)
907         continue;
908
909      nir_foreach_phi_src(src, phi) {
910         if (src->pred == loop_preheader) {
911            phi->dest.ssa.divergent = src->src.ssa->divergent;
912            break;
913         }
914      }
915      progress |= phi->dest.ssa.divergent;
916   }
917
918   /* setup loop state */
919   struct divergence_state loop_state = *state;
920   loop_state.divergent_loop_cf = false;
921   loop_state.divergent_loop_continue = false;
922   loop_state.divergent_loop_break = false;
923
924   /* process loop body until no further changes are made */
925   bool repeat;
926   do {
927      progress |= visit_cf_list(&loop->body, &loop_state);
928      repeat = false;
929
930      /* revisit loop header phis to see if something has changed */
931      nir_foreach_instr(instr, loop_header) {
932         if (instr->type != nir_instr_type_phi)
933            break;
934
935         repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),
936                                         loop_preheader,
937                                         loop_state.divergent_loop_continue);
938      }
939
940      loop_state.divergent_loop_cf = false;
941      loop_state.first_visit = false;
942   } while (repeat);
943
944   /* handle phis after the loop */
945   nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
946      if (instr->type != nir_instr_type_phi)
947         break;
948
949      if (state->first_visit)
950         nir_instr_as_phi(instr)->dest.ssa.divergent = false;
951      progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),
952                                      loop_state.divergent_loop_break);
953   }
954
955   loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue);
956
957   return progress;
958}
959
960static bool
961visit_cf_list(struct exec_list *list, struct divergence_state *state)
962{
963   bool has_changed = false;
964
965   foreach_list_typed(nir_cf_node, node, node, list) {
966      switch (node->type) {
967      case nir_cf_node_block:
968         has_changed |= visit_block(nir_cf_node_as_block(node), state);
969         break;
970      case nir_cf_node_if:
971         has_changed |= visit_if(nir_cf_node_as_if(node), state);
972         break;
973      case nir_cf_node_loop:
974         has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
975         break;
976      case nir_cf_node_function:
977         unreachable("NIR divergence analysis: Unsupported cf_node type.");
978      }
979   }
980
981   return has_changed;
982}
983
984void
985nir_divergence_analysis(nir_shader *shader)
986{
987   struct divergence_state state = {
988      .stage = shader->info.stage,
989      .shader = shader,
990      .divergent_loop_cf = false,
991      .divergent_loop_continue = false,
992      .divergent_loop_break = false,
993      .first_visit = true,
994   };
995
996   visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
997}
998
999bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)
1000{
1001   nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
1002
1003   if (instr->type == nir_instr_type_phi) {
1004      nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);
1005      /* can only update gamma/if phis */
1006      if (!prev || prev->type != nir_cf_node_if)
1007         return false;
1008
1009      nir_if *nif = nir_cf_node_as_if(prev);
1010
1011      visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));
1012      return true;
1013   }
1014
1015   update_instr_divergence(shader, instr);
1016   return true;
1017}
1018
1019