17ec681f3Smrg/*
27ec681f3Smrg * Copyright © 2018 Valve Corporation
37ec681f3Smrg *
47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a
57ec681f3Smrg * copy of this software and associated documentation files (the "Software"),
67ec681f3Smrg * to deal in the Software without restriction, including without limitation
77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the
97ec681f3Smrg * Software is furnished to do so, subject to the following conditions:
107ec681f3Smrg *
117ec681f3Smrg * The above copyright notice and this permission notice (including the next
127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the
137ec681f3Smrg * Software.
147ec681f3Smrg *
157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
217ec681f3Smrg * IN THE SOFTWARE.
227ec681f3Smrg *
237ec681f3Smrg */
247ec681f3Smrg
257ec681f3Smrg#include "nir.h"
267ec681f3Smrg
277ec681f3Smrg/* This pass computes for each ssa definition if it is uniform.
287ec681f3Smrg * That is, the variable has the same value for all invocations
297ec681f3Smrg * of the group.
307ec681f3Smrg *
317ec681f3Smrg * This divergence analysis pass expects the shader to be in LCSSA-form.
327ec681f3Smrg *
337ec681f3Smrg * This algorithm implements "The Simple Divergence Analysis" from
347ec681f3Smrg * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira.
357ec681f3Smrg * Divergence Analysis.  ACM Transactions on Programming Languages and Systems (TOPLAS),
367ec681f3Smrg * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2>
377ec681f3Smrg */
387ec681f3Smrg
397ec681f3Smrgstruct divergence_state {
407ec681f3Smrg   const gl_shader_stage stage;
417ec681f3Smrg   nir_shader *shader;
427ec681f3Smrg
437ec681f3Smrg   /** current control flow state */
447ec681f3Smrg   /* True if some loop-active invocations might take a different control-flow path.
457ec681f3Smrg    * A divergent break does not cause subsequent control-flow to be considered
467ec681f3Smrg    * divergent because those invocations are no longer active in the loop.
477ec681f3Smrg    * For a divergent if, both sides are considered divergent flow because
487ec681f3Smrg    * the other side is still loop-active. */
497ec681f3Smrg   bool divergent_loop_cf;
507ec681f3Smrg   /* True if a divergent continue happened since the loop header */
517ec681f3Smrg   bool divergent_loop_continue;
527ec681f3Smrg   /* True if a divergent break happened since the loop header */
537ec681f3Smrg   bool divergent_loop_break;
547ec681f3Smrg
557ec681f3Smrg   /* True if we visit the block for the fist time */
567ec681f3Smrg   bool first_visit;
577ec681f3Smrg};
587ec681f3Smrg
597ec681f3Smrgstatic bool
607ec681f3Smrgvisit_cf_list(struct exec_list *list, struct divergence_state *state);
617ec681f3Smrg
627ec681f3Smrgstatic bool
637ec681f3Smrgvisit_alu(nir_alu_instr *instr)
647ec681f3Smrg{
657ec681f3Smrg   if (instr->dest.dest.ssa.divergent)
667ec681f3Smrg      return false;
677ec681f3Smrg
687ec681f3Smrg   unsigned num_src = nir_op_infos[instr->op].num_inputs;
697ec681f3Smrg
707ec681f3Smrg   for (unsigned i = 0; i < num_src; i++) {
717ec681f3Smrg      if (instr->src[i].src.ssa->divergent) {
727ec681f3Smrg         instr->dest.dest.ssa.divergent = true;
737ec681f3Smrg         return true;
747ec681f3Smrg      }
757ec681f3Smrg   }
767ec681f3Smrg
777ec681f3Smrg   return false;
787ec681f3Smrg}
797ec681f3Smrg
807ec681f3Smrgstatic bool
817ec681f3Smrgvisit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr)
827ec681f3Smrg{
837ec681f3Smrg   if (!nir_intrinsic_infos[instr->intrinsic].has_dest)
847ec681f3Smrg      return false;
857ec681f3Smrg
867ec681f3Smrg   if (instr->dest.ssa.divergent)
877ec681f3Smrg      return false;
887ec681f3Smrg
897ec681f3Smrg   nir_divergence_options options = shader->options->divergence_analysis_options;
907ec681f3Smrg   gl_shader_stage stage = shader->info.stage;
917ec681f3Smrg   bool is_divergent = false;
927ec681f3Smrg   switch (instr->intrinsic) {
937ec681f3Smrg   /* Intrinsics which are always uniform */
947ec681f3Smrg   case nir_intrinsic_shader_clock:
957ec681f3Smrg   case nir_intrinsic_ballot:
967ec681f3Smrg   case nir_intrinsic_read_invocation:
977ec681f3Smrg   case nir_intrinsic_read_first_invocation:
987ec681f3Smrg   case nir_intrinsic_vote_any:
997ec681f3Smrg   case nir_intrinsic_vote_all:
1007ec681f3Smrg   case nir_intrinsic_vote_feq:
1017ec681f3Smrg   case nir_intrinsic_vote_ieq:
1027ec681f3Smrg   case nir_intrinsic_load_push_constant:
1037ec681f3Smrg   case nir_intrinsic_load_work_dim:
1047ec681f3Smrg   case nir_intrinsic_load_num_workgroups:
1057ec681f3Smrg   case nir_intrinsic_load_workgroup_size:
1067ec681f3Smrg   case nir_intrinsic_load_subgroup_id:
1077ec681f3Smrg   case nir_intrinsic_load_num_subgroups:
1087ec681f3Smrg   case nir_intrinsic_load_ray_launch_size:
1097ec681f3Smrg   case nir_intrinsic_load_subgroup_size:
1107ec681f3Smrg   case nir_intrinsic_load_subgroup_eq_mask:
1117ec681f3Smrg   case nir_intrinsic_load_subgroup_ge_mask:
1127ec681f3Smrg   case nir_intrinsic_load_subgroup_gt_mask:
1137ec681f3Smrg   case nir_intrinsic_load_subgroup_le_mask:
1147ec681f3Smrg   case nir_intrinsic_load_subgroup_lt_mask:
1157ec681f3Smrg   case nir_intrinsic_first_invocation:
1167ec681f3Smrg   case nir_intrinsic_last_invocation:
1177ec681f3Smrg   case nir_intrinsic_load_base_instance:
1187ec681f3Smrg   case nir_intrinsic_load_base_vertex:
1197ec681f3Smrg   case nir_intrinsic_load_first_vertex:
1207ec681f3Smrg   case nir_intrinsic_load_draw_id:
1217ec681f3Smrg   case nir_intrinsic_load_is_indexed_draw:
1227ec681f3Smrg   case nir_intrinsic_load_viewport_scale:
1237ec681f3Smrg   case nir_intrinsic_load_user_clip_plane:
1247ec681f3Smrg   case nir_intrinsic_load_viewport_x_scale:
1257ec681f3Smrg   case nir_intrinsic_load_viewport_y_scale:
1267ec681f3Smrg   case nir_intrinsic_load_viewport_z_scale:
1277ec681f3Smrg   case nir_intrinsic_load_viewport_offset:
1287ec681f3Smrg   case nir_intrinsic_load_viewport_x_offset:
1297ec681f3Smrg   case nir_intrinsic_load_viewport_y_offset:
1307ec681f3Smrg   case nir_intrinsic_load_viewport_z_offset:
1317ec681f3Smrg   case nir_intrinsic_load_blend_const_color_a_float:
1327ec681f3Smrg   case nir_intrinsic_load_blend_const_color_b_float:
1337ec681f3Smrg   case nir_intrinsic_load_blend_const_color_g_float:
1347ec681f3Smrg   case nir_intrinsic_load_blend_const_color_r_float:
1357ec681f3Smrg   case nir_intrinsic_load_blend_const_color_rgba:
1367ec681f3Smrg   case nir_intrinsic_load_blend_const_color_aaaa8888_unorm:
1377ec681f3Smrg   case nir_intrinsic_load_blend_const_color_rgba8888_unorm:
1387ec681f3Smrg   case nir_intrinsic_load_line_width:
1397ec681f3Smrg   case nir_intrinsic_load_aa_line_width:
1407ec681f3Smrg   case nir_intrinsic_load_fb_layers_v3d:
1417ec681f3Smrg   case nir_intrinsic_load_tcs_num_patches_amd:
1427ec681f3Smrg   case nir_intrinsic_load_ring_tess_factors_amd:
1437ec681f3Smrg   case nir_intrinsic_load_ring_tess_offchip_amd:
1447ec681f3Smrg   case nir_intrinsic_load_ring_tess_factors_offset_amd:
1457ec681f3Smrg   case nir_intrinsic_load_ring_tess_offchip_offset_amd:
1467ec681f3Smrg   case nir_intrinsic_load_ring_esgs_amd:
1477ec681f3Smrg   case nir_intrinsic_load_ring_es2gs_offset_amd:
1487ec681f3Smrg   case nir_intrinsic_load_sample_positions_pan:
1497ec681f3Smrg   case nir_intrinsic_load_workgroup_num_input_vertices_amd:
1507ec681f3Smrg   case nir_intrinsic_load_workgroup_num_input_primitives_amd:
1517ec681f3Smrg   case nir_intrinsic_load_shader_query_enabled_amd:
1527ec681f3Smrg   case nir_intrinsic_load_cull_front_face_enabled_amd:
1537ec681f3Smrg   case nir_intrinsic_load_cull_back_face_enabled_amd:
1547ec681f3Smrg   case nir_intrinsic_load_cull_ccw_amd:
1557ec681f3Smrg   case nir_intrinsic_load_cull_small_primitives_enabled_amd:
1567ec681f3Smrg   case nir_intrinsic_load_cull_any_enabled_amd:
1577ec681f3Smrg   case nir_intrinsic_load_cull_small_prim_precision_amd:
1587ec681f3Smrg      is_divergent = false;
1597ec681f3Smrg      break;
1607ec681f3Smrg
1617ec681f3Smrg   /* Intrinsics with divergence depending on shader stage and hardware */
1627ec681f3Smrg   case nir_intrinsic_load_frag_shading_rate:
1637ec681f3Smrg      is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup);
1647ec681f3Smrg      break;
1657ec681f3Smrg   case nir_intrinsic_load_input:
1667ec681f3Smrg      is_divergent = instr->src[0].ssa->divergent;
1677ec681f3Smrg      if (stage == MESA_SHADER_FRAGMENT)
1687ec681f3Smrg         is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
1697ec681f3Smrg      else if (stage == MESA_SHADER_TESS_EVAL)
1707ec681f3Smrg         is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
1717ec681f3Smrg      else if (stage != MESA_SHADER_MESH)
1727ec681f3Smrg         is_divergent = true;
1737ec681f3Smrg      break;
1747ec681f3Smrg   case nir_intrinsic_load_per_vertex_input:
1757ec681f3Smrg      is_divergent = instr->src[0].ssa->divergent ||
1767ec681f3Smrg                     instr->src[1].ssa->divergent;
1777ec681f3Smrg      if (stage == MESA_SHADER_TESS_CTRL)
1787ec681f3Smrg         is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
1797ec681f3Smrg      if (stage == MESA_SHADER_TESS_EVAL)
1807ec681f3Smrg         is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup);
1817ec681f3Smrg      else
1827ec681f3Smrg         is_divergent = true;
1837ec681f3Smrg      break;
1847ec681f3Smrg   case nir_intrinsic_load_input_vertex:
1857ec681f3Smrg      is_divergent = instr->src[1].ssa->divergent;
1867ec681f3Smrg      assert(stage == MESA_SHADER_FRAGMENT);
1877ec681f3Smrg      is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
1887ec681f3Smrg      break;
1897ec681f3Smrg   case nir_intrinsic_load_output:
1907ec681f3Smrg      is_divergent = instr->src[0].ssa->divergent;
1917ec681f3Smrg      switch (stage) {
1927ec681f3Smrg      case MESA_SHADER_TESS_CTRL:
1937ec681f3Smrg         is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup);
1947ec681f3Smrg         break;
1957ec681f3Smrg      case MESA_SHADER_FRAGMENT:
1967ec681f3Smrg         is_divergent = true;
1977ec681f3Smrg         break;
1987ec681f3Smrg      case MESA_SHADER_TASK:
1997ec681f3Smrg      case MESA_SHADER_MESH:
2007ec681f3Smrg         /* Divergent if src[0] is, so nothing else to do. */
2017ec681f3Smrg         break;
2027ec681f3Smrg      default:
2037ec681f3Smrg         unreachable("Invalid stage for load_output");
2047ec681f3Smrg      }
2057ec681f3Smrg      break;
2067ec681f3Smrg   case nir_intrinsic_load_per_vertex_output:
2077ec681f3Smrg      assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH);
2087ec681f3Smrg      is_divergent = instr->src[0].ssa->divergent ||
2097ec681f3Smrg                     instr->src[1].ssa->divergent ||
2107ec681f3Smrg                     (stage == MESA_SHADER_TESS_CTRL &&
2117ec681f3Smrg                      !(options & nir_divergence_single_patch_per_tcs_subgroup));
2127ec681f3Smrg      break;
2137ec681f3Smrg   case nir_intrinsic_load_per_primitive_output:
2147ec681f3Smrg      assert(stage == MESA_SHADER_MESH);
2157ec681f3Smrg      is_divergent = instr->src[0].ssa->divergent ||
2167ec681f3Smrg                     instr->src[1].ssa->divergent;
2177ec681f3Smrg      break;
2187ec681f3Smrg   case nir_intrinsic_load_layer_id:
2197ec681f3Smrg   case nir_intrinsic_load_front_face:
2207ec681f3Smrg      assert(stage == MESA_SHADER_FRAGMENT);
2217ec681f3Smrg      is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
2227ec681f3Smrg      break;
2237ec681f3Smrg   case nir_intrinsic_load_view_index:
2247ec681f3Smrg      assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL);
2257ec681f3Smrg      if (options & nir_divergence_view_index_uniform)
2267ec681f3Smrg         is_divergent = false;
2277ec681f3Smrg      else if (stage == MESA_SHADER_FRAGMENT)
2287ec681f3Smrg         is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
2297ec681f3Smrg      break;
2307ec681f3Smrg   case nir_intrinsic_load_fs_input_interp_deltas:
2317ec681f3Smrg      assert(stage == MESA_SHADER_FRAGMENT);
2327ec681f3Smrg      is_divergent = instr->src[0].ssa->divergent;
2337ec681f3Smrg      is_divergent |= !(options & nir_divergence_single_prim_per_subgroup);
2347ec681f3Smrg      break;
2357ec681f3Smrg   case nir_intrinsic_load_primitive_id:
2367ec681f3Smrg      if (stage == MESA_SHADER_FRAGMENT)
2377ec681f3Smrg         is_divergent = !(options & nir_divergence_single_prim_per_subgroup);
2387ec681f3Smrg      else if (stage == MESA_SHADER_TESS_CTRL)
2397ec681f3Smrg         is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
2407ec681f3Smrg      else if (stage == MESA_SHADER_TESS_EVAL)
2417ec681f3Smrg         is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
2427ec681f3Smrg      else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX)
2437ec681f3Smrg         is_divergent = true;
2447ec681f3Smrg      else
2457ec681f3Smrg         unreachable("Invalid stage for load_primitive_id");
2467ec681f3Smrg      break;
2477ec681f3Smrg   case nir_intrinsic_load_tess_level_inner:
2487ec681f3Smrg   case nir_intrinsic_load_tess_level_outer:
2497ec681f3Smrg      if (stage == MESA_SHADER_TESS_CTRL)
2507ec681f3Smrg         is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup);
2517ec681f3Smrg      else if (stage == MESA_SHADER_TESS_EVAL)
2527ec681f3Smrg         is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
2537ec681f3Smrg      else
2547ec681f3Smrg         unreachable("Invalid stage for load_primitive_tess_level_*");
2557ec681f3Smrg      break;
2567ec681f3Smrg   case nir_intrinsic_load_patch_vertices_in:
2577ec681f3Smrg      if (stage == MESA_SHADER_TESS_EVAL)
2587ec681f3Smrg         is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup);
2597ec681f3Smrg      else
2607ec681f3Smrg         assert(stage == MESA_SHADER_TESS_CTRL);
2617ec681f3Smrg      break;
2627ec681f3Smrg
2637ec681f3Smrg   case nir_intrinsic_load_workgroup_id:
2647ec681f3Smrg      assert(gl_shader_stage_uses_workgroup(stage));
2657ec681f3Smrg      if (stage == MESA_SHADER_COMPUTE)
2667ec681f3Smrg         is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup);
2677ec681f3Smrg      break;
2687ec681f3Smrg
2697ec681f3Smrg   /* Clustered reductions are uniform if cluster_size == subgroup_size or
2707ec681f3Smrg    * the source is uniform and the operation is invariant.
2717ec681f3Smrg    * Inclusive scans are uniform if
2727ec681f3Smrg    * the source is uniform and the operation is invariant
2737ec681f3Smrg    */
2747ec681f3Smrg   case nir_intrinsic_reduce:
2757ec681f3Smrg      if (nir_intrinsic_cluster_size(instr) == 0)
2767ec681f3Smrg         return false;
2777ec681f3Smrg      FALLTHROUGH;
2787ec681f3Smrg   case nir_intrinsic_inclusive_scan: {
2797ec681f3Smrg      nir_op op = nir_intrinsic_reduction_op(instr);
2807ec681f3Smrg      is_divergent = instr->src[0].ssa->divergent;
2817ec681f3Smrg      if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin &&
2827ec681f3Smrg          op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax &&
2837ec681f3Smrg          op != nir_op_iand && op != nir_op_ior)
2847ec681f3Smrg         is_divergent = true;
2857ec681f3Smrg      break;
2867ec681f3Smrg   }
2877ec681f3Smrg
2887ec681f3Smrg   case nir_intrinsic_load_ubo:
2897ec681f3Smrg   case nir_intrinsic_load_ssbo:
2907ec681f3Smrg      is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
2917ec681f3Smrg                     instr->src[1].ssa->divergent;
2927ec681f3Smrg      break;
2937ec681f3Smrg
2947ec681f3Smrg   case nir_intrinsic_get_ssbo_size:
2957ec681f3Smrg   case nir_intrinsic_deref_buffer_array_length:
2967ec681f3Smrg      is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM);
2977ec681f3Smrg      break;
2987ec681f3Smrg
2997ec681f3Smrg   case nir_intrinsic_image_load:
3007ec681f3Smrg   case nir_intrinsic_image_deref_load:
3017ec681f3Smrg   case nir_intrinsic_bindless_image_load:
3027ec681f3Smrg   case nir_intrinsic_image_sparse_load:
3037ec681f3Smrg   case nir_intrinsic_image_deref_sparse_load:
3047ec681f3Smrg   case nir_intrinsic_bindless_image_sparse_load:
3057ec681f3Smrg      is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) ||
3067ec681f3Smrg                     instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent;
3077ec681f3Smrg      break;
3087ec681f3Smrg
3097ec681f3Smrg
3107ec681f3Smrg   /* Intrinsics with divergence depending on sources */
3117ec681f3Smrg   case nir_intrinsic_ballot_bitfield_extract:
3127ec681f3Smrg   case nir_intrinsic_ballot_find_lsb:
3137ec681f3Smrg   case nir_intrinsic_ballot_find_msb:
3147ec681f3Smrg   case nir_intrinsic_ballot_bit_count_reduce:
3157ec681f3Smrg   case nir_intrinsic_shuffle_xor:
3167ec681f3Smrg   case nir_intrinsic_shuffle_up:
3177ec681f3Smrg   case nir_intrinsic_shuffle_down:
3187ec681f3Smrg   case nir_intrinsic_quad_broadcast:
3197ec681f3Smrg   case nir_intrinsic_quad_swap_horizontal:
3207ec681f3Smrg   case nir_intrinsic_quad_swap_vertical:
3217ec681f3Smrg   case nir_intrinsic_quad_swap_diagonal:
3227ec681f3Smrg   case nir_intrinsic_byte_permute_amd:
3237ec681f3Smrg   case nir_intrinsic_load_deref:
3247ec681f3Smrg   case nir_intrinsic_load_shared:
3257ec681f3Smrg   case nir_intrinsic_load_global:
3267ec681f3Smrg   case nir_intrinsic_load_global_constant:
3277ec681f3Smrg   case nir_intrinsic_load_uniform:
3287ec681f3Smrg   case nir_intrinsic_load_constant:
3297ec681f3Smrg   case nir_intrinsic_load_sample_pos_from_id:
3307ec681f3Smrg   case nir_intrinsic_load_kernel_input:
3317ec681f3Smrg   case nir_intrinsic_load_buffer_amd:
3327ec681f3Smrg   case nir_intrinsic_image_samples:
3337ec681f3Smrg   case nir_intrinsic_image_deref_samples:
3347ec681f3Smrg   case nir_intrinsic_bindless_image_samples:
3357ec681f3Smrg   case nir_intrinsic_image_size:
3367ec681f3Smrg   case nir_intrinsic_image_deref_size:
3377ec681f3Smrg   case nir_intrinsic_bindless_image_size:
3387ec681f3Smrg   case nir_intrinsic_copy_deref:
3397ec681f3Smrg   case nir_intrinsic_vulkan_resource_index:
3407ec681f3Smrg   case nir_intrinsic_vulkan_resource_reindex:
3417ec681f3Smrg   case nir_intrinsic_load_vulkan_descriptor:
3427ec681f3Smrg   case nir_intrinsic_atomic_counter_read:
3437ec681f3Smrg   case nir_intrinsic_atomic_counter_read_deref:
3447ec681f3Smrg   case nir_intrinsic_quad_swizzle_amd:
3457ec681f3Smrg   case nir_intrinsic_masked_swizzle_amd:
3467ec681f3Smrg   case nir_intrinsic_is_sparse_texels_resident:
3477ec681f3Smrg   case nir_intrinsic_sparse_residency_code_and:
3487ec681f3Smrg   case nir_intrinsic_load_sbt_amd:
3497ec681f3Smrg   case nir_intrinsic_bvh64_intersect_ray_amd:
3507ec681f3Smrg   case nir_intrinsic_get_ubo_size:
3517ec681f3Smrg   case nir_intrinsic_load_ssbo_address: {
3527ec681f3Smrg      unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs;
3537ec681f3Smrg      for (unsigned i = 0; i < num_srcs; i++) {
3547ec681f3Smrg         if (instr->src[i].ssa->divergent) {
3557ec681f3Smrg            is_divergent = true;
3567ec681f3Smrg            break;
3577ec681f3Smrg         }
3587ec681f3Smrg      }
3597ec681f3Smrg      break;
3607ec681f3Smrg   }
3617ec681f3Smrg
3627ec681f3Smrg   case nir_intrinsic_shuffle:
3637ec681f3Smrg      is_divergent = instr->src[0].ssa->divergent &&
3647ec681f3Smrg                     instr->src[1].ssa->divergent;
3657ec681f3Smrg      break;
3667ec681f3Smrg
3677ec681f3Smrg   /* Intrinsics which are always divergent */
3687ec681f3Smrg   case nir_intrinsic_load_color0:
3697ec681f3Smrg   case nir_intrinsic_load_color1:
3707ec681f3Smrg   case nir_intrinsic_load_param:
3717ec681f3Smrg   case nir_intrinsic_load_sample_id:
3727ec681f3Smrg   case nir_intrinsic_load_sample_id_no_per_sample:
3737ec681f3Smrg   case nir_intrinsic_load_sample_mask_in:
3747ec681f3Smrg   case nir_intrinsic_load_interpolated_input:
3757ec681f3Smrg   case nir_intrinsic_load_barycentric_pixel:
3767ec681f3Smrg   case nir_intrinsic_load_barycentric_centroid:
3777ec681f3Smrg   case nir_intrinsic_load_barycentric_sample:
3787ec681f3Smrg   case nir_intrinsic_load_barycentric_model:
3797ec681f3Smrg   case nir_intrinsic_load_barycentric_at_sample:
3807ec681f3Smrg   case nir_intrinsic_load_barycentric_at_offset:
3817ec681f3Smrg   case nir_intrinsic_interp_deref_at_offset:
3827ec681f3Smrg   case nir_intrinsic_interp_deref_at_sample:
3837ec681f3Smrg   case nir_intrinsic_interp_deref_at_centroid:
3847ec681f3Smrg   case nir_intrinsic_interp_deref_at_vertex:
3857ec681f3Smrg   case nir_intrinsic_load_tess_coord:
3867ec681f3Smrg   case nir_intrinsic_load_point_coord:
3877ec681f3Smrg   case nir_intrinsic_load_line_coord:
3887ec681f3Smrg   case nir_intrinsic_load_frag_coord:
3897ec681f3Smrg   case nir_intrinsic_load_sample_pos:
3907ec681f3Smrg   case nir_intrinsic_load_vertex_id_zero_base:
3917ec681f3Smrg   case nir_intrinsic_load_vertex_id:
3927ec681f3Smrg   case nir_intrinsic_load_instance_id:
3937ec681f3Smrg   case nir_intrinsic_load_invocation_id:
3947ec681f3Smrg   case nir_intrinsic_load_local_invocation_id:
3957ec681f3Smrg   case nir_intrinsic_load_local_invocation_index:
3967ec681f3Smrg   case nir_intrinsic_load_global_invocation_id:
3977ec681f3Smrg   case nir_intrinsic_load_global_invocation_id_zero_base:
3987ec681f3Smrg   case nir_intrinsic_load_global_invocation_index:
3997ec681f3Smrg   case nir_intrinsic_load_subgroup_invocation:
4007ec681f3Smrg   case nir_intrinsic_load_helper_invocation:
4017ec681f3Smrg   case nir_intrinsic_is_helper_invocation:
4027ec681f3Smrg   case nir_intrinsic_load_scratch:
4037ec681f3Smrg   case nir_intrinsic_deref_atomic_add:
4047ec681f3Smrg   case nir_intrinsic_deref_atomic_imin:
4057ec681f3Smrg   case nir_intrinsic_deref_atomic_umin:
4067ec681f3Smrg   case nir_intrinsic_deref_atomic_imax:
4077ec681f3Smrg   case nir_intrinsic_deref_atomic_umax:
4087ec681f3Smrg   case nir_intrinsic_deref_atomic_and:
4097ec681f3Smrg   case nir_intrinsic_deref_atomic_or:
4107ec681f3Smrg   case nir_intrinsic_deref_atomic_xor:
4117ec681f3Smrg   case nir_intrinsic_deref_atomic_exchange:
4127ec681f3Smrg   case nir_intrinsic_deref_atomic_comp_swap:
4137ec681f3Smrg   case nir_intrinsic_deref_atomic_fadd:
4147ec681f3Smrg   case nir_intrinsic_deref_atomic_fmin:
4157ec681f3Smrg   case nir_intrinsic_deref_atomic_fmax:
4167ec681f3Smrg   case nir_intrinsic_deref_atomic_fcomp_swap:
4177ec681f3Smrg   case nir_intrinsic_ssbo_atomic_add:
4187ec681f3Smrg   case nir_intrinsic_ssbo_atomic_imin:
4197ec681f3Smrg   case nir_intrinsic_ssbo_atomic_umin:
4207ec681f3Smrg   case nir_intrinsic_ssbo_atomic_imax:
4217ec681f3Smrg   case nir_intrinsic_ssbo_atomic_umax:
4227ec681f3Smrg   case nir_intrinsic_ssbo_atomic_and:
4237ec681f3Smrg   case nir_intrinsic_ssbo_atomic_or:
4247ec681f3Smrg   case nir_intrinsic_ssbo_atomic_xor:
4257ec681f3Smrg   case nir_intrinsic_ssbo_atomic_exchange:
4267ec681f3Smrg   case nir_intrinsic_ssbo_atomic_comp_swap:
4277ec681f3Smrg   case nir_intrinsic_ssbo_atomic_fadd:
4287ec681f3Smrg   case nir_intrinsic_ssbo_atomic_fmax:
4297ec681f3Smrg   case nir_intrinsic_ssbo_atomic_fmin:
4307ec681f3Smrg   case nir_intrinsic_ssbo_atomic_fcomp_swap:
4317ec681f3Smrg   case nir_intrinsic_image_deref_atomic_add:
4327ec681f3Smrg   case nir_intrinsic_image_deref_atomic_imin:
4337ec681f3Smrg   case nir_intrinsic_image_deref_atomic_umin:
4347ec681f3Smrg   case nir_intrinsic_image_deref_atomic_imax:
4357ec681f3Smrg   case nir_intrinsic_image_deref_atomic_umax:
4367ec681f3Smrg   case nir_intrinsic_image_deref_atomic_and:
4377ec681f3Smrg   case nir_intrinsic_image_deref_atomic_or:
4387ec681f3Smrg   case nir_intrinsic_image_deref_atomic_xor:
4397ec681f3Smrg   case nir_intrinsic_image_deref_atomic_exchange:
4407ec681f3Smrg   case nir_intrinsic_image_deref_atomic_comp_swap:
4417ec681f3Smrg   case nir_intrinsic_image_deref_atomic_fadd:
4427ec681f3Smrg   case nir_intrinsic_image_deref_atomic_fmin:
4437ec681f3Smrg   case nir_intrinsic_image_deref_atomic_fmax:
4447ec681f3Smrg   case nir_intrinsic_image_atomic_add:
4457ec681f3Smrg   case nir_intrinsic_image_atomic_imin:
4467ec681f3Smrg   case nir_intrinsic_image_atomic_umin:
4477ec681f3Smrg   case nir_intrinsic_image_atomic_imax:
4487ec681f3Smrg   case nir_intrinsic_image_atomic_umax:
4497ec681f3Smrg   case nir_intrinsic_image_atomic_and:
4507ec681f3Smrg   case nir_intrinsic_image_atomic_or:
4517ec681f3Smrg   case nir_intrinsic_image_atomic_xor:
4527ec681f3Smrg   case nir_intrinsic_image_atomic_exchange:
4537ec681f3Smrg   case nir_intrinsic_image_atomic_comp_swap:
4547ec681f3Smrg   case nir_intrinsic_image_atomic_fadd:
4557ec681f3Smrg   case nir_intrinsic_image_atomic_fmin:
4567ec681f3Smrg   case nir_intrinsic_image_atomic_fmax:
4577ec681f3Smrg   case nir_intrinsic_bindless_image_atomic_add:
4587ec681f3Smrg   case nir_intrinsic_bindless_image_atomic_imin:
4597ec681f3Smrg   case nir_intrinsic_bindless_image_atomic_umin:
4607ec681f3Smrg   case nir_intrinsic_bindless_image_atomic_imax:
4617ec681f3Smrg   case nir_intrinsic_bindless_image_atomic_umax:
4627ec681f3Smrg   case nir_intrinsic_bindless_image_atomic_and:
4637ec681f3Smrg   case nir_intrinsic_bindless_image_atomic_or:
4647ec681f3Smrg   case nir_intrinsic_bindless_image_atomic_xor:
4657ec681f3Smrg   case nir_intrinsic_bindless_image_atomic_exchange:
4667ec681f3Smrg   case nir_intrinsic_bindless_image_atomic_comp_swap:
4677ec681f3Smrg   case nir_intrinsic_bindless_image_atomic_fadd:
4687ec681f3Smrg   case nir_intrinsic_bindless_image_atomic_fmin:
4697ec681f3Smrg   case nir_intrinsic_bindless_image_atomic_fmax:
4707ec681f3Smrg   case nir_intrinsic_shared_atomic_add:
4717ec681f3Smrg   case nir_intrinsic_shared_atomic_imin:
4727ec681f3Smrg   case nir_intrinsic_shared_atomic_umin:
4737ec681f3Smrg   case nir_intrinsic_shared_atomic_imax:
4747ec681f3Smrg   case nir_intrinsic_shared_atomic_umax:
4757ec681f3Smrg   case nir_intrinsic_shared_atomic_and:
4767ec681f3Smrg   case nir_intrinsic_shared_atomic_or:
4777ec681f3Smrg   case nir_intrinsic_shared_atomic_xor:
4787ec681f3Smrg   case nir_intrinsic_shared_atomic_exchange:
4797ec681f3Smrg   case nir_intrinsic_shared_atomic_comp_swap:
4807ec681f3Smrg   case nir_intrinsic_shared_atomic_fadd:
4817ec681f3Smrg   case nir_intrinsic_shared_atomic_fmin:
4827ec681f3Smrg   case nir_intrinsic_shared_atomic_fmax:
4837ec681f3Smrg   case nir_intrinsic_shared_atomic_fcomp_swap:
4847ec681f3Smrg   case nir_intrinsic_global_atomic_add:
4857ec681f3Smrg   case nir_intrinsic_global_atomic_imin:
4867ec681f3Smrg   case nir_intrinsic_global_atomic_umin:
4877ec681f3Smrg   case nir_intrinsic_global_atomic_imax:
4887ec681f3Smrg   case nir_intrinsic_global_atomic_umax:
4897ec681f3Smrg   case nir_intrinsic_global_atomic_and:
4907ec681f3Smrg   case nir_intrinsic_global_atomic_or:
4917ec681f3Smrg   case nir_intrinsic_global_atomic_xor:
4927ec681f3Smrg   case nir_intrinsic_global_atomic_exchange:
4937ec681f3Smrg   case nir_intrinsic_global_atomic_comp_swap:
4947ec681f3Smrg   case nir_intrinsic_global_atomic_fadd:
4957ec681f3Smrg   case nir_intrinsic_global_atomic_fmin:
4967ec681f3Smrg   case nir_intrinsic_global_atomic_fmax:
4977ec681f3Smrg   case nir_intrinsic_global_atomic_fcomp_swap:
4987ec681f3Smrg   case nir_intrinsic_atomic_counter_add:
4997ec681f3Smrg   case nir_intrinsic_atomic_counter_min:
5007ec681f3Smrg   case nir_intrinsic_atomic_counter_max:
5017ec681f3Smrg   case nir_intrinsic_atomic_counter_and:
5027ec681f3Smrg   case nir_intrinsic_atomic_counter_or:
5037ec681f3Smrg   case nir_intrinsic_atomic_counter_xor:
5047ec681f3Smrg   case nir_intrinsic_atomic_counter_inc:
5057ec681f3Smrg   case nir_intrinsic_atomic_counter_pre_dec:
5067ec681f3Smrg   case nir_intrinsic_atomic_counter_post_dec:
5077ec681f3Smrg   case nir_intrinsic_atomic_counter_exchange:
5087ec681f3Smrg   case nir_intrinsic_atomic_counter_comp_swap:
5097ec681f3Smrg   case nir_intrinsic_atomic_counter_add_deref:
5107ec681f3Smrg   case nir_intrinsic_atomic_counter_min_deref:
5117ec681f3Smrg   case nir_intrinsic_atomic_counter_max_deref:
5127ec681f3Smrg   case nir_intrinsic_atomic_counter_and_deref:
5137ec681f3Smrg   case nir_intrinsic_atomic_counter_or_deref:
5147ec681f3Smrg   case nir_intrinsic_atomic_counter_xor_deref:
5157ec681f3Smrg   case nir_intrinsic_atomic_counter_inc_deref:
5167ec681f3Smrg   case nir_intrinsic_atomic_counter_pre_dec_deref:
5177ec681f3Smrg   case nir_intrinsic_atomic_counter_post_dec_deref:
5187ec681f3Smrg   case nir_intrinsic_atomic_counter_exchange_deref:
5197ec681f3Smrg   case nir_intrinsic_atomic_counter_comp_swap_deref:
5207ec681f3Smrg   case nir_intrinsic_exclusive_scan:
5217ec681f3Smrg   case nir_intrinsic_ballot_bit_count_exclusive:
5227ec681f3Smrg   case nir_intrinsic_ballot_bit_count_inclusive:
5237ec681f3Smrg   case nir_intrinsic_write_invocation_amd:
5247ec681f3Smrg   case nir_intrinsic_mbcnt_amd:
5257ec681f3Smrg   case nir_intrinsic_lane_permute_16_amd:
5267ec681f3Smrg   case nir_intrinsic_elect:
5277ec681f3Smrg   case nir_intrinsic_load_tlb_color_v3d:
5287ec681f3Smrg   case nir_intrinsic_load_tess_rel_patch_id_amd:
5297ec681f3Smrg   case nir_intrinsic_load_gs_vertex_offset_amd:
5307ec681f3Smrg   case nir_intrinsic_has_input_vertex_amd:
5317ec681f3Smrg   case nir_intrinsic_has_input_primitive_amd:
5327ec681f3Smrg   case nir_intrinsic_load_packed_passthrough_primitive_amd:
5337ec681f3Smrg   case nir_intrinsic_load_initial_edgeflags_amd:
5347ec681f3Smrg   case nir_intrinsic_gds_atomic_add_amd:
5357ec681f3Smrg   case nir_intrinsic_load_rt_arg_scratch_offset_amd:
5367ec681f3Smrg   case nir_intrinsic_load_intersection_opaque_amd:
5377ec681f3Smrg      is_divergent = true;
5387ec681f3Smrg      break;
5397ec681f3Smrg
5407ec681f3Smrg   default:
5417ec681f3Smrg#ifdef NDEBUG
5427ec681f3Smrg      is_divergent = true;
5437ec681f3Smrg      break;
5447ec681f3Smrg#else
5457ec681f3Smrg      nir_print_instr(&instr->instr, stderr);
5467ec681f3Smrg      unreachable("\nNIR divergence analysis: Unhandled intrinsic.");
5477ec681f3Smrg#endif
5487ec681f3Smrg   }
5497ec681f3Smrg
5507ec681f3Smrg   instr->dest.ssa.divergent = is_divergent;
5517ec681f3Smrg   return is_divergent;
5527ec681f3Smrg}
5537ec681f3Smrg
5547ec681f3Smrgstatic bool
5557ec681f3Smrgvisit_tex(nir_tex_instr *instr)
5567ec681f3Smrg{
5577ec681f3Smrg   if (instr->dest.ssa.divergent)
5587ec681f3Smrg      return false;
5597ec681f3Smrg
5607ec681f3Smrg   bool is_divergent = false;
5617ec681f3Smrg
5627ec681f3Smrg   for (unsigned i = 0; i < instr->num_srcs; i++) {
5637ec681f3Smrg      switch (instr->src[i].src_type) {
5647ec681f3Smrg      case nir_tex_src_sampler_deref:
5657ec681f3Smrg      case nir_tex_src_sampler_handle:
5667ec681f3Smrg      case nir_tex_src_sampler_offset:
5677ec681f3Smrg         is_divergent |= instr->src[i].src.ssa->divergent &&
5687ec681f3Smrg                         instr->sampler_non_uniform;
5697ec681f3Smrg         break;
5707ec681f3Smrg      case nir_tex_src_texture_deref:
5717ec681f3Smrg      case nir_tex_src_texture_handle:
5727ec681f3Smrg      case nir_tex_src_texture_offset:
5737ec681f3Smrg         is_divergent |= instr->src[i].src.ssa->divergent &&
5747ec681f3Smrg                         instr->texture_non_uniform;
5757ec681f3Smrg         break;
5767ec681f3Smrg      default:
5777ec681f3Smrg         is_divergent |= instr->src[i].src.ssa->divergent;
5787ec681f3Smrg         break;
5797ec681f3Smrg      }
5807ec681f3Smrg   }
5817ec681f3Smrg
5827ec681f3Smrg   instr->dest.ssa.divergent = is_divergent;
5837ec681f3Smrg   return is_divergent;
5847ec681f3Smrg}
5857ec681f3Smrg
5867ec681f3Smrgstatic bool
5877ec681f3Smrgvisit_load_const(nir_load_const_instr *instr)
5887ec681f3Smrg{
5897ec681f3Smrg   return false;
5907ec681f3Smrg}
5917ec681f3Smrg
5927ec681f3Smrgstatic bool
5937ec681f3Smrgvisit_ssa_undef(nir_ssa_undef_instr *instr)
5947ec681f3Smrg{
5957ec681f3Smrg   return false;
5967ec681f3Smrg}
5977ec681f3Smrg
5987ec681f3Smrgstatic bool
5997ec681f3Smrgnir_variable_mode_is_uniform(nir_variable_mode mode) {
6007ec681f3Smrg   switch (mode) {
6017ec681f3Smrg   case nir_var_uniform:
6027ec681f3Smrg   case nir_var_mem_ubo:
6037ec681f3Smrg   case nir_var_mem_ssbo:
6047ec681f3Smrg   case nir_var_mem_shared:
6057ec681f3Smrg   case nir_var_mem_global:
6067ec681f3Smrg      return true;
6077ec681f3Smrg   default:
6087ec681f3Smrg      return false;
6097ec681f3Smrg   }
6107ec681f3Smrg}
6117ec681f3Smrg
6127ec681f3Smrgstatic bool
6137ec681f3Smrgnir_variable_is_uniform(nir_shader *shader, nir_variable *var)
6147ec681f3Smrg{
6157ec681f3Smrg   if (nir_variable_mode_is_uniform(var->data.mode))
6167ec681f3Smrg      return true;
6177ec681f3Smrg
6187ec681f3Smrg   nir_divergence_options options = shader->options->divergence_analysis_options;
6197ec681f3Smrg   gl_shader_stage stage = shader->info.stage;
6207ec681f3Smrg
6217ec681f3Smrg   if (stage == MESA_SHADER_FRAGMENT &&
6227ec681f3Smrg       (options & nir_divergence_single_prim_per_subgroup) &&
6237ec681f3Smrg       var->data.mode == nir_var_shader_in &&
6247ec681f3Smrg       var->data.interpolation == INTERP_MODE_FLAT)
6257ec681f3Smrg      return true;
6267ec681f3Smrg
6277ec681f3Smrg   if (stage == MESA_SHADER_TESS_CTRL &&
6287ec681f3Smrg       (options & nir_divergence_single_patch_per_tcs_subgroup) &&
6297ec681f3Smrg       var->data.mode == nir_var_shader_out && var->data.patch)
6307ec681f3Smrg      return true;
6317ec681f3Smrg
6327ec681f3Smrg   if (stage == MESA_SHADER_TESS_EVAL &&
6337ec681f3Smrg       (options & nir_divergence_single_patch_per_tes_subgroup) &&
6347ec681f3Smrg       var->data.mode == nir_var_shader_in && var->data.patch)
6357ec681f3Smrg      return true;
6367ec681f3Smrg
6377ec681f3Smrg   return false;
6387ec681f3Smrg}
6397ec681f3Smrg
6407ec681f3Smrgstatic bool
6417ec681f3Smrgvisit_deref(nir_shader *shader, nir_deref_instr *deref)
6427ec681f3Smrg{
6437ec681f3Smrg   if (deref->dest.ssa.divergent)
6447ec681f3Smrg      return false;
6457ec681f3Smrg
6467ec681f3Smrg   bool is_divergent = false;
6477ec681f3Smrg   switch (deref->deref_type) {
6487ec681f3Smrg   case nir_deref_type_var:
6497ec681f3Smrg      is_divergent = !nir_variable_is_uniform(shader, deref->var);
6507ec681f3Smrg      break;
6517ec681f3Smrg   case nir_deref_type_array:
6527ec681f3Smrg   case nir_deref_type_ptr_as_array:
6537ec681f3Smrg      is_divergent = deref->arr.index.ssa->divergent;
6547ec681f3Smrg      FALLTHROUGH;
6557ec681f3Smrg   case nir_deref_type_struct:
6567ec681f3Smrg   case nir_deref_type_array_wildcard:
6577ec681f3Smrg      is_divergent |= deref->parent.ssa->divergent;
6587ec681f3Smrg      break;
6597ec681f3Smrg   case nir_deref_type_cast:
6607ec681f3Smrg      is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) ||
6617ec681f3Smrg                     deref->parent.ssa->divergent;
6627ec681f3Smrg      break;
6637ec681f3Smrg   }
6647ec681f3Smrg
6657ec681f3Smrg   deref->dest.ssa.divergent = is_divergent;
6667ec681f3Smrg   return is_divergent;
6677ec681f3Smrg}
6687ec681f3Smrg
6697ec681f3Smrgstatic bool
6707ec681f3Smrgvisit_jump(nir_jump_instr *jump, struct divergence_state *state)
6717ec681f3Smrg{
6727ec681f3Smrg   switch (jump->type) {
6737ec681f3Smrg   case nir_jump_continue:
6747ec681f3Smrg      if (state->divergent_loop_continue)
6757ec681f3Smrg         return false;
6767ec681f3Smrg      if (state->divergent_loop_cf)
6777ec681f3Smrg         state->divergent_loop_continue = true;
6787ec681f3Smrg      return state->divergent_loop_continue;
6797ec681f3Smrg   case nir_jump_break:
6807ec681f3Smrg      if (state->divergent_loop_break)
6817ec681f3Smrg         return false;
6827ec681f3Smrg      if (state->divergent_loop_cf)
6837ec681f3Smrg         state->divergent_loop_break = true;
6847ec681f3Smrg      return state->divergent_loop_break;
6857ec681f3Smrg   case nir_jump_halt:
6867ec681f3Smrg      /* This totally kills invocations so it doesn't add divergence */
6877ec681f3Smrg      break;
6887ec681f3Smrg   case nir_jump_return:
6897ec681f3Smrg      unreachable("NIR divergence analysis: Unsupported return instruction.");
6907ec681f3Smrg      break;
6917ec681f3Smrg   case nir_jump_goto:
6927ec681f3Smrg   case nir_jump_goto_if:
6937ec681f3Smrg      unreachable("NIR divergence analysis: Unsupported goto_if instruction.");
6947ec681f3Smrg      break;
6957ec681f3Smrg   }
6967ec681f3Smrg   return false;
6977ec681f3Smrg}
6987ec681f3Smrg
6997ec681f3Smrgstatic bool
7007ec681f3Smrgset_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state)
7017ec681f3Smrg{
7027ec681f3Smrg   def->divergent = false;
7037ec681f3Smrg   return true;
7047ec681f3Smrg}
7057ec681f3Smrg
7067ec681f3Smrgstatic bool
7077ec681f3Smrgupdate_instr_divergence(nir_shader *shader, nir_instr *instr)
7087ec681f3Smrg{
7097ec681f3Smrg   switch (instr->type) {
7107ec681f3Smrg   case nir_instr_type_alu:
7117ec681f3Smrg      return visit_alu(nir_instr_as_alu(instr));
7127ec681f3Smrg   case nir_instr_type_intrinsic:
7137ec681f3Smrg      return visit_intrinsic(shader, nir_instr_as_intrinsic(instr));
7147ec681f3Smrg   case nir_instr_type_tex:
7157ec681f3Smrg      return visit_tex(nir_instr_as_tex(instr));
7167ec681f3Smrg   case nir_instr_type_load_const:
7177ec681f3Smrg      return visit_load_const(nir_instr_as_load_const(instr));
7187ec681f3Smrg   case nir_instr_type_ssa_undef:
7197ec681f3Smrg      return visit_ssa_undef(nir_instr_as_ssa_undef(instr));
7207ec681f3Smrg   case nir_instr_type_deref:
7217ec681f3Smrg      return visit_deref(shader, nir_instr_as_deref(instr));
7227ec681f3Smrg   case nir_instr_type_jump:
7237ec681f3Smrg   case nir_instr_type_phi:
7247ec681f3Smrg   case nir_instr_type_call:
7257ec681f3Smrg   case nir_instr_type_parallel_copy:
7267ec681f3Smrg   default:
7277ec681f3Smrg      unreachable("NIR divergence analysis: Unsupported instruction type.");
7287ec681f3Smrg   }
7297ec681f3Smrg}
7307ec681f3Smrg
7317ec681f3Smrgstatic bool
7327ec681f3Smrgvisit_block(nir_block *block, struct divergence_state *state)
7337ec681f3Smrg{
7347ec681f3Smrg   bool has_changed = false;
7357ec681f3Smrg
7367ec681f3Smrg   nir_foreach_instr(instr, block) {
7377ec681f3Smrg      /* phis are handled when processing the branches */
7387ec681f3Smrg      if (instr->type == nir_instr_type_phi)
7397ec681f3Smrg         continue;
7407ec681f3Smrg
7417ec681f3Smrg      if (state->first_visit)
7427ec681f3Smrg         nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
7437ec681f3Smrg
7447ec681f3Smrg      if (instr->type == nir_instr_type_jump)
7457ec681f3Smrg         has_changed |= visit_jump(nir_instr_as_jump(instr), state);
7467ec681f3Smrg      else
7477ec681f3Smrg         has_changed |= update_instr_divergence(state->shader, instr);
7487ec681f3Smrg   }
7497ec681f3Smrg
7507ec681f3Smrg   return has_changed;
7517ec681f3Smrg}
7527ec681f3Smrg
7537ec681f3Smrg/* There are 3 types of phi instructions:
7547ec681f3Smrg * (1) gamma: represent the joining point of different paths
7557ec681f3Smrg *     created by an “if-then-else” branch.
7567ec681f3Smrg *     The resulting value is divergent if the branch condition
7577ec681f3Smrg *     or any of the source values is divergent. */
7587ec681f3Smrgstatic bool
7597ec681f3Smrgvisit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent)
7607ec681f3Smrg{
7617ec681f3Smrg   if (phi->dest.ssa.divergent)
7627ec681f3Smrg      return false;
7637ec681f3Smrg
7647ec681f3Smrg   unsigned defined_srcs = 0;
7657ec681f3Smrg   nir_foreach_phi_src(src, phi) {
7667ec681f3Smrg      /* if any source value is divergent, the resulting value is divergent */
7677ec681f3Smrg      if (src->src.ssa->divergent) {
7687ec681f3Smrg         phi->dest.ssa.divergent = true;
7697ec681f3Smrg         return true;
7707ec681f3Smrg      }
7717ec681f3Smrg      if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) {
7727ec681f3Smrg         defined_srcs++;
7737ec681f3Smrg      }
7747ec681f3Smrg   }
7757ec681f3Smrg
7767ec681f3Smrg   /* if the condition is divergent and two sources defined, the definition is divergent */
7777ec681f3Smrg   if (defined_srcs > 1 && if_cond_divergent) {
7787ec681f3Smrg      phi->dest.ssa.divergent = true;
7797ec681f3Smrg      return true;
7807ec681f3Smrg   }
7817ec681f3Smrg
7827ec681f3Smrg   return false;
7837ec681f3Smrg}
7847ec681f3Smrg
7857ec681f3Smrg/* There are 3 types of phi instructions:
7867ec681f3Smrg * (2) mu: which only exist at loop headers,
7877ec681f3Smrg *     merge initial and loop-carried values.
7887ec681f3Smrg *     The resulting value is divergent if any source value
7897ec681f3Smrg *     is divergent or a divergent loop continue condition
7907ec681f3Smrg *     is associated with a different ssa-def. */
7917ec681f3Smrgstatic bool
7927ec681f3Smrgvisit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue)
7937ec681f3Smrg{
7947ec681f3Smrg   if (phi->dest.ssa.divergent)
7957ec681f3Smrg      return false;
7967ec681f3Smrg
7977ec681f3Smrg   nir_ssa_def* same = NULL;
7987ec681f3Smrg   nir_foreach_phi_src(src, phi) {
7997ec681f3Smrg      /* if any source value is divergent, the resulting value is divergent */
8007ec681f3Smrg      if (src->src.ssa->divergent) {
8017ec681f3Smrg         phi->dest.ssa.divergent = true;
8027ec681f3Smrg         return true;
8037ec681f3Smrg      }
8047ec681f3Smrg      /* if this loop is uniform, we're done here */
8057ec681f3Smrg      if (!divergent_continue)
8067ec681f3Smrg         continue;
8077ec681f3Smrg      /* skip the loop preheader */
8087ec681f3Smrg      if (src->pred == preheader)
8097ec681f3Smrg         continue;
8107ec681f3Smrg      /* skip undef values */
8117ec681f3Smrg      if (nir_src_is_undef(src->src))
8127ec681f3Smrg         continue;
8137ec681f3Smrg
8147ec681f3Smrg      /* check if all loop-carried values are from the same ssa-def */
8157ec681f3Smrg      if (!same)
8167ec681f3Smrg         same = src->src.ssa;
8177ec681f3Smrg      else if (same != src->src.ssa) {
8187ec681f3Smrg         phi->dest.ssa.divergent = true;
8197ec681f3Smrg         return true;
8207ec681f3Smrg      }
8217ec681f3Smrg   }
8227ec681f3Smrg
8237ec681f3Smrg   return false;
8247ec681f3Smrg}
8257ec681f3Smrg
8267ec681f3Smrg/* There are 3 types of phi instructions:
8277ec681f3Smrg * (3) eta: represent values that leave a loop.
8287ec681f3Smrg *     The resulting value is divergent if the source value is divergent
8297ec681f3Smrg *     or any loop exit condition is divergent for a value which is
8307ec681f3Smrg *     not loop-invariant.
8317ec681f3Smrg *     (note: there should be no phi for loop-invariant variables.) */
8327ec681f3Smrgstatic bool
8337ec681f3Smrgvisit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break)
8347ec681f3Smrg{
8357ec681f3Smrg   if (phi->dest.ssa.divergent)
8367ec681f3Smrg      return false;
8377ec681f3Smrg
8387ec681f3Smrg   if (divergent_break) {
8397ec681f3Smrg      phi->dest.ssa.divergent = true;
8407ec681f3Smrg      return true;
8417ec681f3Smrg   }
8427ec681f3Smrg
8437ec681f3Smrg   /* if any source value is divergent, the resulting value is divergent */
8447ec681f3Smrg   nir_foreach_phi_src(src, phi) {
8457ec681f3Smrg      if (src->src.ssa->divergent) {
8467ec681f3Smrg         phi->dest.ssa.divergent = true;
8477ec681f3Smrg         return true;
8487ec681f3Smrg      }
8497ec681f3Smrg   }
8507ec681f3Smrg
8517ec681f3Smrg   return false;
8527ec681f3Smrg}
8537ec681f3Smrg
8547ec681f3Smrgstatic bool
8557ec681f3Smrgvisit_if(nir_if *if_stmt, struct divergence_state *state)
8567ec681f3Smrg{
8577ec681f3Smrg   bool progress = false;
8587ec681f3Smrg
8597ec681f3Smrg   struct divergence_state then_state = *state;
8607ec681f3Smrg   then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
8617ec681f3Smrg   progress |= visit_cf_list(&if_stmt->then_list, &then_state);
8627ec681f3Smrg
8637ec681f3Smrg   struct divergence_state else_state = *state;
8647ec681f3Smrg   else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent;
8657ec681f3Smrg   progress |= visit_cf_list(&if_stmt->else_list, &else_state);
8667ec681f3Smrg
8677ec681f3Smrg   /* handle phis after the IF */
8687ec681f3Smrg   nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) {
8697ec681f3Smrg      if (instr->type != nir_instr_type_phi)
8707ec681f3Smrg         break;
8717ec681f3Smrg
8727ec681f3Smrg      if (state->first_visit)
8737ec681f3Smrg         nir_instr_as_phi(instr)->dest.ssa.divergent = false;
8747ec681f3Smrg      progress |= visit_if_merge_phi(nir_instr_as_phi(instr),
8757ec681f3Smrg                                     if_stmt->condition.ssa->divergent);
8767ec681f3Smrg   }
8777ec681f3Smrg
8787ec681f3Smrg   /* join loop divergence information from both branch legs */
8797ec681f3Smrg   state->divergent_loop_continue |= then_state.divergent_loop_continue ||
8807ec681f3Smrg                                     else_state.divergent_loop_continue;
8817ec681f3Smrg   state->divergent_loop_break |= then_state.divergent_loop_break ||
8827ec681f3Smrg                                  else_state.divergent_loop_break;
8837ec681f3Smrg
8847ec681f3Smrg   /* A divergent continue makes succeeding loop CF divergent:
8857ec681f3Smrg    * not all loop-active invocations participate in the remaining loop-body
8867ec681f3Smrg    * which means that a following break might be taken by some invocations, only */
8877ec681f3Smrg   state->divergent_loop_cf |= state->divergent_loop_continue;
8887ec681f3Smrg
8897ec681f3Smrg   return progress;
8907ec681f3Smrg}
8917ec681f3Smrg
8927ec681f3Smrgstatic bool
8937ec681f3Smrgvisit_loop(nir_loop *loop, struct divergence_state *state)
8947ec681f3Smrg{
8957ec681f3Smrg   bool progress = false;
8967ec681f3Smrg   nir_block *loop_header = nir_loop_first_block(loop);
8977ec681f3Smrg   nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header);
8987ec681f3Smrg
8997ec681f3Smrg   /* handle loop header phis first: we have no knowledge yet about
9007ec681f3Smrg    * the loop's control flow or any loop-carried sources. */
9017ec681f3Smrg   nir_foreach_instr(instr, loop_header) {
9027ec681f3Smrg      if (instr->type != nir_instr_type_phi)
9037ec681f3Smrg         break;
9047ec681f3Smrg
9057ec681f3Smrg      nir_phi_instr *phi = nir_instr_as_phi(instr);
9067ec681f3Smrg      if (!state->first_visit && phi->dest.ssa.divergent)
9077ec681f3Smrg         continue;
9087ec681f3Smrg
9097ec681f3Smrg      nir_foreach_phi_src(src, phi) {
9107ec681f3Smrg         if (src->pred == loop_preheader) {
9117ec681f3Smrg            phi->dest.ssa.divergent = src->src.ssa->divergent;
9127ec681f3Smrg            break;
9137ec681f3Smrg         }
9147ec681f3Smrg      }
9157ec681f3Smrg      progress |= phi->dest.ssa.divergent;
9167ec681f3Smrg   }
9177ec681f3Smrg
9187ec681f3Smrg   /* setup loop state */
9197ec681f3Smrg   struct divergence_state loop_state = *state;
9207ec681f3Smrg   loop_state.divergent_loop_cf = false;
9217ec681f3Smrg   loop_state.divergent_loop_continue = false;
9227ec681f3Smrg   loop_state.divergent_loop_break = false;
9237ec681f3Smrg
9247ec681f3Smrg   /* process loop body until no further changes are made */
9257ec681f3Smrg   bool repeat;
9267ec681f3Smrg   do {
9277ec681f3Smrg      progress |= visit_cf_list(&loop->body, &loop_state);
9287ec681f3Smrg      repeat = false;
9297ec681f3Smrg
9307ec681f3Smrg      /* revisit loop header phis to see if something has changed */
9317ec681f3Smrg      nir_foreach_instr(instr, loop_header) {
9327ec681f3Smrg         if (instr->type != nir_instr_type_phi)
9337ec681f3Smrg            break;
9347ec681f3Smrg
9357ec681f3Smrg         repeat |= visit_loop_header_phi(nir_instr_as_phi(instr),
9367ec681f3Smrg                                         loop_preheader,
9377ec681f3Smrg                                         loop_state.divergent_loop_continue);
9387ec681f3Smrg      }
9397ec681f3Smrg
9407ec681f3Smrg      loop_state.divergent_loop_cf = false;
9417ec681f3Smrg      loop_state.first_visit = false;
9427ec681f3Smrg   } while (repeat);
9437ec681f3Smrg
9447ec681f3Smrg   /* handle phis after the loop */
9457ec681f3Smrg   nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) {
9467ec681f3Smrg      if (instr->type != nir_instr_type_phi)
9477ec681f3Smrg         break;
9487ec681f3Smrg
9497ec681f3Smrg      if (state->first_visit)
9507ec681f3Smrg         nir_instr_as_phi(instr)->dest.ssa.divergent = false;
9517ec681f3Smrg      progress |= visit_loop_exit_phi(nir_instr_as_phi(instr),
9527ec681f3Smrg                                      loop_state.divergent_loop_break);
9537ec681f3Smrg   }
9547ec681f3Smrg
9557ec681f3Smrg   loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue);
9567ec681f3Smrg
9577ec681f3Smrg   return progress;
9587ec681f3Smrg}
9597ec681f3Smrg
9607ec681f3Smrgstatic bool
9617ec681f3Smrgvisit_cf_list(struct exec_list *list, struct divergence_state *state)
9627ec681f3Smrg{
9637ec681f3Smrg   bool has_changed = false;
9647ec681f3Smrg
9657ec681f3Smrg   foreach_list_typed(nir_cf_node, node, node, list) {
9667ec681f3Smrg      switch (node->type) {
9677ec681f3Smrg      case nir_cf_node_block:
9687ec681f3Smrg         has_changed |= visit_block(nir_cf_node_as_block(node), state);
9697ec681f3Smrg         break;
9707ec681f3Smrg      case nir_cf_node_if:
9717ec681f3Smrg         has_changed |= visit_if(nir_cf_node_as_if(node), state);
9727ec681f3Smrg         break;
9737ec681f3Smrg      case nir_cf_node_loop:
9747ec681f3Smrg         has_changed |= visit_loop(nir_cf_node_as_loop(node), state);
9757ec681f3Smrg         break;
9767ec681f3Smrg      case nir_cf_node_function:
9777ec681f3Smrg         unreachable("NIR divergence analysis: Unsupported cf_node type.");
9787ec681f3Smrg      }
9797ec681f3Smrg   }
9807ec681f3Smrg
9817ec681f3Smrg   return has_changed;
9827ec681f3Smrg}
9837ec681f3Smrg
9847ec681f3Smrgvoid
9857ec681f3Smrgnir_divergence_analysis(nir_shader *shader)
9867ec681f3Smrg{
9877ec681f3Smrg   struct divergence_state state = {
9887ec681f3Smrg      .stage = shader->info.stage,
9897ec681f3Smrg      .shader = shader,
9907ec681f3Smrg      .divergent_loop_cf = false,
9917ec681f3Smrg      .divergent_loop_continue = false,
9927ec681f3Smrg      .divergent_loop_break = false,
9937ec681f3Smrg      .first_visit = true,
9947ec681f3Smrg   };
9957ec681f3Smrg
9967ec681f3Smrg   visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state);
9977ec681f3Smrg}
9987ec681f3Smrg
9997ec681f3Smrgbool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr)
10007ec681f3Smrg{
10017ec681f3Smrg   nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL);
10027ec681f3Smrg
10037ec681f3Smrg   if (instr->type == nir_instr_type_phi) {
10047ec681f3Smrg      nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node);
10057ec681f3Smrg      /* can only update gamma/if phis */
10067ec681f3Smrg      if (!prev || prev->type != nir_cf_node_if)
10077ec681f3Smrg         return false;
10087ec681f3Smrg
10097ec681f3Smrg      nir_if *nif = nir_cf_node_as_if(prev);
10107ec681f3Smrg
10117ec681f3Smrg      visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition));
10127ec681f3Smrg      return true;
10137ec681f3Smrg   }
10147ec681f3Smrg
10157ec681f3Smrg   update_instr_divergence(shader, instr);
10167ec681f3Smrg   return true;
10177ec681f3Smrg}
10187ec681f3Smrg
1019