17ec681f3Smrg/* 27ec681f3Smrg * Copyright © 2018 Valve Corporation 37ec681f3Smrg * 47ec681f3Smrg * Permission is hereby granted, free of charge, to any person obtaining a 57ec681f3Smrg * copy of this software and associated documentation files (the "Software"), 67ec681f3Smrg * to deal in the Software without restriction, including without limitation 77ec681f3Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 87ec681f3Smrg * and/or sell copies of the Software, and to permit persons to whom the 97ec681f3Smrg * Software is furnished to do so, subject to the following conditions: 107ec681f3Smrg * 117ec681f3Smrg * The above copyright notice and this permission notice (including the next 127ec681f3Smrg * paragraph) shall be included in all copies or substantial portions of the 137ec681f3Smrg * Software. 147ec681f3Smrg * 157ec681f3Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 167ec681f3Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 177ec681f3Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 187ec681f3Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 197ec681f3Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 207ec681f3Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 217ec681f3Smrg * IN THE SOFTWARE. 227ec681f3Smrg * 237ec681f3Smrg */ 247ec681f3Smrg 257ec681f3Smrg#include "nir.h" 267ec681f3Smrg 277ec681f3Smrg/* This pass computes for each ssa definition if it is uniform. 287ec681f3Smrg * That is, the variable has the same value for all invocations 297ec681f3Smrg * of the group. 307ec681f3Smrg * 317ec681f3Smrg * This divergence analysis pass expects the shader to be in LCSSA-form. 327ec681f3Smrg * 337ec681f3Smrg * This algorithm implements "The Simple Divergence Analysis" from 347ec681f3Smrg * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira. 357ec681f3Smrg * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS), 367ec681f3Smrg * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2> 377ec681f3Smrg */ 387ec681f3Smrg 397ec681f3Smrgstruct divergence_state { 407ec681f3Smrg const gl_shader_stage stage; 417ec681f3Smrg nir_shader *shader; 427ec681f3Smrg 437ec681f3Smrg /** current control flow state */ 447ec681f3Smrg /* True if some loop-active invocations might take a different control-flow path. 457ec681f3Smrg * A divergent break does not cause subsequent control-flow to be considered 467ec681f3Smrg * divergent because those invocations are no longer active in the loop. 477ec681f3Smrg * For a divergent if, both sides are considered divergent flow because 487ec681f3Smrg * the other side is still loop-active. */ 497ec681f3Smrg bool divergent_loop_cf; 507ec681f3Smrg /* True if a divergent continue happened since the loop header */ 517ec681f3Smrg bool divergent_loop_continue; 527ec681f3Smrg /* True if a divergent break happened since the loop header */ 537ec681f3Smrg bool divergent_loop_break; 547ec681f3Smrg 557ec681f3Smrg /* True if we visit the block for the fist time */ 567ec681f3Smrg bool first_visit; 577ec681f3Smrg}; 587ec681f3Smrg 597ec681f3Smrgstatic bool 607ec681f3Smrgvisit_cf_list(struct exec_list *list, struct divergence_state *state); 617ec681f3Smrg 627ec681f3Smrgstatic bool 637ec681f3Smrgvisit_alu(nir_alu_instr *instr) 647ec681f3Smrg{ 657ec681f3Smrg if (instr->dest.dest.ssa.divergent) 667ec681f3Smrg return false; 677ec681f3Smrg 687ec681f3Smrg unsigned num_src = nir_op_infos[instr->op].num_inputs; 697ec681f3Smrg 707ec681f3Smrg for (unsigned i = 0; i < num_src; i++) { 717ec681f3Smrg if (instr->src[i].src.ssa->divergent) { 727ec681f3Smrg instr->dest.dest.ssa.divergent = true; 737ec681f3Smrg return true; 747ec681f3Smrg } 757ec681f3Smrg } 767ec681f3Smrg 777ec681f3Smrg return false; 787ec681f3Smrg} 797ec681f3Smrg 807ec681f3Smrgstatic bool 817ec681f3Smrgvisit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) 827ec681f3Smrg{ 837ec681f3Smrg if (!nir_intrinsic_infos[instr->intrinsic].has_dest) 847ec681f3Smrg return false; 857ec681f3Smrg 867ec681f3Smrg if (instr->dest.ssa.divergent) 877ec681f3Smrg return false; 887ec681f3Smrg 897ec681f3Smrg nir_divergence_options options = shader->options->divergence_analysis_options; 907ec681f3Smrg gl_shader_stage stage = shader->info.stage; 917ec681f3Smrg bool is_divergent = false; 927ec681f3Smrg switch (instr->intrinsic) { 937ec681f3Smrg /* Intrinsics which are always uniform */ 947ec681f3Smrg case nir_intrinsic_shader_clock: 957ec681f3Smrg case nir_intrinsic_ballot: 967ec681f3Smrg case nir_intrinsic_read_invocation: 977ec681f3Smrg case nir_intrinsic_read_first_invocation: 987ec681f3Smrg case nir_intrinsic_vote_any: 997ec681f3Smrg case nir_intrinsic_vote_all: 1007ec681f3Smrg case nir_intrinsic_vote_feq: 1017ec681f3Smrg case nir_intrinsic_vote_ieq: 1027ec681f3Smrg case nir_intrinsic_load_push_constant: 1037ec681f3Smrg case nir_intrinsic_load_work_dim: 1047ec681f3Smrg case nir_intrinsic_load_num_workgroups: 1057ec681f3Smrg case nir_intrinsic_load_workgroup_size: 1067ec681f3Smrg case nir_intrinsic_load_subgroup_id: 1077ec681f3Smrg case nir_intrinsic_load_num_subgroups: 1087ec681f3Smrg case nir_intrinsic_load_ray_launch_size: 1097ec681f3Smrg case nir_intrinsic_load_subgroup_size: 1107ec681f3Smrg case nir_intrinsic_load_subgroup_eq_mask: 1117ec681f3Smrg case nir_intrinsic_load_subgroup_ge_mask: 1127ec681f3Smrg case nir_intrinsic_load_subgroup_gt_mask: 1137ec681f3Smrg case nir_intrinsic_load_subgroup_le_mask: 1147ec681f3Smrg case nir_intrinsic_load_subgroup_lt_mask: 1157ec681f3Smrg case nir_intrinsic_first_invocation: 1167ec681f3Smrg case nir_intrinsic_last_invocation: 1177ec681f3Smrg case nir_intrinsic_load_base_instance: 1187ec681f3Smrg case nir_intrinsic_load_base_vertex: 1197ec681f3Smrg case nir_intrinsic_load_first_vertex: 1207ec681f3Smrg case nir_intrinsic_load_draw_id: 1217ec681f3Smrg case nir_intrinsic_load_is_indexed_draw: 1227ec681f3Smrg case nir_intrinsic_load_viewport_scale: 1237ec681f3Smrg case nir_intrinsic_load_user_clip_plane: 1247ec681f3Smrg case nir_intrinsic_load_viewport_x_scale: 1257ec681f3Smrg case nir_intrinsic_load_viewport_y_scale: 1267ec681f3Smrg case nir_intrinsic_load_viewport_z_scale: 1277ec681f3Smrg case nir_intrinsic_load_viewport_offset: 1287ec681f3Smrg case nir_intrinsic_load_viewport_x_offset: 1297ec681f3Smrg case nir_intrinsic_load_viewport_y_offset: 1307ec681f3Smrg case nir_intrinsic_load_viewport_z_offset: 1317ec681f3Smrg case nir_intrinsic_load_blend_const_color_a_float: 1327ec681f3Smrg case nir_intrinsic_load_blend_const_color_b_float: 1337ec681f3Smrg case nir_intrinsic_load_blend_const_color_g_float: 1347ec681f3Smrg case nir_intrinsic_load_blend_const_color_r_float: 1357ec681f3Smrg case nir_intrinsic_load_blend_const_color_rgba: 1367ec681f3Smrg case nir_intrinsic_load_blend_const_color_aaaa8888_unorm: 1377ec681f3Smrg case nir_intrinsic_load_blend_const_color_rgba8888_unorm: 1387ec681f3Smrg case nir_intrinsic_load_line_width: 1397ec681f3Smrg case nir_intrinsic_load_aa_line_width: 1407ec681f3Smrg case nir_intrinsic_load_fb_layers_v3d: 1417ec681f3Smrg case nir_intrinsic_load_tcs_num_patches_amd: 1427ec681f3Smrg case nir_intrinsic_load_ring_tess_factors_amd: 1437ec681f3Smrg case nir_intrinsic_load_ring_tess_offchip_amd: 1447ec681f3Smrg case nir_intrinsic_load_ring_tess_factors_offset_amd: 1457ec681f3Smrg case nir_intrinsic_load_ring_tess_offchip_offset_amd: 1467ec681f3Smrg case nir_intrinsic_load_ring_esgs_amd: 1477ec681f3Smrg case nir_intrinsic_load_ring_es2gs_offset_amd: 1487ec681f3Smrg case nir_intrinsic_load_sample_positions_pan: 1497ec681f3Smrg case nir_intrinsic_load_workgroup_num_input_vertices_amd: 1507ec681f3Smrg case nir_intrinsic_load_workgroup_num_input_primitives_amd: 1517ec681f3Smrg case nir_intrinsic_load_shader_query_enabled_amd: 1527ec681f3Smrg case nir_intrinsic_load_cull_front_face_enabled_amd: 1537ec681f3Smrg case nir_intrinsic_load_cull_back_face_enabled_amd: 1547ec681f3Smrg case nir_intrinsic_load_cull_ccw_amd: 1557ec681f3Smrg case nir_intrinsic_load_cull_small_primitives_enabled_amd: 1567ec681f3Smrg case nir_intrinsic_load_cull_any_enabled_amd: 1577ec681f3Smrg case nir_intrinsic_load_cull_small_prim_precision_amd: 1587ec681f3Smrg is_divergent = false; 1597ec681f3Smrg break; 1607ec681f3Smrg 1617ec681f3Smrg /* Intrinsics with divergence depending on shader stage and hardware */ 1627ec681f3Smrg case nir_intrinsic_load_frag_shading_rate: 1637ec681f3Smrg is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup); 1647ec681f3Smrg break; 1657ec681f3Smrg case nir_intrinsic_load_input: 1667ec681f3Smrg is_divergent = instr->src[0].ssa->divergent; 1677ec681f3Smrg if (stage == MESA_SHADER_FRAGMENT) 1687ec681f3Smrg is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); 1697ec681f3Smrg else if (stage == MESA_SHADER_TESS_EVAL) 1707ec681f3Smrg is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup); 1717ec681f3Smrg else if (stage != MESA_SHADER_MESH) 1727ec681f3Smrg is_divergent = true; 1737ec681f3Smrg break; 1747ec681f3Smrg case nir_intrinsic_load_per_vertex_input: 1757ec681f3Smrg is_divergent = instr->src[0].ssa->divergent || 1767ec681f3Smrg instr->src[1].ssa->divergent; 1777ec681f3Smrg if (stage == MESA_SHADER_TESS_CTRL) 1787ec681f3Smrg is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup); 1797ec681f3Smrg if (stage == MESA_SHADER_TESS_EVAL) 1807ec681f3Smrg is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup); 1817ec681f3Smrg else 1827ec681f3Smrg is_divergent = true; 1837ec681f3Smrg break; 1847ec681f3Smrg case nir_intrinsic_load_input_vertex: 1857ec681f3Smrg is_divergent = instr->src[1].ssa->divergent; 1867ec681f3Smrg assert(stage == MESA_SHADER_FRAGMENT); 1877ec681f3Smrg is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); 1887ec681f3Smrg break; 1897ec681f3Smrg case nir_intrinsic_load_output: 1907ec681f3Smrg is_divergent = instr->src[0].ssa->divergent; 1917ec681f3Smrg switch (stage) { 1927ec681f3Smrg case MESA_SHADER_TESS_CTRL: 1937ec681f3Smrg is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup); 1947ec681f3Smrg break; 1957ec681f3Smrg case MESA_SHADER_FRAGMENT: 1967ec681f3Smrg is_divergent = true; 1977ec681f3Smrg break; 1987ec681f3Smrg case MESA_SHADER_TASK: 1997ec681f3Smrg case MESA_SHADER_MESH: 2007ec681f3Smrg /* Divergent if src[0] is, so nothing else to do. */ 2017ec681f3Smrg break; 2027ec681f3Smrg default: 2037ec681f3Smrg unreachable("Invalid stage for load_output"); 2047ec681f3Smrg } 2057ec681f3Smrg break; 2067ec681f3Smrg case nir_intrinsic_load_per_vertex_output: 2077ec681f3Smrg assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH); 2087ec681f3Smrg is_divergent = instr->src[0].ssa->divergent || 2097ec681f3Smrg instr->src[1].ssa->divergent || 2107ec681f3Smrg (stage == MESA_SHADER_TESS_CTRL && 2117ec681f3Smrg !(options & nir_divergence_single_patch_per_tcs_subgroup)); 2127ec681f3Smrg break; 2137ec681f3Smrg case nir_intrinsic_load_per_primitive_output: 2147ec681f3Smrg assert(stage == MESA_SHADER_MESH); 2157ec681f3Smrg is_divergent = instr->src[0].ssa->divergent || 2167ec681f3Smrg instr->src[1].ssa->divergent; 2177ec681f3Smrg break; 2187ec681f3Smrg case nir_intrinsic_load_layer_id: 2197ec681f3Smrg case nir_intrinsic_load_front_face: 2207ec681f3Smrg assert(stage == MESA_SHADER_FRAGMENT); 2217ec681f3Smrg is_divergent = !(options & nir_divergence_single_prim_per_subgroup); 2227ec681f3Smrg break; 2237ec681f3Smrg case nir_intrinsic_load_view_index: 2247ec681f3Smrg assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL); 2257ec681f3Smrg if (options & nir_divergence_view_index_uniform) 2267ec681f3Smrg is_divergent = false; 2277ec681f3Smrg else if (stage == MESA_SHADER_FRAGMENT) 2287ec681f3Smrg is_divergent = !(options & nir_divergence_single_prim_per_subgroup); 2297ec681f3Smrg break; 2307ec681f3Smrg case nir_intrinsic_load_fs_input_interp_deltas: 2317ec681f3Smrg assert(stage == MESA_SHADER_FRAGMENT); 2327ec681f3Smrg is_divergent = instr->src[0].ssa->divergent; 2337ec681f3Smrg is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); 2347ec681f3Smrg break; 2357ec681f3Smrg case nir_intrinsic_load_primitive_id: 2367ec681f3Smrg if (stage == MESA_SHADER_FRAGMENT) 2377ec681f3Smrg is_divergent = !(options & nir_divergence_single_prim_per_subgroup); 2387ec681f3Smrg else if (stage == MESA_SHADER_TESS_CTRL) 2397ec681f3Smrg is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup); 2407ec681f3Smrg else if (stage == MESA_SHADER_TESS_EVAL) 2417ec681f3Smrg is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); 2427ec681f3Smrg else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX) 2437ec681f3Smrg is_divergent = true; 2447ec681f3Smrg else 2457ec681f3Smrg unreachable("Invalid stage for load_primitive_id"); 2467ec681f3Smrg break; 2477ec681f3Smrg case nir_intrinsic_load_tess_level_inner: 2487ec681f3Smrg case nir_intrinsic_load_tess_level_outer: 2497ec681f3Smrg if (stage == MESA_SHADER_TESS_CTRL) 2507ec681f3Smrg is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup); 2517ec681f3Smrg else if (stage == MESA_SHADER_TESS_EVAL) 2527ec681f3Smrg is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); 2537ec681f3Smrg else 2547ec681f3Smrg unreachable("Invalid stage for load_primitive_tess_level_*"); 2557ec681f3Smrg break; 2567ec681f3Smrg case nir_intrinsic_load_patch_vertices_in: 2577ec681f3Smrg if (stage == MESA_SHADER_TESS_EVAL) 2587ec681f3Smrg is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); 2597ec681f3Smrg else 2607ec681f3Smrg assert(stage == MESA_SHADER_TESS_CTRL); 2617ec681f3Smrg break; 2627ec681f3Smrg 2637ec681f3Smrg case nir_intrinsic_load_workgroup_id: 2647ec681f3Smrg assert(gl_shader_stage_uses_workgroup(stage)); 2657ec681f3Smrg if (stage == MESA_SHADER_COMPUTE) 2667ec681f3Smrg is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup); 2677ec681f3Smrg break; 2687ec681f3Smrg 2697ec681f3Smrg /* Clustered reductions are uniform if cluster_size == subgroup_size or 2707ec681f3Smrg * the source is uniform and the operation is invariant. 2717ec681f3Smrg * Inclusive scans are uniform if 2727ec681f3Smrg * the source is uniform and the operation is invariant 2737ec681f3Smrg */ 2747ec681f3Smrg case nir_intrinsic_reduce: 2757ec681f3Smrg if (nir_intrinsic_cluster_size(instr) == 0) 2767ec681f3Smrg return false; 2777ec681f3Smrg FALLTHROUGH; 2787ec681f3Smrg case nir_intrinsic_inclusive_scan: { 2797ec681f3Smrg nir_op op = nir_intrinsic_reduction_op(instr); 2807ec681f3Smrg is_divergent = instr->src[0].ssa->divergent; 2817ec681f3Smrg if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin && 2827ec681f3Smrg op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax && 2837ec681f3Smrg op != nir_op_iand && op != nir_op_ior) 2847ec681f3Smrg is_divergent = true; 2857ec681f3Smrg break; 2867ec681f3Smrg } 2877ec681f3Smrg 2887ec681f3Smrg case nir_intrinsic_load_ubo: 2897ec681f3Smrg case nir_intrinsic_load_ssbo: 2907ec681f3Smrg is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) || 2917ec681f3Smrg instr->src[1].ssa->divergent; 2927ec681f3Smrg break; 2937ec681f3Smrg 2947ec681f3Smrg case nir_intrinsic_get_ssbo_size: 2957ec681f3Smrg case nir_intrinsic_deref_buffer_array_length: 2967ec681f3Smrg is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM); 2977ec681f3Smrg break; 2987ec681f3Smrg 2997ec681f3Smrg case nir_intrinsic_image_load: 3007ec681f3Smrg case nir_intrinsic_image_deref_load: 3017ec681f3Smrg case nir_intrinsic_bindless_image_load: 3027ec681f3Smrg case nir_intrinsic_image_sparse_load: 3037ec681f3Smrg case nir_intrinsic_image_deref_sparse_load: 3047ec681f3Smrg case nir_intrinsic_bindless_image_sparse_load: 3057ec681f3Smrg is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) || 3067ec681f3Smrg instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent; 3077ec681f3Smrg break; 3087ec681f3Smrg 3097ec681f3Smrg 3107ec681f3Smrg /* Intrinsics with divergence depending on sources */ 3117ec681f3Smrg case nir_intrinsic_ballot_bitfield_extract: 3127ec681f3Smrg case nir_intrinsic_ballot_find_lsb: 3137ec681f3Smrg case nir_intrinsic_ballot_find_msb: 3147ec681f3Smrg case nir_intrinsic_ballot_bit_count_reduce: 3157ec681f3Smrg case nir_intrinsic_shuffle_xor: 3167ec681f3Smrg case nir_intrinsic_shuffle_up: 3177ec681f3Smrg case nir_intrinsic_shuffle_down: 3187ec681f3Smrg case nir_intrinsic_quad_broadcast: 3197ec681f3Smrg case nir_intrinsic_quad_swap_horizontal: 3207ec681f3Smrg case nir_intrinsic_quad_swap_vertical: 3217ec681f3Smrg case nir_intrinsic_quad_swap_diagonal: 3227ec681f3Smrg case nir_intrinsic_byte_permute_amd: 3237ec681f3Smrg case nir_intrinsic_load_deref: 3247ec681f3Smrg case nir_intrinsic_load_shared: 3257ec681f3Smrg case nir_intrinsic_load_global: 3267ec681f3Smrg case nir_intrinsic_load_global_constant: 3277ec681f3Smrg case nir_intrinsic_load_uniform: 3287ec681f3Smrg case nir_intrinsic_load_constant: 3297ec681f3Smrg case nir_intrinsic_load_sample_pos_from_id: 3307ec681f3Smrg case nir_intrinsic_load_kernel_input: 3317ec681f3Smrg case nir_intrinsic_load_buffer_amd: 3327ec681f3Smrg case nir_intrinsic_image_samples: 3337ec681f3Smrg case nir_intrinsic_image_deref_samples: 3347ec681f3Smrg case nir_intrinsic_bindless_image_samples: 3357ec681f3Smrg case nir_intrinsic_image_size: 3367ec681f3Smrg case nir_intrinsic_image_deref_size: 3377ec681f3Smrg case nir_intrinsic_bindless_image_size: 3387ec681f3Smrg case nir_intrinsic_copy_deref: 3397ec681f3Smrg case nir_intrinsic_vulkan_resource_index: 3407ec681f3Smrg case nir_intrinsic_vulkan_resource_reindex: 3417ec681f3Smrg case nir_intrinsic_load_vulkan_descriptor: 3427ec681f3Smrg case nir_intrinsic_atomic_counter_read: 3437ec681f3Smrg case nir_intrinsic_atomic_counter_read_deref: 3447ec681f3Smrg case nir_intrinsic_quad_swizzle_amd: 3457ec681f3Smrg case nir_intrinsic_masked_swizzle_amd: 3467ec681f3Smrg case nir_intrinsic_is_sparse_texels_resident: 3477ec681f3Smrg case nir_intrinsic_sparse_residency_code_and: 3487ec681f3Smrg case nir_intrinsic_load_sbt_amd: 3497ec681f3Smrg case nir_intrinsic_bvh64_intersect_ray_amd: 3507ec681f3Smrg case nir_intrinsic_get_ubo_size: 3517ec681f3Smrg case nir_intrinsic_load_ssbo_address: { 3527ec681f3Smrg unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; 3537ec681f3Smrg for (unsigned i = 0; i < num_srcs; i++) { 3547ec681f3Smrg if (instr->src[i].ssa->divergent) { 3557ec681f3Smrg is_divergent = true; 3567ec681f3Smrg break; 3577ec681f3Smrg } 3587ec681f3Smrg } 3597ec681f3Smrg break; 3607ec681f3Smrg } 3617ec681f3Smrg 3627ec681f3Smrg case nir_intrinsic_shuffle: 3637ec681f3Smrg is_divergent = instr->src[0].ssa->divergent && 3647ec681f3Smrg instr->src[1].ssa->divergent; 3657ec681f3Smrg break; 3667ec681f3Smrg 3677ec681f3Smrg /* Intrinsics which are always divergent */ 3687ec681f3Smrg case nir_intrinsic_load_color0: 3697ec681f3Smrg case nir_intrinsic_load_color1: 3707ec681f3Smrg case nir_intrinsic_load_param: 3717ec681f3Smrg case nir_intrinsic_load_sample_id: 3727ec681f3Smrg case nir_intrinsic_load_sample_id_no_per_sample: 3737ec681f3Smrg case nir_intrinsic_load_sample_mask_in: 3747ec681f3Smrg case nir_intrinsic_load_interpolated_input: 3757ec681f3Smrg case nir_intrinsic_load_barycentric_pixel: 3767ec681f3Smrg case nir_intrinsic_load_barycentric_centroid: 3777ec681f3Smrg case nir_intrinsic_load_barycentric_sample: 3787ec681f3Smrg case nir_intrinsic_load_barycentric_model: 3797ec681f3Smrg case nir_intrinsic_load_barycentric_at_sample: 3807ec681f3Smrg case nir_intrinsic_load_barycentric_at_offset: 3817ec681f3Smrg case nir_intrinsic_interp_deref_at_offset: 3827ec681f3Smrg case nir_intrinsic_interp_deref_at_sample: 3837ec681f3Smrg case nir_intrinsic_interp_deref_at_centroid: 3847ec681f3Smrg case nir_intrinsic_interp_deref_at_vertex: 3857ec681f3Smrg case nir_intrinsic_load_tess_coord: 3867ec681f3Smrg case nir_intrinsic_load_point_coord: 3877ec681f3Smrg case nir_intrinsic_load_line_coord: 3887ec681f3Smrg case nir_intrinsic_load_frag_coord: 3897ec681f3Smrg case nir_intrinsic_load_sample_pos: 3907ec681f3Smrg case nir_intrinsic_load_vertex_id_zero_base: 3917ec681f3Smrg case nir_intrinsic_load_vertex_id: 3927ec681f3Smrg case nir_intrinsic_load_instance_id: 3937ec681f3Smrg case nir_intrinsic_load_invocation_id: 3947ec681f3Smrg case nir_intrinsic_load_local_invocation_id: 3957ec681f3Smrg case nir_intrinsic_load_local_invocation_index: 3967ec681f3Smrg case nir_intrinsic_load_global_invocation_id: 3977ec681f3Smrg case nir_intrinsic_load_global_invocation_id_zero_base: 3987ec681f3Smrg case nir_intrinsic_load_global_invocation_index: 3997ec681f3Smrg case nir_intrinsic_load_subgroup_invocation: 4007ec681f3Smrg case nir_intrinsic_load_helper_invocation: 4017ec681f3Smrg case nir_intrinsic_is_helper_invocation: 4027ec681f3Smrg case nir_intrinsic_load_scratch: 4037ec681f3Smrg case nir_intrinsic_deref_atomic_add: 4047ec681f3Smrg case nir_intrinsic_deref_atomic_imin: 4057ec681f3Smrg case nir_intrinsic_deref_atomic_umin: 4067ec681f3Smrg case nir_intrinsic_deref_atomic_imax: 4077ec681f3Smrg case nir_intrinsic_deref_atomic_umax: 4087ec681f3Smrg case nir_intrinsic_deref_atomic_and: 4097ec681f3Smrg case nir_intrinsic_deref_atomic_or: 4107ec681f3Smrg case nir_intrinsic_deref_atomic_xor: 4117ec681f3Smrg case nir_intrinsic_deref_atomic_exchange: 4127ec681f3Smrg case nir_intrinsic_deref_atomic_comp_swap: 4137ec681f3Smrg case nir_intrinsic_deref_atomic_fadd: 4147ec681f3Smrg case nir_intrinsic_deref_atomic_fmin: 4157ec681f3Smrg case nir_intrinsic_deref_atomic_fmax: 4167ec681f3Smrg case nir_intrinsic_deref_atomic_fcomp_swap: 4177ec681f3Smrg case nir_intrinsic_ssbo_atomic_add: 4187ec681f3Smrg case nir_intrinsic_ssbo_atomic_imin: 4197ec681f3Smrg case nir_intrinsic_ssbo_atomic_umin: 4207ec681f3Smrg case nir_intrinsic_ssbo_atomic_imax: 4217ec681f3Smrg case nir_intrinsic_ssbo_atomic_umax: 4227ec681f3Smrg case nir_intrinsic_ssbo_atomic_and: 4237ec681f3Smrg case nir_intrinsic_ssbo_atomic_or: 4247ec681f3Smrg case nir_intrinsic_ssbo_atomic_xor: 4257ec681f3Smrg case nir_intrinsic_ssbo_atomic_exchange: 4267ec681f3Smrg case nir_intrinsic_ssbo_atomic_comp_swap: 4277ec681f3Smrg case nir_intrinsic_ssbo_atomic_fadd: 4287ec681f3Smrg case nir_intrinsic_ssbo_atomic_fmax: 4297ec681f3Smrg case nir_intrinsic_ssbo_atomic_fmin: 4307ec681f3Smrg case nir_intrinsic_ssbo_atomic_fcomp_swap: 4317ec681f3Smrg case nir_intrinsic_image_deref_atomic_add: 4327ec681f3Smrg case nir_intrinsic_image_deref_atomic_imin: 4337ec681f3Smrg case nir_intrinsic_image_deref_atomic_umin: 4347ec681f3Smrg case nir_intrinsic_image_deref_atomic_imax: 4357ec681f3Smrg case nir_intrinsic_image_deref_atomic_umax: 4367ec681f3Smrg case nir_intrinsic_image_deref_atomic_and: 4377ec681f3Smrg case nir_intrinsic_image_deref_atomic_or: 4387ec681f3Smrg case nir_intrinsic_image_deref_atomic_xor: 4397ec681f3Smrg case nir_intrinsic_image_deref_atomic_exchange: 4407ec681f3Smrg case nir_intrinsic_image_deref_atomic_comp_swap: 4417ec681f3Smrg case nir_intrinsic_image_deref_atomic_fadd: 4427ec681f3Smrg case nir_intrinsic_image_deref_atomic_fmin: 4437ec681f3Smrg case nir_intrinsic_image_deref_atomic_fmax: 4447ec681f3Smrg case nir_intrinsic_image_atomic_add: 4457ec681f3Smrg case nir_intrinsic_image_atomic_imin: 4467ec681f3Smrg case nir_intrinsic_image_atomic_umin: 4477ec681f3Smrg case nir_intrinsic_image_atomic_imax: 4487ec681f3Smrg case nir_intrinsic_image_atomic_umax: 4497ec681f3Smrg case nir_intrinsic_image_atomic_and: 4507ec681f3Smrg case nir_intrinsic_image_atomic_or: 4517ec681f3Smrg case nir_intrinsic_image_atomic_xor: 4527ec681f3Smrg case nir_intrinsic_image_atomic_exchange: 4537ec681f3Smrg case nir_intrinsic_image_atomic_comp_swap: 4547ec681f3Smrg case nir_intrinsic_image_atomic_fadd: 4557ec681f3Smrg case nir_intrinsic_image_atomic_fmin: 4567ec681f3Smrg case nir_intrinsic_image_atomic_fmax: 4577ec681f3Smrg case nir_intrinsic_bindless_image_atomic_add: 4587ec681f3Smrg case nir_intrinsic_bindless_image_atomic_imin: 4597ec681f3Smrg case nir_intrinsic_bindless_image_atomic_umin: 4607ec681f3Smrg case nir_intrinsic_bindless_image_atomic_imax: 4617ec681f3Smrg case nir_intrinsic_bindless_image_atomic_umax: 4627ec681f3Smrg case nir_intrinsic_bindless_image_atomic_and: 4637ec681f3Smrg case nir_intrinsic_bindless_image_atomic_or: 4647ec681f3Smrg case nir_intrinsic_bindless_image_atomic_xor: 4657ec681f3Smrg case nir_intrinsic_bindless_image_atomic_exchange: 4667ec681f3Smrg case nir_intrinsic_bindless_image_atomic_comp_swap: 4677ec681f3Smrg case nir_intrinsic_bindless_image_atomic_fadd: 4687ec681f3Smrg case nir_intrinsic_bindless_image_atomic_fmin: 4697ec681f3Smrg case nir_intrinsic_bindless_image_atomic_fmax: 4707ec681f3Smrg case nir_intrinsic_shared_atomic_add: 4717ec681f3Smrg case nir_intrinsic_shared_atomic_imin: 4727ec681f3Smrg case nir_intrinsic_shared_atomic_umin: 4737ec681f3Smrg case nir_intrinsic_shared_atomic_imax: 4747ec681f3Smrg case nir_intrinsic_shared_atomic_umax: 4757ec681f3Smrg case nir_intrinsic_shared_atomic_and: 4767ec681f3Smrg case nir_intrinsic_shared_atomic_or: 4777ec681f3Smrg case nir_intrinsic_shared_atomic_xor: 4787ec681f3Smrg case nir_intrinsic_shared_atomic_exchange: 4797ec681f3Smrg case nir_intrinsic_shared_atomic_comp_swap: 4807ec681f3Smrg case nir_intrinsic_shared_atomic_fadd: 4817ec681f3Smrg case nir_intrinsic_shared_atomic_fmin: 4827ec681f3Smrg case nir_intrinsic_shared_atomic_fmax: 4837ec681f3Smrg case nir_intrinsic_shared_atomic_fcomp_swap: 4847ec681f3Smrg case nir_intrinsic_global_atomic_add: 4857ec681f3Smrg case nir_intrinsic_global_atomic_imin: 4867ec681f3Smrg case nir_intrinsic_global_atomic_umin: 4877ec681f3Smrg case nir_intrinsic_global_atomic_imax: 4887ec681f3Smrg case nir_intrinsic_global_atomic_umax: 4897ec681f3Smrg case nir_intrinsic_global_atomic_and: 4907ec681f3Smrg case nir_intrinsic_global_atomic_or: 4917ec681f3Smrg case nir_intrinsic_global_atomic_xor: 4927ec681f3Smrg case nir_intrinsic_global_atomic_exchange: 4937ec681f3Smrg case nir_intrinsic_global_atomic_comp_swap: 4947ec681f3Smrg case nir_intrinsic_global_atomic_fadd: 4957ec681f3Smrg case nir_intrinsic_global_atomic_fmin: 4967ec681f3Smrg case nir_intrinsic_global_atomic_fmax: 4977ec681f3Smrg case nir_intrinsic_global_atomic_fcomp_swap: 4987ec681f3Smrg case nir_intrinsic_atomic_counter_add: 4997ec681f3Smrg case nir_intrinsic_atomic_counter_min: 5007ec681f3Smrg case nir_intrinsic_atomic_counter_max: 5017ec681f3Smrg case nir_intrinsic_atomic_counter_and: 5027ec681f3Smrg case nir_intrinsic_atomic_counter_or: 5037ec681f3Smrg case nir_intrinsic_atomic_counter_xor: 5047ec681f3Smrg case nir_intrinsic_atomic_counter_inc: 5057ec681f3Smrg case nir_intrinsic_atomic_counter_pre_dec: 5067ec681f3Smrg case nir_intrinsic_atomic_counter_post_dec: 5077ec681f3Smrg case nir_intrinsic_atomic_counter_exchange: 5087ec681f3Smrg case nir_intrinsic_atomic_counter_comp_swap: 5097ec681f3Smrg case nir_intrinsic_atomic_counter_add_deref: 5107ec681f3Smrg case nir_intrinsic_atomic_counter_min_deref: 5117ec681f3Smrg case nir_intrinsic_atomic_counter_max_deref: 5127ec681f3Smrg case nir_intrinsic_atomic_counter_and_deref: 5137ec681f3Smrg case nir_intrinsic_atomic_counter_or_deref: 5147ec681f3Smrg case nir_intrinsic_atomic_counter_xor_deref: 5157ec681f3Smrg case nir_intrinsic_atomic_counter_inc_deref: 5167ec681f3Smrg case nir_intrinsic_atomic_counter_pre_dec_deref: 5177ec681f3Smrg case nir_intrinsic_atomic_counter_post_dec_deref: 5187ec681f3Smrg case nir_intrinsic_atomic_counter_exchange_deref: 5197ec681f3Smrg case nir_intrinsic_atomic_counter_comp_swap_deref: 5207ec681f3Smrg case nir_intrinsic_exclusive_scan: 5217ec681f3Smrg case nir_intrinsic_ballot_bit_count_exclusive: 5227ec681f3Smrg case nir_intrinsic_ballot_bit_count_inclusive: 5237ec681f3Smrg case nir_intrinsic_write_invocation_amd: 5247ec681f3Smrg case nir_intrinsic_mbcnt_amd: 5257ec681f3Smrg case nir_intrinsic_lane_permute_16_amd: 5267ec681f3Smrg case nir_intrinsic_elect: 5277ec681f3Smrg case nir_intrinsic_load_tlb_color_v3d: 5287ec681f3Smrg case nir_intrinsic_load_tess_rel_patch_id_amd: 5297ec681f3Smrg case nir_intrinsic_load_gs_vertex_offset_amd: 5307ec681f3Smrg case nir_intrinsic_has_input_vertex_amd: 5317ec681f3Smrg case nir_intrinsic_has_input_primitive_amd: 5327ec681f3Smrg case nir_intrinsic_load_packed_passthrough_primitive_amd: 5337ec681f3Smrg case nir_intrinsic_load_initial_edgeflags_amd: 5347ec681f3Smrg case nir_intrinsic_gds_atomic_add_amd: 5357ec681f3Smrg case nir_intrinsic_load_rt_arg_scratch_offset_amd: 5367ec681f3Smrg case nir_intrinsic_load_intersection_opaque_amd: 5377ec681f3Smrg is_divergent = true; 5387ec681f3Smrg break; 5397ec681f3Smrg 5407ec681f3Smrg default: 5417ec681f3Smrg#ifdef NDEBUG 5427ec681f3Smrg is_divergent = true; 5437ec681f3Smrg break; 5447ec681f3Smrg#else 5457ec681f3Smrg nir_print_instr(&instr->instr, stderr); 5467ec681f3Smrg unreachable("\nNIR divergence analysis: Unhandled intrinsic."); 5477ec681f3Smrg#endif 5487ec681f3Smrg } 5497ec681f3Smrg 5507ec681f3Smrg instr->dest.ssa.divergent = is_divergent; 5517ec681f3Smrg return is_divergent; 5527ec681f3Smrg} 5537ec681f3Smrg 5547ec681f3Smrgstatic bool 5557ec681f3Smrgvisit_tex(nir_tex_instr *instr) 5567ec681f3Smrg{ 5577ec681f3Smrg if (instr->dest.ssa.divergent) 5587ec681f3Smrg return false; 5597ec681f3Smrg 5607ec681f3Smrg bool is_divergent = false; 5617ec681f3Smrg 5627ec681f3Smrg for (unsigned i = 0; i < instr->num_srcs; i++) { 5637ec681f3Smrg switch (instr->src[i].src_type) { 5647ec681f3Smrg case nir_tex_src_sampler_deref: 5657ec681f3Smrg case nir_tex_src_sampler_handle: 5667ec681f3Smrg case nir_tex_src_sampler_offset: 5677ec681f3Smrg is_divergent |= instr->src[i].src.ssa->divergent && 5687ec681f3Smrg instr->sampler_non_uniform; 5697ec681f3Smrg break; 5707ec681f3Smrg case nir_tex_src_texture_deref: 5717ec681f3Smrg case nir_tex_src_texture_handle: 5727ec681f3Smrg case nir_tex_src_texture_offset: 5737ec681f3Smrg is_divergent |= instr->src[i].src.ssa->divergent && 5747ec681f3Smrg instr->texture_non_uniform; 5757ec681f3Smrg break; 5767ec681f3Smrg default: 5777ec681f3Smrg is_divergent |= instr->src[i].src.ssa->divergent; 5787ec681f3Smrg break; 5797ec681f3Smrg } 5807ec681f3Smrg } 5817ec681f3Smrg 5827ec681f3Smrg instr->dest.ssa.divergent = is_divergent; 5837ec681f3Smrg return is_divergent; 5847ec681f3Smrg} 5857ec681f3Smrg 5867ec681f3Smrgstatic bool 5877ec681f3Smrgvisit_load_const(nir_load_const_instr *instr) 5887ec681f3Smrg{ 5897ec681f3Smrg return false; 5907ec681f3Smrg} 5917ec681f3Smrg 5927ec681f3Smrgstatic bool 5937ec681f3Smrgvisit_ssa_undef(nir_ssa_undef_instr *instr) 5947ec681f3Smrg{ 5957ec681f3Smrg return false; 5967ec681f3Smrg} 5977ec681f3Smrg 5987ec681f3Smrgstatic bool 5997ec681f3Smrgnir_variable_mode_is_uniform(nir_variable_mode mode) { 6007ec681f3Smrg switch (mode) { 6017ec681f3Smrg case nir_var_uniform: 6027ec681f3Smrg case nir_var_mem_ubo: 6037ec681f3Smrg case nir_var_mem_ssbo: 6047ec681f3Smrg case nir_var_mem_shared: 6057ec681f3Smrg case nir_var_mem_global: 6067ec681f3Smrg return true; 6077ec681f3Smrg default: 6087ec681f3Smrg return false; 6097ec681f3Smrg } 6107ec681f3Smrg} 6117ec681f3Smrg 6127ec681f3Smrgstatic bool 6137ec681f3Smrgnir_variable_is_uniform(nir_shader *shader, nir_variable *var) 6147ec681f3Smrg{ 6157ec681f3Smrg if (nir_variable_mode_is_uniform(var->data.mode)) 6167ec681f3Smrg return true; 6177ec681f3Smrg 6187ec681f3Smrg nir_divergence_options options = shader->options->divergence_analysis_options; 6197ec681f3Smrg gl_shader_stage stage = shader->info.stage; 6207ec681f3Smrg 6217ec681f3Smrg if (stage == MESA_SHADER_FRAGMENT && 6227ec681f3Smrg (options & nir_divergence_single_prim_per_subgroup) && 6237ec681f3Smrg var->data.mode == nir_var_shader_in && 6247ec681f3Smrg var->data.interpolation == INTERP_MODE_FLAT) 6257ec681f3Smrg return true; 6267ec681f3Smrg 6277ec681f3Smrg if (stage == MESA_SHADER_TESS_CTRL && 6287ec681f3Smrg (options & nir_divergence_single_patch_per_tcs_subgroup) && 6297ec681f3Smrg var->data.mode == nir_var_shader_out && var->data.patch) 6307ec681f3Smrg return true; 6317ec681f3Smrg 6327ec681f3Smrg if (stage == MESA_SHADER_TESS_EVAL && 6337ec681f3Smrg (options & nir_divergence_single_patch_per_tes_subgroup) && 6347ec681f3Smrg var->data.mode == nir_var_shader_in && var->data.patch) 6357ec681f3Smrg return true; 6367ec681f3Smrg 6377ec681f3Smrg return false; 6387ec681f3Smrg} 6397ec681f3Smrg 6407ec681f3Smrgstatic bool 6417ec681f3Smrgvisit_deref(nir_shader *shader, nir_deref_instr *deref) 6427ec681f3Smrg{ 6437ec681f3Smrg if (deref->dest.ssa.divergent) 6447ec681f3Smrg return false; 6457ec681f3Smrg 6467ec681f3Smrg bool is_divergent = false; 6477ec681f3Smrg switch (deref->deref_type) { 6487ec681f3Smrg case nir_deref_type_var: 6497ec681f3Smrg is_divergent = !nir_variable_is_uniform(shader, deref->var); 6507ec681f3Smrg break; 6517ec681f3Smrg case nir_deref_type_array: 6527ec681f3Smrg case nir_deref_type_ptr_as_array: 6537ec681f3Smrg is_divergent = deref->arr.index.ssa->divergent; 6547ec681f3Smrg FALLTHROUGH; 6557ec681f3Smrg case nir_deref_type_struct: 6567ec681f3Smrg case nir_deref_type_array_wildcard: 6577ec681f3Smrg is_divergent |= deref->parent.ssa->divergent; 6587ec681f3Smrg break; 6597ec681f3Smrg case nir_deref_type_cast: 6607ec681f3Smrg is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) || 6617ec681f3Smrg deref->parent.ssa->divergent; 6627ec681f3Smrg break; 6637ec681f3Smrg } 6647ec681f3Smrg 6657ec681f3Smrg deref->dest.ssa.divergent = is_divergent; 6667ec681f3Smrg return is_divergent; 6677ec681f3Smrg} 6687ec681f3Smrg 6697ec681f3Smrgstatic bool 6707ec681f3Smrgvisit_jump(nir_jump_instr *jump, struct divergence_state *state) 6717ec681f3Smrg{ 6727ec681f3Smrg switch (jump->type) { 6737ec681f3Smrg case nir_jump_continue: 6747ec681f3Smrg if (state->divergent_loop_continue) 6757ec681f3Smrg return false; 6767ec681f3Smrg if (state->divergent_loop_cf) 6777ec681f3Smrg state->divergent_loop_continue = true; 6787ec681f3Smrg return state->divergent_loop_continue; 6797ec681f3Smrg case nir_jump_break: 6807ec681f3Smrg if (state->divergent_loop_break) 6817ec681f3Smrg return false; 6827ec681f3Smrg if (state->divergent_loop_cf) 6837ec681f3Smrg state->divergent_loop_break = true; 6847ec681f3Smrg return state->divergent_loop_break; 6857ec681f3Smrg case nir_jump_halt: 6867ec681f3Smrg /* This totally kills invocations so it doesn't add divergence */ 6877ec681f3Smrg break; 6887ec681f3Smrg case nir_jump_return: 6897ec681f3Smrg unreachable("NIR divergence analysis: Unsupported return instruction."); 6907ec681f3Smrg break; 6917ec681f3Smrg case nir_jump_goto: 6927ec681f3Smrg case nir_jump_goto_if: 6937ec681f3Smrg unreachable("NIR divergence analysis: Unsupported goto_if instruction."); 6947ec681f3Smrg break; 6957ec681f3Smrg } 6967ec681f3Smrg return false; 6977ec681f3Smrg} 6987ec681f3Smrg 6997ec681f3Smrgstatic bool 7007ec681f3Smrgset_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state) 7017ec681f3Smrg{ 7027ec681f3Smrg def->divergent = false; 7037ec681f3Smrg return true; 7047ec681f3Smrg} 7057ec681f3Smrg 7067ec681f3Smrgstatic bool 7077ec681f3Smrgupdate_instr_divergence(nir_shader *shader, nir_instr *instr) 7087ec681f3Smrg{ 7097ec681f3Smrg switch (instr->type) { 7107ec681f3Smrg case nir_instr_type_alu: 7117ec681f3Smrg return visit_alu(nir_instr_as_alu(instr)); 7127ec681f3Smrg case nir_instr_type_intrinsic: 7137ec681f3Smrg return visit_intrinsic(shader, nir_instr_as_intrinsic(instr)); 7147ec681f3Smrg case nir_instr_type_tex: 7157ec681f3Smrg return visit_tex(nir_instr_as_tex(instr)); 7167ec681f3Smrg case nir_instr_type_load_const: 7177ec681f3Smrg return visit_load_const(nir_instr_as_load_const(instr)); 7187ec681f3Smrg case nir_instr_type_ssa_undef: 7197ec681f3Smrg return visit_ssa_undef(nir_instr_as_ssa_undef(instr)); 7207ec681f3Smrg case nir_instr_type_deref: 7217ec681f3Smrg return visit_deref(shader, nir_instr_as_deref(instr)); 7227ec681f3Smrg case nir_instr_type_jump: 7237ec681f3Smrg case nir_instr_type_phi: 7247ec681f3Smrg case nir_instr_type_call: 7257ec681f3Smrg case nir_instr_type_parallel_copy: 7267ec681f3Smrg default: 7277ec681f3Smrg unreachable("NIR divergence analysis: Unsupported instruction type."); 7287ec681f3Smrg } 7297ec681f3Smrg} 7307ec681f3Smrg 7317ec681f3Smrgstatic bool 7327ec681f3Smrgvisit_block(nir_block *block, struct divergence_state *state) 7337ec681f3Smrg{ 7347ec681f3Smrg bool has_changed = false; 7357ec681f3Smrg 7367ec681f3Smrg nir_foreach_instr(instr, block) { 7377ec681f3Smrg /* phis are handled when processing the branches */ 7387ec681f3Smrg if (instr->type == nir_instr_type_phi) 7397ec681f3Smrg continue; 7407ec681f3Smrg 7417ec681f3Smrg if (state->first_visit) 7427ec681f3Smrg nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL); 7437ec681f3Smrg 7447ec681f3Smrg if (instr->type == nir_instr_type_jump) 7457ec681f3Smrg has_changed |= visit_jump(nir_instr_as_jump(instr), state); 7467ec681f3Smrg else 7477ec681f3Smrg has_changed |= update_instr_divergence(state->shader, instr); 7487ec681f3Smrg } 7497ec681f3Smrg 7507ec681f3Smrg return has_changed; 7517ec681f3Smrg} 7527ec681f3Smrg 7537ec681f3Smrg/* There are 3 types of phi instructions: 7547ec681f3Smrg * (1) gamma: represent the joining point of different paths 7557ec681f3Smrg * created by an “if-then-else” branch. 7567ec681f3Smrg * The resulting value is divergent if the branch condition 7577ec681f3Smrg * or any of the source values is divergent. */ 7587ec681f3Smrgstatic bool 7597ec681f3Smrgvisit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent) 7607ec681f3Smrg{ 7617ec681f3Smrg if (phi->dest.ssa.divergent) 7627ec681f3Smrg return false; 7637ec681f3Smrg 7647ec681f3Smrg unsigned defined_srcs = 0; 7657ec681f3Smrg nir_foreach_phi_src(src, phi) { 7667ec681f3Smrg /* if any source value is divergent, the resulting value is divergent */ 7677ec681f3Smrg if (src->src.ssa->divergent) { 7687ec681f3Smrg phi->dest.ssa.divergent = true; 7697ec681f3Smrg return true; 7707ec681f3Smrg } 7717ec681f3Smrg if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) { 7727ec681f3Smrg defined_srcs++; 7737ec681f3Smrg } 7747ec681f3Smrg } 7757ec681f3Smrg 7767ec681f3Smrg /* if the condition is divergent and two sources defined, the definition is divergent */ 7777ec681f3Smrg if (defined_srcs > 1 && if_cond_divergent) { 7787ec681f3Smrg phi->dest.ssa.divergent = true; 7797ec681f3Smrg return true; 7807ec681f3Smrg } 7817ec681f3Smrg 7827ec681f3Smrg return false; 7837ec681f3Smrg} 7847ec681f3Smrg 7857ec681f3Smrg/* There are 3 types of phi instructions: 7867ec681f3Smrg * (2) mu: which only exist at loop headers, 7877ec681f3Smrg * merge initial and loop-carried values. 7887ec681f3Smrg * The resulting value is divergent if any source value 7897ec681f3Smrg * is divergent or a divergent loop continue condition 7907ec681f3Smrg * is associated with a different ssa-def. */ 7917ec681f3Smrgstatic bool 7927ec681f3Smrgvisit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue) 7937ec681f3Smrg{ 7947ec681f3Smrg if (phi->dest.ssa.divergent) 7957ec681f3Smrg return false; 7967ec681f3Smrg 7977ec681f3Smrg nir_ssa_def* same = NULL; 7987ec681f3Smrg nir_foreach_phi_src(src, phi) { 7997ec681f3Smrg /* if any source value is divergent, the resulting value is divergent */ 8007ec681f3Smrg if (src->src.ssa->divergent) { 8017ec681f3Smrg phi->dest.ssa.divergent = true; 8027ec681f3Smrg return true; 8037ec681f3Smrg } 8047ec681f3Smrg /* if this loop is uniform, we're done here */ 8057ec681f3Smrg if (!divergent_continue) 8067ec681f3Smrg continue; 8077ec681f3Smrg /* skip the loop preheader */ 8087ec681f3Smrg if (src->pred == preheader) 8097ec681f3Smrg continue; 8107ec681f3Smrg /* skip undef values */ 8117ec681f3Smrg if (nir_src_is_undef(src->src)) 8127ec681f3Smrg continue; 8137ec681f3Smrg 8147ec681f3Smrg /* check if all loop-carried values are from the same ssa-def */ 8157ec681f3Smrg if (!same) 8167ec681f3Smrg same = src->src.ssa; 8177ec681f3Smrg else if (same != src->src.ssa) { 8187ec681f3Smrg phi->dest.ssa.divergent = true; 8197ec681f3Smrg return true; 8207ec681f3Smrg } 8217ec681f3Smrg } 8227ec681f3Smrg 8237ec681f3Smrg return false; 8247ec681f3Smrg} 8257ec681f3Smrg 8267ec681f3Smrg/* There are 3 types of phi instructions: 8277ec681f3Smrg * (3) eta: represent values that leave a loop. 8287ec681f3Smrg * The resulting value is divergent if the source value is divergent 8297ec681f3Smrg * or any loop exit condition is divergent for a value which is 8307ec681f3Smrg * not loop-invariant. 8317ec681f3Smrg * (note: there should be no phi for loop-invariant variables.) */ 8327ec681f3Smrgstatic bool 8337ec681f3Smrgvisit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break) 8347ec681f3Smrg{ 8357ec681f3Smrg if (phi->dest.ssa.divergent) 8367ec681f3Smrg return false; 8377ec681f3Smrg 8387ec681f3Smrg if (divergent_break) { 8397ec681f3Smrg phi->dest.ssa.divergent = true; 8407ec681f3Smrg return true; 8417ec681f3Smrg } 8427ec681f3Smrg 8437ec681f3Smrg /* if any source value is divergent, the resulting value is divergent */ 8447ec681f3Smrg nir_foreach_phi_src(src, phi) { 8457ec681f3Smrg if (src->src.ssa->divergent) { 8467ec681f3Smrg phi->dest.ssa.divergent = true; 8477ec681f3Smrg return true; 8487ec681f3Smrg } 8497ec681f3Smrg } 8507ec681f3Smrg 8517ec681f3Smrg return false; 8527ec681f3Smrg} 8537ec681f3Smrg 8547ec681f3Smrgstatic bool 8557ec681f3Smrgvisit_if(nir_if *if_stmt, struct divergence_state *state) 8567ec681f3Smrg{ 8577ec681f3Smrg bool progress = false; 8587ec681f3Smrg 8597ec681f3Smrg struct divergence_state then_state = *state; 8607ec681f3Smrg then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent; 8617ec681f3Smrg progress |= visit_cf_list(&if_stmt->then_list, &then_state); 8627ec681f3Smrg 8637ec681f3Smrg struct divergence_state else_state = *state; 8647ec681f3Smrg else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent; 8657ec681f3Smrg progress |= visit_cf_list(&if_stmt->else_list, &else_state); 8667ec681f3Smrg 8677ec681f3Smrg /* handle phis after the IF */ 8687ec681f3Smrg nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) { 8697ec681f3Smrg if (instr->type != nir_instr_type_phi) 8707ec681f3Smrg break; 8717ec681f3Smrg 8727ec681f3Smrg if (state->first_visit) 8737ec681f3Smrg nir_instr_as_phi(instr)->dest.ssa.divergent = false; 8747ec681f3Smrg progress |= visit_if_merge_phi(nir_instr_as_phi(instr), 8757ec681f3Smrg if_stmt->condition.ssa->divergent); 8767ec681f3Smrg } 8777ec681f3Smrg 8787ec681f3Smrg /* join loop divergence information from both branch legs */ 8797ec681f3Smrg state->divergent_loop_continue |= then_state.divergent_loop_continue || 8807ec681f3Smrg else_state.divergent_loop_continue; 8817ec681f3Smrg state->divergent_loop_break |= then_state.divergent_loop_break || 8827ec681f3Smrg else_state.divergent_loop_break; 8837ec681f3Smrg 8847ec681f3Smrg /* A divergent continue makes succeeding loop CF divergent: 8857ec681f3Smrg * not all loop-active invocations participate in the remaining loop-body 8867ec681f3Smrg * which means that a following break might be taken by some invocations, only */ 8877ec681f3Smrg state->divergent_loop_cf |= state->divergent_loop_continue; 8887ec681f3Smrg 8897ec681f3Smrg return progress; 8907ec681f3Smrg} 8917ec681f3Smrg 8927ec681f3Smrgstatic bool 8937ec681f3Smrgvisit_loop(nir_loop *loop, struct divergence_state *state) 8947ec681f3Smrg{ 8957ec681f3Smrg bool progress = false; 8967ec681f3Smrg nir_block *loop_header = nir_loop_first_block(loop); 8977ec681f3Smrg nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header); 8987ec681f3Smrg 8997ec681f3Smrg /* handle loop header phis first: we have no knowledge yet about 9007ec681f3Smrg * the loop's control flow or any loop-carried sources. */ 9017ec681f3Smrg nir_foreach_instr(instr, loop_header) { 9027ec681f3Smrg if (instr->type != nir_instr_type_phi) 9037ec681f3Smrg break; 9047ec681f3Smrg 9057ec681f3Smrg nir_phi_instr *phi = nir_instr_as_phi(instr); 9067ec681f3Smrg if (!state->first_visit && phi->dest.ssa.divergent) 9077ec681f3Smrg continue; 9087ec681f3Smrg 9097ec681f3Smrg nir_foreach_phi_src(src, phi) { 9107ec681f3Smrg if (src->pred == loop_preheader) { 9117ec681f3Smrg phi->dest.ssa.divergent = src->src.ssa->divergent; 9127ec681f3Smrg break; 9137ec681f3Smrg } 9147ec681f3Smrg } 9157ec681f3Smrg progress |= phi->dest.ssa.divergent; 9167ec681f3Smrg } 9177ec681f3Smrg 9187ec681f3Smrg /* setup loop state */ 9197ec681f3Smrg struct divergence_state loop_state = *state; 9207ec681f3Smrg loop_state.divergent_loop_cf = false; 9217ec681f3Smrg loop_state.divergent_loop_continue = false; 9227ec681f3Smrg loop_state.divergent_loop_break = false; 9237ec681f3Smrg 9247ec681f3Smrg /* process loop body until no further changes are made */ 9257ec681f3Smrg bool repeat; 9267ec681f3Smrg do { 9277ec681f3Smrg progress |= visit_cf_list(&loop->body, &loop_state); 9287ec681f3Smrg repeat = false; 9297ec681f3Smrg 9307ec681f3Smrg /* revisit loop header phis to see if something has changed */ 9317ec681f3Smrg nir_foreach_instr(instr, loop_header) { 9327ec681f3Smrg if (instr->type != nir_instr_type_phi) 9337ec681f3Smrg break; 9347ec681f3Smrg 9357ec681f3Smrg repeat |= visit_loop_header_phi(nir_instr_as_phi(instr), 9367ec681f3Smrg loop_preheader, 9377ec681f3Smrg loop_state.divergent_loop_continue); 9387ec681f3Smrg } 9397ec681f3Smrg 9407ec681f3Smrg loop_state.divergent_loop_cf = false; 9417ec681f3Smrg loop_state.first_visit = false; 9427ec681f3Smrg } while (repeat); 9437ec681f3Smrg 9447ec681f3Smrg /* handle phis after the loop */ 9457ec681f3Smrg nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) { 9467ec681f3Smrg if (instr->type != nir_instr_type_phi) 9477ec681f3Smrg break; 9487ec681f3Smrg 9497ec681f3Smrg if (state->first_visit) 9507ec681f3Smrg nir_instr_as_phi(instr)->dest.ssa.divergent = false; 9517ec681f3Smrg progress |= visit_loop_exit_phi(nir_instr_as_phi(instr), 9527ec681f3Smrg loop_state.divergent_loop_break); 9537ec681f3Smrg } 9547ec681f3Smrg 9557ec681f3Smrg loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue); 9567ec681f3Smrg 9577ec681f3Smrg return progress; 9587ec681f3Smrg} 9597ec681f3Smrg 9607ec681f3Smrgstatic bool 9617ec681f3Smrgvisit_cf_list(struct exec_list *list, struct divergence_state *state) 9627ec681f3Smrg{ 9637ec681f3Smrg bool has_changed = false; 9647ec681f3Smrg 9657ec681f3Smrg foreach_list_typed(nir_cf_node, node, node, list) { 9667ec681f3Smrg switch (node->type) { 9677ec681f3Smrg case nir_cf_node_block: 9687ec681f3Smrg has_changed |= visit_block(nir_cf_node_as_block(node), state); 9697ec681f3Smrg break; 9707ec681f3Smrg case nir_cf_node_if: 9717ec681f3Smrg has_changed |= visit_if(nir_cf_node_as_if(node), state); 9727ec681f3Smrg break; 9737ec681f3Smrg case nir_cf_node_loop: 9747ec681f3Smrg has_changed |= visit_loop(nir_cf_node_as_loop(node), state); 9757ec681f3Smrg break; 9767ec681f3Smrg case nir_cf_node_function: 9777ec681f3Smrg unreachable("NIR divergence analysis: Unsupported cf_node type."); 9787ec681f3Smrg } 9797ec681f3Smrg } 9807ec681f3Smrg 9817ec681f3Smrg return has_changed; 9827ec681f3Smrg} 9837ec681f3Smrg 9847ec681f3Smrgvoid 9857ec681f3Smrgnir_divergence_analysis(nir_shader *shader) 9867ec681f3Smrg{ 9877ec681f3Smrg struct divergence_state state = { 9887ec681f3Smrg .stage = shader->info.stage, 9897ec681f3Smrg .shader = shader, 9907ec681f3Smrg .divergent_loop_cf = false, 9917ec681f3Smrg .divergent_loop_continue = false, 9927ec681f3Smrg .divergent_loop_break = false, 9937ec681f3Smrg .first_visit = true, 9947ec681f3Smrg }; 9957ec681f3Smrg 9967ec681f3Smrg visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state); 9977ec681f3Smrg} 9987ec681f3Smrg 9997ec681f3Smrgbool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr) 10007ec681f3Smrg{ 10017ec681f3Smrg nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL); 10027ec681f3Smrg 10037ec681f3Smrg if (instr->type == nir_instr_type_phi) { 10047ec681f3Smrg nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node); 10057ec681f3Smrg /* can only update gamma/if phis */ 10067ec681f3Smrg if (!prev || prev->type != nir_cf_node_if) 10077ec681f3Smrg return false; 10087ec681f3Smrg 10097ec681f3Smrg nir_if *nif = nir_cf_node_as_if(prev); 10107ec681f3Smrg 10117ec681f3Smrg visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition)); 10127ec681f3Smrg return true; 10137ec681f3Smrg } 10147ec681f3Smrg 10157ec681f3Smrg update_instr_divergence(shader, instr); 10167ec681f3Smrg return true; 10177ec681f3Smrg} 10187ec681f3Smrg 1019