1/* 2 * Copyright © 2018 Valve Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 */ 24 25#include "nir.h" 26 27/* This pass computes for each ssa definition if it is uniform. 28 * That is, the variable has the same value for all invocations 29 * of the group. 30 * 31 * This divergence analysis pass expects the shader to be in LCSSA-form. 32 * 33 * This algorithm implements "The Simple Divergence Analysis" from 34 * Diogo Sampaio, Rafael De Souza, Sylvain Collange, Fernando Magno Quintão Pereira. 35 * Divergence Analysis. ACM Transactions on Programming Languages and Systems (TOPLAS), 36 * ACM, 2013, 35 (4), pp.13:1-13:36. <10.1145/2523815>. <hal-00909072v2> 37 */ 38 39struct divergence_state { 40 const gl_shader_stage stage; 41 nir_shader *shader; 42 43 /** current control flow state */ 44 /* True if some loop-active invocations might take a different control-flow path. 45 * A divergent break does not cause subsequent control-flow to be considered 46 * divergent because those invocations are no longer active in the loop. 47 * For a divergent if, both sides are considered divergent flow because 48 * the other side is still loop-active. */ 49 bool divergent_loop_cf; 50 /* True if a divergent continue happened since the loop header */ 51 bool divergent_loop_continue; 52 /* True if a divergent break happened since the loop header */ 53 bool divergent_loop_break; 54 55 /* True if we visit the block for the fist time */ 56 bool first_visit; 57}; 58 59static bool 60visit_cf_list(struct exec_list *list, struct divergence_state *state); 61 62static bool 63visit_alu(nir_alu_instr *instr) 64{ 65 if (instr->dest.dest.ssa.divergent) 66 return false; 67 68 unsigned num_src = nir_op_infos[instr->op].num_inputs; 69 70 for (unsigned i = 0; i < num_src; i++) { 71 if (instr->src[i].src.ssa->divergent) { 72 instr->dest.dest.ssa.divergent = true; 73 return true; 74 } 75 } 76 77 return false; 78} 79 80static bool 81visit_intrinsic(nir_shader *shader, nir_intrinsic_instr *instr) 82{ 83 if (!nir_intrinsic_infos[instr->intrinsic].has_dest) 84 return false; 85 86 if (instr->dest.ssa.divergent) 87 return false; 88 89 nir_divergence_options options = shader->options->divergence_analysis_options; 90 gl_shader_stage stage = shader->info.stage; 91 bool is_divergent = false; 92 switch (instr->intrinsic) { 93 /* Intrinsics which are always uniform */ 94 case nir_intrinsic_shader_clock: 95 case nir_intrinsic_ballot: 96 case nir_intrinsic_read_invocation: 97 case nir_intrinsic_read_first_invocation: 98 case nir_intrinsic_vote_any: 99 case nir_intrinsic_vote_all: 100 case nir_intrinsic_vote_feq: 101 case nir_intrinsic_vote_ieq: 102 case nir_intrinsic_load_push_constant: 103 case nir_intrinsic_load_work_dim: 104 case nir_intrinsic_load_num_workgroups: 105 case nir_intrinsic_load_workgroup_size: 106 case nir_intrinsic_load_subgroup_id: 107 case nir_intrinsic_load_num_subgroups: 108 case nir_intrinsic_load_ray_launch_size: 109 case nir_intrinsic_load_subgroup_size: 110 case nir_intrinsic_load_subgroup_eq_mask: 111 case nir_intrinsic_load_subgroup_ge_mask: 112 case nir_intrinsic_load_subgroup_gt_mask: 113 case nir_intrinsic_load_subgroup_le_mask: 114 case nir_intrinsic_load_subgroup_lt_mask: 115 case nir_intrinsic_first_invocation: 116 case nir_intrinsic_last_invocation: 117 case nir_intrinsic_load_base_instance: 118 case nir_intrinsic_load_base_vertex: 119 case nir_intrinsic_load_first_vertex: 120 case nir_intrinsic_load_draw_id: 121 case nir_intrinsic_load_is_indexed_draw: 122 case nir_intrinsic_load_viewport_scale: 123 case nir_intrinsic_load_user_clip_plane: 124 case nir_intrinsic_load_viewport_x_scale: 125 case nir_intrinsic_load_viewport_y_scale: 126 case nir_intrinsic_load_viewport_z_scale: 127 case nir_intrinsic_load_viewport_offset: 128 case nir_intrinsic_load_viewport_x_offset: 129 case nir_intrinsic_load_viewport_y_offset: 130 case nir_intrinsic_load_viewport_z_offset: 131 case nir_intrinsic_load_blend_const_color_a_float: 132 case nir_intrinsic_load_blend_const_color_b_float: 133 case nir_intrinsic_load_blend_const_color_g_float: 134 case nir_intrinsic_load_blend_const_color_r_float: 135 case nir_intrinsic_load_blend_const_color_rgba: 136 case nir_intrinsic_load_blend_const_color_aaaa8888_unorm: 137 case nir_intrinsic_load_blend_const_color_rgba8888_unorm: 138 case nir_intrinsic_load_line_width: 139 case nir_intrinsic_load_aa_line_width: 140 case nir_intrinsic_load_fb_layers_v3d: 141 case nir_intrinsic_load_tcs_num_patches_amd: 142 case nir_intrinsic_load_ring_tess_factors_amd: 143 case nir_intrinsic_load_ring_tess_offchip_amd: 144 case nir_intrinsic_load_ring_tess_factors_offset_amd: 145 case nir_intrinsic_load_ring_tess_offchip_offset_amd: 146 case nir_intrinsic_load_ring_esgs_amd: 147 case nir_intrinsic_load_ring_es2gs_offset_amd: 148 case nir_intrinsic_load_sample_positions_pan: 149 case nir_intrinsic_load_workgroup_num_input_vertices_amd: 150 case nir_intrinsic_load_workgroup_num_input_primitives_amd: 151 case nir_intrinsic_load_shader_query_enabled_amd: 152 case nir_intrinsic_load_cull_front_face_enabled_amd: 153 case nir_intrinsic_load_cull_back_face_enabled_amd: 154 case nir_intrinsic_load_cull_ccw_amd: 155 case nir_intrinsic_load_cull_small_primitives_enabled_amd: 156 case nir_intrinsic_load_cull_any_enabled_amd: 157 case nir_intrinsic_load_cull_small_prim_precision_amd: 158 is_divergent = false; 159 break; 160 161 /* Intrinsics with divergence depending on shader stage and hardware */ 162 case nir_intrinsic_load_frag_shading_rate: 163 is_divergent = !(options & nir_divergence_single_frag_shading_rate_per_subgroup); 164 break; 165 case nir_intrinsic_load_input: 166 is_divergent = instr->src[0].ssa->divergent; 167 if (stage == MESA_SHADER_FRAGMENT) 168 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); 169 else if (stage == MESA_SHADER_TESS_EVAL) 170 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup); 171 else if (stage != MESA_SHADER_MESH) 172 is_divergent = true; 173 break; 174 case nir_intrinsic_load_per_vertex_input: 175 is_divergent = instr->src[0].ssa->divergent || 176 instr->src[1].ssa->divergent; 177 if (stage == MESA_SHADER_TESS_CTRL) 178 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup); 179 if (stage == MESA_SHADER_TESS_EVAL) 180 is_divergent |= !(options & nir_divergence_single_patch_per_tes_subgroup); 181 else 182 is_divergent = true; 183 break; 184 case nir_intrinsic_load_input_vertex: 185 is_divergent = instr->src[1].ssa->divergent; 186 assert(stage == MESA_SHADER_FRAGMENT); 187 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); 188 break; 189 case nir_intrinsic_load_output: 190 is_divergent = instr->src[0].ssa->divergent; 191 switch (stage) { 192 case MESA_SHADER_TESS_CTRL: 193 is_divergent |= !(options & nir_divergence_single_patch_per_tcs_subgroup); 194 break; 195 case MESA_SHADER_FRAGMENT: 196 is_divergent = true; 197 break; 198 case MESA_SHADER_TASK: 199 case MESA_SHADER_MESH: 200 /* Divergent if src[0] is, so nothing else to do. */ 201 break; 202 default: 203 unreachable("Invalid stage for load_output"); 204 } 205 break; 206 case nir_intrinsic_load_per_vertex_output: 207 assert(stage == MESA_SHADER_TESS_CTRL || stage == MESA_SHADER_MESH); 208 is_divergent = instr->src[0].ssa->divergent || 209 instr->src[1].ssa->divergent || 210 (stage == MESA_SHADER_TESS_CTRL && 211 !(options & nir_divergence_single_patch_per_tcs_subgroup)); 212 break; 213 case nir_intrinsic_load_per_primitive_output: 214 assert(stage == MESA_SHADER_MESH); 215 is_divergent = instr->src[0].ssa->divergent || 216 instr->src[1].ssa->divergent; 217 break; 218 case nir_intrinsic_load_layer_id: 219 case nir_intrinsic_load_front_face: 220 assert(stage == MESA_SHADER_FRAGMENT); 221 is_divergent = !(options & nir_divergence_single_prim_per_subgroup); 222 break; 223 case nir_intrinsic_load_view_index: 224 assert(stage != MESA_SHADER_COMPUTE && stage != MESA_SHADER_KERNEL); 225 if (options & nir_divergence_view_index_uniform) 226 is_divergent = false; 227 else if (stage == MESA_SHADER_FRAGMENT) 228 is_divergent = !(options & nir_divergence_single_prim_per_subgroup); 229 break; 230 case nir_intrinsic_load_fs_input_interp_deltas: 231 assert(stage == MESA_SHADER_FRAGMENT); 232 is_divergent = instr->src[0].ssa->divergent; 233 is_divergent |= !(options & nir_divergence_single_prim_per_subgroup); 234 break; 235 case nir_intrinsic_load_primitive_id: 236 if (stage == MESA_SHADER_FRAGMENT) 237 is_divergent = !(options & nir_divergence_single_prim_per_subgroup); 238 else if (stage == MESA_SHADER_TESS_CTRL) 239 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup); 240 else if (stage == MESA_SHADER_TESS_EVAL) 241 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); 242 else if (stage == MESA_SHADER_GEOMETRY || stage == MESA_SHADER_VERTEX) 243 is_divergent = true; 244 else 245 unreachable("Invalid stage for load_primitive_id"); 246 break; 247 case nir_intrinsic_load_tess_level_inner: 248 case nir_intrinsic_load_tess_level_outer: 249 if (stage == MESA_SHADER_TESS_CTRL) 250 is_divergent = !(options & nir_divergence_single_patch_per_tcs_subgroup); 251 else if (stage == MESA_SHADER_TESS_EVAL) 252 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); 253 else 254 unreachable("Invalid stage for load_primitive_tess_level_*"); 255 break; 256 case nir_intrinsic_load_patch_vertices_in: 257 if (stage == MESA_SHADER_TESS_EVAL) 258 is_divergent = !(options & nir_divergence_single_patch_per_tes_subgroup); 259 else 260 assert(stage == MESA_SHADER_TESS_CTRL); 261 break; 262 263 case nir_intrinsic_load_workgroup_id: 264 assert(gl_shader_stage_uses_workgroup(stage)); 265 if (stage == MESA_SHADER_COMPUTE) 266 is_divergent |= (options & nir_divergence_multiple_workgroup_per_compute_subgroup); 267 break; 268 269 /* Clustered reductions are uniform if cluster_size == subgroup_size or 270 * the source is uniform and the operation is invariant. 271 * Inclusive scans are uniform if 272 * the source is uniform and the operation is invariant 273 */ 274 case nir_intrinsic_reduce: 275 if (nir_intrinsic_cluster_size(instr) == 0) 276 return false; 277 FALLTHROUGH; 278 case nir_intrinsic_inclusive_scan: { 279 nir_op op = nir_intrinsic_reduction_op(instr); 280 is_divergent = instr->src[0].ssa->divergent; 281 if (op != nir_op_umin && op != nir_op_imin && op != nir_op_fmin && 282 op != nir_op_umax && op != nir_op_imax && op != nir_op_fmax && 283 op != nir_op_iand && op != nir_op_ior) 284 is_divergent = true; 285 break; 286 } 287 288 case nir_intrinsic_load_ubo: 289 case nir_intrinsic_load_ssbo: 290 is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) || 291 instr->src[1].ssa->divergent; 292 break; 293 294 case nir_intrinsic_get_ssbo_size: 295 case nir_intrinsic_deref_buffer_array_length: 296 is_divergent = instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM); 297 break; 298 299 case nir_intrinsic_image_load: 300 case nir_intrinsic_image_deref_load: 301 case nir_intrinsic_bindless_image_load: 302 case nir_intrinsic_image_sparse_load: 303 case nir_intrinsic_image_deref_sparse_load: 304 case nir_intrinsic_bindless_image_sparse_load: 305 is_divergent = (instr->src[0].ssa->divergent && (nir_intrinsic_access(instr) & ACCESS_NON_UNIFORM)) || 306 instr->src[1].ssa->divergent || instr->src[2].ssa->divergent || instr->src[3].ssa->divergent; 307 break; 308 309 310 /* Intrinsics with divergence depending on sources */ 311 case nir_intrinsic_ballot_bitfield_extract: 312 case nir_intrinsic_ballot_find_lsb: 313 case nir_intrinsic_ballot_find_msb: 314 case nir_intrinsic_ballot_bit_count_reduce: 315 case nir_intrinsic_shuffle_xor: 316 case nir_intrinsic_shuffle_up: 317 case nir_intrinsic_shuffle_down: 318 case nir_intrinsic_quad_broadcast: 319 case nir_intrinsic_quad_swap_horizontal: 320 case nir_intrinsic_quad_swap_vertical: 321 case nir_intrinsic_quad_swap_diagonal: 322 case nir_intrinsic_byte_permute_amd: 323 case nir_intrinsic_load_deref: 324 case nir_intrinsic_load_shared: 325 case nir_intrinsic_load_global: 326 case nir_intrinsic_load_global_constant: 327 case nir_intrinsic_load_uniform: 328 case nir_intrinsic_load_constant: 329 case nir_intrinsic_load_sample_pos_from_id: 330 case nir_intrinsic_load_kernel_input: 331 case nir_intrinsic_load_buffer_amd: 332 case nir_intrinsic_image_samples: 333 case nir_intrinsic_image_deref_samples: 334 case nir_intrinsic_bindless_image_samples: 335 case nir_intrinsic_image_size: 336 case nir_intrinsic_image_deref_size: 337 case nir_intrinsic_bindless_image_size: 338 case nir_intrinsic_copy_deref: 339 case nir_intrinsic_vulkan_resource_index: 340 case nir_intrinsic_vulkan_resource_reindex: 341 case nir_intrinsic_load_vulkan_descriptor: 342 case nir_intrinsic_atomic_counter_read: 343 case nir_intrinsic_atomic_counter_read_deref: 344 case nir_intrinsic_quad_swizzle_amd: 345 case nir_intrinsic_masked_swizzle_amd: 346 case nir_intrinsic_is_sparse_texels_resident: 347 case nir_intrinsic_sparse_residency_code_and: 348 case nir_intrinsic_load_sbt_amd: 349 case nir_intrinsic_bvh64_intersect_ray_amd: 350 case nir_intrinsic_get_ubo_size: 351 case nir_intrinsic_load_ssbo_address: { 352 unsigned num_srcs = nir_intrinsic_infos[instr->intrinsic].num_srcs; 353 for (unsigned i = 0; i < num_srcs; i++) { 354 if (instr->src[i].ssa->divergent) { 355 is_divergent = true; 356 break; 357 } 358 } 359 break; 360 } 361 362 case nir_intrinsic_shuffle: 363 is_divergent = instr->src[0].ssa->divergent && 364 instr->src[1].ssa->divergent; 365 break; 366 367 /* Intrinsics which are always divergent */ 368 case nir_intrinsic_load_color0: 369 case nir_intrinsic_load_color1: 370 case nir_intrinsic_load_param: 371 case nir_intrinsic_load_sample_id: 372 case nir_intrinsic_load_sample_id_no_per_sample: 373 case nir_intrinsic_load_sample_mask_in: 374 case nir_intrinsic_load_interpolated_input: 375 case nir_intrinsic_load_barycentric_pixel: 376 case nir_intrinsic_load_barycentric_centroid: 377 case nir_intrinsic_load_barycentric_sample: 378 case nir_intrinsic_load_barycentric_model: 379 case nir_intrinsic_load_barycentric_at_sample: 380 case nir_intrinsic_load_barycentric_at_offset: 381 case nir_intrinsic_interp_deref_at_offset: 382 case nir_intrinsic_interp_deref_at_sample: 383 case nir_intrinsic_interp_deref_at_centroid: 384 case nir_intrinsic_interp_deref_at_vertex: 385 case nir_intrinsic_load_tess_coord: 386 case nir_intrinsic_load_point_coord: 387 case nir_intrinsic_load_line_coord: 388 case nir_intrinsic_load_frag_coord: 389 case nir_intrinsic_load_sample_pos: 390 case nir_intrinsic_load_vertex_id_zero_base: 391 case nir_intrinsic_load_vertex_id: 392 case nir_intrinsic_load_instance_id: 393 case nir_intrinsic_load_invocation_id: 394 case nir_intrinsic_load_local_invocation_id: 395 case nir_intrinsic_load_local_invocation_index: 396 case nir_intrinsic_load_global_invocation_id: 397 case nir_intrinsic_load_global_invocation_id_zero_base: 398 case nir_intrinsic_load_global_invocation_index: 399 case nir_intrinsic_load_subgroup_invocation: 400 case nir_intrinsic_load_helper_invocation: 401 case nir_intrinsic_is_helper_invocation: 402 case nir_intrinsic_load_scratch: 403 case nir_intrinsic_deref_atomic_add: 404 case nir_intrinsic_deref_atomic_imin: 405 case nir_intrinsic_deref_atomic_umin: 406 case nir_intrinsic_deref_atomic_imax: 407 case nir_intrinsic_deref_atomic_umax: 408 case nir_intrinsic_deref_atomic_and: 409 case nir_intrinsic_deref_atomic_or: 410 case nir_intrinsic_deref_atomic_xor: 411 case nir_intrinsic_deref_atomic_exchange: 412 case nir_intrinsic_deref_atomic_comp_swap: 413 case nir_intrinsic_deref_atomic_fadd: 414 case nir_intrinsic_deref_atomic_fmin: 415 case nir_intrinsic_deref_atomic_fmax: 416 case nir_intrinsic_deref_atomic_fcomp_swap: 417 case nir_intrinsic_ssbo_atomic_add: 418 case nir_intrinsic_ssbo_atomic_imin: 419 case nir_intrinsic_ssbo_atomic_umin: 420 case nir_intrinsic_ssbo_atomic_imax: 421 case nir_intrinsic_ssbo_atomic_umax: 422 case nir_intrinsic_ssbo_atomic_and: 423 case nir_intrinsic_ssbo_atomic_or: 424 case nir_intrinsic_ssbo_atomic_xor: 425 case nir_intrinsic_ssbo_atomic_exchange: 426 case nir_intrinsic_ssbo_atomic_comp_swap: 427 case nir_intrinsic_ssbo_atomic_fadd: 428 case nir_intrinsic_ssbo_atomic_fmax: 429 case nir_intrinsic_ssbo_atomic_fmin: 430 case nir_intrinsic_ssbo_atomic_fcomp_swap: 431 case nir_intrinsic_image_deref_atomic_add: 432 case nir_intrinsic_image_deref_atomic_imin: 433 case nir_intrinsic_image_deref_atomic_umin: 434 case nir_intrinsic_image_deref_atomic_imax: 435 case nir_intrinsic_image_deref_atomic_umax: 436 case nir_intrinsic_image_deref_atomic_and: 437 case nir_intrinsic_image_deref_atomic_or: 438 case nir_intrinsic_image_deref_atomic_xor: 439 case nir_intrinsic_image_deref_atomic_exchange: 440 case nir_intrinsic_image_deref_atomic_comp_swap: 441 case nir_intrinsic_image_deref_atomic_fadd: 442 case nir_intrinsic_image_deref_atomic_fmin: 443 case nir_intrinsic_image_deref_atomic_fmax: 444 case nir_intrinsic_image_atomic_add: 445 case nir_intrinsic_image_atomic_imin: 446 case nir_intrinsic_image_atomic_umin: 447 case nir_intrinsic_image_atomic_imax: 448 case nir_intrinsic_image_atomic_umax: 449 case nir_intrinsic_image_atomic_and: 450 case nir_intrinsic_image_atomic_or: 451 case nir_intrinsic_image_atomic_xor: 452 case nir_intrinsic_image_atomic_exchange: 453 case nir_intrinsic_image_atomic_comp_swap: 454 case nir_intrinsic_image_atomic_fadd: 455 case nir_intrinsic_image_atomic_fmin: 456 case nir_intrinsic_image_atomic_fmax: 457 case nir_intrinsic_bindless_image_atomic_add: 458 case nir_intrinsic_bindless_image_atomic_imin: 459 case nir_intrinsic_bindless_image_atomic_umin: 460 case nir_intrinsic_bindless_image_atomic_imax: 461 case nir_intrinsic_bindless_image_atomic_umax: 462 case nir_intrinsic_bindless_image_atomic_and: 463 case nir_intrinsic_bindless_image_atomic_or: 464 case nir_intrinsic_bindless_image_atomic_xor: 465 case nir_intrinsic_bindless_image_atomic_exchange: 466 case nir_intrinsic_bindless_image_atomic_comp_swap: 467 case nir_intrinsic_bindless_image_atomic_fadd: 468 case nir_intrinsic_bindless_image_atomic_fmin: 469 case nir_intrinsic_bindless_image_atomic_fmax: 470 case nir_intrinsic_shared_atomic_add: 471 case nir_intrinsic_shared_atomic_imin: 472 case nir_intrinsic_shared_atomic_umin: 473 case nir_intrinsic_shared_atomic_imax: 474 case nir_intrinsic_shared_atomic_umax: 475 case nir_intrinsic_shared_atomic_and: 476 case nir_intrinsic_shared_atomic_or: 477 case nir_intrinsic_shared_atomic_xor: 478 case nir_intrinsic_shared_atomic_exchange: 479 case nir_intrinsic_shared_atomic_comp_swap: 480 case nir_intrinsic_shared_atomic_fadd: 481 case nir_intrinsic_shared_atomic_fmin: 482 case nir_intrinsic_shared_atomic_fmax: 483 case nir_intrinsic_shared_atomic_fcomp_swap: 484 case nir_intrinsic_global_atomic_add: 485 case nir_intrinsic_global_atomic_imin: 486 case nir_intrinsic_global_atomic_umin: 487 case nir_intrinsic_global_atomic_imax: 488 case nir_intrinsic_global_atomic_umax: 489 case nir_intrinsic_global_atomic_and: 490 case nir_intrinsic_global_atomic_or: 491 case nir_intrinsic_global_atomic_xor: 492 case nir_intrinsic_global_atomic_exchange: 493 case nir_intrinsic_global_atomic_comp_swap: 494 case nir_intrinsic_global_atomic_fadd: 495 case nir_intrinsic_global_atomic_fmin: 496 case nir_intrinsic_global_atomic_fmax: 497 case nir_intrinsic_global_atomic_fcomp_swap: 498 case nir_intrinsic_atomic_counter_add: 499 case nir_intrinsic_atomic_counter_min: 500 case nir_intrinsic_atomic_counter_max: 501 case nir_intrinsic_atomic_counter_and: 502 case nir_intrinsic_atomic_counter_or: 503 case nir_intrinsic_atomic_counter_xor: 504 case nir_intrinsic_atomic_counter_inc: 505 case nir_intrinsic_atomic_counter_pre_dec: 506 case nir_intrinsic_atomic_counter_post_dec: 507 case nir_intrinsic_atomic_counter_exchange: 508 case nir_intrinsic_atomic_counter_comp_swap: 509 case nir_intrinsic_atomic_counter_add_deref: 510 case nir_intrinsic_atomic_counter_min_deref: 511 case nir_intrinsic_atomic_counter_max_deref: 512 case nir_intrinsic_atomic_counter_and_deref: 513 case nir_intrinsic_atomic_counter_or_deref: 514 case nir_intrinsic_atomic_counter_xor_deref: 515 case nir_intrinsic_atomic_counter_inc_deref: 516 case nir_intrinsic_atomic_counter_pre_dec_deref: 517 case nir_intrinsic_atomic_counter_post_dec_deref: 518 case nir_intrinsic_atomic_counter_exchange_deref: 519 case nir_intrinsic_atomic_counter_comp_swap_deref: 520 case nir_intrinsic_exclusive_scan: 521 case nir_intrinsic_ballot_bit_count_exclusive: 522 case nir_intrinsic_ballot_bit_count_inclusive: 523 case nir_intrinsic_write_invocation_amd: 524 case nir_intrinsic_mbcnt_amd: 525 case nir_intrinsic_lane_permute_16_amd: 526 case nir_intrinsic_elect: 527 case nir_intrinsic_load_tlb_color_v3d: 528 case nir_intrinsic_load_tess_rel_patch_id_amd: 529 case nir_intrinsic_load_gs_vertex_offset_amd: 530 case nir_intrinsic_has_input_vertex_amd: 531 case nir_intrinsic_has_input_primitive_amd: 532 case nir_intrinsic_load_packed_passthrough_primitive_amd: 533 case nir_intrinsic_load_initial_edgeflags_amd: 534 case nir_intrinsic_gds_atomic_add_amd: 535 case nir_intrinsic_load_rt_arg_scratch_offset_amd: 536 case nir_intrinsic_load_intersection_opaque_amd: 537 is_divergent = true; 538 break; 539 540 default: 541#ifdef NDEBUG 542 is_divergent = true; 543 break; 544#else 545 nir_print_instr(&instr->instr, stderr); 546 unreachable("\nNIR divergence analysis: Unhandled intrinsic."); 547#endif 548 } 549 550 instr->dest.ssa.divergent = is_divergent; 551 return is_divergent; 552} 553 554static bool 555visit_tex(nir_tex_instr *instr) 556{ 557 if (instr->dest.ssa.divergent) 558 return false; 559 560 bool is_divergent = false; 561 562 for (unsigned i = 0; i < instr->num_srcs; i++) { 563 switch (instr->src[i].src_type) { 564 case nir_tex_src_sampler_deref: 565 case nir_tex_src_sampler_handle: 566 case nir_tex_src_sampler_offset: 567 is_divergent |= instr->src[i].src.ssa->divergent && 568 instr->sampler_non_uniform; 569 break; 570 case nir_tex_src_texture_deref: 571 case nir_tex_src_texture_handle: 572 case nir_tex_src_texture_offset: 573 is_divergent |= instr->src[i].src.ssa->divergent && 574 instr->texture_non_uniform; 575 break; 576 default: 577 is_divergent |= instr->src[i].src.ssa->divergent; 578 break; 579 } 580 } 581 582 instr->dest.ssa.divergent = is_divergent; 583 return is_divergent; 584} 585 586static bool 587visit_load_const(nir_load_const_instr *instr) 588{ 589 return false; 590} 591 592static bool 593visit_ssa_undef(nir_ssa_undef_instr *instr) 594{ 595 return false; 596} 597 598static bool 599nir_variable_mode_is_uniform(nir_variable_mode mode) { 600 switch (mode) { 601 case nir_var_uniform: 602 case nir_var_mem_ubo: 603 case nir_var_mem_ssbo: 604 case nir_var_mem_shared: 605 case nir_var_mem_global: 606 return true; 607 default: 608 return false; 609 } 610} 611 612static bool 613nir_variable_is_uniform(nir_shader *shader, nir_variable *var) 614{ 615 if (nir_variable_mode_is_uniform(var->data.mode)) 616 return true; 617 618 nir_divergence_options options = shader->options->divergence_analysis_options; 619 gl_shader_stage stage = shader->info.stage; 620 621 if (stage == MESA_SHADER_FRAGMENT && 622 (options & nir_divergence_single_prim_per_subgroup) && 623 var->data.mode == nir_var_shader_in && 624 var->data.interpolation == INTERP_MODE_FLAT) 625 return true; 626 627 if (stage == MESA_SHADER_TESS_CTRL && 628 (options & nir_divergence_single_patch_per_tcs_subgroup) && 629 var->data.mode == nir_var_shader_out && var->data.patch) 630 return true; 631 632 if (stage == MESA_SHADER_TESS_EVAL && 633 (options & nir_divergence_single_patch_per_tes_subgroup) && 634 var->data.mode == nir_var_shader_in && var->data.patch) 635 return true; 636 637 return false; 638} 639 640static bool 641visit_deref(nir_shader *shader, nir_deref_instr *deref) 642{ 643 if (deref->dest.ssa.divergent) 644 return false; 645 646 bool is_divergent = false; 647 switch (deref->deref_type) { 648 case nir_deref_type_var: 649 is_divergent = !nir_variable_is_uniform(shader, deref->var); 650 break; 651 case nir_deref_type_array: 652 case nir_deref_type_ptr_as_array: 653 is_divergent = deref->arr.index.ssa->divergent; 654 FALLTHROUGH; 655 case nir_deref_type_struct: 656 case nir_deref_type_array_wildcard: 657 is_divergent |= deref->parent.ssa->divergent; 658 break; 659 case nir_deref_type_cast: 660 is_divergent = !nir_variable_mode_is_uniform(deref->var->data.mode) || 661 deref->parent.ssa->divergent; 662 break; 663 } 664 665 deref->dest.ssa.divergent = is_divergent; 666 return is_divergent; 667} 668 669static bool 670visit_jump(nir_jump_instr *jump, struct divergence_state *state) 671{ 672 switch (jump->type) { 673 case nir_jump_continue: 674 if (state->divergent_loop_continue) 675 return false; 676 if (state->divergent_loop_cf) 677 state->divergent_loop_continue = true; 678 return state->divergent_loop_continue; 679 case nir_jump_break: 680 if (state->divergent_loop_break) 681 return false; 682 if (state->divergent_loop_cf) 683 state->divergent_loop_break = true; 684 return state->divergent_loop_break; 685 case nir_jump_halt: 686 /* This totally kills invocations so it doesn't add divergence */ 687 break; 688 case nir_jump_return: 689 unreachable("NIR divergence analysis: Unsupported return instruction."); 690 break; 691 case nir_jump_goto: 692 case nir_jump_goto_if: 693 unreachable("NIR divergence analysis: Unsupported goto_if instruction."); 694 break; 695 } 696 return false; 697} 698 699static bool 700set_ssa_def_not_divergent(nir_ssa_def *def, UNUSED void *_state) 701{ 702 def->divergent = false; 703 return true; 704} 705 706static bool 707update_instr_divergence(nir_shader *shader, nir_instr *instr) 708{ 709 switch (instr->type) { 710 case nir_instr_type_alu: 711 return visit_alu(nir_instr_as_alu(instr)); 712 case nir_instr_type_intrinsic: 713 return visit_intrinsic(shader, nir_instr_as_intrinsic(instr)); 714 case nir_instr_type_tex: 715 return visit_tex(nir_instr_as_tex(instr)); 716 case nir_instr_type_load_const: 717 return visit_load_const(nir_instr_as_load_const(instr)); 718 case nir_instr_type_ssa_undef: 719 return visit_ssa_undef(nir_instr_as_ssa_undef(instr)); 720 case nir_instr_type_deref: 721 return visit_deref(shader, nir_instr_as_deref(instr)); 722 case nir_instr_type_jump: 723 case nir_instr_type_phi: 724 case nir_instr_type_call: 725 case nir_instr_type_parallel_copy: 726 default: 727 unreachable("NIR divergence analysis: Unsupported instruction type."); 728 } 729} 730 731static bool 732visit_block(nir_block *block, struct divergence_state *state) 733{ 734 bool has_changed = false; 735 736 nir_foreach_instr(instr, block) { 737 /* phis are handled when processing the branches */ 738 if (instr->type == nir_instr_type_phi) 739 continue; 740 741 if (state->first_visit) 742 nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL); 743 744 if (instr->type == nir_instr_type_jump) 745 has_changed |= visit_jump(nir_instr_as_jump(instr), state); 746 else 747 has_changed |= update_instr_divergence(state->shader, instr); 748 } 749 750 return has_changed; 751} 752 753/* There are 3 types of phi instructions: 754 * (1) gamma: represent the joining point of different paths 755 * created by an “if-then-else” branch. 756 * The resulting value is divergent if the branch condition 757 * or any of the source values is divergent. */ 758static bool 759visit_if_merge_phi(nir_phi_instr *phi, bool if_cond_divergent) 760{ 761 if (phi->dest.ssa.divergent) 762 return false; 763 764 unsigned defined_srcs = 0; 765 nir_foreach_phi_src(src, phi) { 766 /* if any source value is divergent, the resulting value is divergent */ 767 if (src->src.ssa->divergent) { 768 phi->dest.ssa.divergent = true; 769 return true; 770 } 771 if (src->src.ssa->parent_instr->type != nir_instr_type_ssa_undef) { 772 defined_srcs++; 773 } 774 } 775 776 /* if the condition is divergent and two sources defined, the definition is divergent */ 777 if (defined_srcs > 1 && if_cond_divergent) { 778 phi->dest.ssa.divergent = true; 779 return true; 780 } 781 782 return false; 783} 784 785/* There are 3 types of phi instructions: 786 * (2) mu: which only exist at loop headers, 787 * merge initial and loop-carried values. 788 * The resulting value is divergent if any source value 789 * is divergent or a divergent loop continue condition 790 * is associated with a different ssa-def. */ 791static bool 792visit_loop_header_phi(nir_phi_instr *phi, nir_block *preheader, bool divergent_continue) 793{ 794 if (phi->dest.ssa.divergent) 795 return false; 796 797 nir_ssa_def* same = NULL; 798 nir_foreach_phi_src(src, phi) { 799 /* if any source value is divergent, the resulting value is divergent */ 800 if (src->src.ssa->divergent) { 801 phi->dest.ssa.divergent = true; 802 return true; 803 } 804 /* if this loop is uniform, we're done here */ 805 if (!divergent_continue) 806 continue; 807 /* skip the loop preheader */ 808 if (src->pred == preheader) 809 continue; 810 /* skip undef values */ 811 if (nir_src_is_undef(src->src)) 812 continue; 813 814 /* check if all loop-carried values are from the same ssa-def */ 815 if (!same) 816 same = src->src.ssa; 817 else if (same != src->src.ssa) { 818 phi->dest.ssa.divergent = true; 819 return true; 820 } 821 } 822 823 return false; 824} 825 826/* There are 3 types of phi instructions: 827 * (3) eta: represent values that leave a loop. 828 * The resulting value is divergent if the source value is divergent 829 * or any loop exit condition is divergent for a value which is 830 * not loop-invariant. 831 * (note: there should be no phi for loop-invariant variables.) */ 832static bool 833visit_loop_exit_phi(nir_phi_instr *phi, bool divergent_break) 834{ 835 if (phi->dest.ssa.divergent) 836 return false; 837 838 if (divergent_break) { 839 phi->dest.ssa.divergent = true; 840 return true; 841 } 842 843 /* if any source value is divergent, the resulting value is divergent */ 844 nir_foreach_phi_src(src, phi) { 845 if (src->src.ssa->divergent) { 846 phi->dest.ssa.divergent = true; 847 return true; 848 } 849 } 850 851 return false; 852} 853 854static bool 855visit_if(nir_if *if_stmt, struct divergence_state *state) 856{ 857 bool progress = false; 858 859 struct divergence_state then_state = *state; 860 then_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent; 861 progress |= visit_cf_list(&if_stmt->then_list, &then_state); 862 863 struct divergence_state else_state = *state; 864 else_state.divergent_loop_cf |= if_stmt->condition.ssa->divergent; 865 progress |= visit_cf_list(&if_stmt->else_list, &else_state); 866 867 /* handle phis after the IF */ 868 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&if_stmt->cf_node)) { 869 if (instr->type != nir_instr_type_phi) 870 break; 871 872 if (state->first_visit) 873 nir_instr_as_phi(instr)->dest.ssa.divergent = false; 874 progress |= visit_if_merge_phi(nir_instr_as_phi(instr), 875 if_stmt->condition.ssa->divergent); 876 } 877 878 /* join loop divergence information from both branch legs */ 879 state->divergent_loop_continue |= then_state.divergent_loop_continue || 880 else_state.divergent_loop_continue; 881 state->divergent_loop_break |= then_state.divergent_loop_break || 882 else_state.divergent_loop_break; 883 884 /* A divergent continue makes succeeding loop CF divergent: 885 * not all loop-active invocations participate in the remaining loop-body 886 * which means that a following break might be taken by some invocations, only */ 887 state->divergent_loop_cf |= state->divergent_loop_continue; 888 889 return progress; 890} 891 892static bool 893visit_loop(nir_loop *loop, struct divergence_state *state) 894{ 895 bool progress = false; 896 nir_block *loop_header = nir_loop_first_block(loop); 897 nir_block *loop_preheader = nir_block_cf_tree_prev(loop_header); 898 899 /* handle loop header phis first: we have no knowledge yet about 900 * the loop's control flow or any loop-carried sources. */ 901 nir_foreach_instr(instr, loop_header) { 902 if (instr->type != nir_instr_type_phi) 903 break; 904 905 nir_phi_instr *phi = nir_instr_as_phi(instr); 906 if (!state->first_visit && phi->dest.ssa.divergent) 907 continue; 908 909 nir_foreach_phi_src(src, phi) { 910 if (src->pred == loop_preheader) { 911 phi->dest.ssa.divergent = src->src.ssa->divergent; 912 break; 913 } 914 } 915 progress |= phi->dest.ssa.divergent; 916 } 917 918 /* setup loop state */ 919 struct divergence_state loop_state = *state; 920 loop_state.divergent_loop_cf = false; 921 loop_state.divergent_loop_continue = false; 922 loop_state.divergent_loop_break = false; 923 924 /* process loop body until no further changes are made */ 925 bool repeat; 926 do { 927 progress |= visit_cf_list(&loop->body, &loop_state); 928 repeat = false; 929 930 /* revisit loop header phis to see if something has changed */ 931 nir_foreach_instr(instr, loop_header) { 932 if (instr->type != nir_instr_type_phi) 933 break; 934 935 repeat |= visit_loop_header_phi(nir_instr_as_phi(instr), 936 loop_preheader, 937 loop_state.divergent_loop_continue); 938 } 939 940 loop_state.divergent_loop_cf = false; 941 loop_state.first_visit = false; 942 } while (repeat); 943 944 /* handle phis after the loop */ 945 nir_foreach_instr(instr, nir_cf_node_cf_tree_next(&loop->cf_node)) { 946 if (instr->type != nir_instr_type_phi) 947 break; 948 949 if (state->first_visit) 950 nir_instr_as_phi(instr)->dest.ssa.divergent = false; 951 progress |= visit_loop_exit_phi(nir_instr_as_phi(instr), 952 loop_state.divergent_loop_break); 953 } 954 955 loop->divergent = (loop_state.divergent_loop_break || loop_state.divergent_loop_continue); 956 957 return progress; 958} 959 960static bool 961visit_cf_list(struct exec_list *list, struct divergence_state *state) 962{ 963 bool has_changed = false; 964 965 foreach_list_typed(nir_cf_node, node, node, list) { 966 switch (node->type) { 967 case nir_cf_node_block: 968 has_changed |= visit_block(nir_cf_node_as_block(node), state); 969 break; 970 case nir_cf_node_if: 971 has_changed |= visit_if(nir_cf_node_as_if(node), state); 972 break; 973 case nir_cf_node_loop: 974 has_changed |= visit_loop(nir_cf_node_as_loop(node), state); 975 break; 976 case nir_cf_node_function: 977 unreachable("NIR divergence analysis: Unsupported cf_node type."); 978 } 979 } 980 981 return has_changed; 982} 983 984void 985nir_divergence_analysis(nir_shader *shader) 986{ 987 struct divergence_state state = { 988 .stage = shader->info.stage, 989 .shader = shader, 990 .divergent_loop_cf = false, 991 .divergent_loop_continue = false, 992 .divergent_loop_break = false, 993 .first_visit = true, 994 }; 995 996 visit_cf_list(&nir_shader_get_entrypoint(shader)->body, &state); 997} 998 999bool nir_update_instr_divergence(nir_shader *shader, nir_instr *instr) 1000{ 1001 nir_foreach_ssa_def(instr, set_ssa_def_not_divergent, NULL); 1002 1003 if (instr->type == nir_instr_type_phi) { 1004 nir_cf_node *prev = nir_cf_node_prev(&instr->block->cf_node); 1005 /* can only update gamma/if phis */ 1006 if (!prev || prev->type != nir_cf_node_if) 1007 return false; 1008 1009 nir_if *nif = nir_cf_node_as_if(prev); 1010 1011 visit_if_merge_phi(nir_instr_as_phi(instr), nir_src_is_divergent(nif->condition)); 1012 return true; 1013 } 1014 1015 update_instr_divergence(shader, instr); 1016 return true; 1017} 1018 1019