101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2015 Thomas Helland 37ec681f3Smrg * Copyright © 2019 Valve Corporation 401e04c3fSmrg * 501e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 601e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 701e04c3fSmrg * to deal in the Software without restriction, including without limitation 801e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 901e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 1001e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1101e04c3fSmrg * 1201e04c3fSmrg * The above copyright notice and this permission notice (including the next 1301e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1401e04c3fSmrg * Software. 1501e04c3fSmrg * 1601e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1701e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1801e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1901e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 2001e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2101e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2201e04c3fSmrg * IN THE SOFTWARE. 2301e04c3fSmrg */ 2401e04c3fSmrg 2501e04c3fSmrg/* 2601e04c3fSmrg * This pass converts the ssa-graph into "Loop Closed SSA form". This is 2701e04c3fSmrg * done by placing phi nodes at the exits of the loop for all values 2801e04c3fSmrg * that are used outside the loop. The result is it transforms: 2901e04c3fSmrg * 3001e04c3fSmrg * loop { -> loop { 3101e04c3fSmrg * ssa2 = .... -> ssa2 = ... 3201e04c3fSmrg * if (cond) -> if (cond) 3301e04c3fSmrg * break; -> break; 3401e04c3fSmrg * ssa3 = ssa2 * ssa4 -> ssa3 = ssa2 * ssa4 3501e04c3fSmrg * } -> } 3601e04c3fSmrg * ssa6 = ssa2 + 4 -> ssa5 = phi(ssa2) 3701e04c3fSmrg * ssa6 = ssa5 + 4 3801e04c3fSmrg */ 3901e04c3fSmrg 4001e04c3fSmrg#include "nir.h" 4101e04c3fSmrg 4201e04c3fSmrgtypedef struct { 4301e04c3fSmrg /* The nir_shader we are transforming */ 4401e04c3fSmrg nir_shader *shader; 4501e04c3fSmrg 4601e04c3fSmrg /* The loop we store information for */ 4701e04c3fSmrg nir_loop *loop; 487ec681f3Smrg nir_block *block_after_loop; 497ec681f3Smrg nir_block **exit_blocks; 5001e04c3fSmrg 517ec681f3Smrg /* Whether to skip loop invariant variables */ 527ec681f3Smrg bool skip_invariants; 537ec681f3Smrg bool skip_bool_invariants; 547ec681f3Smrg 557ec681f3Smrg bool progress; 5601e04c3fSmrg} lcssa_state; 5701e04c3fSmrg 5801e04c3fSmrgstatic bool 597ec681f3Smrgis_if_use_inside_loop(nir_src *use, nir_loop *loop) 6001e04c3fSmrg{ 6101e04c3fSmrg nir_block *block_before_loop = 6201e04c3fSmrg nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 6301e04c3fSmrg nir_block *block_after_loop = 6401e04c3fSmrg nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); 6501e04c3fSmrg 6601e04c3fSmrg nir_block *prev_block = 6701e04c3fSmrg nir_cf_node_as_block(nir_cf_node_prev(&use->parent_if->cf_node)); 6801e04c3fSmrg if (prev_block->index <= block_before_loop->index || 6901e04c3fSmrg prev_block->index >= block_after_loop->index) { 7001e04c3fSmrg return false; 7101e04c3fSmrg } 7201e04c3fSmrg 7301e04c3fSmrg return true; 7401e04c3fSmrg} 7501e04c3fSmrg 7601e04c3fSmrgstatic bool 777ec681f3Smrgis_use_inside_loop(nir_src *use, nir_loop *loop) 7801e04c3fSmrg{ 7901e04c3fSmrg nir_block *block_before_loop = 8001e04c3fSmrg nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 8101e04c3fSmrg nir_block *block_after_loop = 8201e04c3fSmrg nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); 8301e04c3fSmrg 8401e04c3fSmrg if (use->parent_instr->block->index <= block_before_loop->index || 8501e04c3fSmrg use->parent_instr->block->index >= block_after_loop->index) { 8601e04c3fSmrg return false; 8701e04c3fSmrg } 8801e04c3fSmrg 8901e04c3fSmrg return true; 9001e04c3fSmrg} 9101e04c3fSmrg 927ec681f3Smrgstatic bool 937ec681f3Smrgis_defined_before_loop(nir_ssa_def *def, nir_loop *loop) 947ec681f3Smrg{ 957ec681f3Smrg nir_instr *instr = def->parent_instr; 967ec681f3Smrg nir_block *block_before_loop = 977ec681f3Smrg nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 987ec681f3Smrg 997ec681f3Smrg return instr->block->index <= block_before_loop->index; 1007ec681f3Smrg} 1017ec681f3Smrg 1027ec681f3Smrgtypedef enum instr_invariance { 1037ec681f3Smrg undefined = 0, 1047ec681f3Smrg invariant, 1057ec681f3Smrg not_invariant, 1067ec681f3Smrg} instr_invariance; 1077ec681f3Smrg 1087ec681f3Smrgstatic instr_invariance 1097ec681f3Smrginstr_is_invariant(nir_instr *instr, nir_loop *loop); 1107ec681f3Smrg 1117ec681f3Smrgstatic bool 1127ec681f3Smrgdef_is_invariant(nir_ssa_def *def, nir_loop *loop) 1137ec681f3Smrg{ 1147ec681f3Smrg if (is_defined_before_loop(def, loop)) 1157ec681f3Smrg return invariant; 1167ec681f3Smrg 1177ec681f3Smrg if (def->parent_instr->pass_flags == undefined) 1187ec681f3Smrg def->parent_instr->pass_flags = instr_is_invariant(def->parent_instr, loop); 1197ec681f3Smrg 1207ec681f3Smrg return def->parent_instr->pass_flags == invariant; 1217ec681f3Smrg} 1227ec681f3Smrg 1237ec681f3Smrgstatic bool 1247ec681f3Smrgsrc_is_invariant(nir_src *src, void *state) 1257ec681f3Smrg{ 1267ec681f3Smrg assert(src->is_ssa); 1277ec681f3Smrg return def_is_invariant(src->ssa, (nir_loop *)state); 1287ec681f3Smrg} 1297ec681f3Smrg 1307ec681f3Smrgstatic instr_invariance 1317ec681f3Smrgphi_is_invariant(nir_phi_instr *instr, nir_loop *loop) 1327ec681f3Smrg{ 1337ec681f3Smrg /* Base case: it's a phi at the loop header 1347ec681f3Smrg * Loop-header phis are updated in each loop iteration with 1357ec681f3Smrg * the loop-carried value, and thus control-flow dependent 1367ec681f3Smrg * on the loop itself. 1377ec681f3Smrg */ 1387ec681f3Smrg if (instr->instr.block == nir_loop_first_block(loop)) 1397ec681f3Smrg return not_invariant; 1407ec681f3Smrg 1417ec681f3Smrg nir_foreach_phi_src(src, instr) { 1427ec681f3Smrg if (!src_is_invariant(&src->src, loop)) 1437ec681f3Smrg return not_invariant; 1447ec681f3Smrg } 1457ec681f3Smrg 1467ec681f3Smrg /* All loop header- and LCSSA-phis should be handled by this point. */ 1477ec681f3Smrg nir_cf_node *prev = nir_cf_node_prev(&instr->instr.block->cf_node); 1487ec681f3Smrg assert(prev && prev->type == nir_cf_node_if); 1497ec681f3Smrg 1507ec681f3Smrg /* Invariance of phis after if-nodes also depends on the invariance 1517ec681f3Smrg * of the branch condition. 1527ec681f3Smrg */ 1537ec681f3Smrg nir_if *if_node = nir_cf_node_as_if(prev); 1547ec681f3Smrg if (!def_is_invariant(if_node->condition.ssa, loop)) 1557ec681f3Smrg return not_invariant; 1567ec681f3Smrg 1577ec681f3Smrg return invariant; 1587ec681f3Smrg} 1597ec681f3Smrg 1607ec681f3Smrg 1617ec681f3Smrg/* An instruction is said to be loop-invariant if it 1627ec681f3Smrg * - has no sideeffects and 1637ec681f3Smrg * - solely depends on variables defined outside of the loop or 1647ec681f3Smrg * by other invariant instructions 1657ec681f3Smrg */ 1667ec681f3Smrgstatic instr_invariance 1677ec681f3Smrginstr_is_invariant(nir_instr *instr, nir_loop *loop) 1687ec681f3Smrg{ 1697ec681f3Smrg assert(instr->pass_flags == undefined); 1707ec681f3Smrg 1717ec681f3Smrg switch (instr->type) { 1727ec681f3Smrg case nir_instr_type_load_const: 1737ec681f3Smrg case nir_instr_type_ssa_undef: 1747ec681f3Smrg return invariant; 1757ec681f3Smrg case nir_instr_type_call: 1767ec681f3Smrg return not_invariant; 1777ec681f3Smrg case nir_instr_type_phi: 1787ec681f3Smrg return phi_is_invariant(nir_instr_as_phi(instr), loop); 1797ec681f3Smrg case nir_instr_type_intrinsic: { 1807ec681f3Smrg nir_intrinsic_instr *intrinsic = nir_instr_as_intrinsic(instr); 1817ec681f3Smrg if (!(nir_intrinsic_infos[intrinsic->intrinsic].flags & NIR_INTRINSIC_CAN_REORDER)) 1827ec681f3Smrg return not_invariant; 1837ec681f3Smrg } 1847ec681f3Smrg FALLTHROUGH; 1857ec681f3Smrg default: 1867ec681f3Smrg return nir_foreach_src(instr, src_is_invariant, loop) ? invariant : not_invariant; 1877ec681f3Smrg } 1887ec681f3Smrg 1897ec681f3Smrg return invariant; 1907ec681f3Smrg} 1917ec681f3Smrg 19201e04c3fSmrgstatic bool 19301e04c3fSmrgconvert_loop_exit_for_ssa(nir_ssa_def *def, void *void_state) 19401e04c3fSmrg{ 19501e04c3fSmrg lcssa_state *state = void_state; 19601e04c3fSmrg bool all_uses_inside_loop = true; 19701e04c3fSmrg 1987ec681f3Smrg /* Don't create LCSSA-Phis for loop-invariant variables */ 1997ec681f3Smrg if (state->skip_invariants && 2007ec681f3Smrg (def->bit_size != 1 || state->skip_bool_invariants)) { 2017ec681f3Smrg assert(def->parent_instr->pass_flags != undefined); 2027ec681f3Smrg if (def->parent_instr->pass_flags == invariant) 2037ec681f3Smrg return true; 2047ec681f3Smrg } 20501e04c3fSmrg 20601e04c3fSmrg nir_foreach_use(use, def) { 20701e04c3fSmrg if (use->parent_instr->type == nir_instr_type_phi && 2087ec681f3Smrg use->parent_instr->block == state->block_after_loop) { 20901e04c3fSmrg continue; 21001e04c3fSmrg } 21101e04c3fSmrg 21201e04c3fSmrg if (!is_use_inside_loop(use, state->loop)) { 21301e04c3fSmrg all_uses_inside_loop = false; 21401e04c3fSmrg } 21501e04c3fSmrg } 21601e04c3fSmrg 21701e04c3fSmrg nir_foreach_if_use(use, def) { 21801e04c3fSmrg if (!is_if_use_inside_loop(use, state->loop)) { 21901e04c3fSmrg all_uses_inside_loop = false; 22001e04c3fSmrg } 22101e04c3fSmrg } 22201e04c3fSmrg 22301e04c3fSmrg /* There where no sources that had defs outside the loop */ 22401e04c3fSmrg if (all_uses_inside_loop) 22501e04c3fSmrg return true; 22601e04c3fSmrg 22701e04c3fSmrg /* Initialize a phi-instruction */ 22801e04c3fSmrg nir_phi_instr *phi = nir_phi_instr_create(state->shader); 22901e04c3fSmrg nir_ssa_dest_init(&phi->instr, &phi->dest, 23001e04c3fSmrg def->num_components, def->bit_size, "LCSSA-phi"); 23101e04c3fSmrg 23201e04c3fSmrg /* Create a phi node with as many sources pointing to the same ssa_def as 23301e04c3fSmrg * the block has predecessors. 23401e04c3fSmrg */ 2357ec681f3Smrg uint32_t num_exits = state->block_after_loop->predecessors->entries; 2367ec681f3Smrg for (uint32_t i = 0; i < num_exits; i++) { 2377ec681f3Smrg nir_phi_instr_add_src(phi, state->exit_blocks[i], nir_src_for_ssa(def)); 23801e04c3fSmrg } 23901e04c3fSmrg 2407ec681f3Smrg nir_instr_insert_before_block(state->block_after_loop, &phi->instr); 2417e102996Smaya nir_ssa_def *dest = &phi->dest.ssa; 2427e102996Smaya 2437e102996Smaya /* deref instructions need a cast after the phi */ 2447e102996Smaya if (def->parent_instr->type == nir_instr_type_deref) { 2457e102996Smaya nir_deref_instr *cast = 2467e102996Smaya nir_deref_instr_create(state->shader, nir_deref_type_cast); 2477e102996Smaya 2487e102996Smaya nir_deref_instr *instr = nir_instr_as_deref(def->parent_instr); 2497ec681f3Smrg cast->modes = instr->modes; 2507e102996Smaya cast->type = instr->type; 2517e102996Smaya cast->parent = nir_src_for_ssa(&phi->dest.ssa); 2527ec681f3Smrg cast->cast.ptr_stride = nir_deref_instr_array_stride(instr); 2537e102996Smaya 2547e102996Smaya nir_ssa_dest_init(&cast->instr, &cast->dest, 2557e102996Smaya phi->dest.ssa.num_components, 2567e102996Smaya phi->dest.ssa.bit_size, NULL); 2577ec681f3Smrg nir_instr_insert(nir_after_phis(state->block_after_loop), &cast->instr); 2587e102996Smaya dest = &cast->dest.ssa; 2597e102996Smaya } 26001e04c3fSmrg 26101e04c3fSmrg /* Run through all uses and rewrite those outside the loop to point to 26201e04c3fSmrg * the phi instead of pointing to the ssa-def. 26301e04c3fSmrg */ 26401e04c3fSmrg nir_foreach_use_safe(use, def) { 26501e04c3fSmrg if (use->parent_instr->type == nir_instr_type_phi && 2667ec681f3Smrg state->block_after_loop == use->parent_instr->block) { 26701e04c3fSmrg continue; 26801e04c3fSmrg } 26901e04c3fSmrg 27001e04c3fSmrg if (!is_use_inside_loop(use, state->loop)) { 2717e102996Smaya nir_instr_rewrite_src(use->parent_instr, use, nir_src_for_ssa(dest)); 27201e04c3fSmrg } 27301e04c3fSmrg } 27401e04c3fSmrg 27501e04c3fSmrg nir_foreach_if_use_safe(use, def) { 27601e04c3fSmrg if (!is_if_use_inside_loop(use, state->loop)) { 2777e102996Smaya nir_if_rewrite_condition(use->parent_if, nir_src_for_ssa(dest)); 27801e04c3fSmrg } 27901e04c3fSmrg } 28001e04c3fSmrg 2817ec681f3Smrg state->progress = true; 28201e04c3fSmrg return true; 28301e04c3fSmrg} 28401e04c3fSmrg 2857ec681f3Smrgstatic void 2867ec681f3Smrgsetup_loop_state(lcssa_state *state, nir_loop *loop) 2877ec681f3Smrg{ 2887ec681f3Smrg state->loop = loop; 2897ec681f3Smrg state->block_after_loop = 2907ec681f3Smrg nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); 2917ec681f3Smrg 2927ec681f3Smrg ralloc_free(state->exit_blocks); 2937ec681f3Smrg state->exit_blocks = nir_block_get_predecessors_sorted(state->block_after_loop, state); 2947ec681f3Smrg} 2957ec681f3Smrg 29601e04c3fSmrgstatic void 29701e04c3fSmrgconvert_to_lcssa(nir_cf_node *cf_node, lcssa_state *state) 29801e04c3fSmrg{ 29901e04c3fSmrg switch (cf_node->type) { 30001e04c3fSmrg case nir_cf_node_block: 30101e04c3fSmrg return; 30201e04c3fSmrg case nir_cf_node_if: { 30301e04c3fSmrg nir_if *if_stmt = nir_cf_node_as_if(cf_node); 30401e04c3fSmrg foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) 30501e04c3fSmrg convert_to_lcssa(nested_node, state); 30601e04c3fSmrg foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) 30701e04c3fSmrg convert_to_lcssa(nested_node, state); 30801e04c3fSmrg return; 30901e04c3fSmrg } 31001e04c3fSmrg case nir_cf_node_loop: { 3117ec681f3Smrg if (state->skip_invariants) { 3127ec681f3Smrg nir_foreach_block_in_cf_node(block, cf_node) { 3137ec681f3Smrg nir_foreach_instr(instr, block) 3147ec681f3Smrg instr->pass_flags = undefined; 3157ec681f3Smrg } 3167ec681f3Smrg } 31701e04c3fSmrg 3187ec681f3Smrg /* first, convert inner loops */ 3197ec681f3Smrg nir_loop *loop = nir_cf_node_as_loop(cf_node); 3207ec681f3Smrg foreach_list_typed(nir_cf_node, nested_node, node, &loop->body) 32101e04c3fSmrg convert_to_lcssa(nested_node, state); 32201e04c3fSmrg 3237ec681f3Smrg setup_loop_state(state, loop); 3247ec681f3Smrg 3257ec681f3Smrg /* mark loop-invariant instructions */ 3267ec681f3Smrg if (state->skip_invariants) { 3277ec681f3Smrg /* Without a loop all instructions are invariant. 3287ec681f3Smrg * For outer loops, multiple breaks can still create phis. 3297ec681f3Smrg * The variance then depends on all (nested) break conditions. 3307ec681f3Smrg * We don't consider this, but assume all not_invariant. 3317ec681f3Smrg */ 3327ec681f3Smrg if (nir_loop_first_block(loop)->predecessors->entries == 1) 3337ec681f3Smrg goto end; 3347ec681f3Smrg 3357ec681f3Smrg nir_foreach_block_in_cf_node(block, cf_node) { 3367ec681f3Smrg nir_foreach_instr(instr, block) { 3377ec681f3Smrg if (instr->pass_flags == undefined) 3387ec681f3Smrg instr->pass_flags = instr_is_invariant(instr, nir_cf_node_as_loop(cf_node)); 3397ec681f3Smrg } 3407ec681f3Smrg } 3417ec681f3Smrg } 3427ec681f3Smrg 3437ec681f3Smrg nir_foreach_block_in_cf_node(block, cf_node) { 3447ec681f3Smrg nir_foreach_instr(instr, block) { 3457ec681f3Smrg nir_foreach_ssa_def(instr, convert_loop_exit_for_ssa, state); 3467ec681f3Smrg 3477ec681f3Smrg /* for outer loops, invariant instructions can be variant */ 3487ec681f3Smrg if (state->skip_invariants && instr->pass_flags == invariant) 3497ec681f3Smrg instr->pass_flags = undefined; 3507ec681f3Smrg } 3517ec681f3Smrg } 3527ec681f3Smrg 3537ec681f3Smrgend: 3547ec681f3Smrg /* For outer loops, the LCSSA-phi should be considered not invariant */ 3557ec681f3Smrg if (state->skip_invariants) { 3567ec681f3Smrg nir_foreach_instr(instr, state->block_after_loop) { 3577ec681f3Smrg if (instr->type == nir_instr_type_phi) 3587ec681f3Smrg instr->pass_flags = not_invariant; 3597ec681f3Smrg else 3607ec681f3Smrg break; 3617ec681f3Smrg } 3627ec681f3Smrg } 36301e04c3fSmrg return; 36401e04c3fSmrg } 36501e04c3fSmrg default: 36601e04c3fSmrg unreachable("unknown cf node type"); 36701e04c3fSmrg } 36801e04c3fSmrg} 36901e04c3fSmrg 37001e04c3fSmrgvoid 3717ec681f3Smrgnir_convert_loop_to_lcssa(nir_loop *loop) 3727ec681f3Smrg{ 37301e04c3fSmrg nir_function_impl *impl = nir_cf_node_get_function(&loop->cf_node); 37401e04c3fSmrg 37501e04c3fSmrg nir_metadata_require(impl, nir_metadata_block_index); 37601e04c3fSmrg 37701e04c3fSmrg lcssa_state *state = rzalloc(NULL, lcssa_state); 3787ec681f3Smrg setup_loop_state(state, loop); 37901e04c3fSmrg state->shader = impl->function->shader; 3807ec681f3Smrg state->skip_invariants = false; 3817ec681f3Smrg state->skip_bool_invariants = false; 3827ec681f3Smrg 3837ec681f3Smrg nir_foreach_block_in_cf_node (block, &loop->cf_node) { 3847ec681f3Smrg nir_foreach_instr(instr, block) 3857ec681f3Smrg nir_foreach_ssa_def(instr, convert_loop_exit_for_ssa, state); 3867ec681f3Smrg } 3877ec681f3Smrg 3887ec681f3Smrg ralloc_free(state); 3897ec681f3Smrg} 3907ec681f3Smrg 3917ec681f3Smrgbool 3927ec681f3Smrgnir_convert_to_lcssa(nir_shader *shader, bool skip_invariants, bool skip_bool_invariants) 3937ec681f3Smrg{ 3947ec681f3Smrg bool progress = false; 3957ec681f3Smrg lcssa_state *state = rzalloc(NULL, lcssa_state); 3967ec681f3Smrg state->shader = shader; 3977ec681f3Smrg state->skip_invariants = skip_invariants; 3987ec681f3Smrg state->skip_bool_invariants = skip_bool_invariants; 3997ec681f3Smrg 4007ec681f3Smrg nir_foreach_function(function, shader) { 4017ec681f3Smrg if (function->impl == NULL) 4027ec681f3Smrg continue; 4037ec681f3Smrg 4047ec681f3Smrg state->progress = false; 4057ec681f3Smrg nir_metadata_require(function->impl, nir_metadata_block_index); 40601e04c3fSmrg 4077ec681f3Smrg foreach_list_typed(nir_cf_node, node, node, &function->impl->body) 4087ec681f3Smrg convert_to_lcssa(node, state); 4097ec681f3Smrg 4107ec681f3Smrg if (state->progress) { 4117ec681f3Smrg progress = true; 4127ec681f3Smrg nir_metadata_preserve(function->impl, nir_metadata_block_index | 4137ec681f3Smrg nir_metadata_dominance); 4147ec681f3Smrg } else { 4157ec681f3Smrg nir_metadata_preserve(function->impl, nir_metadata_all); 4167ec681f3Smrg } 4177ec681f3Smrg } 41801e04c3fSmrg 41901e04c3fSmrg ralloc_free(state); 4207ec681f3Smrg return progress; 42101e04c3fSmrg} 4227ec681f3Smrg 423