1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2015 Thomas Helland 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg/* 25b8e80941Smrg * This pass converts the ssa-graph into "Loop Closed SSA form". This is 26b8e80941Smrg * done by placing phi nodes at the exits of the loop for all values 27b8e80941Smrg * that are used outside the loop. The result is it transforms: 28b8e80941Smrg * 29b8e80941Smrg * loop { -> loop { 30b8e80941Smrg * ssa2 = .... -> ssa2 = ... 31b8e80941Smrg * if (cond) -> if (cond) 32b8e80941Smrg * break; -> break; 33b8e80941Smrg * ssa3 = ssa2 * ssa4 -> ssa3 = ssa2 * ssa4 34b8e80941Smrg * } -> } 35b8e80941Smrg * ssa6 = ssa2 + 4 -> ssa5 = phi(ssa2) 36b8e80941Smrg * ssa6 = ssa5 + 4 37b8e80941Smrg */ 38b8e80941Smrg 39b8e80941Smrg#include "nir.h" 40b8e80941Smrg 41b8e80941Smrgtypedef struct { 42b8e80941Smrg /* The nir_shader we are transforming */ 43b8e80941Smrg nir_shader *shader; 44b8e80941Smrg 45b8e80941Smrg /* The loop we store information for */ 46b8e80941Smrg nir_loop *loop; 47b8e80941Smrg 48b8e80941Smrg} lcssa_state; 49b8e80941Smrg 50b8e80941Smrgstatic bool 51b8e80941Smrgis_if_use_inside_loop(nir_src *use, nir_loop* loop) 52b8e80941Smrg{ 53b8e80941Smrg nir_block *block_before_loop = 54b8e80941Smrg nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 55b8e80941Smrg nir_block *block_after_loop = 56b8e80941Smrg nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); 57b8e80941Smrg 58b8e80941Smrg nir_block *prev_block = 59b8e80941Smrg nir_cf_node_as_block(nir_cf_node_prev(&use->parent_if->cf_node)); 60b8e80941Smrg if (prev_block->index <= block_before_loop->index || 61b8e80941Smrg prev_block->index >= block_after_loop->index) { 62b8e80941Smrg return false; 63b8e80941Smrg } 64b8e80941Smrg 65b8e80941Smrg return true; 66b8e80941Smrg} 67b8e80941Smrg 68b8e80941Smrgstatic bool 69b8e80941Smrgis_use_inside_loop(nir_src *use, nir_loop* loop) 70b8e80941Smrg{ 71b8e80941Smrg nir_block *block_before_loop = 72b8e80941Smrg nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node)); 73b8e80941Smrg nir_block *block_after_loop = 74b8e80941Smrg nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); 75b8e80941Smrg 76b8e80941Smrg if (use->parent_instr->block->index <= block_before_loop->index || 77b8e80941Smrg use->parent_instr->block->index >= block_after_loop->index) { 78b8e80941Smrg return false; 79b8e80941Smrg } 80b8e80941Smrg 81b8e80941Smrg return true; 82b8e80941Smrg} 83b8e80941Smrg 84b8e80941Smrgstatic bool 85b8e80941Smrgconvert_loop_exit_for_ssa(nir_ssa_def *def, void *void_state) 86b8e80941Smrg{ 87b8e80941Smrg lcssa_state *state = void_state; 88b8e80941Smrg bool all_uses_inside_loop = true; 89b8e80941Smrg 90b8e80941Smrg nir_block *block_after_loop = 91b8e80941Smrg nir_cf_node_as_block(nir_cf_node_next(&state->loop->cf_node)); 92b8e80941Smrg 93b8e80941Smrg nir_foreach_use(use, def) { 94b8e80941Smrg if (use->parent_instr->type == nir_instr_type_phi && 95b8e80941Smrg use->parent_instr->block == block_after_loop) { 96b8e80941Smrg continue; 97b8e80941Smrg } 98b8e80941Smrg 99b8e80941Smrg if (!is_use_inside_loop(use, state->loop)) { 100b8e80941Smrg all_uses_inside_loop = false; 101b8e80941Smrg } 102b8e80941Smrg } 103b8e80941Smrg 104b8e80941Smrg nir_foreach_if_use(use, def) { 105b8e80941Smrg if (!is_if_use_inside_loop(use, state->loop)) { 106b8e80941Smrg all_uses_inside_loop = false; 107b8e80941Smrg } 108b8e80941Smrg } 109b8e80941Smrg 110b8e80941Smrg /* There where no sources that had defs outside the loop */ 111b8e80941Smrg if (all_uses_inside_loop) 112b8e80941Smrg return true; 113b8e80941Smrg 114b8e80941Smrg /* Initialize a phi-instruction */ 115b8e80941Smrg nir_phi_instr *phi = nir_phi_instr_create(state->shader); 116b8e80941Smrg nir_ssa_dest_init(&phi->instr, &phi->dest, 117b8e80941Smrg def->num_components, def->bit_size, "LCSSA-phi"); 118b8e80941Smrg 119b8e80941Smrg /* Create a phi node with as many sources pointing to the same ssa_def as 120b8e80941Smrg * the block has predecessors. 121b8e80941Smrg */ 122b8e80941Smrg set_foreach(block_after_loop->predecessors, entry) { 123b8e80941Smrg nir_phi_src *phi_src = ralloc(phi, nir_phi_src); 124b8e80941Smrg phi_src->src = nir_src_for_ssa(def); 125b8e80941Smrg phi_src->pred = (nir_block *) entry->key; 126b8e80941Smrg 127b8e80941Smrg exec_list_push_tail(&phi->srcs, &phi_src->node); 128b8e80941Smrg } 129b8e80941Smrg 130b8e80941Smrg nir_instr_insert_before_block(block_after_loop, &phi->instr); 131b8e80941Smrg nir_ssa_def *dest = &phi->dest.ssa; 132b8e80941Smrg 133b8e80941Smrg /* deref instructions need a cast after the phi */ 134b8e80941Smrg if (def->parent_instr->type == nir_instr_type_deref) { 135b8e80941Smrg nir_deref_instr *cast = 136b8e80941Smrg nir_deref_instr_create(state->shader, nir_deref_type_cast); 137b8e80941Smrg 138b8e80941Smrg nir_deref_instr *instr = nir_instr_as_deref(def->parent_instr); 139b8e80941Smrg cast->mode = instr->mode; 140b8e80941Smrg cast->type = instr->type; 141b8e80941Smrg cast->parent = nir_src_for_ssa(&phi->dest.ssa); 142b8e80941Smrg cast->cast.ptr_stride = nir_deref_instr_ptr_as_array_stride(instr); 143b8e80941Smrg 144b8e80941Smrg nir_ssa_dest_init(&cast->instr, &cast->dest, 145b8e80941Smrg phi->dest.ssa.num_components, 146b8e80941Smrg phi->dest.ssa.bit_size, NULL); 147b8e80941Smrg nir_instr_insert(nir_after_phis(block_after_loop), &cast->instr); 148b8e80941Smrg dest = &cast->dest.ssa; 149b8e80941Smrg } 150b8e80941Smrg 151b8e80941Smrg /* Run through all uses and rewrite those outside the loop to point to 152b8e80941Smrg * the phi instead of pointing to the ssa-def. 153b8e80941Smrg */ 154b8e80941Smrg nir_foreach_use_safe(use, def) { 155b8e80941Smrg if (use->parent_instr->type == nir_instr_type_phi && 156b8e80941Smrg block_after_loop == use->parent_instr->block) { 157b8e80941Smrg continue; 158b8e80941Smrg } 159b8e80941Smrg 160b8e80941Smrg if (!is_use_inside_loop(use, state->loop)) { 161b8e80941Smrg nir_instr_rewrite_src(use->parent_instr, use, nir_src_for_ssa(dest)); 162b8e80941Smrg } 163b8e80941Smrg } 164b8e80941Smrg 165b8e80941Smrg nir_foreach_if_use_safe(use, def) { 166b8e80941Smrg if (!is_if_use_inside_loop(use, state->loop)) { 167b8e80941Smrg nir_if_rewrite_condition(use->parent_if, nir_src_for_ssa(dest)); 168b8e80941Smrg } 169b8e80941Smrg } 170b8e80941Smrg 171b8e80941Smrg return true; 172b8e80941Smrg} 173b8e80941Smrg 174b8e80941Smrgstatic void 175b8e80941Smrgconvert_to_lcssa(nir_cf_node *cf_node, lcssa_state *state) 176b8e80941Smrg{ 177b8e80941Smrg switch (cf_node->type) { 178b8e80941Smrg case nir_cf_node_block: 179b8e80941Smrg nir_foreach_instr(instr, nir_cf_node_as_block(cf_node)) 180b8e80941Smrg nir_foreach_ssa_def(instr, convert_loop_exit_for_ssa, state); 181b8e80941Smrg return; 182b8e80941Smrg case nir_cf_node_if: { 183b8e80941Smrg nir_if *if_stmt = nir_cf_node_as_if(cf_node); 184b8e80941Smrg foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list) 185b8e80941Smrg convert_to_lcssa(nested_node, state); 186b8e80941Smrg foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list) 187b8e80941Smrg convert_to_lcssa(nested_node, state); 188b8e80941Smrg return; 189b8e80941Smrg } 190b8e80941Smrg case nir_cf_node_loop: { 191b8e80941Smrg nir_loop *parent_loop = state->loop; 192b8e80941Smrg state->loop = nir_cf_node_as_loop(cf_node); 193b8e80941Smrg 194b8e80941Smrg foreach_list_typed(nir_cf_node, nested_node, node, &state->loop->body) 195b8e80941Smrg convert_to_lcssa(nested_node, state); 196b8e80941Smrg 197b8e80941Smrg state->loop = parent_loop; 198b8e80941Smrg return; 199b8e80941Smrg } 200b8e80941Smrg default: 201b8e80941Smrg unreachable("unknown cf node type"); 202b8e80941Smrg } 203b8e80941Smrg} 204b8e80941Smrg 205b8e80941Smrgvoid 206b8e80941Smrgnir_convert_loop_to_lcssa(nir_loop *loop) { 207b8e80941Smrg nir_function_impl *impl = nir_cf_node_get_function(&loop->cf_node); 208b8e80941Smrg 209b8e80941Smrg nir_metadata_require(impl, nir_metadata_block_index); 210b8e80941Smrg 211b8e80941Smrg lcssa_state *state = rzalloc(NULL, lcssa_state); 212b8e80941Smrg state->loop = loop; 213b8e80941Smrg state->shader = impl->function->shader; 214b8e80941Smrg 215b8e80941Smrg foreach_list_typed(nir_cf_node, node, node, &state->loop->body) 216b8e80941Smrg convert_to_lcssa(node, state); 217b8e80941Smrg 218b8e80941Smrg ralloc_free(state); 219b8e80941Smrg} 220