1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2014 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * Authors: 24b8e80941Smrg * Jason Ekstrand (jason@jlekstrand.net) 25b8e80941Smrg */ 26b8e80941Smrg 27b8e80941Smrg#include "nir.h" 28b8e80941Smrg#include "nir_worklist.h" 29b8e80941Smrg#include "nir_vla.h" 30b8e80941Smrg 31b8e80941Smrg/* 32b8e80941Smrg * Basic liveness analysis. This works only in SSA form. 33b8e80941Smrg * 34b8e80941Smrg * This liveness pass treats phi nodes as being melded to the space between 35b8e80941Smrg * blocks so that the destinations of a phi are in the livein of the block 36b8e80941Smrg * in which it resides and the sources are in the liveout of the 37b8e80941Smrg * corresponding block. By formulating the liveness information in this 38b8e80941Smrg * way, we ensure that the definition of any variable dominates its entire 39b8e80941Smrg * live range. This is true because the only way that the definition of an 40b8e80941Smrg * SSA value may not dominate a use is if the use is in a phi node and the 41b8e80941Smrg * uses in phi no are in the live-out of the corresponding predecessor 42b8e80941Smrg * block but not in the live-in of the block containing the phi node. 43b8e80941Smrg */ 44b8e80941Smrg 45b8e80941Smrgstruct live_ssa_defs_state { 46b8e80941Smrg unsigned num_ssa_defs; 47b8e80941Smrg unsigned bitset_words; 48b8e80941Smrg 49b8e80941Smrg nir_block_worklist worklist; 50b8e80941Smrg}; 51b8e80941Smrg 52b8e80941Smrgstatic bool 53b8e80941Smrgindex_ssa_def(nir_ssa_def *def, void *void_state) 54b8e80941Smrg{ 55b8e80941Smrg struct live_ssa_defs_state *state = void_state; 56b8e80941Smrg 57b8e80941Smrg if (def->parent_instr->type == nir_instr_type_ssa_undef) 58b8e80941Smrg def->live_index = 0; 59b8e80941Smrg else 60b8e80941Smrg def->live_index = state->num_ssa_defs++; 61b8e80941Smrg 62b8e80941Smrg return true; 63b8e80941Smrg} 64b8e80941Smrg 65b8e80941Smrg/* Initialize the liveness data to zero and add the given block to the 66b8e80941Smrg * worklist. 67b8e80941Smrg */ 68b8e80941Smrgstatic bool 69b8e80941Smrginit_liveness_block(nir_block *block, 70b8e80941Smrg struct live_ssa_defs_state *state) 71b8e80941Smrg{ 72b8e80941Smrg block->live_in = reralloc(block, block->live_in, BITSET_WORD, 73b8e80941Smrg state->bitset_words); 74b8e80941Smrg memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD)); 75b8e80941Smrg 76b8e80941Smrg block->live_out = reralloc(block, block->live_out, BITSET_WORD, 77b8e80941Smrg state->bitset_words); 78b8e80941Smrg memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD)); 79b8e80941Smrg 80b8e80941Smrg nir_block_worklist_push_head(&state->worklist, block); 81b8e80941Smrg 82b8e80941Smrg return true; 83b8e80941Smrg} 84b8e80941Smrg 85b8e80941Smrgstatic bool 86b8e80941Smrgset_src_live(nir_src *src, void *void_live) 87b8e80941Smrg{ 88b8e80941Smrg BITSET_WORD *live = void_live; 89b8e80941Smrg 90b8e80941Smrg if (!src->is_ssa) 91b8e80941Smrg return true; 92b8e80941Smrg 93b8e80941Smrg if (src->ssa->live_index == 0) 94b8e80941Smrg return true; /* undefined variables are never live */ 95b8e80941Smrg 96b8e80941Smrg BITSET_SET(live, src->ssa->live_index); 97b8e80941Smrg 98b8e80941Smrg return true; 99b8e80941Smrg} 100b8e80941Smrg 101b8e80941Smrgstatic bool 102b8e80941Smrgset_ssa_def_dead(nir_ssa_def *def, void *void_live) 103b8e80941Smrg{ 104b8e80941Smrg BITSET_WORD *live = void_live; 105b8e80941Smrg 106b8e80941Smrg BITSET_CLEAR(live, def->live_index); 107b8e80941Smrg 108b8e80941Smrg return true; 109b8e80941Smrg} 110b8e80941Smrg 111b8e80941Smrg/** Propagates the live in of succ across the edge to the live out of pred 112b8e80941Smrg * 113b8e80941Smrg * Phi nodes exist "between" blocks and all the phi nodes at the start of a 114b8e80941Smrg * block act "in parallel". When we propagate from the live_in of one 115b8e80941Smrg * block to the live out of the other, we have to kill any writes from phis 116b8e80941Smrg * and make live any sources. 117b8e80941Smrg * 118b8e80941Smrg * Returns true if updating live out of pred added anything 119b8e80941Smrg */ 120b8e80941Smrgstatic bool 121b8e80941Smrgpropagate_across_edge(nir_block *pred, nir_block *succ, 122b8e80941Smrg struct live_ssa_defs_state *state) 123b8e80941Smrg{ 124b8e80941Smrg NIR_VLA(BITSET_WORD, live, state->bitset_words); 125b8e80941Smrg memcpy(live, succ->live_in, state->bitset_words * sizeof *live); 126b8e80941Smrg 127b8e80941Smrg nir_foreach_instr(instr, succ) { 128b8e80941Smrg if (instr->type != nir_instr_type_phi) 129b8e80941Smrg break; 130b8e80941Smrg nir_phi_instr *phi = nir_instr_as_phi(instr); 131b8e80941Smrg 132b8e80941Smrg assert(phi->dest.is_ssa); 133b8e80941Smrg set_ssa_def_dead(&phi->dest.ssa, live); 134b8e80941Smrg } 135b8e80941Smrg 136b8e80941Smrg nir_foreach_instr(instr, succ) { 137b8e80941Smrg if (instr->type != nir_instr_type_phi) 138b8e80941Smrg break; 139b8e80941Smrg nir_phi_instr *phi = nir_instr_as_phi(instr); 140b8e80941Smrg 141b8e80941Smrg nir_foreach_phi_src(src, phi) { 142b8e80941Smrg if (src->pred == pred) { 143b8e80941Smrg set_src_live(&src->src, live); 144b8e80941Smrg break; 145b8e80941Smrg } 146b8e80941Smrg } 147b8e80941Smrg } 148b8e80941Smrg 149b8e80941Smrg BITSET_WORD progress = 0; 150b8e80941Smrg for (unsigned i = 0; i < state->bitset_words; ++i) { 151b8e80941Smrg progress |= live[i] & ~pred->live_out[i]; 152b8e80941Smrg pred->live_out[i] |= live[i]; 153b8e80941Smrg } 154b8e80941Smrg return progress != 0; 155b8e80941Smrg} 156b8e80941Smrg 157b8e80941Smrgvoid 158b8e80941Smrgnir_live_ssa_defs_impl(nir_function_impl *impl) 159b8e80941Smrg{ 160b8e80941Smrg struct live_ssa_defs_state state; 161b8e80941Smrg 162b8e80941Smrg /* We start at 1 because we reserve the index value of 0 for ssa_undef 163b8e80941Smrg * instructions. Those are never live, so their liveness information 164b8e80941Smrg * can be compacted into a single bit. 165b8e80941Smrg */ 166b8e80941Smrg state.num_ssa_defs = 1; 167b8e80941Smrg nir_foreach_block(block, impl) { 168b8e80941Smrg nir_foreach_instr(instr, block) 169b8e80941Smrg nir_foreach_ssa_def(instr, index_ssa_def, &state); 170b8e80941Smrg } 171b8e80941Smrg 172b8e80941Smrg nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL); 173b8e80941Smrg 174b8e80941Smrg /* We now know how many unique ssa definitions we have and we can go 175b8e80941Smrg * ahead and allocate live_in and live_out sets and add all of the 176b8e80941Smrg * blocks to the worklist. 177b8e80941Smrg */ 178b8e80941Smrg state.bitset_words = BITSET_WORDS(state.num_ssa_defs); 179b8e80941Smrg nir_foreach_block(block, impl) { 180b8e80941Smrg init_liveness_block(block, &state); 181b8e80941Smrg } 182b8e80941Smrg 183b8e80941Smrg 184b8e80941Smrg /* We're now ready to work through the worklist and update the liveness 185b8e80941Smrg * sets of each of the blocks. By the time we get to this point, every 186b8e80941Smrg * block in the function implementation has been pushed onto the 187b8e80941Smrg * worklist in reverse order. As long as we keep the worklist 188b8e80941Smrg * up-to-date as we go, everything will get covered. 189b8e80941Smrg */ 190b8e80941Smrg while (!nir_block_worklist_is_empty(&state.worklist)) { 191b8e80941Smrg /* We pop them off in the reverse order we pushed them on. This way 192b8e80941Smrg * the first walk of the instructions is backwards so we only walk 193b8e80941Smrg * once in the case of no control flow. 194b8e80941Smrg */ 195b8e80941Smrg nir_block *block = nir_block_worklist_pop_head(&state.worklist); 196b8e80941Smrg 197b8e80941Smrg memcpy(block->live_in, block->live_out, 198b8e80941Smrg state.bitset_words * sizeof(BITSET_WORD)); 199b8e80941Smrg 200b8e80941Smrg nir_if *following_if = nir_block_get_following_if(block); 201b8e80941Smrg if (following_if) 202b8e80941Smrg set_src_live(&following_if->condition, block->live_in); 203b8e80941Smrg 204b8e80941Smrg nir_foreach_instr_reverse(instr, block) { 205b8e80941Smrg /* Phi nodes are handled seperately so we want to skip them. Since 206b8e80941Smrg * we are going backwards and they are at the beginning, we can just 207b8e80941Smrg * break as soon as we see one. 208b8e80941Smrg */ 209b8e80941Smrg if (instr->type == nir_instr_type_phi) 210b8e80941Smrg break; 211b8e80941Smrg 212b8e80941Smrg nir_foreach_ssa_def(instr, set_ssa_def_dead, block->live_in); 213b8e80941Smrg nir_foreach_src(instr, set_src_live, block->live_in); 214b8e80941Smrg } 215b8e80941Smrg 216b8e80941Smrg /* Walk over all of the predecessors of the current block updating 217b8e80941Smrg * their live in with the live out of this one. If anything has 218b8e80941Smrg * changed, add the predecessor to the work list so that we ensure 219b8e80941Smrg * that the new information is used. 220b8e80941Smrg */ 221b8e80941Smrg set_foreach(block->predecessors, entry) { 222b8e80941Smrg nir_block *pred = (nir_block *)entry->key; 223b8e80941Smrg if (propagate_across_edge(pred, block, &state)) 224b8e80941Smrg nir_block_worklist_push_tail(&state.worklist, pred); 225b8e80941Smrg } 226b8e80941Smrg } 227b8e80941Smrg 228b8e80941Smrg nir_block_worklist_fini(&state.worklist); 229b8e80941Smrg} 230b8e80941Smrg 231b8e80941Smrgstatic bool 232b8e80941Smrgsrc_does_not_use_def(nir_src *src, void *def) 233b8e80941Smrg{ 234b8e80941Smrg return !src->is_ssa || src->ssa != (nir_ssa_def *)def; 235b8e80941Smrg} 236b8e80941Smrg 237b8e80941Smrgstatic bool 238b8e80941Smrgsearch_for_use_after_instr(nir_instr *start, nir_ssa_def *def) 239b8e80941Smrg{ 240b8e80941Smrg /* Only look for a use strictly after the given instruction */ 241b8e80941Smrg struct exec_node *node = start->node.next; 242b8e80941Smrg while (!exec_node_is_tail_sentinel(node)) { 243b8e80941Smrg nir_instr *instr = exec_node_data(nir_instr, node, node); 244b8e80941Smrg if (!nir_foreach_src(instr, src_does_not_use_def, def)) 245b8e80941Smrg return true; 246b8e80941Smrg node = node->next; 247b8e80941Smrg } 248b8e80941Smrg return false; 249b8e80941Smrg} 250b8e80941Smrg 251b8e80941Smrg/* Returns true if def is live at instr assuming that def comes before 252b8e80941Smrg * instr in a pre DFS search of the dominance tree. 253b8e80941Smrg */ 254b8e80941Smrgstatic bool 255b8e80941Smrgnir_ssa_def_is_live_at(nir_ssa_def *def, nir_instr *instr) 256b8e80941Smrg{ 257b8e80941Smrg if (BITSET_TEST(instr->block->live_out, def->live_index)) { 258b8e80941Smrg /* Since def dominates instr, if def is in the liveout of the block, 259b8e80941Smrg * it's live at instr 260b8e80941Smrg */ 261b8e80941Smrg return true; 262b8e80941Smrg } else { 263b8e80941Smrg if (BITSET_TEST(instr->block->live_in, def->live_index) || 264b8e80941Smrg def->parent_instr->block == instr->block) { 265b8e80941Smrg /* In this case it is either live coming into instr's block or it 266b8e80941Smrg * is defined in the same block. In this case, we simply need to 267b8e80941Smrg * see if it is used after instr. 268b8e80941Smrg */ 269b8e80941Smrg return search_for_use_after_instr(instr, def); 270b8e80941Smrg } else { 271b8e80941Smrg return false; 272b8e80941Smrg } 273b8e80941Smrg } 274b8e80941Smrg} 275b8e80941Smrg 276b8e80941Smrgbool 277b8e80941Smrgnir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b) 278b8e80941Smrg{ 279b8e80941Smrg if (a->parent_instr == b->parent_instr) { 280b8e80941Smrg /* Two variables defined at the same time interfere assuming at 281b8e80941Smrg * least one isn't dead. 282b8e80941Smrg */ 283b8e80941Smrg return true; 284b8e80941Smrg } else if (a->live_index == 0 || b->live_index == 0) { 285b8e80941Smrg /* If either variable is an ssa_undef, then there's no interference */ 286b8e80941Smrg return false; 287b8e80941Smrg } else if (a->live_index < b->live_index) { 288b8e80941Smrg return nir_ssa_def_is_live_at(a, b->parent_instr); 289b8e80941Smrg } else { 290b8e80941Smrg return nir_ssa_def_is_live_at(b, a->parent_instr); 291b8e80941Smrg } 292b8e80941Smrg} 293