101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2014 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg * 2301e04c3fSmrg * Authors: 2401e04c3fSmrg * Jason Ekstrand (jason@jlekstrand.net) 2501e04c3fSmrg */ 2601e04c3fSmrg 2701e04c3fSmrg#include "nir.h" 2801e04c3fSmrg#include "nir_worklist.h" 2901e04c3fSmrg#include "nir_vla.h" 3001e04c3fSmrg 3101e04c3fSmrg/* 3201e04c3fSmrg * Basic liveness analysis. This works only in SSA form. 3301e04c3fSmrg * 3401e04c3fSmrg * This liveness pass treats phi nodes as being melded to the space between 3501e04c3fSmrg * blocks so that the destinations of a phi are in the livein of the block 3601e04c3fSmrg * in which it resides and the sources are in the liveout of the 3701e04c3fSmrg * corresponding block. By formulating the liveness information in this 3801e04c3fSmrg * way, we ensure that the definition of any variable dominates its entire 3901e04c3fSmrg * live range. This is true because the only way that the definition of an 4001e04c3fSmrg * SSA value may not dominate a use is if the use is in a phi node and the 4101e04c3fSmrg * uses in phi no are in the live-out of the corresponding predecessor 4201e04c3fSmrg * block but not in the live-in of the block containing the phi node. 4301e04c3fSmrg */ 4401e04c3fSmrg 4501e04c3fSmrgstruct live_ssa_defs_state { 4601e04c3fSmrg unsigned bitset_words; 4701e04c3fSmrg 487ec681f3Smrg /* Used in propagate_across_edge() */ 497ec681f3Smrg BITSET_WORD *tmp_live; 507ec681f3Smrg 5101e04c3fSmrg nir_block_worklist worklist; 5201e04c3fSmrg}; 5301e04c3fSmrg 5401e04c3fSmrg/* Initialize the liveness data to zero and add the given block to the 5501e04c3fSmrg * worklist. 5601e04c3fSmrg */ 577ec681f3Smrgstatic void 5801e04c3fSmrginit_liveness_block(nir_block *block, 5901e04c3fSmrg struct live_ssa_defs_state *state) 6001e04c3fSmrg{ 6101e04c3fSmrg block->live_in = reralloc(block, block->live_in, BITSET_WORD, 6201e04c3fSmrg state->bitset_words); 6301e04c3fSmrg memset(block->live_in, 0, state->bitset_words * sizeof(BITSET_WORD)); 6401e04c3fSmrg 6501e04c3fSmrg block->live_out = reralloc(block, block->live_out, BITSET_WORD, 6601e04c3fSmrg state->bitset_words); 6701e04c3fSmrg memset(block->live_out, 0, state->bitset_words * sizeof(BITSET_WORD)); 6801e04c3fSmrg 6901e04c3fSmrg nir_block_worklist_push_head(&state->worklist, block); 7001e04c3fSmrg} 7101e04c3fSmrg 7201e04c3fSmrgstatic bool 7301e04c3fSmrgset_src_live(nir_src *src, void *void_live) 7401e04c3fSmrg{ 7501e04c3fSmrg BITSET_WORD *live = void_live; 7601e04c3fSmrg 7701e04c3fSmrg if (!src->is_ssa) 7801e04c3fSmrg return true; 7901e04c3fSmrg 807ec681f3Smrg if (nir_src_is_undef(*src)) 8101e04c3fSmrg return true; /* undefined variables are never live */ 8201e04c3fSmrg 837ec681f3Smrg BITSET_SET(live, src->ssa->index); 8401e04c3fSmrg 8501e04c3fSmrg return true; 8601e04c3fSmrg} 8701e04c3fSmrg 8801e04c3fSmrgstatic bool 8901e04c3fSmrgset_ssa_def_dead(nir_ssa_def *def, void *void_live) 9001e04c3fSmrg{ 9101e04c3fSmrg BITSET_WORD *live = void_live; 9201e04c3fSmrg 937ec681f3Smrg BITSET_CLEAR(live, def->index); 9401e04c3fSmrg 9501e04c3fSmrg return true; 9601e04c3fSmrg} 9701e04c3fSmrg 9801e04c3fSmrg/** Propagates the live in of succ across the edge to the live out of pred 9901e04c3fSmrg * 10001e04c3fSmrg * Phi nodes exist "between" blocks and all the phi nodes at the start of a 10101e04c3fSmrg * block act "in parallel". When we propagate from the live_in of one 10201e04c3fSmrg * block to the live out of the other, we have to kill any writes from phis 10301e04c3fSmrg * and make live any sources. 10401e04c3fSmrg * 10501e04c3fSmrg * Returns true if updating live out of pred added anything 10601e04c3fSmrg */ 10701e04c3fSmrgstatic bool 10801e04c3fSmrgpropagate_across_edge(nir_block *pred, nir_block *succ, 10901e04c3fSmrg struct live_ssa_defs_state *state) 11001e04c3fSmrg{ 1117ec681f3Smrg BITSET_WORD *live = state->tmp_live; 11201e04c3fSmrg memcpy(live, succ->live_in, state->bitset_words * sizeof *live); 11301e04c3fSmrg 11401e04c3fSmrg nir_foreach_instr(instr, succ) { 11501e04c3fSmrg if (instr->type != nir_instr_type_phi) 11601e04c3fSmrg break; 11701e04c3fSmrg nir_phi_instr *phi = nir_instr_as_phi(instr); 11801e04c3fSmrg 11901e04c3fSmrg assert(phi->dest.is_ssa); 12001e04c3fSmrg set_ssa_def_dead(&phi->dest.ssa, live); 12101e04c3fSmrg } 12201e04c3fSmrg 12301e04c3fSmrg nir_foreach_instr(instr, succ) { 12401e04c3fSmrg if (instr->type != nir_instr_type_phi) 12501e04c3fSmrg break; 12601e04c3fSmrg nir_phi_instr *phi = nir_instr_as_phi(instr); 12701e04c3fSmrg 12801e04c3fSmrg nir_foreach_phi_src(src, phi) { 12901e04c3fSmrg if (src->pred == pred) { 13001e04c3fSmrg set_src_live(&src->src, live); 13101e04c3fSmrg break; 13201e04c3fSmrg } 13301e04c3fSmrg } 13401e04c3fSmrg } 13501e04c3fSmrg 13601e04c3fSmrg BITSET_WORD progress = 0; 13701e04c3fSmrg for (unsigned i = 0; i < state->bitset_words; ++i) { 13801e04c3fSmrg progress |= live[i] & ~pred->live_out[i]; 13901e04c3fSmrg pred->live_out[i] |= live[i]; 14001e04c3fSmrg } 14101e04c3fSmrg return progress != 0; 14201e04c3fSmrg} 14301e04c3fSmrg 14401e04c3fSmrgvoid 14501e04c3fSmrgnir_live_ssa_defs_impl(nir_function_impl *impl) 14601e04c3fSmrg{ 1477ec681f3Smrg struct live_ssa_defs_state state = { 1487ec681f3Smrg .bitset_words = BITSET_WORDS(impl->ssa_alloc), 1497ec681f3Smrg }; 1507ec681f3Smrg state.tmp_live = rzalloc_array(impl, BITSET_WORD, state.bitset_words), 15101e04c3fSmrg 1527ec681f3Smrg /* Number the instructions so we can do cheap interference tests using the 1537ec681f3Smrg * instruction index. 15401e04c3fSmrg */ 1557ec681f3Smrg nir_metadata_require(impl, nir_metadata_instr_index); 15601e04c3fSmrg 15701e04c3fSmrg nir_block_worklist_init(&state.worklist, impl->num_blocks, NULL); 15801e04c3fSmrg 1597ec681f3Smrg /* Allocate live_in and live_out sets and add all of the blocks to the 1607ec681f3Smrg * worklist. 16101e04c3fSmrg */ 16201e04c3fSmrg nir_foreach_block(block, impl) { 16301e04c3fSmrg init_liveness_block(block, &state); 16401e04c3fSmrg } 16501e04c3fSmrg 16601e04c3fSmrg 16701e04c3fSmrg /* We're now ready to work through the worklist and update the liveness 16801e04c3fSmrg * sets of each of the blocks. By the time we get to this point, every 16901e04c3fSmrg * block in the function implementation has been pushed onto the 17001e04c3fSmrg * worklist in reverse order. As long as we keep the worklist 17101e04c3fSmrg * up-to-date as we go, everything will get covered. 17201e04c3fSmrg */ 17301e04c3fSmrg while (!nir_block_worklist_is_empty(&state.worklist)) { 17401e04c3fSmrg /* We pop them off in the reverse order we pushed them on. This way 17501e04c3fSmrg * the first walk of the instructions is backwards so we only walk 17601e04c3fSmrg * once in the case of no control flow. 17701e04c3fSmrg */ 17801e04c3fSmrg nir_block *block = nir_block_worklist_pop_head(&state.worklist); 17901e04c3fSmrg 18001e04c3fSmrg memcpy(block->live_in, block->live_out, 18101e04c3fSmrg state.bitset_words * sizeof(BITSET_WORD)); 18201e04c3fSmrg 18301e04c3fSmrg nir_if *following_if = nir_block_get_following_if(block); 18401e04c3fSmrg if (following_if) 18501e04c3fSmrg set_src_live(&following_if->condition, block->live_in); 18601e04c3fSmrg 18701e04c3fSmrg nir_foreach_instr_reverse(instr, block) { 18801e04c3fSmrg /* Phi nodes are handled seperately so we want to skip them. Since 18901e04c3fSmrg * we are going backwards and they are at the beginning, we can just 19001e04c3fSmrg * break as soon as we see one. 19101e04c3fSmrg */ 19201e04c3fSmrg if (instr->type == nir_instr_type_phi) 19301e04c3fSmrg break; 19401e04c3fSmrg 19501e04c3fSmrg nir_foreach_ssa_def(instr, set_ssa_def_dead, block->live_in); 19601e04c3fSmrg nir_foreach_src(instr, set_src_live, block->live_in); 19701e04c3fSmrg } 19801e04c3fSmrg 19901e04c3fSmrg /* Walk over all of the predecessors of the current block updating 20001e04c3fSmrg * their live in with the live out of this one. If anything has 20101e04c3fSmrg * changed, add the predecessor to the work list so that we ensure 20201e04c3fSmrg * that the new information is used. 20301e04c3fSmrg */ 20401e04c3fSmrg set_foreach(block->predecessors, entry) { 20501e04c3fSmrg nir_block *pred = (nir_block *)entry->key; 20601e04c3fSmrg if (propagate_across_edge(pred, block, &state)) 20701e04c3fSmrg nir_block_worklist_push_tail(&state.worklist, pred); 20801e04c3fSmrg } 20901e04c3fSmrg } 21001e04c3fSmrg 2117ec681f3Smrg ralloc_free(state.tmp_live); 21201e04c3fSmrg nir_block_worklist_fini(&state.worklist); 21301e04c3fSmrg} 21401e04c3fSmrg 2157ec681f3Smrg/** Return the live set at a cursor 2167ec681f3Smrg * 2177ec681f3Smrg * Note: The bitset returned may be the live_in or live_out from the block in 2187ec681f3Smrg * which the instruction lives. Do not ralloc_free() it directly; 2197ec681f3Smrg * instead, provide a mem_ctx and free that. 2207ec681f3Smrg */ 2217ec681f3Smrgconst BITSET_WORD * 2227ec681f3Smrgnir_get_live_ssa_defs(nir_cursor cursor, void *mem_ctx) 2237ec681f3Smrg{ 2247ec681f3Smrg nir_block *block = nir_cursor_current_block(cursor); 2257ec681f3Smrg nir_function_impl *impl = nir_cf_node_get_function(&block->cf_node); 2267ec681f3Smrg assert(impl->valid_metadata & nir_metadata_live_ssa_defs); 2277ec681f3Smrg 2287ec681f3Smrg switch (cursor.option) { 2297ec681f3Smrg case nir_cursor_before_block: 2307ec681f3Smrg return cursor.block->live_in; 2317ec681f3Smrg 2327ec681f3Smrg case nir_cursor_after_block: 2337ec681f3Smrg return cursor.block->live_out; 2347ec681f3Smrg 2357ec681f3Smrg case nir_cursor_before_instr: 2367ec681f3Smrg if (cursor.instr == nir_block_first_instr(cursor.instr->block)) 2377ec681f3Smrg return cursor.instr->block->live_in; 2387ec681f3Smrg break; 2397ec681f3Smrg 2407ec681f3Smrg case nir_cursor_after_instr: 2417ec681f3Smrg if (cursor.instr == nir_block_last_instr(cursor.instr->block)) 2427ec681f3Smrg return cursor.instr->block->live_out; 2437ec681f3Smrg break; 2447ec681f3Smrg } 2457ec681f3Smrg 2467ec681f3Smrg /* If we got here, we're an instruction cursor mid-block */ 2477ec681f3Smrg const unsigned bitset_words = BITSET_WORDS(impl->ssa_alloc); 2487ec681f3Smrg BITSET_WORD *live = ralloc_array(mem_ctx, BITSET_WORD, bitset_words); 2497ec681f3Smrg memcpy(live, block->live_out, bitset_words * sizeof(BITSET_WORD)); 2507ec681f3Smrg 2517ec681f3Smrg nir_foreach_instr_reverse(instr, block) { 2527ec681f3Smrg if (cursor.option == nir_cursor_after_instr && instr == cursor.instr) 2537ec681f3Smrg break; 2547ec681f3Smrg 2557ec681f3Smrg /* If someone asked for liveness in the middle of a bunch of phis, 2567ec681f3Smrg * that's an error. Since we are going backwards and they are at the 2577ec681f3Smrg * beginning, we can just blow up as soon as we see one. 2587ec681f3Smrg */ 2597ec681f3Smrg assert(instr->type != nir_instr_type_phi); 2607ec681f3Smrg if (instr->type == nir_instr_type_phi) 2617ec681f3Smrg break; 2627ec681f3Smrg 2637ec681f3Smrg nir_foreach_ssa_def(instr, set_ssa_def_dead, live); 2647ec681f3Smrg nir_foreach_src(instr, set_src_live, live); 2657ec681f3Smrg 2667ec681f3Smrg if (cursor.option == nir_cursor_before_instr && instr == cursor.instr) 2677ec681f3Smrg break; 2687ec681f3Smrg } 2697ec681f3Smrg 2707ec681f3Smrg return live; 2717ec681f3Smrg} 2727ec681f3Smrg 27301e04c3fSmrgstatic bool 27401e04c3fSmrgsrc_does_not_use_def(nir_src *src, void *def) 27501e04c3fSmrg{ 27601e04c3fSmrg return !src->is_ssa || src->ssa != (nir_ssa_def *)def; 27701e04c3fSmrg} 27801e04c3fSmrg 27901e04c3fSmrgstatic bool 28001e04c3fSmrgsearch_for_use_after_instr(nir_instr *start, nir_ssa_def *def) 28101e04c3fSmrg{ 28201e04c3fSmrg /* Only look for a use strictly after the given instruction */ 28301e04c3fSmrg struct exec_node *node = start->node.next; 28401e04c3fSmrg while (!exec_node_is_tail_sentinel(node)) { 28501e04c3fSmrg nir_instr *instr = exec_node_data(nir_instr, node, node); 28601e04c3fSmrg if (!nir_foreach_src(instr, src_does_not_use_def, def)) 28701e04c3fSmrg return true; 28801e04c3fSmrg node = node->next; 28901e04c3fSmrg } 2907ec681f3Smrg 2917ec681f3Smrg /* If uses are considered to be in the block immediately preceding the if 2927ec681f3Smrg * so we need to also check the following if condition, if any. 2937ec681f3Smrg */ 2947ec681f3Smrg nir_if *following_if = nir_block_get_following_if(start->block); 2957ec681f3Smrg if (following_if && following_if->condition.is_ssa && 2967ec681f3Smrg following_if->condition.ssa == def) 2977ec681f3Smrg return true; 2987ec681f3Smrg 29901e04c3fSmrg return false; 30001e04c3fSmrg} 30101e04c3fSmrg 30201e04c3fSmrg/* Returns true if def is live at instr assuming that def comes before 30301e04c3fSmrg * instr in a pre DFS search of the dominance tree. 30401e04c3fSmrg */ 30501e04c3fSmrgstatic bool 30601e04c3fSmrgnir_ssa_def_is_live_at(nir_ssa_def *def, nir_instr *instr) 30701e04c3fSmrg{ 3087ec681f3Smrg if (BITSET_TEST(instr->block->live_out, def->index)) { 30901e04c3fSmrg /* Since def dominates instr, if def is in the liveout of the block, 31001e04c3fSmrg * it's live at instr 31101e04c3fSmrg */ 31201e04c3fSmrg return true; 31301e04c3fSmrg } else { 3147ec681f3Smrg if (BITSET_TEST(instr->block->live_in, def->index) || 31501e04c3fSmrg def->parent_instr->block == instr->block) { 31601e04c3fSmrg /* In this case it is either live coming into instr's block or it 31701e04c3fSmrg * is defined in the same block. In this case, we simply need to 31801e04c3fSmrg * see if it is used after instr. 31901e04c3fSmrg */ 32001e04c3fSmrg return search_for_use_after_instr(instr, def); 32101e04c3fSmrg } else { 32201e04c3fSmrg return false; 32301e04c3fSmrg } 32401e04c3fSmrg } 32501e04c3fSmrg} 32601e04c3fSmrg 32701e04c3fSmrgbool 32801e04c3fSmrgnir_ssa_defs_interfere(nir_ssa_def *a, nir_ssa_def *b) 32901e04c3fSmrg{ 33001e04c3fSmrg if (a->parent_instr == b->parent_instr) { 33101e04c3fSmrg /* Two variables defined at the same time interfere assuming at 33201e04c3fSmrg * least one isn't dead. 33301e04c3fSmrg */ 33401e04c3fSmrg return true; 3357ec681f3Smrg } else if (a->parent_instr->type == nir_instr_type_ssa_undef || 3367ec681f3Smrg b->parent_instr->type == nir_instr_type_ssa_undef) { 33701e04c3fSmrg /* If either variable is an ssa_undef, then there's no interference */ 33801e04c3fSmrg return false; 3397ec681f3Smrg } else if (a->parent_instr->index < b->parent_instr->index) { 34001e04c3fSmrg return nir_ssa_def_is_live_at(a, b->parent_instr); 34101e04c3fSmrg } else { 34201e04c3fSmrg return nir_ssa_def_is_live_at(b, a->parent_instr); 34301e04c3fSmrg } 34401e04c3fSmrg} 3457ec681f3Smrg 3467ec681f3Smrg/* Takes an SSA def's defs and uses and expands the live interval to cover 3477ec681f3Smrg * that range. Control flow effects are handled separately. 3487ec681f3Smrg */ 3497ec681f3Smrgstatic bool def_cb(nir_ssa_def *def, void *state) 3507ec681f3Smrg{ 3517ec681f3Smrg nir_instr_liveness *liveness = state; 3527ec681f3Smrg nir_instr *instr = def->parent_instr; 3537ec681f3Smrg int index = def->index; 3547ec681f3Smrg 3557ec681f3Smrg liveness->defs[index].start = MIN2(liveness->defs[index].start, instr->index); 3567ec681f3Smrg 3577ec681f3Smrg nir_foreach_use(src, def) { 3587ec681f3Smrg liveness->defs[index].end = MAX2(liveness->defs[index].end, 3597ec681f3Smrg src->parent_instr->index); 3607ec681f3Smrg } 3617ec681f3Smrg 3627ec681f3Smrg return true; 3637ec681f3Smrg} 3647ec681f3Smrg 3657ec681f3Smrgnir_instr_liveness * 3667ec681f3Smrgnir_live_ssa_defs_per_instr(nir_function_impl *impl) 3677ec681f3Smrg{ 3687ec681f3Smrg /* We'll use block-level live_ssa_defs to expand our per-instr ranges for 3697ec681f3Smrg * control flow. 3707ec681f3Smrg */ 3717ec681f3Smrg nir_metadata_require(impl, 3727ec681f3Smrg nir_metadata_block_index | 3737ec681f3Smrg nir_metadata_instr_index | 3747ec681f3Smrg nir_metadata_live_ssa_defs); 3757ec681f3Smrg 3767ec681f3Smrg /* Make our struct. */ 3777ec681f3Smrg nir_instr_liveness *liveness = ralloc(NULL, nir_instr_liveness); 3787ec681f3Smrg liveness->defs = rzalloc_array(liveness, nir_liveness_bounds, 3797ec681f3Smrg impl->ssa_alloc); 3807ec681f3Smrg 3817ec681f3Smrg /* Set our starts so we can use MIN2() as we accumulate bounds. */ 3827ec681f3Smrg for (int i = 0; i < impl->ssa_alloc; i++) 3837ec681f3Smrg liveness->defs->start = ~0; 3847ec681f3Smrg 3857ec681f3Smrg nir_foreach_block(block, impl) { 3867ec681f3Smrg unsigned index; 3877ec681f3Smrg BITSET_FOREACH_SET(index, block->live_in, impl->ssa_alloc) { 3887ec681f3Smrg liveness->defs[index].start = MIN2(liveness->defs[index].start, 3897ec681f3Smrg block->start_ip); 3907ec681f3Smrg } 3917ec681f3Smrg 3927ec681f3Smrg nir_foreach_instr(instr, block) { 3937ec681f3Smrg nir_foreach_ssa_def(instr, def_cb, liveness); 3947ec681f3Smrg }; 3957ec681f3Smrg 3967ec681f3Smrg /* track an if src's use. We need to make sure that our value is live 3977ec681f3Smrg * across the if reference, where we don't have an instr->index 3987ec681f3Smrg * representing the use. Mark it as live through the end of the block. 3997ec681f3Smrg */ 4007ec681f3Smrg nir_if *nif = nir_block_get_following_if(block); 4017ec681f3Smrg if (nif) { 4027ec681f3Smrg if (nif->condition.is_ssa) { 4037ec681f3Smrg liveness->defs[nif->condition.ssa->index].end = MAX2( 4047ec681f3Smrg liveness->defs[nif->condition.ssa->index].end, block->end_ip); 4057ec681f3Smrg } 4067ec681f3Smrg } 4077ec681f3Smrg 4087ec681f3Smrg BITSET_FOREACH_SET(index, block->live_out, impl->ssa_alloc) { 4097ec681f3Smrg liveness->defs[index].end = MAX2(liveness->defs[index].end, 4107ec681f3Smrg block->end_ip); 4117ec681f3Smrg } 4127ec681f3Smrg } 4137ec681f3Smrg 4147ec681f3Smrg return liveness; 4157ec681f3Smrg} 416