101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2014 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 2101e04c3fSmrg * IN THE SOFTWARE. 2201e04c3fSmrg * 2301e04c3fSmrg * Authors: 2401e04c3fSmrg * Connor Abbott (cwabbott0@gmail.com) 2501e04c3fSmrg * 2601e04c3fSmrg */ 2701e04c3fSmrg 2801e04c3fSmrg#include "nir.h" 2901e04c3fSmrg 3001e04c3fSmrg/* 3101e04c3fSmrg * Implements the algorithms for computing the dominance tree and the 3201e04c3fSmrg * dominance frontier from "A Simple, Fast Dominance Algorithm" by Cooper, 3301e04c3fSmrg * Harvey, and Kennedy. 3401e04c3fSmrg */ 3501e04c3fSmrg 3601e04c3fSmrgstatic bool 3701e04c3fSmrginit_block(nir_block *block, nir_function_impl *impl) 3801e04c3fSmrg{ 3901e04c3fSmrg if (block == nir_start_block(impl)) 4001e04c3fSmrg block->imm_dom = block; 4101e04c3fSmrg else 4201e04c3fSmrg block->imm_dom = NULL; 4301e04c3fSmrg block->num_dom_children = 0; 4401e04c3fSmrg 457ec681f3Smrg /* See nir_block_dominates */ 467ec681f3Smrg block->dom_pre_index = UINT32_MAX; 477ec681f3Smrg block->dom_post_index = 0; 487ec681f3Smrg 497ec681f3Smrg _mesa_set_clear(block->dom_frontier, NULL); 5001e04c3fSmrg 5101e04c3fSmrg return true; 5201e04c3fSmrg} 5301e04c3fSmrg 5401e04c3fSmrgstatic nir_block * 5501e04c3fSmrgintersect(nir_block *b1, nir_block *b2) 5601e04c3fSmrg{ 5701e04c3fSmrg while (b1 != b2) { 5801e04c3fSmrg /* 5901e04c3fSmrg * Note, the comparisons here are the opposite of what the paper says 6001e04c3fSmrg * because we index blocks from beginning -> end (i.e. reverse 6101e04c3fSmrg * post-order) instead of post-order like they assume. 6201e04c3fSmrg */ 6301e04c3fSmrg while (b1->index > b2->index) 6401e04c3fSmrg b1 = b1->imm_dom; 6501e04c3fSmrg while (b2->index > b1->index) 6601e04c3fSmrg b2 = b2->imm_dom; 6701e04c3fSmrg } 6801e04c3fSmrg 6901e04c3fSmrg return b1; 7001e04c3fSmrg} 7101e04c3fSmrg 7201e04c3fSmrgstatic bool 7301e04c3fSmrgcalc_dominance(nir_block *block) 7401e04c3fSmrg{ 7501e04c3fSmrg nir_block *new_idom = NULL; 7601e04c3fSmrg set_foreach(block->predecessors, entry) { 7701e04c3fSmrg nir_block *pred = (nir_block *) entry->key; 7801e04c3fSmrg 7901e04c3fSmrg if (pred->imm_dom) { 8001e04c3fSmrg if (new_idom) 8101e04c3fSmrg new_idom = intersect(pred, new_idom); 8201e04c3fSmrg else 8301e04c3fSmrg new_idom = pred; 8401e04c3fSmrg } 8501e04c3fSmrg } 8601e04c3fSmrg 8701e04c3fSmrg if (block->imm_dom != new_idom) { 8801e04c3fSmrg block->imm_dom = new_idom; 8901e04c3fSmrg return true; 9001e04c3fSmrg } 9101e04c3fSmrg 9201e04c3fSmrg return false; 9301e04c3fSmrg} 9401e04c3fSmrg 9501e04c3fSmrgstatic bool 9601e04c3fSmrgcalc_dom_frontier(nir_block *block) 9701e04c3fSmrg{ 9801e04c3fSmrg if (block->predecessors->entries > 1) { 9901e04c3fSmrg set_foreach(block->predecessors, entry) { 10001e04c3fSmrg nir_block *runner = (nir_block *) entry->key; 10101e04c3fSmrg 10201e04c3fSmrg /* Skip unreachable predecessors */ 10301e04c3fSmrg if (runner->imm_dom == NULL) 10401e04c3fSmrg continue; 10501e04c3fSmrg 10601e04c3fSmrg while (runner != block->imm_dom) { 10701e04c3fSmrg _mesa_set_add(runner->dom_frontier, block); 10801e04c3fSmrg runner = runner->imm_dom; 10901e04c3fSmrg } 11001e04c3fSmrg } 11101e04c3fSmrg } 11201e04c3fSmrg 11301e04c3fSmrg return true; 11401e04c3fSmrg} 11501e04c3fSmrg 11601e04c3fSmrg/* 11701e04c3fSmrg * Compute each node's children in the dominance tree from the immediate 11801e04c3fSmrg * dominator information. We do this in three stages: 11901e04c3fSmrg * 12001e04c3fSmrg * 1. Calculate the number of children each node has 12101e04c3fSmrg * 2. Allocate arrays, setting the number of children to 0 again 12201e04c3fSmrg * 3. For each node, add itself to its parent's list of children, using 12301e04c3fSmrg * num_dom_children as an index - at the end of this step, num_dom_children 12401e04c3fSmrg * for each node will be the same as it was at the end of step #1. 12501e04c3fSmrg */ 12601e04c3fSmrg 12701e04c3fSmrgstatic void 12801e04c3fSmrgcalc_dom_children(nir_function_impl* impl) 12901e04c3fSmrg{ 13001e04c3fSmrg void *mem_ctx = ralloc_parent(impl); 13101e04c3fSmrg 1327ec681f3Smrg nir_foreach_block_unstructured(block, impl) { 13301e04c3fSmrg if (block->imm_dom) 13401e04c3fSmrg block->imm_dom->num_dom_children++; 13501e04c3fSmrg } 13601e04c3fSmrg 1377ec681f3Smrg nir_foreach_block_unstructured(block, impl) { 13801e04c3fSmrg block->dom_children = ralloc_array(mem_ctx, nir_block *, 13901e04c3fSmrg block->num_dom_children); 14001e04c3fSmrg block->num_dom_children = 0; 14101e04c3fSmrg } 14201e04c3fSmrg 1437ec681f3Smrg nir_foreach_block_unstructured(block, impl) { 14401e04c3fSmrg if (block->imm_dom) { 14501e04c3fSmrg block->imm_dom->dom_children[block->imm_dom->num_dom_children++] 14601e04c3fSmrg = block; 14701e04c3fSmrg } 14801e04c3fSmrg } 14901e04c3fSmrg} 15001e04c3fSmrg 15101e04c3fSmrgstatic void 1527ec681f3Smrgcalc_dfs_indicies(nir_block *block, uint32_t *index) 15301e04c3fSmrg{ 1547ec681f3Smrg /* UINT32_MAX has special meaning. See nir_block_dominates. */ 1557ec681f3Smrg assert(*index < UINT32_MAX - 2); 1567ec681f3Smrg 15701e04c3fSmrg block->dom_pre_index = (*index)++; 15801e04c3fSmrg 15901e04c3fSmrg for (unsigned i = 0; i < block->num_dom_children; i++) 16001e04c3fSmrg calc_dfs_indicies(block->dom_children[i], index); 16101e04c3fSmrg 16201e04c3fSmrg block->dom_post_index = (*index)++; 16301e04c3fSmrg} 16401e04c3fSmrg 16501e04c3fSmrgvoid 16601e04c3fSmrgnir_calc_dominance_impl(nir_function_impl *impl) 16701e04c3fSmrg{ 16801e04c3fSmrg if (impl->valid_metadata & nir_metadata_dominance) 16901e04c3fSmrg return; 17001e04c3fSmrg 17101e04c3fSmrg nir_metadata_require(impl, nir_metadata_block_index); 17201e04c3fSmrg 17301e04c3fSmrg 1747ec681f3Smrg nir_foreach_block_unstructured(block, impl) { 17501e04c3fSmrg init_block(block, impl); 17601e04c3fSmrg } 17701e04c3fSmrg 17801e04c3fSmrg bool progress = true; 17901e04c3fSmrg while (progress) { 18001e04c3fSmrg progress = false; 1817ec681f3Smrg nir_foreach_block_unstructured(block, impl) { 18201e04c3fSmrg if (block != nir_start_block(impl)) 18301e04c3fSmrg progress |= calc_dominance(block); 18401e04c3fSmrg } 18501e04c3fSmrg } 18601e04c3fSmrg 1877ec681f3Smrg nir_foreach_block_unstructured(block, impl) { 18801e04c3fSmrg calc_dom_frontier(block); 18901e04c3fSmrg } 19001e04c3fSmrg 19101e04c3fSmrg nir_block *start_block = nir_start_block(impl); 19201e04c3fSmrg start_block->imm_dom = NULL; 19301e04c3fSmrg 19401e04c3fSmrg calc_dom_children(impl); 19501e04c3fSmrg 1967ec681f3Smrg uint32_t dfs_index = 1; 19701e04c3fSmrg calc_dfs_indicies(start_block, &dfs_index); 19801e04c3fSmrg} 19901e04c3fSmrg 20001e04c3fSmrgvoid 20101e04c3fSmrgnir_calc_dominance(nir_shader *shader) 20201e04c3fSmrg{ 20301e04c3fSmrg nir_foreach_function(function, shader) { 20401e04c3fSmrg if (function->impl) 20501e04c3fSmrg nir_calc_dominance_impl(function->impl); 20601e04c3fSmrg } 20701e04c3fSmrg} 20801e04c3fSmrg 2097ec681f3Smrgstatic nir_block * 2107ec681f3Smrgblock_return_if_reachable(nir_block *b) 2117ec681f3Smrg{ 2127ec681f3Smrg return (b && nir_block_is_reachable(b)) ? b : NULL; 2137ec681f3Smrg} 2147ec681f3Smrg 21501e04c3fSmrg/** 2167ec681f3Smrg * Computes the least common ancestor of two blocks. If one of the blocks 2177ec681f3Smrg * is null or unreachable, the other block is returned or NULL if it's 2187ec681f3Smrg * unreachable. 21901e04c3fSmrg */ 22001e04c3fSmrgnir_block * 22101e04c3fSmrgnir_dominance_lca(nir_block *b1, nir_block *b2) 22201e04c3fSmrg{ 2237ec681f3Smrg if (b1 == NULL || !nir_block_is_reachable(b1)) 2247ec681f3Smrg return block_return_if_reachable(b2); 22501e04c3fSmrg 2267ec681f3Smrg if (b2 == NULL || !nir_block_is_reachable(b2)) 2277ec681f3Smrg return block_return_if_reachable(b1); 22801e04c3fSmrg 22901e04c3fSmrg assert(nir_cf_node_get_function(&b1->cf_node) == 23001e04c3fSmrg nir_cf_node_get_function(&b2->cf_node)); 23101e04c3fSmrg 23201e04c3fSmrg assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata & 23301e04c3fSmrg nir_metadata_dominance); 23401e04c3fSmrg 23501e04c3fSmrg return intersect(b1, b2); 23601e04c3fSmrg} 23701e04c3fSmrg 23801e04c3fSmrg/** 2397ec681f3Smrg * Returns true if parent dominates child according to the following 2407ec681f3Smrg * definition: 2417ec681f3Smrg * 2427ec681f3Smrg * "The block A dominates the block B if every path from the start block 2437ec681f3Smrg * to block B passes through A." 2447ec681f3Smrg * 2457ec681f3Smrg * This means, in particular, that any unreachable block is dominated by every 2467ec681f3Smrg * other block and an unreachable block does not dominate anything except 2477ec681f3Smrg * another unreachable block. 24801e04c3fSmrg */ 24901e04c3fSmrgbool 25001e04c3fSmrgnir_block_dominates(nir_block *parent, nir_block *child) 25101e04c3fSmrg{ 25201e04c3fSmrg assert(nir_cf_node_get_function(&parent->cf_node) == 25301e04c3fSmrg nir_cf_node_get_function(&child->cf_node)); 25401e04c3fSmrg 25501e04c3fSmrg assert(nir_cf_node_get_function(&parent->cf_node)->valid_metadata & 25601e04c3fSmrg nir_metadata_dominance); 25701e04c3fSmrg 2587ec681f3Smrg /* If a block is unreachable, then nir_block::dom_pre_index == UINT32_MAX 2597ec681f3Smrg * and nir_block::dom_post_index == 0. This allows us to trivially handle 2607ec681f3Smrg * unreachable blocks here with zero extra work. 2617ec681f3Smrg */ 26201e04c3fSmrg return child->dom_pre_index >= parent->dom_pre_index && 26301e04c3fSmrg child->dom_post_index <= parent->dom_post_index; 26401e04c3fSmrg} 26501e04c3fSmrg 2667e102996Smayabool 2677e102996Smayanir_block_is_unreachable(nir_block *block) 2687e102996Smaya{ 2697e102996Smaya assert(nir_cf_node_get_function(&block->cf_node)->valid_metadata & 2707e102996Smaya nir_metadata_dominance); 2717e102996Smaya assert(nir_cf_node_get_function(&block->cf_node)->valid_metadata & 2727e102996Smaya nir_metadata_block_index); 2737e102996Smaya 2747e102996Smaya /* Unreachable blocks have no dominator. The only reachable block with no 2757e102996Smaya * dominator is the start block which has index 0. 2767e102996Smaya */ 2777e102996Smaya return block->index > 0 && block->imm_dom == NULL; 2787e102996Smaya} 2797e102996Smaya 28001e04c3fSmrgvoid 28101e04c3fSmrgnir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp) 28201e04c3fSmrg{ 28301e04c3fSmrg fprintf(fp, "digraph doms_%s {\n", impl->function->name); 28401e04c3fSmrg 2857ec681f3Smrg nir_foreach_block_unstructured(block, impl) { 28601e04c3fSmrg if (block->imm_dom) 28701e04c3fSmrg fprintf(fp, "\t%u -> %u\n", block->imm_dom->index, block->index); 28801e04c3fSmrg } 28901e04c3fSmrg 29001e04c3fSmrg fprintf(fp, "}\n\n"); 29101e04c3fSmrg} 29201e04c3fSmrg 29301e04c3fSmrgvoid 29401e04c3fSmrgnir_dump_dom_tree(nir_shader *shader, FILE *fp) 29501e04c3fSmrg{ 29601e04c3fSmrg nir_foreach_function(function, shader) { 29701e04c3fSmrg if (function->impl) 29801e04c3fSmrg nir_dump_dom_tree_impl(function->impl, fp); 29901e04c3fSmrg } 30001e04c3fSmrg} 30101e04c3fSmrg 30201e04c3fSmrgvoid 30301e04c3fSmrgnir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp) 30401e04c3fSmrg{ 3057ec681f3Smrg nir_foreach_block_unstructured(block, impl) { 30601e04c3fSmrg fprintf(fp, "DF(%u) = {", block->index); 30701e04c3fSmrg set_foreach(block->dom_frontier, entry) { 30801e04c3fSmrg nir_block *df = (nir_block *) entry->key; 30901e04c3fSmrg fprintf(fp, "%u, ", df->index); 31001e04c3fSmrg } 31101e04c3fSmrg fprintf(fp, "}\n"); 31201e04c3fSmrg } 31301e04c3fSmrg} 31401e04c3fSmrg 31501e04c3fSmrgvoid 31601e04c3fSmrgnir_dump_dom_frontier(nir_shader *shader, FILE *fp) 31701e04c3fSmrg{ 31801e04c3fSmrg nir_foreach_function(function, shader) { 31901e04c3fSmrg if (function->impl) 32001e04c3fSmrg nir_dump_dom_frontier_impl(function->impl, fp); 32101e04c3fSmrg } 32201e04c3fSmrg} 32301e04c3fSmrg 32401e04c3fSmrgvoid 32501e04c3fSmrgnir_dump_cfg_impl(nir_function_impl *impl, FILE *fp) 32601e04c3fSmrg{ 32701e04c3fSmrg fprintf(fp, "digraph cfg_%s {\n", impl->function->name); 32801e04c3fSmrg 3297ec681f3Smrg nir_foreach_block_unstructured(block, impl) { 33001e04c3fSmrg if (block->successors[0]) 33101e04c3fSmrg fprintf(fp, "\t%u -> %u\n", block->index, block->successors[0]->index); 33201e04c3fSmrg if (block->successors[1]) 33301e04c3fSmrg fprintf(fp, "\t%u -> %u\n", block->index, block->successors[1]->index); 33401e04c3fSmrg } 33501e04c3fSmrg 33601e04c3fSmrg fprintf(fp, "}\n\n"); 33701e04c3fSmrg} 33801e04c3fSmrg 33901e04c3fSmrgvoid 34001e04c3fSmrgnir_dump_cfg(nir_shader *shader, FILE *fp) 34101e04c3fSmrg{ 34201e04c3fSmrg nir_foreach_function(function, shader) { 34301e04c3fSmrg if (function->impl) 34401e04c3fSmrg nir_dump_cfg_impl(function->impl, fp); 34501e04c3fSmrg } 34601e04c3fSmrg} 347