1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2014 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21b8e80941Smrg * IN THE SOFTWARE. 22b8e80941Smrg * 23b8e80941Smrg * Authors: 24b8e80941Smrg * Connor Abbott (cwabbott0@gmail.com) 25b8e80941Smrg * 26b8e80941Smrg */ 27b8e80941Smrg 28b8e80941Smrg#include "nir.h" 29b8e80941Smrg 30b8e80941Smrg/* 31b8e80941Smrg * Implements the algorithms for computing the dominance tree and the 32b8e80941Smrg * dominance frontier from "A Simple, Fast Dominance Algorithm" by Cooper, 33b8e80941Smrg * Harvey, and Kennedy. 34b8e80941Smrg */ 35b8e80941Smrg 36b8e80941Smrgstatic bool 37b8e80941Smrginit_block(nir_block *block, nir_function_impl *impl) 38b8e80941Smrg{ 39b8e80941Smrg if (block == nir_start_block(impl)) 40b8e80941Smrg block->imm_dom = block; 41b8e80941Smrg else 42b8e80941Smrg block->imm_dom = NULL; 43b8e80941Smrg block->num_dom_children = 0; 44b8e80941Smrg 45b8e80941Smrg set_foreach(block->dom_frontier, entry) { 46b8e80941Smrg _mesa_set_remove(block->dom_frontier, entry); 47b8e80941Smrg } 48b8e80941Smrg 49b8e80941Smrg return true; 50b8e80941Smrg} 51b8e80941Smrg 52b8e80941Smrgstatic nir_block * 53b8e80941Smrgintersect(nir_block *b1, nir_block *b2) 54b8e80941Smrg{ 55b8e80941Smrg while (b1 != b2) { 56b8e80941Smrg /* 57b8e80941Smrg * Note, the comparisons here are the opposite of what the paper says 58b8e80941Smrg * because we index blocks from beginning -> end (i.e. reverse 59b8e80941Smrg * post-order) instead of post-order like they assume. 60b8e80941Smrg */ 61b8e80941Smrg while (b1->index > b2->index) 62b8e80941Smrg b1 = b1->imm_dom; 63b8e80941Smrg while (b2->index > b1->index) 64b8e80941Smrg b2 = b2->imm_dom; 65b8e80941Smrg } 66b8e80941Smrg 67b8e80941Smrg return b1; 68b8e80941Smrg} 69b8e80941Smrg 70b8e80941Smrgstatic bool 71b8e80941Smrgcalc_dominance(nir_block *block) 72b8e80941Smrg{ 73b8e80941Smrg nir_block *new_idom = NULL; 74b8e80941Smrg set_foreach(block->predecessors, entry) { 75b8e80941Smrg nir_block *pred = (nir_block *) entry->key; 76b8e80941Smrg 77b8e80941Smrg if (pred->imm_dom) { 78b8e80941Smrg if (new_idom) 79b8e80941Smrg new_idom = intersect(pred, new_idom); 80b8e80941Smrg else 81b8e80941Smrg new_idom = pred; 82b8e80941Smrg } 83b8e80941Smrg } 84b8e80941Smrg 85b8e80941Smrg if (block->imm_dom != new_idom) { 86b8e80941Smrg block->imm_dom = new_idom; 87b8e80941Smrg return true; 88b8e80941Smrg } 89b8e80941Smrg 90b8e80941Smrg return false; 91b8e80941Smrg} 92b8e80941Smrg 93b8e80941Smrgstatic bool 94b8e80941Smrgcalc_dom_frontier(nir_block *block) 95b8e80941Smrg{ 96b8e80941Smrg if (block->predecessors->entries > 1) { 97b8e80941Smrg set_foreach(block->predecessors, entry) { 98b8e80941Smrg nir_block *runner = (nir_block *) entry->key; 99b8e80941Smrg 100b8e80941Smrg /* Skip unreachable predecessors */ 101b8e80941Smrg if (runner->imm_dom == NULL) 102b8e80941Smrg continue; 103b8e80941Smrg 104b8e80941Smrg while (runner != block->imm_dom) { 105b8e80941Smrg _mesa_set_add(runner->dom_frontier, block); 106b8e80941Smrg runner = runner->imm_dom; 107b8e80941Smrg } 108b8e80941Smrg } 109b8e80941Smrg } 110b8e80941Smrg 111b8e80941Smrg return true; 112b8e80941Smrg} 113b8e80941Smrg 114b8e80941Smrg/* 115b8e80941Smrg * Compute each node's children in the dominance tree from the immediate 116b8e80941Smrg * dominator information. We do this in three stages: 117b8e80941Smrg * 118b8e80941Smrg * 1. Calculate the number of children each node has 119b8e80941Smrg * 2. Allocate arrays, setting the number of children to 0 again 120b8e80941Smrg * 3. For each node, add itself to its parent's list of children, using 121b8e80941Smrg * num_dom_children as an index - at the end of this step, num_dom_children 122b8e80941Smrg * for each node will be the same as it was at the end of step #1. 123b8e80941Smrg */ 124b8e80941Smrg 125b8e80941Smrgstatic void 126b8e80941Smrgcalc_dom_children(nir_function_impl* impl) 127b8e80941Smrg{ 128b8e80941Smrg void *mem_ctx = ralloc_parent(impl); 129b8e80941Smrg 130b8e80941Smrg nir_foreach_block(block, impl) { 131b8e80941Smrg if (block->imm_dom) 132b8e80941Smrg block->imm_dom->num_dom_children++; 133b8e80941Smrg } 134b8e80941Smrg 135b8e80941Smrg nir_foreach_block(block, impl) { 136b8e80941Smrg block->dom_children = ralloc_array(mem_ctx, nir_block *, 137b8e80941Smrg block->num_dom_children); 138b8e80941Smrg block->num_dom_children = 0; 139b8e80941Smrg } 140b8e80941Smrg 141b8e80941Smrg nir_foreach_block(block, impl) { 142b8e80941Smrg if (block->imm_dom) { 143b8e80941Smrg block->imm_dom->dom_children[block->imm_dom->num_dom_children++] 144b8e80941Smrg = block; 145b8e80941Smrg } 146b8e80941Smrg } 147b8e80941Smrg} 148b8e80941Smrg 149b8e80941Smrgstatic void 150b8e80941Smrgcalc_dfs_indicies(nir_block *block, unsigned *index) 151b8e80941Smrg{ 152b8e80941Smrg block->dom_pre_index = (*index)++; 153b8e80941Smrg 154b8e80941Smrg for (unsigned i = 0; i < block->num_dom_children; i++) 155b8e80941Smrg calc_dfs_indicies(block->dom_children[i], index); 156b8e80941Smrg 157b8e80941Smrg block->dom_post_index = (*index)++; 158b8e80941Smrg} 159b8e80941Smrg 160b8e80941Smrgvoid 161b8e80941Smrgnir_calc_dominance_impl(nir_function_impl *impl) 162b8e80941Smrg{ 163b8e80941Smrg if (impl->valid_metadata & nir_metadata_dominance) 164b8e80941Smrg return; 165b8e80941Smrg 166b8e80941Smrg nir_metadata_require(impl, nir_metadata_block_index); 167b8e80941Smrg 168b8e80941Smrg 169b8e80941Smrg nir_foreach_block(block, impl) { 170b8e80941Smrg init_block(block, impl); 171b8e80941Smrg } 172b8e80941Smrg 173b8e80941Smrg bool progress = true; 174b8e80941Smrg while (progress) { 175b8e80941Smrg progress = false; 176b8e80941Smrg nir_foreach_block(block, impl) { 177b8e80941Smrg if (block != nir_start_block(impl)) 178b8e80941Smrg progress |= calc_dominance(block); 179b8e80941Smrg } 180b8e80941Smrg } 181b8e80941Smrg 182b8e80941Smrg nir_foreach_block(block, impl) { 183b8e80941Smrg calc_dom_frontier(block); 184b8e80941Smrg } 185b8e80941Smrg 186b8e80941Smrg nir_block *start_block = nir_start_block(impl); 187b8e80941Smrg start_block->imm_dom = NULL; 188b8e80941Smrg 189b8e80941Smrg calc_dom_children(impl); 190b8e80941Smrg 191b8e80941Smrg unsigned dfs_index = 0; 192b8e80941Smrg calc_dfs_indicies(start_block, &dfs_index); 193b8e80941Smrg} 194b8e80941Smrg 195b8e80941Smrgvoid 196b8e80941Smrgnir_calc_dominance(nir_shader *shader) 197b8e80941Smrg{ 198b8e80941Smrg nir_foreach_function(function, shader) { 199b8e80941Smrg if (function->impl) 200b8e80941Smrg nir_calc_dominance_impl(function->impl); 201b8e80941Smrg } 202b8e80941Smrg} 203b8e80941Smrg 204b8e80941Smrg/** 205b8e80941Smrg * Computes the least common anscestor of two blocks. If one of the blocks 206b8e80941Smrg * is null, the other block is returned. 207b8e80941Smrg */ 208b8e80941Smrgnir_block * 209b8e80941Smrgnir_dominance_lca(nir_block *b1, nir_block *b2) 210b8e80941Smrg{ 211b8e80941Smrg if (b1 == NULL) 212b8e80941Smrg return b2; 213b8e80941Smrg 214b8e80941Smrg if (b2 == NULL) 215b8e80941Smrg return b1; 216b8e80941Smrg 217b8e80941Smrg assert(nir_cf_node_get_function(&b1->cf_node) == 218b8e80941Smrg nir_cf_node_get_function(&b2->cf_node)); 219b8e80941Smrg 220b8e80941Smrg assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata & 221b8e80941Smrg nir_metadata_dominance); 222b8e80941Smrg 223b8e80941Smrg return intersect(b1, b2); 224b8e80941Smrg} 225b8e80941Smrg 226b8e80941Smrg/** 227b8e80941Smrg * Returns true if parent dominates child 228b8e80941Smrg */ 229b8e80941Smrgbool 230b8e80941Smrgnir_block_dominates(nir_block *parent, nir_block *child) 231b8e80941Smrg{ 232b8e80941Smrg assert(nir_cf_node_get_function(&parent->cf_node) == 233b8e80941Smrg nir_cf_node_get_function(&child->cf_node)); 234b8e80941Smrg 235b8e80941Smrg assert(nir_cf_node_get_function(&parent->cf_node)->valid_metadata & 236b8e80941Smrg nir_metadata_dominance); 237b8e80941Smrg 238b8e80941Smrg return child->dom_pre_index >= parent->dom_pre_index && 239b8e80941Smrg child->dom_post_index <= parent->dom_post_index; 240b8e80941Smrg} 241b8e80941Smrg 242b8e80941Smrgbool 243b8e80941Smrgnir_block_is_unreachable(nir_block *block) 244b8e80941Smrg{ 245b8e80941Smrg assert(nir_cf_node_get_function(&block->cf_node)->valid_metadata & 246b8e80941Smrg nir_metadata_dominance); 247b8e80941Smrg assert(nir_cf_node_get_function(&block->cf_node)->valid_metadata & 248b8e80941Smrg nir_metadata_block_index); 249b8e80941Smrg 250b8e80941Smrg /* Unreachable blocks have no dominator. The only reachable block with no 251b8e80941Smrg * dominator is the start block which has index 0. 252b8e80941Smrg */ 253b8e80941Smrg return block->index > 0 && block->imm_dom == NULL; 254b8e80941Smrg} 255b8e80941Smrg 256b8e80941Smrgvoid 257b8e80941Smrgnir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp) 258b8e80941Smrg{ 259b8e80941Smrg fprintf(fp, "digraph doms_%s {\n", impl->function->name); 260b8e80941Smrg 261b8e80941Smrg nir_foreach_block(block, impl) { 262b8e80941Smrg if (block->imm_dom) 263b8e80941Smrg fprintf(fp, "\t%u -> %u\n", block->imm_dom->index, block->index); 264b8e80941Smrg } 265b8e80941Smrg 266b8e80941Smrg fprintf(fp, "}\n\n"); 267b8e80941Smrg} 268b8e80941Smrg 269b8e80941Smrgvoid 270b8e80941Smrgnir_dump_dom_tree(nir_shader *shader, FILE *fp) 271b8e80941Smrg{ 272b8e80941Smrg nir_foreach_function(function, shader) { 273b8e80941Smrg if (function->impl) 274b8e80941Smrg nir_dump_dom_tree_impl(function->impl, fp); 275b8e80941Smrg } 276b8e80941Smrg} 277b8e80941Smrg 278b8e80941Smrgvoid 279b8e80941Smrgnir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp) 280b8e80941Smrg{ 281b8e80941Smrg nir_foreach_block(block, impl) { 282b8e80941Smrg fprintf(fp, "DF(%u) = {", block->index); 283b8e80941Smrg set_foreach(block->dom_frontier, entry) { 284b8e80941Smrg nir_block *df = (nir_block *) entry->key; 285b8e80941Smrg fprintf(fp, "%u, ", df->index); 286b8e80941Smrg } 287b8e80941Smrg fprintf(fp, "}\n"); 288b8e80941Smrg } 289b8e80941Smrg} 290b8e80941Smrg 291b8e80941Smrgvoid 292b8e80941Smrgnir_dump_dom_frontier(nir_shader *shader, FILE *fp) 293b8e80941Smrg{ 294b8e80941Smrg nir_foreach_function(function, shader) { 295b8e80941Smrg if (function->impl) 296b8e80941Smrg nir_dump_dom_frontier_impl(function->impl, fp); 297b8e80941Smrg } 298b8e80941Smrg} 299b8e80941Smrg 300b8e80941Smrgvoid 301b8e80941Smrgnir_dump_cfg_impl(nir_function_impl *impl, FILE *fp) 302b8e80941Smrg{ 303b8e80941Smrg fprintf(fp, "digraph cfg_%s {\n", impl->function->name); 304b8e80941Smrg 305b8e80941Smrg nir_foreach_block(block, impl) { 306b8e80941Smrg if (block->successors[0]) 307b8e80941Smrg fprintf(fp, "\t%u -> %u\n", block->index, block->successors[0]->index); 308b8e80941Smrg if (block->successors[1]) 309b8e80941Smrg fprintf(fp, "\t%u -> %u\n", block->index, block->successors[1]->index); 310b8e80941Smrg } 311b8e80941Smrg 312b8e80941Smrg fprintf(fp, "}\n\n"); 313b8e80941Smrg} 314b8e80941Smrg 315b8e80941Smrgvoid 316b8e80941Smrgnir_dump_cfg(nir_shader *shader, FILE *fp) 317b8e80941Smrg{ 318b8e80941Smrg nir_foreach_function(function, shader) { 319b8e80941Smrg if (function->impl) 320b8e80941Smrg nir_dump_cfg_impl(function->impl, fp); 321b8e80941Smrg } 322b8e80941Smrg} 323