101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2014 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
2101e04c3fSmrg * IN THE SOFTWARE.
2201e04c3fSmrg *
2301e04c3fSmrg * Authors:
2401e04c3fSmrg *    Connor Abbott (cwabbott0@gmail.com)
2501e04c3fSmrg *
2601e04c3fSmrg */
2701e04c3fSmrg
2801e04c3fSmrg#include "nir.h"
2901e04c3fSmrg
3001e04c3fSmrg/*
3101e04c3fSmrg * Implements the algorithms for computing the dominance tree and the
3201e04c3fSmrg * dominance frontier from "A Simple, Fast Dominance Algorithm" by Cooper,
3301e04c3fSmrg * Harvey, and Kennedy.
3401e04c3fSmrg */
3501e04c3fSmrg
3601e04c3fSmrgstatic bool
3701e04c3fSmrginit_block(nir_block *block, nir_function_impl *impl)
3801e04c3fSmrg{
3901e04c3fSmrg   if (block == nir_start_block(impl))
4001e04c3fSmrg      block->imm_dom = block;
4101e04c3fSmrg   else
4201e04c3fSmrg      block->imm_dom = NULL;
4301e04c3fSmrg   block->num_dom_children = 0;
4401e04c3fSmrg
457ec681f3Smrg   /* See nir_block_dominates */
467ec681f3Smrg   block->dom_pre_index = UINT32_MAX;
477ec681f3Smrg   block->dom_post_index = 0;
487ec681f3Smrg
497ec681f3Smrg   _mesa_set_clear(block->dom_frontier, NULL);
5001e04c3fSmrg
5101e04c3fSmrg   return true;
5201e04c3fSmrg}
5301e04c3fSmrg
5401e04c3fSmrgstatic nir_block *
5501e04c3fSmrgintersect(nir_block *b1, nir_block *b2)
5601e04c3fSmrg{
5701e04c3fSmrg   while (b1 != b2) {
5801e04c3fSmrg      /*
5901e04c3fSmrg       * Note, the comparisons here are the opposite of what the paper says
6001e04c3fSmrg       * because we index blocks from beginning -> end (i.e. reverse
6101e04c3fSmrg       * post-order) instead of post-order like they assume.
6201e04c3fSmrg       */
6301e04c3fSmrg      while (b1->index > b2->index)
6401e04c3fSmrg         b1 = b1->imm_dom;
6501e04c3fSmrg      while (b2->index > b1->index)
6601e04c3fSmrg         b2 = b2->imm_dom;
6701e04c3fSmrg   }
6801e04c3fSmrg
6901e04c3fSmrg   return b1;
7001e04c3fSmrg}
7101e04c3fSmrg
7201e04c3fSmrgstatic bool
7301e04c3fSmrgcalc_dominance(nir_block *block)
7401e04c3fSmrg{
7501e04c3fSmrg   nir_block *new_idom = NULL;
7601e04c3fSmrg   set_foreach(block->predecessors, entry) {
7701e04c3fSmrg      nir_block *pred = (nir_block *) entry->key;
7801e04c3fSmrg
7901e04c3fSmrg      if (pred->imm_dom) {
8001e04c3fSmrg         if (new_idom)
8101e04c3fSmrg            new_idom = intersect(pred, new_idom);
8201e04c3fSmrg         else
8301e04c3fSmrg            new_idom = pred;
8401e04c3fSmrg      }
8501e04c3fSmrg   }
8601e04c3fSmrg
8701e04c3fSmrg   if (block->imm_dom != new_idom) {
8801e04c3fSmrg      block->imm_dom = new_idom;
8901e04c3fSmrg      return true;
9001e04c3fSmrg   }
9101e04c3fSmrg
9201e04c3fSmrg   return false;
9301e04c3fSmrg}
9401e04c3fSmrg
9501e04c3fSmrgstatic bool
9601e04c3fSmrgcalc_dom_frontier(nir_block *block)
9701e04c3fSmrg{
9801e04c3fSmrg   if (block->predecessors->entries > 1) {
9901e04c3fSmrg      set_foreach(block->predecessors, entry) {
10001e04c3fSmrg         nir_block *runner = (nir_block *) entry->key;
10101e04c3fSmrg
10201e04c3fSmrg         /* Skip unreachable predecessors */
10301e04c3fSmrg         if (runner->imm_dom == NULL)
10401e04c3fSmrg            continue;
10501e04c3fSmrg
10601e04c3fSmrg         while (runner != block->imm_dom) {
10701e04c3fSmrg            _mesa_set_add(runner->dom_frontier, block);
10801e04c3fSmrg            runner = runner->imm_dom;
10901e04c3fSmrg         }
11001e04c3fSmrg      }
11101e04c3fSmrg   }
11201e04c3fSmrg
11301e04c3fSmrg   return true;
11401e04c3fSmrg}
11501e04c3fSmrg
11601e04c3fSmrg/*
11701e04c3fSmrg * Compute each node's children in the dominance tree from the immediate
11801e04c3fSmrg * dominator information. We do this in three stages:
11901e04c3fSmrg *
12001e04c3fSmrg * 1. Calculate the number of children each node has
12101e04c3fSmrg * 2. Allocate arrays, setting the number of children to 0 again
12201e04c3fSmrg * 3. For each node, add itself to its parent's list of children, using
12301e04c3fSmrg *    num_dom_children as an index - at the end of this step, num_dom_children
12401e04c3fSmrg *    for each node will be the same as it was at the end of step #1.
12501e04c3fSmrg */
12601e04c3fSmrg
12701e04c3fSmrgstatic void
12801e04c3fSmrgcalc_dom_children(nir_function_impl* impl)
12901e04c3fSmrg{
13001e04c3fSmrg   void *mem_ctx = ralloc_parent(impl);
13101e04c3fSmrg
1327ec681f3Smrg   nir_foreach_block_unstructured(block, impl) {
13301e04c3fSmrg      if (block->imm_dom)
13401e04c3fSmrg         block->imm_dom->num_dom_children++;
13501e04c3fSmrg   }
13601e04c3fSmrg
1377ec681f3Smrg   nir_foreach_block_unstructured(block, impl) {
13801e04c3fSmrg      block->dom_children = ralloc_array(mem_ctx, nir_block *,
13901e04c3fSmrg                                         block->num_dom_children);
14001e04c3fSmrg      block->num_dom_children = 0;
14101e04c3fSmrg   }
14201e04c3fSmrg
1437ec681f3Smrg   nir_foreach_block_unstructured(block, impl) {
14401e04c3fSmrg      if (block->imm_dom) {
14501e04c3fSmrg         block->imm_dom->dom_children[block->imm_dom->num_dom_children++]
14601e04c3fSmrg            = block;
14701e04c3fSmrg      }
14801e04c3fSmrg   }
14901e04c3fSmrg}
15001e04c3fSmrg
15101e04c3fSmrgstatic void
1527ec681f3Smrgcalc_dfs_indicies(nir_block *block, uint32_t *index)
15301e04c3fSmrg{
1547ec681f3Smrg   /* UINT32_MAX has special meaning. See nir_block_dominates. */
1557ec681f3Smrg   assert(*index < UINT32_MAX - 2);
1567ec681f3Smrg
15701e04c3fSmrg   block->dom_pre_index = (*index)++;
15801e04c3fSmrg
15901e04c3fSmrg   for (unsigned i = 0; i < block->num_dom_children; i++)
16001e04c3fSmrg      calc_dfs_indicies(block->dom_children[i], index);
16101e04c3fSmrg
16201e04c3fSmrg   block->dom_post_index = (*index)++;
16301e04c3fSmrg}
16401e04c3fSmrg
16501e04c3fSmrgvoid
16601e04c3fSmrgnir_calc_dominance_impl(nir_function_impl *impl)
16701e04c3fSmrg{
16801e04c3fSmrg   if (impl->valid_metadata & nir_metadata_dominance)
16901e04c3fSmrg      return;
17001e04c3fSmrg
17101e04c3fSmrg   nir_metadata_require(impl, nir_metadata_block_index);
17201e04c3fSmrg
17301e04c3fSmrg
1747ec681f3Smrg   nir_foreach_block_unstructured(block, impl) {
17501e04c3fSmrg      init_block(block, impl);
17601e04c3fSmrg   }
17701e04c3fSmrg
17801e04c3fSmrg   bool progress = true;
17901e04c3fSmrg   while (progress) {
18001e04c3fSmrg      progress = false;
1817ec681f3Smrg      nir_foreach_block_unstructured(block, impl) {
18201e04c3fSmrg         if (block != nir_start_block(impl))
18301e04c3fSmrg            progress |= calc_dominance(block);
18401e04c3fSmrg      }
18501e04c3fSmrg   }
18601e04c3fSmrg
1877ec681f3Smrg   nir_foreach_block_unstructured(block, impl) {
18801e04c3fSmrg      calc_dom_frontier(block);
18901e04c3fSmrg   }
19001e04c3fSmrg
19101e04c3fSmrg   nir_block *start_block = nir_start_block(impl);
19201e04c3fSmrg   start_block->imm_dom = NULL;
19301e04c3fSmrg
19401e04c3fSmrg   calc_dom_children(impl);
19501e04c3fSmrg
1967ec681f3Smrg   uint32_t dfs_index = 1;
19701e04c3fSmrg   calc_dfs_indicies(start_block, &dfs_index);
19801e04c3fSmrg}
19901e04c3fSmrg
20001e04c3fSmrgvoid
20101e04c3fSmrgnir_calc_dominance(nir_shader *shader)
20201e04c3fSmrg{
20301e04c3fSmrg   nir_foreach_function(function, shader) {
20401e04c3fSmrg      if (function->impl)
20501e04c3fSmrg         nir_calc_dominance_impl(function->impl);
20601e04c3fSmrg   }
20701e04c3fSmrg}
20801e04c3fSmrg
2097ec681f3Smrgstatic nir_block *
2107ec681f3Smrgblock_return_if_reachable(nir_block *b)
2117ec681f3Smrg{
2127ec681f3Smrg   return (b && nir_block_is_reachable(b)) ? b : NULL;
2137ec681f3Smrg}
2147ec681f3Smrg
21501e04c3fSmrg/**
2167ec681f3Smrg * Computes the least common ancestor of two blocks.  If one of the blocks
2177ec681f3Smrg * is null or unreachable, the other block is returned or NULL if it's
2187ec681f3Smrg * unreachable.
21901e04c3fSmrg */
22001e04c3fSmrgnir_block *
22101e04c3fSmrgnir_dominance_lca(nir_block *b1, nir_block *b2)
22201e04c3fSmrg{
2237ec681f3Smrg   if (b1 == NULL || !nir_block_is_reachable(b1))
2247ec681f3Smrg      return block_return_if_reachable(b2);
22501e04c3fSmrg
2267ec681f3Smrg   if (b2 == NULL || !nir_block_is_reachable(b2))
2277ec681f3Smrg      return block_return_if_reachable(b1);
22801e04c3fSmrg
22901e04c3fSmrg   assert(nir_cf_node_get_function(&b1->cf_node) ==
23001e04c3fSmrg          nir_cf_node_get_function(&b2->cf_node));
23101e04c3fSmrg
23201e04c3fSmrg   assert(nir_cf_node_get_function(&b1->cf_node)->valid_metadata &
23301e04c3fSmrg          nir_metadata_dominance);
23401e04c3fSmrg
23501e04c3fSmrg   return intersect(b1, b2);
23601e04c3fSmrg}
23701e04c3fSmrg
23801e04c3fSmrg/**
2397ec681f3Smrg * Returns true if parent dominates child according to the following
2407ec681f3Smrg * definition:
2417ec681f3Smrg *
2427ec681f3Smrg *    "The block A dominates the block B if every path from the start block
2437ec681f3Smrg *    to block B passes through A."
2447ec681f3Smrg *
2457ec681f3Smrg * This means, in particular, that any unreachable block is dominated by every
2467ec681f3Smrg * other block and an unreachable block does not dominate anything except
2477ec681f3Smrg * another unreachable block.
24801e04c3fSmrg */
24901e04c3fSmrgbool
25001e04c3fSmrgnir_block_dominates(nir_block *parent, nir_block *child)
25101e04c3fSmrg{
25201e04c3fSmrg   assert(nir_cf_node_get_function(&parent->cf_node) ==
25301e04c3fSmrg          nir_cf_node_get_function(&child->cf_node));
25401e04c3fSmrg
25501e04c3fSmrg   assert(nir_cf_node_get_function(&parent->cf_node)->valid_metadata &
25601e04c3fSmrg          nir_metadata_dominance);
25701e04c3fSmrg
2587ec681f3Smrg   /* If a block is unreachable, then nir_block::dom_pre_index == UINT32_MAX
2597ec681f3Smrg    * and nir_block::dom_post_index == 0.  This allows us to trivially handle
2607ec681f3Smrg    * unreachable blocks here with zero extra work.
2617ec681f3Smrg    */
26201e04c3fSmrg   return child->dom_pre_index >= parent->dom_pre_index &&
26301e04c3fSmrg          child->dom_post_index <= parent->dom_post_index;
26401e04c3fSmrg}
26501e04c3fSmrg
2667e102996Smayabool
2677e102996Smayanir_block_is_unreachable(nir_block *block)
2687e102996Smaya{
2697e102996Smaya   assert(nir_cf_node_get_function(&block->cf_node)->valid_metadata &
2707e102996Smaya          nir_metadata_dominance);
2717e102996Smaya   assert(nir_cf_node_get_function(&block->cf_node)->valid_metadata &
2727e102996Smaya          nir_metadata_block_index);
2737e102996Smaya
2747e102996Smaya   /* Unreachable blocks have no dominator.  The only reachable block with no
2757e102996Smaya    * dominator is the start block which has index 0.
2767e102996Smaya    */
2777e102996Smaya   return block->index > 0 && block->imm_dom == NULL;
2787e102996Smaya}
2797e102996Smaya
28001e04c3fSmrgvoid
28101e04c3fSmrgnir_dump_dom_tree_impl(nir_function_impl *impl, FILE *fp)
28201e04c3fSmrg{
28301e04c3fSmrg   fprintf(fp, "digraph doms_%s {\n", impl->function->name);
28401e04c3fSmrg
2857ec681f3Smrg   nir_foreach_block_unstructured(block, impl) {
28601e04c3fSmrg      if (block->imm_dom)
28701e04c3fSmrg         fprintf(fp, "\t%u -> %u\n", block->imm_dom->index, block->index);
28801e04c3fSmrg   }
28901e04c3fSmrg
29001e04c3fSmrg   fprintf(fp, "}\n\n");
29101e04c3fSmrg}
29201e04c3fSmrg
29301e04c3fSmrgvoid
29401e04c3fSmrgnir_dump_dom_tree(nir_shader *shader, FILE *fp)
29501e04c3fSmrg{
29601e04c3fSmrg   nir_foreach_function(function, shader) {
29701e04c3fSmrg      if (function->impl)
29801e04c3fSmrg         nir_dump_dom_tree_impl(function->impl, fp);
29901e04c3fSmrg   }
30001e04c3fSmrg}
30101e04c3fSmrg
30201e04c3fSmrgvoid
30301e04c3fSmrgnir_dump_dom_frontier_impl(nir_function_impl *impl, FILE *fp)
30401e04c3fSmrg{
3057ec681f3Smrg   nir_foreach_block_unstructured(block, impl) {
30601e04c3fSmrg      fprintf(fp, "DF(%u) = {", block->index);
30701e04c3fSmrg      set_foreach(block->dom_frontier, entry) {
30801e04c3fSmrg         nir_block *df = (nir_block *) entry->key;
30901e04c3fSmrg         fprintf(fp, "%u, ", df->index);
31001e04c3fSmrg      }
31101e04c3fSmrg      fprintf(fp, "}\n");
31201e04c3fSmrg   }
31301e04c3fSmrg}
31401e04c3fSmrg
31501e04c3fSmrgvoid
31601e04c3fSmrgnir_dump_dom_frontier(nir_shader *shader, FILE *fp)
31701e04c3fSmrg{
31801e04c3fSmrg   nir_foreach_function(function, shader) {
31901e04c3fSmrg      if (function->impl)
32001e04c3fSmrg         nir_dump_dom_frontier_impl(function->impl, fp);
32101e04c3fSmrg   }
32201e04c3fSmrg}
32301e04c3fSmrg
32401e04c3fSmrgvoid
32501e04c3fSmrgnir_dump_cfg_impl(nir_function_impl *impl, FILE *fp)
32601e04c3fSmrg{
32701e04c3fSmrg   fprintf(fp, "digraph cfg_%s {\n", impl->function->name);
32801e04c3fSmrg
3297ec681f3Smrg   nir_foreach_block_unstructured(block, impl) {
33001e04c3fSmrg      if (block->successors[0])
33101e04c3fSmrg         fprintf(fp, "\t%u -> %u\n", block->index, block->successors[0]->index);
33201e04c3fSmrg      if (block->successors[1])
33301e04c3fSmrg         fprintf(fp, "\t%u -> %u\n", block->index, block->successors[1]->index);
33401e04c3fSmrg   }
33501e04c3fSmrg
33601e04c3fSmrg   fprintf(fp, "}\n\n");
33701e04c3fSmrg}
33801e04c3fSmrg
33901e04c3fSmrgvoid
34001e04c3fSmrgnir_dump_cfg(nir_shader *shader, FILE *fp)
34101e04c3fSmrg{
34201e04c3fSmrg   nir_foreach_function(function, shader) {
34301e04c3fSmrg      if (function->impl)
34401e04c3fSmrg         nir_dump_cfg_impl(function->impl, fp);
34501e04c3fSmrg   }
34601e04c3fSmrg}
347