nir_phi_builder.c revision 01e04c3f
1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "nir_phi_builder.h"
25#include "nir/nir_vla.h"
26
27struct nir_phi_builder {
28   nir_shader *shader;
29   nir_function_impl *impl;
30
31   /* Copied from the impl for easy access */
32   unsigned num_blocks;
33
34   /* Array of all blocks indexed by block->index. */
35   nir_block **blocks;
36
37   /* Hold on to the values so we can easily iterate over them. */
38   struct exec_list values;
39
40   /* Worklist for phi adding */
41   unsigned iter_count;
42   unsigned *work;
43   nir_block **W;
44};
45
46#define NEEDS_PHI ((nir_ssa_def *)(intptr_t)-1)
47
48struct nir_phi_builder_value {
49   struct exec_node node;
50
51   struct nir_phi_builder *builder;
52
53   /* Needed so we can create phis and undefs */
54   unsigned num_components;
55   unsigned bit_size;
56
57   /* The list of phi nodes associated with this value.  Phi nodes are not
58    * added directly.  Instead, they are created, the instr->block pointer
59    * set, and then added to this list.  Later, in phi_builder_finish, we
60    * set up their sources and add them to the top of their respective
61    * blocks.
62    */
63   struct exec_list phis;
64
65   /* Array of SSA defs, indexed by block.  For each block, this array has has
66    * one of three types of values:
67    *
68    *  - NULL. Indicates that there is no known definition in this block.  If
69    *    you need to find one, look at the block's immediate dominator.
70    *
71    *  - NEEDS_PHI. Indicates that the block may need a phi node but none has
72    *    been created yet.  If a def is requested for a block, a phi will need
73    *    to be created.
74    *
75    *  - A regular SSA def.  This will be either the result of a phi node or
76    *    one of the defs provided by nir_phi_builder_value_set_blocK_def().
77    */
78   nir_ssa_def *defs[0];
79};
80
81struct nir_phi_builder *
82nir_phi_builder_create(nir_function_impl *impl)
83{
84   struct nir_phi_builder *pb = rzalloc(NULL, struct nir_phi_builder);
85
86   pb->shader = impl->function->shader;
87   pb->impl = impl;
88
89   assert(impl->valid_metadata & (nir_metadata_block_index |
90                                  nir_metadata_dominance));
91
92   pb->num_blocks = impl->num_blocks;
93   pb->blocks = ralloc_array(pb, nir_block *, pb->num_blocks);
94   nir_foreach_block(block, impl) {
95      pb->blocks[block->index] = block;
96   }
97
98   exec_list_make_empty(&pb->values);
99
100   pb->iter_count = 0;
101   pb->work = rzalloc_array(pb, unsigned, pb->num_blocks);
102   pb->W = ralloc_array(pb, nir_block *, pb->num_blocks);
103
104   return pb;
105}
106
107struct nir_phi_builder_value *
108nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
109                          unsigned bit_size, const BITSET_WORD *defs)
110{
111   struct nir_phi_builder_value *val;
112   unsigned i, w_start = 0, w_end = 0;
113
114   val = rzalloc_size(pb, sizeof(*val) + sizeof(val->defs[0]) * pb->num_blocks);
115   val->builder = pb;
116   val->num_components = num_components;
117   val->bit_size = bit_size;
118   exec_list_make_empty(&val->phis);
119   exec_list_push_tail(&pb->values, &val->node);
120
121   pb->iter_count++;
122
123   BITSET_WORD tmp;
124   BITSET_FOREACH_SET(i, tmp, defs, pb->num_blocks) {
125      if (pb->work[i] < pb->iter_count)
126         pb->W[w_end++] = pb->blocks[i];
127      pb->work[i] = pb->iter_count;
128   }
129
130   while (w_start != w_end) {
131      nir_block *cur = pb->W[w_start++];
132      set_foreach(cur->dom_frontier, dom_entry) {
133         nir_block *next = (nir_block *) dom_entry->key;
134
135         /* If there's more than one return statement, then the end block
136          * can be a join point for some definitions. However, there are
137          * no instructions in the end block, so nothing would use those
138          * phi nodes. Of course, we couldn't place those phi nodes
139          * anyways due to the restriction of having no instructions in the
140          * end block...
141          */
142         if (next == pb->impl->end_block)
143            continue;
144
145         if (val->defs[next->index] == NULL) {
146            /* Instead of creating a phi node immediately, we simply set the
147             * value to the magic value NEEDS_PHI.  Later, we create phi nodes
148             * on demand in nir_phi_builder_value_get_block_def().
149             */
150            val->defs[next->index] = NEEDS_PHI;
151
152            if (pb->work[next->index] < pb->iter_count) {
153               pb->work[next->index] = pb->iter_count;
154               pb->W[w_end++] = next;
155            }
156         }
157      }
158   }
159
160   return val;
161}
162
163void
164nir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val,
165                                    nir_block *block, nir_ssa_def *def)
166{
167   val->defs[block->index] = def;
168}
169
170nir_ssa_def *
171nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
172                                    nir_block *block)
173{
174   /* Crawl up the dominance tree and find the closest dominator for which we
175    * have a valid ssa_def, if any.
176    */
177   nir_block *dom = block;
178   while (dom && val->defs[dom->index] == NULL)
179      dom = dom->imm_dom;
180
181   nir_ssa_def *def;
182   if (dom == NULL) {
183      /* No dominator means either that we crawled to the top without ever
184       * finding a definition or that this block is unreachable.  In either
185       * case, the value is undefined so we need an SSA undef.
186       */
187      nir_ssa_undef_instr *undef =
188         nir_ssa_undef_instr_create(val->builder->shader,
189                                    val->num_components,
190                                    val->bit_size);
191      nir_instr_insert(nir_before_cf_list(&val->builder->impl->body),
192                       &undef->instr);
193      def = &undef->def;
194   } else if (val->defs[dom->index] == NEEDS_PHI) {
195      /* The magic value NEEDS_PHI indicates that the block needs a phi node
196       * but none has been created.  We need to create one now so we can
197       * return it to the caller.
198       *
199       * Because a phi node may use SSA defs that it does not dominate (this
200       * happens in loops), we do not yet have enough information to fully
201       * fill out the phi node.  Instead, the phi nodes we create here will be
202       * empty (have no sources) and won't actually be placed in the block's
203       * instruction list yet.  Later, in nir_phi_builder_finish(), we walk
204       * over all of the phi instructions, fill out the sources lists, and
205       * place them at the top of their respective block's instruction list.
206       *
207       * Creating phi nodes on-demand allows us to avoid creating dead phi
208       * nodes that will just get deleted later. While this probably isn't a
209       * big win for a full into-SSA pass, other users may use the phi builder
210       * to make small SSA form repairs where most of the phi nodes will never
211       * be used.
212       */
213      nir_phi_instr *phi = nir_phi_instr_create(val->builder->shader);
214      nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components,
215                        val->bit_size, NULL);
216      phi->instr.block = dom;
217      exec_list_push_tail(&val->phis, &phi->instr.node);
218      def = val->defs[dom->index] = &phi->dest.ssa;
219   } else {
220      /* In this case, we have an actual SSA def.  It's either the result of a
221       * phi node created by the case above or one passed to us through
222       * nir_phi_builder_value_set_block_def().
223       */
224      def = val->defs[dom->index];
225   }
226
227   /* Walk the chain and stash the def in all of the applicable blocks.  We do
228    * this for two reasons:
229    *
230    *  1) To speed up lookup next time even if the next time is called from a
231    *     block that is not dominated by this one.
232    *  2) To avoid unneeded recreation of phi nodes and undefs.
233    */
234   for (dom = block; dom && val->defs[dom->index] == NULL; dom = dom->imm_dom)
235      val->defs[dom->index] = def;
236
237   return def;
238}
239
240static int
241compare_blocks(const void *_a, const void *_b)
242{
243   const nir_block * const * a = _a;
244   const nir_block * const * b = _b;
245
246   return (*a)->index - (*b)->index;
247}
248
249void
250nir_phi_builder_finish(struct nir_phi_builder *pb)
251{
252   const unsigned num_blocks = pb->num_blocks;
253   NIR_VLA(nir_block *, preds, num_blocks);
254
255   foreach_list_typed(struct nir_phi_builder_value, val, node, &pb->values) {
256      /* We treat the linked list of phi nodes like a worklist.  The list is
257       * pre-populated by calls to nir_phi_builder_value_get_block_def() that
258       * create phi nodes.  As we fill in the sources of phi nodes, more may
259       * be created and are added to the end of the list.
260       *
261       * Because we are adding and removing phi nodes from the list as we go,
262       * we can't iterate over it normally.  Instead, we just iterate until
263       * the list is empty.
264       */
265      while (!exec_list_is_empty(&val->phis)) {
266         struct exec_node *head = exec_list_get_head(&val->phis);
267         nir_phi_instr *phi = exec_node_data(nir_phi_instr, head, instr.node);
268         assert(phi->instr.type == nir_instr_type_phi);
269
270         exec_node_remove(&phi->instr.node);
271
272         /* Construct an array of predecessors.  We sort it to ensure
273          * determinism in the phi insertion algorithm.
274          *
275          * XXX: Calling qsort this many times seems expensive.
276          */
277         int num_preds = 0;
278         set_foreach(phi->instr.block->predecessors, entry)
279            preds[num_preds++] = (nir_block *)entry->key;
280         qsort(preds, num_preds, sizeof(*preds), compare_blocks);
281
282         for (unsigned i = 0; i < num_preds; i++) {
283            nir_phi_src *src = ralloc(phi, nir_phi_src);
284            src->pred = preds[i];
285            src->src = nir_src_for_ssa(
286               nir_phi_builder_value_get_block_def(val, preds[i]));
287            exec_list_push_tail(&phi->srcs, &src->node);
288         }
289
290         nir_instr_insert(nir_before_block(phi->instr.block), &phi->instr);
291      }
292   }
293
294   ralloc_free(pb);
295}
296