1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2016 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include "nir_phi_builder.h"
25b8e80941Smrg#include "nir/nir_vla.h"
26b8e80941Smrg
27b8e80941Smrgstruct nir_phi_builder {
28b8e80941Smrg   nir_shader *shader;
29b8e80941Smrg   nir_function_impl *impl;
30b8e80941Smrg
31b8e80941Smrg   /* Copied from the impl for easy access */
32b8e80941Smrg   unsigned num_blocks;
33b8e80941Smrg
34b8e80941Smrg   /* Array of all blocks indexed by block->index. */
35b8e80941Smrg   nir_block **blocks;
36b8e80941Smrg
37b8e80941Smrg   /* Hold on to the values so we can easily iterate over them. */
38b8e80941Smrg   struct exec_list values;
39b8e80941Smrg
40b8e80941Smrg   /* Worklist for phi adding */
41b8e80941Smrg   unsigned iter_count;
42b8e80941Smrg   unsigned *work;
43b8e80941Smrg   nir_block **W;
44b8e80941Smrg};
45b8e80941Smrg
46b8e80941Smrg#define NEEDS_PHI ((nir_ssa_def *)(intptr_t)-1)
47b8e80941Smrg
48b8e80941Smrgstruct nir_phi_builder_value {
49b8e80941Smrg   struct exec_node node;
50b8e80941Smrg
51b8e80941Smrg   struct nir_phi_builder *builder;
52b8e80941Smrg
53b8e80941Smrg   /* Needed so we can create phis and undefs */
54b8e80941Smrg   unsigned num_components;
55b8e80941Smrg   unsigned bit_size;
56b8e80941Smrg
57b8e80941Smrg   /* The list of phi nodes associated with this value.  Phi nodes are not
58b8e80941Smrg    * added directly.  Instead, they are created, the instr->block pointer
59b8e80941Smrg    * set, and then added to this list.  Later, in phi_builder_finish, we
60b8e80941Smrg    * set up their sources and add them to the top of their respective
61b8e80941Smrg    * blocks.
62b8e80941Smrg    */
63b8e80941Smrg   struct exec_list phis;
64b8e80941Smrg
65b8e80941Smrg   /* Array of SSA defs, indexed by block.  For each block, this array has has
66b8e80941Smrg    * one of three types of values:
67b8e80941Smrg    *
68b8e80941Smrg    *  - NULL. Indicates that there is no known definition in this block.  If
69b8e80941Smrg    *    you need to find one, look at the block's immediate dominator.
70b8e80941Smrg    *
71b8e80941Smrg    *  - NEEDS_PHI. Indicates that the block may need a phi node but none has
72b8e80941Smrg    *    been created yet.  If a def is requested for a block, a phi will need
73b8e80941Smrg    *    to be created.
74b8e80941Smrg    *
75b8e80941Smrg    *  - A regular SSA def.  This will be either the result of a phi node or
76b8e80941Smrg    *    one of the defs provided by nir_phi_builder_value_set_blocK_def().
77b8e80941Smrg    */
78b8e80941Smrg   struct hash_table ht;
79b8e80941Smrg};
80b8e80941Smrg
81b8e80941Smrg/**
82b8e80941Smrg * Convert a block index into a value that can be used as a key for a hash table
83b8e80941Smrg *
84b8e80941Smrg * The hash table functions want a pointer that is not \c NULL.
85b8e80941Smrg * _mesa_hash_pointer drops the two least significant bits, but that's where
86b8e80941Smrg * most of our data likely is.  Shift by 2 and add 1 to make everything happy.
87b8e80941Smrg */
88b8e80941Smrg#define INDEX_TO_KEY(x) ((void *)(uintptr_t) ((x << 2) + 1))
89b8e80941Smrg
90b8e80941Smrgstruct nir_phi_builder *
91b8e80941Smrgnir_phi_builder_create(nir_function_impl *impl)
92b8e80941Smrg{
93b8e80941Smrg   struct nir_phi_builder *pb = rzalloc(NULL, struct nir_phi_builder);
94b8e80941Smrg
95b8e80941Smrg   pb->shader = impl->function->shader;
96b8e80941Smrg   pb->impl = impl;
97b8e80941Smrg
98b8e80941Smrg   assert(impl->valid_metadata & (nir_metadata_block_index |
99b8e80941Smrg                                  nir_metadata_dominance));
100b8e80941Smrg
101b8e80941Smrg   pb->num_blocks = impl->num_blocks;
102b8e80941Smrg   pb->blocks = ralloc_array(pb, nir_block *, pb->num_blocks);
103b8e80941Smrg   nir_foreach_block(block, impl) {
104b8e80941Smrg      pb->blocks[block->index] = block;
105b8e80941Smrg   }
106b8e80941Smrg
107b8e80941Smrg   exec_list_make_empty(&pb->values);
108b8e80941Smrg
109b8e80941Smrg   pb->iter_count = 0;
110b8e80941Smrg   pb->work = rzalloc_array(pb, unsigned, pb->num_blocks);
111b8e80941Smrg   pb->W = ralloc_array(pb, nir_block *, pb->num_blocks);
112b8e80941Smrg
113b8e80941Smrg   return pb;
114b8e80941Smrg}
115b8e80941Smrg
116b8e80941Smrgstruct nir_phi_builder_value *
117b8e80941Smrgnir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
118b8e80941Smrg                          unsigned bit_size, const BITSET_WORD *defs)
119b8e80941Smrg{
120b8e80941Smrg   struct nir_phi_builder_value *val;
121b8e80941Smrg   unsigned i, w_start = 0, w_end = 0;
122b8e80941Smrg
123b8e80941Smrg   val = rzalloc_size(pb, sizeof(*val));
124b8e80941Smrg   val->builder = pb;
125b8e80941Smrg   val->num_components = num_components;
126b8e80941Smrg   val->bit_size = bit_size;
127b8e80941Smrg   exec_list_make_empty(&val->phis);
128b8e80941Smrg   exec_list_push_tail(&pb->values, &val->node);
129b8e80941Smrg
130b8e80941Smrg   _mesa_hash_table_init(&val->ht, pb, _mesa_hash_pointer,
131b8e80941Smrg                         _mesa_key_pointer_equal);
132b8e80941Smrg
133b8e80941Smrg   pb->iter_count++;
134b8e80941Smrg
135b8e80941Smrg   BITSET_WORD tmp;
136b8e80941Smrg   BITSET_FOREACH_SET(i, tmp, defs, pb->num_blocks) {
137b8e80941Smrg      if (pb->work[i] < pb->iter_count)
138b8e80941Smrg         pb->W[w_end++] = pb->blocks[i];
139b8e80941Smrg      pb->work[i] = pb->iter_count;
140b8e80941Smrg   }
141b8e80941Smrg
142b8e80941Smrg   while (w_start != w_end) {
143b8e80941Smrg      nir_block *cur = pb->W[w_start++];
144b8e80941Smrg      set_foreach(cur->dom_frontier, dom_entry) {
145b8e80941Smrg         nir_block *next = (nir_block *) dom_entry->key;
146b8e80941Smrg
147b8e80941Smrg         /* If there's more than one return statement, then the end block
148b8e80941Smrg          * can be a join point for some definitions. However, there are
149b8e80941Smrg          * no instructions in the end block, so nothing would use those
150b8e80941Smrg          * phi nodes. Of course, we couldn't place those phi nodes
151b8e80941Smrg          * anyways due to the restriction of having no instructions in the
152b8e80941Smrg          * end block...
153b8e80941Smrg          */
154b8e80941Smrg         if (next == pb->impl->end_block)
155b8e80941Smrg            continue;
156b8e80941Smrg
157b8e80941Smrg         if (_mesa_hash_table_search(&val->ht, INDEX_TO_KEY(next->index)) == NULL) {
158b8e80941Smrg            /* Instead of creating a phi node immediately, we simply set the
159b8e80941Smrg             * value to the magic value NEEDS_PHI.  Later, we create phi nodes
160b8e80941Smrg             * on demand in nir_phi_builder_value_get_block_def().
161b8e80941Smrg             */
162b8e80941Smrg            nir_phi_builder_value_set_block_def(val, next, NEEDS_PHI);
163b8e80941Smrg
164b8e80941Smrg            if (pb->work[next->index] < pb->iter_count) {
165b8e80941Smrg               pb->work[next->index] = pb->iter_count;
166b8e80941Smrg               pb->W[w_end++] = next;
167b8e80941Smrg            }
168b8e80941Smrg         }
169b8e80941Smrg      }
170b8e80941Smrg   }
171b8e80941Smrg
172b8e80941Smrg   return val;
173b8e80941Smrg}
174b8e80941Smrg
175b8e80941Smrgvoid
176b8e80941Smrgnir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val,
177b8e80941Smrg                                    nir_block *block, nir_ssa_def *def)
178b8e80941Smrg{
179b8e80941Smrg   _mesa_hash_table_insert(&val->ht, INDEX_TO_KEY(block->index), def);
180b8e80941Smrg}
181b8e80941Smrg
182b8e80941Smrgnir_ssa_def *
183b8e80941Smrgnir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
184b8e80941Smrg                                    nir_block *block)
185b8e80941Smrg{
186b8e80941Smrg   /* Crawl up the dominance tree and find the closest dominator for which we
187b8e80941Smrg    * have a valid ssa_def, if any.
188b8e80941Smrg    */
189b8e80941Smrg   nir_block *dom = block;
190b8e80941Smrg   struct hash_entry *he = NULL;
191b8e80941Smrg
192b8e80941Smrg   while (dom != NULL) {
193b8e80941Smrg      he = _mesa_hash_table_search(&val->ht, INDEX_TO_KEY(dom->index));
194b8e80941Smrg      if (he != NULL)
195b8e80941Smrg         break;
196b8e80941Smrg
197b8e80941Smrg      dom = dom->imm_dom;
198b8e80941Smrg   }
199b8e80941Smrg
200b8e80941Smrg   /* Exactly one of (he != NULL) and (dom == NULL) must be true. */
201b8e80941Smrg   assert((he != NULL) != (dom == NULL));
202b8e80941Smrg
203b8e80941Smrg   nir_ssa_def *def;
204b8e80941Smrg   if (dom == NULL) {
205b8e80941Smrg      /* No dominator means either that we crawled to the top without ever
206b8e80941Smrg       * finding a definition or that this block is unreachable.  In either
207b8e80941Smrg       * case, the value is undefined so we need an SSA undef.
208b8e80941Smrg       */
209b8e80941Smrg      nir_ssa_undef_instr *undef =
210b8e80941Smrg         nir_ssa_undef_instr_create(val->builder->shader,
211b8e80941Smrg                                    val->num_components,
212b8e80941Smrg                                    val->bit_size);
213b8e80941Smrg      nir_instr_insert(nir_before_cf_list(&val->builder->impl->body),
214b8e80941Smrg                       &undef->instr);
215b8e80941Smrg      def = &undef->def;
216b8e80941Smrg   } else if (he->data == NEEDS_PHI) {
217b8e80941Smrg      /* The magic value NEEDS_PHI indicates that the block needs a phi node
218b8e80941Smrg       * but none has been created.  We need to create one now so we can
219b8e80941Smrg       * return it to the caller.
220b8e80941Smrg       *
221b8e80941Smrg       * Because a phi node may use SSA defs that it does not dominate (this
222b8e80941Smrg       * happens in loops), we do not yet have enough information to fully
223b8e80941Smrg       * fill out the phi node.  Instead, the phi nodes we create here will be
224b8e80941Smrg       * empty (have no sources) and won't actually be placed in the block's
225b8e80941Smrg       * instruction list yet.  Later, in nir_phi_builder_finish(), we walk
226b8e80941Smrg       * over all of the phi instructions, fill out the sources lists, and
227b8e80941Smrg       * place them at the top of their respective block's instruction list.
228b8e80941Smrg       *
229b8e80941Smrg       * Creating phi nodes on-demand allows us to avoid creating dead phi
230b8e80941Smrg       * nodes that will just get deleted later. While this probably isn't a
231b8e80941Smrg       * big win for a full into-SSA pass, other users may use the phi builder
232b8e80941Smrg       * to make small SSA form repairs where most of the phi nodes will never
233b8e80941Smrg       * be used.
234b8e80941Smrg       */
235b8e80941Smrg      nir_phi_instr *phi = nir_phi_instr_create(val->builder->shader);
236b8e80941Smrg      nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components,
237b8e80941Smrg                        val->bit_size, NULL);
238b8e80941Smrg      phi->instr.block = dom;
239b8e80941Smrg      exec_list_push_tail(&val->phis, &phi->instr.node);
240b8e80941Smrg      def = &phi->dest.ssa;
241b8e80941Smrg      he->data = def;
242b8e80941Smrg   } else {
243b8e80941Smrg      /* In this case, we have an actual SSA def.  It's either the result of a
244b8e80941Smrg       * phi node created by the case above or one passed to us through
245b8e80941Smrg       * nir_phi_builder_value_set_block_def().
246b8e80941Smrg       */
247b8e80941Smrg      def = (struct nir_ssa_def *) he->data;
248b8e80941Smrg   }
249b8e80941Smrg
250b8e80941Smrg   /* Walk the chain and stash the def in all of the applicable blocks.  We do
251b8e80941Smrg    * this for two reasons:
252b8e80941Smrg    *
253b8e80941Smrg    *  1) To speed up lookup next time even if the next time is called from a
254b8e80941Smrg    *     block that is not dominated by this one.
255b8e80941Smrg    *  2) To avoid unneeded recreation of phi nodes and undefs.
256b8e80941Smrg    */
257b8e80941Smrg   for (dom = block; dom != NULL; dom = dom->imm_dom) {
258b8e80941Smrg      if (_mesa_hash_table_search(&val->ht, INDEX_TO_KEY(dom->index)) != NULL)
259b8e80941Smrg         break;
260b8e80941Smrg
261b8e80941Smrg      nir_phi_builder_value_set_block_def(val, dom, def);
262b8e80941Smrg   }
263b8e80941Smrg
264b8e80941Smrg   return def;
265b8e80941Smrg}
266b8e80941Smrg
267b8e80941Smrgstatic int
268b8e80941Smrgcompare_blocks(const void *_a, const void *_b)
269b8e80941Smrg{
270b8e80941Smrg   const nir_block * const * a = _a;
271b8e80941Smrg   const nir_block * const * b = _b;
272b8e80941Smrg
273b8e80941Smrg   return (*a)->index - (*b)->index;
274b8e80941Smrg}
275b8e80941Smrg
276b8e80941Smrgvoid
277b8e80941Smrgnir_phi_builder_finish(struct nir_phi_builder *pb)
278b8e80941Smrg{
279b8e80941Smrg   const unsigned num_blocks = pb->num_blocks;
280b8e80941Smrg   NIR_VLA(nir_block *, preds, num_blocks);
281b8e80941Smrg
282b8e80941Smrg   foreach_list_typed(struct nir_phi_builder_value, val, node, &pb->values) {
283b8e80941Smrg      /* We treat the linked list of phi nodes like a worklist.  The list is
284b8e80941Smrg       * pre-populated by calls to nir_phi_builder_value_get_block_def() that
285b8e80941Smrg       * create phi nodes.  As we fill in the sources of phi nodes, more may
286b8e80941Smrg       * be created and are added to the end of the list.
287b8e80941Smrg       *
288b8e80941Smrg       * Because we are adding and removing phi nodes from the list as we go,
289b8e80941Smrg       * we can't iterate over it normally.  Instead, we just iterate until
290b8e80941Smrg       * the list is empty.
291b8e80941Smrg       */
292b8e80941Smrg      while (!exec_list_is_empty(&val->phis)) {
293b8e80941Smrg         struct exec_node *head = exec_list_get_head(&val->phis);
294b8e80941Smrg         nir_phi_instr *phi = exec_node_data(nir_phi_instr, head, instr.node);
295b8e80941Smrg         assert(phi->instr.type == nir_instr_type_phi);
296b8e80941Smrg
297b8e80941Smrg         exec_node_remove(&phi->instr.node);
298b8e80941Smrg
299b8e80941Smrg         /* Construct an array of predecessors.  We sort it to ensure
300b8e80941Smrg          * determinism in the phi insertion algorithm.
301b8e80941Smrg          *
302b8e80941Smrg          * XXX: Calling qsort this many times seems expensive.
303b8e80941Smrg          */
304b8e80941Smrg         int num_preds = 0;
305b8e80941Smrg         set_foreach(phi->instr.block->predecessors, entry)
306b8e80941Smrg            preds[num_preds++] = (nir_block *)entry->key;
307b8e80941Smrg         qsort(preds, num_preds, sizeof(*preds), compare_blocks);
308b8e80941Smrg
309b8e80941Smrg         for (unsigned i = 0; i < num_preds; i++) {
310b8e80941Smrg            nir_phi_src *src = ralloc(phi, nir_phi_src);
311b8e80941Smrg            src->pred = preds[i];
312b8e80941Smrg            src->src = nir_src_for_ssa(
313b8e80941Smrg               nir_phi_builder_value_get_block_def(val, preds[i]));
314b8e80941Smrg            exec_list_push_tail(&phi->srcs, &src->node);
315b8e80941Smrg         }
316b8e80941Smrg
317b8e80941Smrg         nir_instr_insert(nir_before_block(phi->instr.block), &phi->instr);
318b8e80941Smrg      }
319b8e80941Smrg   }
320b8e80941Smrg
321b8e80941Smrg   ralloc_free(pb);
322b8e80941Smrg}
323