1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2015 Thomas Helland
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg/*
25b8e80941Smrg * This pass converts the ssa-graph into "Loop Closed SSA form". This is
26b8e80941Smrg * done by placing phi nodes at the exits of the loop for all values
27b8e80941Smrg * that are used outside the loop. The result is it transforms:
28b8e80941Smrg *
29b8e80941Smrg * loop {                    ->      loop {
30b8e80941Smrg *    ssa2 = ....            ->          ssa2 = ...
31b8e80941Smrg *    if (cond)              ->          if (cond)
32b8e80941Smrg *       break;              ->             break;
33b8e80941Smrg *    ssa3 = ssa2 * ssa4     ->          ssa3 = ssa2 * ssa4
34b8e80941Smrg * }                         ->       }
35b8e80941Smrg * ssa6 = ssa2 + 4           ->       ssa5 = phi(ssa2)
36b8e80941Smrg *                                    ssa6 = ssa5 + 4
37b8e80941Smrg */
38b8e80941Smrg
39b8e80941Smrg#include "nir.h"
40b8e80941Smrg
41b8e80941Smrgtypedef struct {
42b8e80941Smrg   /* The nir_shader we are transforming */
43b8e80941Smrg   nir_shader *shader;
44b8e80941Smrg
45b8e80941Smrg   /* The loop we store information for */
46b8e80941Smrg   nir_loop *loop;
47b8e80941Smrg
48b8e80941Smrg} lcssa_state;
49b8e80941Smrg
50b8e80941Smrgstatic bool
51b8e80941Smrgis_if_use_inside_loop(nir_src *use, nir_loop* loop)
52b8e80941Smrg{
53b8e80941Smrg   nir_block *block_before_loop =
54b8e80941Smrg      nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
55b8e80941Smrg   nir_block *block_after_loop =
56b8e80941Smrg      nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
57b8e80941Smrg
58b8e80941Smrg   nir_block *prev_block =
59b8e80941Smrg      nir_cf_node_as_block(nir_cf_node_prev(&use->parent_if->cf_node));
60b8e80941Smrg   if (prev_block->index <= block_before_loop->index ||
61b8e80941Smrg       prev_block->index >= block_after_loop->index) {
62b8e80941Smrg      return false;
63b8e80941Smrg   }
64b8e80941Smrg
65b8e80941Smrg   return true;
66b8e80941Smrg}
67b8e80941Smrg
68b8e80941Smrgstatic bool
69b8e80941Smrgis_use_inside_loop(nir_src *use, nir_loop* loop)
70b8e80941Smrg{
71b8e80941Smrg   nir_block *block_before_loop =
72b8e80941Smrg      nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
73b8e80941Smrg   nir_block *block_after_loop =
74b8e80941Smrg      nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node));
75b8e80941Smrg
76b8e80941Smrg   if (use->parent_instr->block->index <= block_before_loop->index ||
77b8e80941Smrg       use->parent_instr->block->index >= block_after_loop->index) {
78b8e80941Smrg      return false;
79b8e80941Smrg   }
80b8e80941Smrg
81b8e80941Smrg   return true;
82b8e80941Smrg}
83b8e80941Smrg
84b8e80941Smrgstatic bool
85b8e80941Smrgconvert_loop_exit_for_ssa(nir_ssa_def *def, void *void_state)
86b8e80941Smrg{
87b8e80941Smrg   lcssa_state *state = void_state;
88b8e80941Smrg   bool all_uses_inside_loop = true;
89b8e80941Smrg
90b8e80941Smrg   nir_block *block_after_loop =
91b8e80941Smrg      nir_cf_node_as_block(nir_cf_node_next(&state->loop->cf_node));
92b8e80941Smrg
93b8e80941Smrg   nir_foreach_use(use, def) {
94b8e80941Smrg      if (use->parent_instr->type == nir_instr_type_phi &&
95b8e80941Smrg          use->parent_instr->block == block_after_loop) {
96b8e80941Smrg         continue;
97b8e80941Smrg      }
98b8e80941Smrg
99b8e80941Smrg      if (!is_use_inside_loop(use, state->loop)) {
100b8e80941Smrg         all_uses_inside_loop = false;
101b8e80941Smrg      }
102b8e80941Smrg   }
103b8e80941Smrg
104b8e80941Smrg   nir_foreach_if_use(use, def) {
105b8e80941Smrg      if (!is_if_use_inside_loop(use, state->loop)) {
106b8e80941Smrg         all_uses_inside_loop = false;
107b8e80941Smrg      }
108b8e80941Smrg   }
109b8e80941Smrg
110b8e80941Smrg   /* There where no sources that had defs outside the loop */
111b8e80941Smrg   if (all_uses_inside_loop)
112b8e80941Smrg      return true;
113b8e80941Smrg
114b8e80941Smrg   /* Initialize a phi-instruction */
115b8e80941Smrg   nir_phi_instr *phi = nir_phi_instr_create(state->shader);
116b8e80941Smrg   nir_ssa_dest_init(&phi->instr, &phi->dest,
117b8e80941Smrg                     def->num_components, def->bit_size, "LCSSA-phi");
118b8e80941Smrg
119b8e80941Smrg   /* Create a phi node with as many sources pointing to the same ssa_def as
120b8e80941Smrg    * the block has predecessors.
121b8e80941Smrg    */
122b8e80941Smrg   set_foreach(block_after_loop->predecessors, entry) {
123b8e80941Smrg      nir_phi_src *phi_src = ralloc(phi, nir_phi_src);
124b8e80941Smrg      phi_src->src = nir_src_for_ssa(def);
125b8e80941Smrg      phi_src->pred = (nir_block *) entry->key;
126b8e80941Smrg
127b8e80941Smrg      exec_list_push_tail(&phi->srcs, &phi_src->node);
128b8e80941Smrg   }
129b8e80941Smrg
130b8e80941Smrg   nir_instr_insert_before_block(block_after_loop, &phi->instr);
131b8e80941Smrg   nir_ssa_def *dest = &phi->dest.ssa;
132b8e80941Smrg
133b8e80941Smrg   /* deref instructions need a cast after the phi */
134b8e80941Smrg   if (def->parent_instr->type == nir_instr_type_deref) {
135b8e80941Smrg      nir_deref_instr *cast =
136b8e80941Smrg         nir_deref_instr_create(state->shader, nir_deref_type_cast);
137b8e80941Smrg
138b8e80941Smrg      nir_deref_instr *instr = nir_instr_as_deref(def->parent_instr);
139b8e80941Smrg      cast->mode = instr->mode;
140b8e80941Smrg      cast->type = instr->type;
141b8e80941Smrg      cast->parent = nir_src_for_ssa(&phi->dest.ssa);
142b8e80941Smrg      cast->cast.ptr_stride = nir_deref_instr_ptr_as_array_stride(instr);
143b8e80941Smrg
144b8e80941Smrg      nir_ssa_dest_init(&cast->instr, &cast->dest,
145b8e80941Smrg                        phi->dest.ssa.num_components,
146b8e80941Smrg                        phi->dest.ssa.bit_size, NULL);
147b8e80941Smrg      nir_instr_insert(nir_after_phis(block_after_loop), &cast->instr);
148b8e80941Smrg      dest = &cast->dest.ssa;
149b8e80941Smrg   }
150b8e80941Smrg
151b8e80941Smrg   /* Run through all uses and rewrite those outside the loop to point to
152b8e80941Smrg    * the phi instead of pointing to the ssa-def.
153b8e80941Smrg    */
154b8e80941Smrg   nir_foreach_use_safe(use, def) {
155b8e80941Smrg      if (use->parent_instr->type == nir_instr_type_phi &&
156b8e80941Smrg          block_after_loop == use->parent_instr->block) {
157b8e80941Smrg         continue;
158b8e80941Smrg      }
159b8e80941Smrg
160b8e80941Smrg      if (!is_use_inside_loop(use, state->loop)) {
161b8e80941Smrg         nir_instr_rewrite_src(use->parent_instr, use, nir_src_for_ssa(dest));
162b8e80941Smrg      }
163b8e80941Smrg   }
164b8e80941Smrg
165b8e80941Smrg   nir_foreach_if_use_safe(use, def) {
166b8e80941Smrg      if (!is_if_use_inside_loop(use, state->loop)) {
167b8e80941Smrg         nir_if_rewrite_condition(use->parent_if, nir_src_for_ssa(dest));
168b8e80941Smrg      }
169b8e80941Smrg   }
170b8e80941Smrg
171b8e80941Smrg   return true;
172b8e80941Smrg}
173b8e80941Smrg
174b8e80941Smrgstatic void
175b8e80941Smrgconvert_to_lcssa(nir_cf_node *cf_node, lcssa_state *state)
176b8e80941Smrg{
177b8e80941Smrg   switch (cf_node->type) {
178b8e80941Smrg   case nir_cf_node_block:
179b8e80941Smrg      nir_foreach_instr(instr, nir_cf_node_as_block(cf_node))
180b8e80941Smrg         nir_foreach_ssa_def(instr, convert_loop_exit_for_ssa, state);
181b8e80941Smrg      return;
182b8e80941Smrg   case nir_cf_node_if: {
183b8e80941Smrg      nir_if *if_stmt = nir_cf_node_as_if(cf_node);
184b8e80941Smrg      foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->then_list)
185b8e80941Smrg         convert_to_lcssa(nested_node, state);
186b8e80941Smrg      foreach_list_typed(nir_cf_node, nested_node, node, &if_stmt->else_list)
187b8e80941Smrg         convert_to_lcssa(nested_node, state);
188b8e80941Smrg      return;
189b8e80941Smrg   }
190b8e80941Smrg   case nir_cf_node_loop: {
191b8e80941Smrg      nir_loop *parent_loop = state->loop;
192b8e80941Smrg      state->loop = nir_cf_node_as_loop(cf_node);
193b8e80941Smrg
194b8e80941Smrg      foreach_list_typed(nir_cf_node, nested_node, node, &state->loop->body)
195b8e80941Smrg         convert_to_lcssa(nested_node, state);
196b8e80941Smrg
197b8e80941Smrg      state->loop = parent_loop;
198b8e80941Smrg      return;
199b8e80941Smrg   }
200b8e80941Smrg   default:
201b8e80941Smrg      unreachable("unknown cf node type");
202b8e80941Smrg   }
203b8e80941Smrg}
204b8e80941Smrg
205b8e80941Smrgvoid
206b8e80941Smrgnir_convert_loop_to_lcssa(nir_loop *loop) {
207b8e80941Smrg   nir_function_impl *impl = nir_cf_node_get_function(&loop->cf_node);
208b8e80941Smrg
209b8e80941Smrg   nir_metadata_require(impl, nir_metadata_block_index);
210b8e80941Smrg
211b8e80941Smrg   lcssa_state *state = rzalloc(NULL, lcssa_state);
212b8e80941Smrg   state->loop = loop;
213b8e80941Smrg   state->shader = impl->function->shader;
214b8e80941Smrg
215b8e80941Smrg   foreach_list_typed(nir_cf_node, node, node, &state->loop->body)
216b8e80941Smrg      convert_to_lcssa(node, state);
217b8e80941Smrg
218b8e80941Smrg   ralloc_free(state);
219b8e80941Smrg}
220