1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2012 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21b8e80941Smrg * IN THE SOFTWARE.
22b8e80941Smrg *
23b8e80941Smrg * Authors:
24b8e80941Smrg *    Eric Anholt <eric@anholt.net>
25b8e80941Smrg *
26b8e80941Smrg */
27b8e80941Smrg
28b8e80941Smrg#include "brw_cfg.h"
29b8e80941Smrg
30b8e80941Smrg/** @file brw_cfg.cpp
31b8e80941Smrg *
32b8e80941Smrg * Walks the shader instructions generated and creates a set of basic
33b8e80941Smrg * blocks with successor/predecessor edges connecting them.
34b8e80941Smrg */
35b8e80941Smrg
36b8e80941Smrgstatic bblock_t *
37b8e80941Smrgpop_stack(exec_list *list)
38b8e80941Smrg{
39b8e80941Smrg   bblock_link *link = (bblock_link *)list->get_tail();
40b8e80941Smrg   bblock_t *block = link->block;
41b8e80941Smrg   link->link.remove();
42b8e80941Smrg
43b8e80941Smrg   return block;
44b8e80941Smrg}
45b8e80941Smrg
46b8e80941Smrgstatic exec_node *
47b8e80941Smrglink(void *mem_ctx, bblock_t *block)
48b8e80941Smrg{
49b8e80941Smrg   bblock_link *l = new(mem_ctx) bblock_link(block);
50b8e80941Smrg   return &l->link;
51b8e80941Smrg}
52b8e80941Smrg
53b8e80941Smrgbblock_t::bblock_t(cfg_t *cfg) :
54b8e80941Smrg   cfg(cfg), idom(NULL), start_ip(0), end_ip(0), num(0), cycle_count(0)
55b8e80941Smrg{
56b8e80941Smrg   instructions.make_empty();
57b8e80941Smrg   parents.make_empty();
58b8e80941Smrg   children.make_empty();
59b8e80941Smrg}
60b8e80941Smrg
61b8e80941Smrgvoid
62b8e80941Smrgbblock_t::add_successor(void *mem_ctx, bblock_t *successor)
63b8e80941Smrg{
64b8e80941Smrg   successor->parents.push_tail(::link(mem_ctx, this));
65b8e80941Smrg   children.push_tail(::link(mem_ctx, successor));
66b8e80941Smrg}
67b8e80941Smrg
68b8e80941Smrgbool
69b8e80941Smrgbblock_t::is_predecessor_of(const bblock_t *block) const
70b8e80941Smrg{
71b8e80941Smrg   foreach_list_typed_safe (bblock_link, parent, link, &block->parents) {
72b8e80941Smrg      if (parent->block == this) {
73b8e80941Smrg         return true;
74b8e80941Smrg      }
75b8e80941Smrg   }
76b8e80941Smrg
77b8e80941Smrg   return false;
78b8e80941Smrg}
79b8e80941Smrg
80b8e80941Smrgbool
81b8e80941Smrgbblock_t::is_successor_of(const bblock_t *block) const
82b8e80941Smrg{
83b8e80941Smrg   foreach_list_typed_safe (bblock_link, child, link, &block->children) {
84b8e80941Smrg      if (child->block == this) {
85b8e80941Smrg         return true;
86b8e80941Smrg      }
87b8e80941Smrg   }
88b8e80941Smrg
89b8e80941Smrg   return false;
90b8e80941Smrg}
91b8e80941Smrg
92b8e80941Smrgstatic bool
93b8e80941Smrgends_block(const backend_instruction *inst)
94b8e80941Smrg{
95b8e80941Smrg   enum opcode op = inst->opcode;
96b8e80941Smrg
97b8e80941Smrg   return op == BRW_OPCODE_IF ||
98b8e80941Smrg          op == BRW_OPCODE_ELSE ||
99b8e80941Smrg          op == BRW_OPCODE_CONTINUE ||
100b8e80941Smrg          op == BRW_OPCODE_BREAK ||
101b8e80941Smrg          op == BRW_OPCODE_DO ||
102b8e80941Smrg          op == BRW_OPCODE_WHILE;
103b8e80941Smrg}
104b8e80941Smrg
105b8e80941Smrgstatic bool
106b8e80941Smrgstarts_block(const backend_instruction *inst)
107b8e80941Smrg{
108b8e80941Smrg   enum opcode op = inst->opcode;
109b8e80941Smrg
110b8e80941Smrg   return op == BRW_OPCODE_DO ||
111b8e80941Smrg          op == BRW_OPCODE_ENDIF;
112b8e80941Smrg}
113b8e80941Smrg
114b8e80941Smrgbool
115b8e80941Smrgbblock_t::can_combine_with(const bblock_t *that) const
116b8e80941Smrg{
117b8e80941Smrg   if ((const bblock_t *)this->link.next != that)
118b8e80941Smrg      return false;
119b8e80941Smrg
120b8e80941Smrg   if (ends_block(this->end()) ||
121b8e80941Smrg       starts_block(that->start()))
122b8e80941Smrg      return false;
123b8e80941Smrg
124b8e80941Smrg   return true;
125b8e80941Smrg}
126b8e80941Smrg
127b8e80941Smrgvoid
128b8e80941Smrgbblock_t::combine_with(bblock_t *that)
129b8e80941Smrg{
130b8e80941Smrg   assert(this->can_combine_with(that));
131b8e80941Smrg   foreach_list_typed (bblock_link, link, link, &that->parents) {
132b8e80941Smrg      assert(link->block == this);
133b8e80941Smrg   }
134b8e80941Smrg
135b8e80941Smrg   this->end_ip = that->end_ip;
136b8e80941Smrg   this->instructions.append_list(&that->instructions);
137b8e80941Smrg
138b8e80941Smrg   this->cfg->remove_block(that);
139b8e80941Smrg}
140b8e80941Smrg
141b8e80941Smrgvoid
142b8e80941Smrgbblock_t::dump(backend_shader *s) const
143b8e80941Smrg{
144b8e80941Smrg   int ip = this->start_ip;
145b8e80941Smrg   foreach_inst_in_block(backend_instruction, inst, this) {
146b8e80941Smrg      fprintf(stderr, "%5d: ", ip);
147b8e80941Smrg      s->dump_instruction(inst);
148b8e80941Smrg      ip++;
149b8e80941Smrg   }
150b8e80941Smrg}
151b8e80941Smrg
152b8e80941Smrgcfg_t::cfg_t(exec_list *instructions)
153b8e80941Smrg{
154b8e80941Smrg   mem_ctx = ralloc_context(NULL);
155b8e80941Smrg   block_list.make_empty();
156b8e80941Smrg   blocks = NULL;
157b8e80941Smrg   num_blocks = 0;
158b8e80941Smrg   idom_dirty = true;
159b8e80941Smrg   cycle_count = 0;
160b8e80941Smrg
161b8e80941Smrg   bblock_t *cur = NULL;
162b8e80941Smrg   int ip = 0;
163b8e80941Smrg
164b8e80941Smrg   bblock_t *entry = new_block();
165b8e80941Smrg   bblock_t *cur_if = NULL;    /**< BB ending with IF. */
166b8e80941Smrg   bblock_t *cur_else = NULL;  /**< BB ending with ELSE. */
167b8e80941Smrg   bblock_t *cur_endif = NULL; /**< BB starting with ENDIF. */
168b8e80941Smrg   bblock_t *cur_do = NULL;    /**< BB starting with DO. */
169b8e80941Smrg   bblock_t *cur_while = NULL; /**< BB immediately following WHILE. */
170b8e80941Smrg   exec_list if_stack, else_stack, do_stack, while_stack;
171b8e80941Smrg   bblock_t *next;
172b8e80941Smrg
173b8e80941Smrg   set_next_block(&cur, entry, ip);
174b8e80941Smrg
175b8e80941Smrg   foreach_in_list_safe(backend_instruction, inst, instructions) {
176b8e80941Smrg      /* set_next_block wants the post-incremented ip */
177b8e80941Smrg      ip++;
178b8e80941Smrg
179b8e80941Smrg      inst->exec_node::remove();
180b8e80941Smrg
181b8e80941Smrg      switch (inst->opcode) {
182b8e80941Smrg      case BRW_OPCODE_IF:
183b8e80941Smrg         cur->instructions.push_tail(inst);
184b8e80941Smrg
185b8e80941Smrg	 /* Push our information onto a stack so we can recover from
186b8e80941Smrg	  * nested ifs.
187b8e80941Smrg	  */
188b8e80941Smrg	 if_stack.push_tail(link(mem_ctx, cur_if));
189b8e80941Smrg	 else_stack.push_tail(link(mem_ctx, cur_else));
190b8e80941Smrg
191b8e80941Smrg	 cur_if = cur;
192b8e80941Smrg	 cur_else = NULL;
193b8e80941Smrg         cur_endif = NULL;
194b8e80941Smrg
195b8e80941Smrg	 /* Set up our immediately following block, full of "then"
196b8e80941Smrg	  * instructions.
197b8e80941Smrg	  */
198b8e80941Smrg	 next = new_block();
199b8e80941Smrg	 cur_if->add_successor(mem_ctx, next);
200b8e80941Smrg
201b8e80941Smrg	 set_next_block(&cur, next, ip);
202b8e80941Smrg	 break;
203b8e80941Smrg
204b8e80941Smrg      case BRW_OPCODE_ELSE:
205b8e80941Smrg         cur->instructions.push_tail(inst);
206b8e80941Smrg
207b8e80941Smrg         cur_else = cur;
208b8e80941Smrg
209b8e80941Smrg	 next = new_block();
210b8e80941Smrg         assert(cur_if != NULL);
211b8e80941Smrg	 cur_if->add_successor(mem_ctx, next);
212b8e80941Smrg
213b8e80941Smrg	 set_next_block(&cur, next, ip);
214b8e80941Smrg	 break;
215b8e80941Smrg
216b8e80941Smrg      case BRW_OPCODE_ENDIF: {
217b8e80941Smrg         if (cur->instructions.is_empty()) {
218b8e80941Smrg            /* New block was just created; use it. */
219b8e80941Smrg            cur_endif = cur;
220b8e80941Smrg         } else {
221b8e80941Smrg            cur_endif = new_block();
222b8e80941Smrg
223b8e80941Smrg            cur->add_successor(mem_ctx, cur_endif);
224b8e80941Smrg
225b8e80941Smrg            set_next_block(&cur, cur_endif, ip - 1);
226b8e80941Smrg         }
227b8e80941Smrg
228b8e80941Smrg         cur->instructions.push_tail(inst);
229b8e80941Smrg
230b8e80941Smrg         if (cur_else) {
231b8e80941Smrg            cur_else->add_successor(mem_ctx, cur_endif);
232b8e80941Smrg         } else {
233b8e80941Smrg            assert(cur_if != NULL);
234b8e80941Smrg            cur_if->add_successor(mem_ctx, cur_endif);
235b8e80941Smrg         }
236b8e80941Smrg
237b8e80941Smrg         assert(cur_if->end()->opcode == BRW_OPCODE_IF);
238b8e80941Smrg         assert(!cur_else || cur_else->end()->opcode == BRW_OPCODE_ELSE);
239b8e80941Smrg
240b8e80941Smrg	 /* Pop the stack so we're in the previous if/else/endif */
241b8e80941Smrg	 cur_if = pop_stack(&if_stack);
242b8e80941Smrg	 cur_else = pop_stack(&else_stack);
243b8e80941Smrg	 break;
244b8e80941Smrg      }
245b8e80941Smrg      case BRW_OPCODE_DO:
246b8e80941Smrg	 /* Push our information onto a stack so we can recover from
247b8e80941Smrg	  * nested loops.
248b8e80941Smrg	  */
249b8e80941Smrg	 do_stack.push_tail(link(mem_ctx, cur_do));
250b8e80941Smrg	 while_stack.push_tail(link(mem_ctx, cur_while));
251b8e80941Smrg
252b8e80941Smrg	 /* Set up the block just after the while.  Don't know when exactly
253b8e80941Smrg	  * it will start, yet.
254b8e80941Smrg	  */
255b8e80941Smrg	 cur_while = new_block();
256b8e80941Smrg
257b8e80941Smrg         if (cur->instructions.is_empty()) {
258b8e80941Smrg            /* New block was just created; use it. */
259b8e80941Smrg            cur_do = cur;
260b8e80941Smrg         } else {
261b8e80941Smrg            cur_do = new_block();
262b8e80941Smrg
263b8e80941Smrg            cur->add_successor(mem_ctx, cur_do);
264b8e80941Smrg
265b8e80941Smrg            set_next_block(&cur, cur_do, ip - 1);
266b8e80941Smrg         }
267b8e80941Smrg
268b8e80941Smrg         cur->instructions.push_tail(inst);
269b8e80941Smrg
270b8e80941Smrg         /* Represent divergent execution of the loop as a pair of alternative
271b8e80941Smrg          * edges coming out of the DO instruction: For any physical iteration
272b8e80941Smrg          * of the loop a given logical thread can either start off enabled
273b8e80941Smrg          * (which is represented as the "next" successor), or disabled (if it
274b8e80941Smrg          * has reached a non-uniform exit of the loop during a previous
275b8e80941Smrg          * iteration, which is represented as the "cur_while" successor).
276b8e80941Smrg          *
277b8e80941Smrg          * The disabled edge will be taken by the logical thread anytime we
278b8e80941Smrg          * arrive at the DO instruction through a back-edge coming from a
279b8e80941Smrg          * conditional exit of the loop where divergent control flow started.
280b8e80941Smrg          *
281b8e80941Smrg          * This guarantees that there is a control-flow path from any
282b8e80941Smrg          * divergence point of the loop into the convergence point
283b8e80941Smrg          * (immediately past the WHILE instruction) such that it overlaps the
284b8e80941Smrg          * whole IP region of divergent control flow (potentially the whole
285b8e80941Smrg          * loop) *and* doesn't imply the execution of any instructions part
286b8e80941Smrg          * of the loop (since the corresponding execution mask bit will be
287b8e80941Smrg          * disabled for a diverging thread).
288b8e80941Smrg          *
289b8e80941Smrg          * This way we make sure that any variables that are live throughout
290b8e80941Smrg          * the region of divergence for an inactive logical thread are also
291b8e80941Smrg          * considered to interfere with any other variables assigned by
292b8e80941Smrg          * active logical threads within the same physical region of the
293b8e80941Smrg          * program, since otherwise we would risk cross-channel data
294b8e80941Smrg          * corruption.
295b8e80941Smrg          */
296b8e80941Smrg         next = new_block();
297b8e80941Smrg         cur->add_successor(mem_ctx, next);
298b8e80941Smrg         cur->add_successor(mem_ctx, cur_while);
299b8e80941Smrg         set_next_block(&cur, next, ip);
300b8e80941Smrg	 break;
301b8e80941Smrg
302b8e80941Smrg      case BRW_OPCODE_CONTINUE:
303b8e80941Smrg         cur->instructions.push_tail(inst);
304b8e80941Smrg
305b8e80941Smrg         /* A conditional CONTINUE may start a region of divergent control
306b8e80941Smrg          * flow until the start of the next loop iteration (*not* until the
307b8e80941Smrg          * end of the loop which is why the successor is not the top-level
308b8e80941Smrg          * divergence point at cur_do).  The live interval of any variable
309b8e80941Smrg          * extending through a CONTINUE edge is guaranteed to overlap the
310b8e80941Smrg          * whole region of divergent execution, because any variable live-out
311b8e80941Smrg          * at the CONTINUE instruction will also be live-in at the top of the
312b8e80941Smrg          * loop, and therefore also live-out at the bottom-most point of the
313b8e80941Smrg          * loop which is reachable from the top (since a control flow path
314b8e80941Smrg          * exists from a definition of the variable through this CONTINUE
315b8e80941Smrg          * instruction, the top of the loop, the (reachable) bottom of the
316b8e80941Smrg          * loop, the top of the loop again, into a use of the variable).
317b8e80941Smrg          */
318b8e80941Smrg         assert(cur_do != NULL);
319b8e80941Smrg         cur->add_successor(mem_ctx, cur_do->next());
320b8e80941Smrg
321b8e80941Smrg	 next = new_block();
322b8e80941Smrg	 if (inst->predicate)
323b8e80941Smrg	    cur->add_successor(mem_ctx, next);
324b8e80941Smrg
325b8e80941Smrg	 set_next_block(&cur, next, ip);
326b8e80941Smrg	 break;
327b8e80941Smrg
328b8e80941Smrg      case BRW_OPCODE_BREAK:
329b8e80941Smrg         cur->instructions.push_tail(inst);
330b8e80941Smrg
331b8e80941Smrg         /* A conditional BREAK instruction may start a region of divergent
332b8e80941Smrg          * control flow until the end of the loop if the condition is
333b8e80941Smrg          * non-uniform, in which case the loop will execute additional
334b8e80941Smrg          * iterations with the present channel disabled.  We model this as a
335b8e80941Smrg          * control flow path from the divergence point to the convergence
336b8e80941Smrg          * point that overlaps the whole IP range of the loop and skips over
337b8e80941Smrg          * the execution of any other instructions part of the loop.
338b8e80941Smrg          *
339b8e80941Smrg          * See the DO case for additional explanation.
340b8e80941Smrg          */
341b8e80941Smrg         assert(cur_do != NULL);
342b8e80941Smrg         cur->add_successor(mem_ctx, cur_do);
343b8e80941Smrg
344b8e80941Smrg	 next = new_block();
345b8e80941Smrg	 if (inst->predicate)
346b8e80941Smrg	    cur->add_successor(mem_ctx, next);
347b8e80941Smrg
348b8e80941Smrg	 set_next_block(&cur, next, ip);
349b8e80941Smrg	 break;
350b8e80941Smrg
351b8e80941Smrg      case BRW_OPCODE_WHILE:
352b8e80941Smrg         cur->instructions.push_tail(inst);
353b8e80941Smrg
354b8e80941Smrg         assert(cur_do != NULL && cur_while != NULL);
355b8e80941Smrg
356b8e80941Smrg         /* A conditional WHILE instruction may start a region of divergent
357b8e80941Smrg          * control flow until the end of the loop, just like the BREAK
358b8e80941Smrg          * instruction.  See the BREAK case for more details.  OTOH an
359b8e80941Smrg          * unconditional WHILE instruction is non-divergent (just like an
360b8e80941Smrg          * unconditional CONTINUE), and will necessarily lead to the
361b8e80941Smrg          * execution of an additional iteration of the loop for all enabled
362b8e80941Smrg          * channels, so we may skip over the divergence point at the top of
363b8e80941Smrg          * the loop to keep the CFG as unambiguous as possible.
364b8e80941Smrg          */
365b8e80941Smrg         cur->add_successor(mem_ctx, inst->predicate ? cur_do :
366b8e80941Smrg                                     cur_do->next());
367b8e80941Smrg
368b8e80941Smrg	 set_next_block(&cur, cur_while, ip);
369b8e80941Smrg
370b8e80941Smrg	 /* Pop the stack so we're in the previous loop */
371b8e80941Smrg	 cur_do = pop_stack(&do_stack);
372b8e80941Smrg	 cur_while = pop_stack(&while_stack);
373b8e80941Smrg	 break;
374b8e80941Smrg
375b8e80941Smrg      default:
376b8e80941Smrg         cur->instructions.push_tail(inst);
377b8e80941Smrg	 break;
378b8e80941Smrg      }
379b8e80941Smrg   }
380b8e80941Smrg
381b8e80941Smrg   cur->end_ip = ip - 1;
382b8e80941Smrg
383b8e80941Smrg   make_block_array();
384b8e80941Smrg}
385b8e80941Smrg
386b8e80941Smrgcfg_t::~cfg_t()
387b8e80941Smrg{
388b8e80941Smrg   ralloc_free(mem_ctx);
389b8e80941Smrg}
390b8e80941Smrg
391b8e80941Smrgvoid
392b8e80941Smrgcfg_t::remove_block(bblock_t *block)
393b8e80941Smrg{
394b8e80941Smrg   foreach_list_typed_safe (bblock_link, predecessor, link, &block->parents) {
395b8e80941Smrg      /* Remove block from all of its predecessors' successor lists. */
396b8e80941Smrg      foreach_list_typed_safe (bblock_link, successor, link,
397b8e80941Smrg                               &predecessor->block->children) {
398b8e80941Smrg         if (block == successor->block) {
399b8e80941Smrg            successor->link.remove();
400b8e80941Smrg            ralloc_free(successor);
401b8e80941Smrg         }
402b8e80941Smrg      }
403b8e80941Smrg
404b8e80941Smrg      /* Add removed-block's successors to its predecessors' successor lists. */
405b8e80941Smrg      foreach_list_typed (bblock_link, successor, link, &block->children) {
406b8e80941Smrg         if (!successor->block->is_successor_of(predecessor->block)) {
407b8e80941Smrg            predecessor->block->children.push_tail(link(mem_ctx,
408b8e80941Smrg                                                        successor->block));
409b8e80941Smrg         }
410b8e80941Smrg      }
411b8e80941Smrg   }
412b8e80941Smrg
413b8e80941Smrg   foreach_list_typed_safe (bblock_link, successor, link, &block->children) {
414b8e80941Smrg      /* Remove block from all of its childrens' parents lists. */
415b8e80941Smrg      foreach_list_typed_safe (bblock_link, predecessor, link,
416b8e80941Smrg                               &successor->block->parents) {
417b8e80941Smrg         if (block == predecessor->block) {
418b8e80941Smrg            predecessor->link.remove();
419b8e80941Smrg            ralloc_free(predecessor);
420b8e80941Smrg         }
421b8e80941Smrg      }
422b8e80941Smrg
423b8e80941Smrg      /* Add removed-block's predecessors to its successors' predecessor lists. */
424b8e80941Smrg      foreach_list_typed (bblock_link, predecessor, link, &block->parents) {
425b8e80941Smrg         if (!predecessor->block->is_predecessor_of(successor->block)) {
426b8e80941Smrg            successor->block->parents.push_tail(link(mem_ctx,
427b8e80941Smrg                                                     predecessor->block));
428b8e80941Smrg         }
429b8e80941Smrg      }
430b8e80941Smrg   }
431b8e80941Smrg
432b8e80941Smrg   block->link.remove();
433b8e80941Smrg
434b8e80941Smrg   for (int b = block->num; b < this->num_blocks - 1; b++) {
435b8e80941Smrg      this->blocks[b] = this->blocks[b + 1];
436b8e80941Smrg      this->blocks[b]->num = b;
437b8e80941Smrg   }
438b8e80941Smrg
439b8e80941Smrg   this->blocks[this->num_blocks - 1]->num = this->num_blocks - 2;
440b8e80941Smrg   this->num_blocks--;
441b8e80941Smrg   idom_dirty = true;
442b8e80941Smrg}
443b8e80941Smrg
444b8e80941Smrgbblock_t *
445b8e80941Smrgcfg_t::new_block()
446b8e80941Smrg{
447b8e80941Smrg   bblock_t *block = new(mem_ctx) bblock_t(this);
448b8e80941Smrg
449b8e80941Smrg   return block;
450b8e80941Smrg}
451b8e80941Smrg
452b8e80941Smrgvoid
453b8e80941Smrgcfg_t::set_next_block(bblock_t **cur, bblock_t *block, int ip)
454b8e80941Smrg{
455b8e80941Smrg   if (*cur) {
456b8e80941Smrg      (*cur)->end_ip = ip - 1;
457b8e80941Smrg   }
458b8e80941Smrg
459b8e80941Smrg   block->start_ip = ip;
460b8e80941Smrg   block->num = num_blocks++;
461b8e80941Smrg   block_list.push_tail(&block->link);
462b8e80941Smrg   *cur = block;
463b8e80941Smrg}
464b8e80941Smrg
465b8e80941Smrgvoid
466b8e80941Smrgcfg_t::make_block_array()
467b8e80941Smrg{
468b8e80941Smrg   blocks = ralloc_array(mem_ctx, bblock_t *, num_blocks);
469b8e80941Smrg
470b8e80941Smrg   int i = 0;
471b8e80941Smrg   foreach_block (block, this) {
472b8e80941Smrg      blocks[i++] = block;
473b8e80941Smrg   }
474b8e80941Smrg   assert(i == num_blocks);
475b8e80941Smrg}
476b8e80941Smrg
477b8e80941Smrgvoid
478b8e80941Smrgcfg_t::dump(backend_shader *s)
479b8e80941Smrg{
480b8e80941Smrg   if (idom_dirty)
481b8e80941Smrg      calculate_idom();
482b8e80941Smrg
483b8e80941Smrg   foreach_block (block, this) {
484b8e80941Smrg      if (block->idom)
485b8e80941Smrg         fprintf(stderr, "START B%d IDOM(B%d)", block->num, block->idom->num);
486b8e80941Smrg      else
487b8e80941Smrg         fprintf(stderr, "START B%d IDOM(none)", block->num);
488b8e80941Smrg
489b8e80941Smrg      foreach_list_typed(bblock_link, link, link, &block->parents) {
490b8e80941Smrg         fprintf(stderr, " <-B%d",
491b8e80941Smrg                 link->block->num);
492b8e80941Smrg      }
493b8e80941Smrg      fprintf(stderr, "\n");
494b8e80941Smrg      if (s != NULL)
495b8e80941Smrg         block->dump(s);
496b8e80941Smrg      fprintf(stderr, "END B%d", block->num);
497b8e80941Smrg      foreach_list_typed(bblock_link, link, link, &block->children) {
498b8e80941Smrg         fprintf(stderr, " ->B%d",
499b8e80941Smrg                 link->block->num);
500b8e80941Smrg      }
501b8e80941Smrg      fprintf(stderr, "\n");
502b8e80941Smrg   }
503b8e80941Smrg}
504b8e80941Smrg
505b8e80941Smrg/* Calculates the immediate dominator of each block, according to "A Simple,
506b8e80941Smrg * Fast Dominance Algorithm" by Keith D. Cooper, Timothy J. Harvey, and Ken
507b8e80941Smrg * Kennedy.
508b8e80941Smrg *
509b8e80941Smrg * The authors claim that for control flow graphs of sizes normally encountered
510b8e80941Smrg * (less than 1000 nodes) that this algorithm is significantly faster than
511b8e80941Smrg * others like Lengauer-Tarjan.
512b8e80941Smrg */
513b8e80941Smrgvoid
514b8e80941Smrgcfg_t::calculate_idom()
515b8e80941Smrg{
516b8e80941Smrg   foreach_block(block, this) {
517b8e80941Smrg      block->idom = NULL;
518b8e80941Smrg   }
519b8e80941Smrg   blocks[0]->idom = blocks[0];
520b8e80941Smrg
521b8e80941Smrg   bool changed;
522b8e80941Smrg   do {
523b8e80941Smrg      changed = false;
524b8e80941Smrg
525b8e80941Smrg      foreach_block(block, this) {
526b8e80941Smrg         if (block->num == 0)
527b8e80941Smrg            continue;
528b8e80941Smrg
529b8e80941Smrg         bblock_t *new_idom = NULL;
530b8e80941Smrg         foreach_list_typed(bblock_link, parent, link, &block->parents) {
531b8e80941Smrg            if (parent->block->idom) {
532b8e80941Smrg               if (new_idom == NULL) {
533b8e80941Smrg                  new_idom = parent->block;
534b8e80941Smrg               } else if (parent->block->idom != NULL) {
535b8e80941Smrg                  new_idom = intersect(parent->block, new_idom);
536b8e80941Smrg               }
537b8e80941Smrg            }
538b8e80941Smrg         }
539b8e80941Smrg
540b8e80941Smrg         if (block->idom != new_idom) {
541b8e80941Smrg            block->idom = new_idom;
542b8e80941Smrg            changed = true;
543b8e80941Smrg         }
544b8e80941Smrg      }
545b8e80941Smrg   } while (changed);
546b8e80941Smrg
547b8e80941Smrg   idom_dirty = false;
548b8e80941Smrg}
549b8e80941Smrg
550b8e80941Smrgbblock_t *
551b8e80941Smrgcfg_t::intersect(bblock_t *b1, bblock_t *b2)
552b8e80941Smrg{
553b8e80941Smrg   /* Note, the comparisons here are the opposite of what the paper says
554b8e80941Smrg    * because we index blocks from beginning -> end (i.e. reverse post-order)
555b8e80941Smrg    * instead of post-order like they assume.
556b8e80941Smrg    */
557b8e80941Smrg   while (b1->num != b2->num) {
558b8e80941Smrg      while (b1->num > b2->num)
559b8e80941Smrg         b1 = b1->idom;
560b8e80941Smrg      while (b2->num > b1->num)
561b8e80941Smrg         b2 = b2->idom;
562b8e80941Smrg   }
563b8e80941Smrg   assert(b1);
564b8e80941Smrg   return b1;
565b8e80941Smrg}
566b8e80941Smrg
567b8e80941Smrgvoid
568b8e80941Smrgcfg_t::dump_cfg()
569b8e80941Smrg{
570b8e80941Smrg   printf("digraph CFG {\n");
571b8e80941Smrg   for (int b = 0; b < num_blocks; b++) {
572b8e80941Smrg      bblock_t *block = this->blocks[b];
573b8e80941Smrg
574b8e80941Smrg      foreach_list_typed_safe (bblock_link, child, link, &block->children) {
575b8e80941Smrg         printf("\t%d -> %d\n", b, child->block->num);
576b8e80941Smrg      }
577b8e80941Smrg   }
578b8e80941Smrg   printf("}\n");
579b8e80941Smrg}
580b8e80941Smrg
581b8e80941Smrgvoid
582b8e80941Smrgcfg_t::dump_domtree()
583b8e80941Smrg{
584b8e80941Smrg   printf("digraph DominanceTree {\n");
585b8e80941Smrg   foreach_block(block, this) {
586b8e80941Smrg      if (block->idom) {
587b8e80941Smrg         printf("\t%d -> %d\n", block->idom->num, block->num);
588b8e80941Smrg      }
589b8e80941Smrg   }
590b8e80941Smrg   printf("}\n");
591b8e80941Smrg}
592