1b8e80941Smrg/*
2b8e80941Smrg * Copyright © 2010 Intel Corporation
3b8e80941Smrg *
4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a
5b8e80941Smrg * copy of this software and associated documentation files (the "Software"),
6b8e80941Smrg * to deal in the Software without restriction, including without limitation
7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the
9b8e80941Smrg * Software is furnished to do so, subject to the following conditions:
10b8e80941Smrg *
11b8e80941Smrg * The above copyright notice and this permission notice (including the next
12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the
13b8e80941Smrg * Software.
14b8e80941Smrg *
15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21b8e80941Smrg * DEALINGS IN THE SOFTWARE.
22b8e80941Smrg */
23b8e80941Smrg
24b8e80941Smrg#include "compiler/glsl_types.h"
25b8e80941Smrg#include "loop_analysis.h"
26b8e80941Smrg#include "ir_hierarchical_visitor.h"
27b8e80941Smrg
28b8e80941Smrg#include "main/mtypes.h"
29b8e80941Smrg
30b8e80941Smrgnamespace {
31b8e80941Smrg
32b8e80941Smrgclass loop_unroll_visitor : public ir_hierarchical_visitor {
33b8e80941Smrgpublic:
34b8e80941Smrg   loop_unroll_visitor(loop_state *state,
35b8e80941Smrg                       const struct gl_shader_compiler_options *options)
36b8e80941Smrg   {
37b8e80941Smrg      this->state = state;
38b8e80941Smrg      this->progress = false;
39b8e80941Smrg      this->options = options;
40b8e80941Smrg   }
41b8e80941Smrg
42b8e80941Smrg   virtual ir_visitor_status visit_leave(ir_loop *ir);
43b8e80941Smrg   void simple_unroll(ir_loop *ir, int iterations);
44b8e80941Smrg   void complex_unroll(ir_loop *ir, int iterations,
45b8e80941Smrg                       bool continue_from_then_branch,
46b8e80941Smrg                       bool limiting_term_first,
47b8e80941Smrg                       bool lt_continue_from_then_branch);
48b8e80941Smrg   void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest);
49b8e80941Smrg
50b8e80941Smrg   loop_state *state;
51b8e80941Smrg
52b8e80941Smrg   bool progress;
53b8e80941Smrg   const struct gl_shader_compiler_options *options;
54b8e80941Smrg};
55b8e80941Smrg
56b8e80941Smrg} /* anonymous namespace */
57b8e80941Smrg
58b8e80941Smrgclass loop_unroll_count : public ir_hierarchical_visitor {
59b8e80941Smrgpublic:
60b8e80941Smrg   int nodes;
61b8e80941Smrg   bool unsupported_variable_indexing;
62b8e80941Smrg   bool array_indexed_by_induction_var_with_exact_iterations;
63b8e80941Smrg   /* If there are nested loops, the node count will be inaccurate. */
64b8e80941Smrg   bool nested_loop;
65b8e80941Smrg
66b8e80941Smrg   loop_unroll_count(exec_list *list, loop_variable_state *ls,
67b8e80941Smrg                     const struct gl_shader_compiler_options *options)
68b8e80941Smrg      : ls(ls), options(options)
69b8e80941Smrg   {
70b8e80941Smrg      nodes = 0;
71b8e80941Smrg      nested_loop = false;
72b8e80941Smrg      unsupported_variable_indexing = false;
73b8e80941Smrg      array_indexed_by_induction_var_with_exact_iterations = false;
74b8e80941Smrg
75b8e80941Smrg      run(list);
76b8e80941Smrg   }
77b8e80941Smrg
78b8e80941Smrg   virtual ir_visitor_status visit_enter(ir_assignment *)
79b8e80941Smrg   {
80b8e80941Smrg      nodes++;
81b8e80941Smrg      return visit_continue;
82b8e80941Smrg   }
83b8e80941Smrg
84b8e80941Smrg   virtual ir_visitor_status visit_enter(ir_expression *)
85b8e80941Smrg   {
86b8e80941Smrg      nodes++;
87b8e80941Smrg      return visit_continue;
88b8e80941Smrg   }
89b8e80941Smrg
90b8e80941Smrg   virtual ir_visitor_status visit_enter(ir_loop *)
91b8e80941Smrg   {
92b8e80941Smrg      nested_loop = true;
93b8e80941Smrg      return visit_continue;
94b8e80941Smrg   }
95b8e80941Smrg
96b8e80941Smrg   virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
97b8e80941Smrg   {
98b8e80941Smrg      /* Force unroll in case of dynamic indexing with sampler arrays
99b8e80941Smrg       * when EmitNoIndirectSampler is set.
100b8e80941Smrg       */
101b8e80941Smrg      if (options->EmitNoIndirectSampler) {
102b8e80941Smrg         if ((ir->array->type->is_array() &&
103b8e80941Smrg              ir->array->type->contains_sampler()) &&
104b8e80941Smrg             !ir->array_index->constant_expression_value(ralloc_parent(ir))) {
105b8e80941Smrg            unsupported_variable_indexing = true;
106b8e80941Smrg            return visit_continue;
107b8e80941Smrg         }
108b8e80941Smrg      }
109b8e80941Smrg
110b8e80941Smrg      /* Check for arrays variably-indexed by a loop induction variable.
111b8e80941Smrg       * Unrolling the loop may convert that access into constant-indexing.
112b8e80941Smrg       *
113b8e80941Smrg       * Many drivers don't support particular kinds of variable indexing,
114b8e80941Smrg       * and have to resort to using lower_variable_index_to_cond_assign to
115b8e80941Smrg       * handle it.  This results in huge amounts of horrible code, so we'd
116b8e80941Smrg       * like to avoid that if possible.  Here, we just note that it will
117b8e80941Smrg       * happen.
118b8e80941Smrg       */
119b8e80941Smrg      if ((ir->array->type->is_array() || ir->array->type->is_matrix()) &&
120b8e80941Smrg          !ir->array_index->as_constant()) {
121b8e80941Smrg         ir_variable *array = ir->array->variable_referenced();
122b8e80941Smrg         loop_variable *lv = ls->get(ir->array_index->variable_referenced());
123b8e80941Smrg         if (array && lv && lv->is_induction_var()) {
124b8e80941Smrg            /* If an array is indexed by a loop induction variable, and the
125b8e80941Smrg             * array size is exactly the number of loop iterations, this is
126b8e80941Smrg             * probably a simple for-loop trying to access each element in
127b8e80941Smrg             * turn; the application may expect it to be unrolled.
128b8e80941Smrg             */
129b8e80941Smrg            if (int(array->type->length) == ls->limiting_terminator->iterations)
130b8e80941Smrg               array_indexed_by_induction_var_with_exact_iterations = true;
131b8e80941Smrg
132b8e80941Smrg            switch (array->data.mode) {
133b8e80941Smrg            case ir_var_auto:
134b8e80941Smrg            case ir_var_temporary:
135b8e80941Smrg            case ir_var_const_in:
136b8e80941Smrg            case ir_var_function_in:
137b8e80941Smrg            case ir_var_function_out:
138b8e80941Smrg            case ir_var_function_inout:
139b8e80941Smrg               if (options->EmitNoIndirectTemp)
140b8e80941Smrg                  unsupported_variable_indexing = true;
141b8e80941Smrg               break;
142b8e80941Smrg            case ir_var_uniform:
143b8e80941Smrg            case ir_var_shader_storage:
144b8e80941Smrg               if (options->EmitNoIndirectUniform)
145b8e80941Smrg                  unsupported_variable_indexing = true;
146b8e80941Smrg               break;
147b8e80941Smrg            case ir_var_shader_in:
148b8e80941Smrg               if (options->EmitNoIndirectInput)
149b8e80941Smrg                  unsupported_variable_indexing = true;
150b8e80941Smrg               break;
151b8e80941Smrg            case ir_var_shader_out:
152b8e80941Smrg               if (options->EmitNoIndirectOutput)
153b8e80941Smrg                  unsupported_variable_indexing = true;
154b8e80941Smrg               break;
155b8e80941Smrg            }
156b8e80941Smrg         }
157b8e80941Smrg      }
158b8e80941Smrg      return visit_continue;
159b8e80941Smrg   }
160b8e80941Smrg
161b8e80941Smrgprivate:
162b8e80941Smrg   loop_variable_state *ls;
163b8e80941Smrg   const struct gl_shader_compiler_options *options;
164b8e80941Smrg};
165b8e80941Smrg
166b8e80941Smrg
167b8e80941Smrg/**
168b8e80941Smrg * Unroll a loop which does not contain any jumps.  For example, if the input
169b8e80941Smrg * is:
170b8e80941Smrg *
171b8e80941Smrg *     (loop (...) ...instrs...)
172b8e80941Smrg *
173b8e80941Smrg * And the iteration count is 3, the output will be:
174b8e80941Smrg *
175b8e80941Smrg *     ...instrs... ...instrs... ...instrs...
176b8e80941Smrg */
177b8e80941Smrgvoid
178b8e80941Smrgloop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations)
179b8e80941Smrg{
180b8e80941Smrg   void *const mem_ctx = ralloc_parent(ir);
181b8e80941Smrg   loop_variable_state *const ls = this->state->get(ir);
182b8e80941Smrg
183b8e80941Smrg   /* If there are no terminators, then the loop iteration count must be 1.
184b8e80941Smrg    * This is the 'do { } while (false);' case.
185b8e80941Smrg    */
186b8e80941Smrg   assert(!ls->terminators.is_empty() || iterations == 1);
187b8e80941Smrg
188b8e80941Smrg   ir_instruction *first_ir =
189b8e80941Smrg      (ir_instruction *) ir->body_instructions.get_head();
190b8e80941Smrg
191b8e80941Smrg   if (!first_ir) {
192b8e80941Smrg      /* The loop is empty remove it and return */
193b8e80941Smrg      ir->remove();
194b8e80941Smrg      return;
195b8e80941Smrg   }
196b8e80941Smrg
197b8e80941Smrg   ir_if *limit_if = NULL;
198b8e80941Smrg   bool exit_branch_has_instructions = false;
199b8e80941Smrg   if (ls->limiting_terminator) {
200b8e80941Smrg      limit_if = ls->limiting_terminator->ir;
201b8e80941Smrg      ir_instruction *ir_if_last = (ir_instruction *)
202b8e80941Smrg         limit_if->then_instructions.get_tail();
203b8e80941Smrg
204b8e80941Smrg      if (is_break(ir_if_last)) {
205b8e80941Smrg         if (ir_if_last != limit_if->then_instructions.get_head())
206b8e80941Smrg            exit_branch_has_instructions = true;
207b8e80941Smrg
208b8e80941Smrg         splice_post_if_instructions(limit_if, &limit_if->else_instructions);
209b8e80941Smrg         ir_if_last->remove();
210b8e80941Smrg      } else {
211b8e80941Smrg         ir_if_last = (ir_instruction *)
212b8e80941Smrg            limit_if->else_instructions.get_tail();
213b8e80941Smrg         assert(is_break(ir_if_last));
214b8e80941Smrg
215b8e80941Smrg         if (ir_if_last != limit_if->else_instructions.get_head())
216b8e80941Smrg            exit_branch_has_instructions = true;
217b8e80941Smrg
218b8e80941Smrg         splice_post_if_instructions(limit_if, &limit_if->then_instructions);
219b8e80941Smrg         ir_if_last->remove();
220b8e80941Smrg      }
221b8e80941Smrg   }
222b8e80941Smrg
223b8e80941Smrg   /* Because 'iterations' is the number of times we pass over the *entire*
224b8e80941Smrg    * loop body before hitting the first break, we need to bump the number of
225b8e80941Smrg    * iterations if the limiting terminator is not the first instruction in
226b8e80941Smrg    * the loop, or it the exit branch contains instructions. This ensures we
227b8e80941Smrg    * execute any instructions before the terminator or in its exit branch.
228b8e80941Smrg    */
229b8e80941Smrg   if (!ls->terminators.is_empty() &&
230b8e80941Smrg       (limit_if != first_ir->as_if() || exit_branch_has_instructions))
231b8e80941Smrg      iterations++;
232b8e80941Smrg
233b8e80941Smrg   for (int i = 0; i < iterations; i++) {
234b8e80941Smrg      exec_list copy_list;
235b8e80941Smrg
236b8e80941Smrg      copy_list.make_empty();
237b8e80941Smrg      clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
238b8e80941Smrg
239b8e80941Smrg      ir->insert_before(&copy_list);
240b8e80941Smrg   }
241b8e80941Smrg
242b8e80941Smrg   /* The loop has been replaced by the unrolled copies.  Remove the original
243b8e80941Smrg    * loop from the IR sequence.
244b8e80941Smrg    */
245b8e80941Smrg   ir->remove();
246b8e80941Smrg
247b8e80941Smrg   this->progress = true;
248b8e80941Smrg}
249b8e80941Smrg
250b8e80941Smrg
251b8e80941Smrg/**
252b8e80941Smrg * Unroll a loop whose last statement is an ir_if.  If \c
253b8e80941Smrg * continue_from_then_branch is true, the loop is repeated only when the
254b8e80941Smrg * "then" branch of the if is taken; otherwise it is repeated only when the
255b8e80941Smrg * "else" branch of the if is taken.
256b8e80941Smrg *
257b8e80941Smrg * For example, if the input is:
258b8e80941Smrg *
259b8e80941Smrg *     (loop (...)
260b8e80941Smrg *      ...body...
261b8e80941Smrg *      (if (cond)
262b8e80941Smrg *          (...then_instrs...)
263b8e80941Smrg *        (...else_instrs...)))
264b8e80941Smrg *
265b8e80941Smrg * And the iteration count is 3, and \c continue_from_then_branch is true,
266b8e80941Smrg * then the output will be:
267b8e80941Smrg *
268b8e80941Smrg *     ...body...
269b8e80941Smrg *     (if (cond)
270b8e80941Smrg *         (...then_instrs...
271b8e80941Smrg *          ...body...
272b8e80941Smrg *          (if (cond)
273b8e80941Smrg *              (...then_instrs...
274b8e80941Smrg *               ...body...
275b8e80941Smrg *               (if (cond)
276b8e80941Smrg *                   (...then_instrs...)
277b8e80941Smrg *                 (...else_instrs...)))
278b8e80941Smrg *            (...else_instrs...)))
279b8e80941Smrg *       (...else_instrs))
280b8e80941Smrg */
281b8e80941Smrgvoid
282b8e80941Smrgloop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations,
283b8e80941Smrg                                    bool second_term_then_continue,
284b8e80941Smrg                                    bool extra_iteration_required,
285b8e80941Smrg                                    bool first_term_then_continue)
286b8e80941Smrg{
287b8e80941Smrg   void *const mem_ctx = ralloc_parent(ir);
288b8e80941Smrg   ir_instruction *ir_to_replace = ir;
289b8e80941Smrg
290b8e80941Smrg   /* Because 'iterations' is the number of times we pass over the *entire*
291b8e80941Smrg    * loop body before hitting the first break, we need to bump the number of
292b8e80941Smrg    * iterations if the limiting terminator is not the first instruction in
293b8e80941Smrg    * the loop, or it the exit branch contains instructions. This ensures we
294b8e80941Smrg    * execute any instructions before the terminator or in its exit branch.
295b8e80941Smrg    */
296b8e80941Smrg   if (extra_iteration_required)
297b8e80941Smrg      iterations++;
298b8e80941Smrg
299b8e80941Smrg   for (int i = 0; i < iterations; i++) {
300b8e80941Smrg      exec_list copy_list;
301b8e80941Smrg
302b8e80941Smrg      copy_list.make_empty();
303b8e80941Smrg      clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
304b8e80941Smrg
305b8e80941Smrg      ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if();
306b8e80941Smrg      assert(ir_if != NULL);
307b8e80941Smrg
308b8e80941Smrg      exec_list *const first_list = first_term_then_continue
309b8e80941Smrg         ? &ir_if->then_instructions : &ir_if->else_instructions;
310b8e80941Smrg      ir_if = ((ir_instruction *) first_list->get_tail())->as_if();
311b8e80941Smrg
312b8e80941Smrg      ir_to_replace->insert_before(&copy_list);
313b8e80941Smrg      ir_to_replace->remove();
314b8e80941Smrg
315b8e80941Smrg      /* placeholder that will be removed in the next iteration */
316b8e80941Smrg      ir_to_replace =
317b8e80941Smrg         new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue);
318b8e80941Smrg
319b8e80941Smrg      exec_list *const second_term_continue_list = second_term_then_continue
320b8e80941Smrg         ? &ir_if->then_instructions : &ir_if->else_instructions;
321b8e80941Smrg
322b8e80941Smrg      second_term_continue_list->push_tail(ir_to_replace);
323b8e80941Smrg   }
324b8e80941Smrg
325b8e80941Smrg   ir_to_replace->remove();
326b8e80941Smrg
327b8e80941Smrg   this->progress = true;
328b8e80941Smrg}
329b8e80941Smrg
330b8e80941Smrg
331b8e80941Smrg/**
332b8e80941Smrg * Move all of the instructions which follow \c ir_if to the end of
333b8e80941Smrg * \c splice_dest.
334b8e80941Smrg *
335b8e80941Smrg * For example, in the code snippet:
336b8e80941Smrg *
337b8e80941Smrg *     (if (cond)
338b8e80941Smrg *         (...then_instructions...
339b8e80941Smrg *          break)
340b8e80941Smrg *       (...else_instructions...))
341b8e80941Smrg *     ...post_if_instructions...
342b8e80941Smrg *
343b8e80941Smrg * If \c ir_if points to the "if" instruction, and \c splice_dest points to
344b8e80941Smrg * (...else_instructions...), the code snippet is transformed into:
345b8e80941Smrg *
346b8e80941Smrg *     (if (cond)
347b8e80941Smrg *         (...then_instructions...
348b8e80941Smrg *          break)
349b8e80941Smrg *       (...else_instructions...
350b8e80941Smrg *        ...post_if_instructions...))
351b8e80941Smrg */
352b8e80941Smrgvoid
353b8e80941Smrgloop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if,
354b8e80941Smrg                                                 exec_list *splice_dest)
355b8e80941Smrg{
356b8e80941Smrg   while (!ir_if->get_next()->is_tail_sentinel()) {
357b8e80941Smrg      ir_instruction *move_ir = (ir_instruction *) ir_if->get_next();
358b8e80941Smrg
359b8e80941Smrg      move_ir->remove();
360b8e80941Smrg      splice_dest->push_tail(move_ir);
361b8e80941Smrg   }
362b8e80941Smrg}
363b8e80941Smrg
364b8e80941Smrgstatic bool
365b8e80941Smrgexit_branch_has_instructions(ir_if *term_if, bool lt_then_continue)
366b8e80941Smrg{
367b8e80941Smrg   if (lt_then_continue) {
368b8e80941Smrg      if (term_if->else_instructions.get_head() ==
369b8e80941Smrg          term_if->else_instructions.get_tail())
370b8e80941Smrg         return false;
371b8e80941Smrg   } else {
372b8e80941Smrg      if (term_if->then_instructions.get_head() ==
373b8e80941Smrg          term_if->then_instructions.get_tail())
374b8e80941Smrg         return false;
375b8e80941Smrg   }
376b8e80941Smrg
377b8e80941Smrg   return true;
378b8e80941Smrg}
379b8e80941Smrg
380b8e80941Smrgir_visitor_status
381b8e80941Smrgloop_unroll_visitor::visit_leave(ir_loop *ir)
382b8e80941Smrg{
383b8e80941Smrg   loop_variable_state *const ls = this->state->get(ir);
384b8e80941Smrg
385b8e80941Smrg   /* If we've entered a loop that hasn't been analyzed, something really,
386b8e80941Smrg    * really bad has happened.
387b8e80941Smrg    */
388b8e80941Smrg   if (ls == NULL) {
389b8e80941Smrg      assert(ls != NULL);
390b8e80941Smrg      return visit_continue;
391b8e80941Smrg   }
392b8e80941Smrg
393b8e80941Smrg   if (ls->limiting_terminator != NULL) {
394b8e80941Smrg      /* If the limiting terminator has an iteration count of zero, then we've
395b8e80941Smrg       * proven that the loop cannot run, so delete it.
396b8e80941Smrg       */
397b8e80941Smrg      int iterations = ls->limiting_terminator->iterations;
398b8e80941Smrg      if (iterations == 0) {
399b8e80941Smrg         ir->remove();
400b8e80941Smrg         this->progress = true;
401b8e80941Smrg         return visit_continue;
402b8e80941Smrg      }
403b8e80941Smrg   }
404b8e80941Smrg
405b8e80941Smrg   /* Remove the conditional break statements associated with all terminators
406b8e80941Smrg    * that are associated with a fixed iteration count, except for the one
407b8e80941Smrg    * associated with the limiting terminator--that one needs to stay, since
408b8e80941Smrg    * it terminates the loop.  Exception: if the loop still has a normative
409b8e80941Smrg    * bound, then that terminates the loop, so we don't even need the limiting
410b8e80941Smrg    * terminator.
411b8e80941Smrg    */
412b8e80941Smrg   foreach_in_list_safe(loop_terminator, t, &ls->terminators) {
413b8e80941Smrg      if (t->iterations < 0)
414b8e80941Smrg         continue;
415b8e80941Smrg
416b8e80941Smrg      exec_list *branch_instructions;
417b8e80941Smrg      if (t != ls->limiting_terminator) {
418b8e80941Smrg         ir_instruction *ir_if_last = (ir_instruction *)
419b8e80941Smrg            t->ir->then_instructions.get_tail();
420b8e80941Smrg         if (is_break(ir_if_last)) {
421b8e80941Smrg            branch_instructions = &t->ir->else_instructions;
422b8e80941Smrg         } else {
423b8e80941Smrg            branch_instructions = &t->ir->then_instructions;
424b8e80941Smrg            assert(is_break((ir_instruction *)
425b8e80941Smrg                            t->ir->else_instructions.get_tail()));
426b8e80941Smrg         }
427b8e80941Smrg
428b8e80941Smrg         exec_list copy_list;
429b8e80941Smrg         copy_list.make_empty();
430b8e80941Smrg         clone_ir_list(ir, &copy_list, branch_instructions);
431b8e80941Smrg
432b8e80941Smrg         t->ir->insert_before(&copy_list);
433b8e80941Smrg         t->ir->remove();
434b8e80941Smrg
435b8e80941Smrg         assert(ls->num_loop_jumps > 0);
436b8e80941Smrg         ls->num_loop_jumps--;
437b8e80941Smrg
438b8e80941Smrg         /* Also remove it from the terminator list */
439b8e80941Smrg         t->remove();
440b8e80941Smrg
441b8e80941Smrg         this->progress = true;
442b8e80941Smrg      }
443b8e80941Smrg   }
444b8e80941Smrg
445b8e80941Smrg   if (ls->limiting_terminator == NULL) {
446b8e80941Smrg      ir_instruction *last_ir =
447b8e80941Smrg         (ir_instruction *) ir->body_instructions.get_tail();
448b8e80941Smrg
449b8e80941Smrg      /* If a loop has no induction variable and the last instruction is
450b8e80941Smrg       * a break, unroll the loop with a count of 1.  This is the classic
451b8e80941Smrg       *
452b8e80941Smrg       *    do {
453b8e80941Smrg       *        // ...
454b8e80941Smrg       *    } while (false)
455b8e80941Smrg       *
456b8e80941Smrg       * that is used to wrap multi-line macros.
457b8e80941Smrg       *
458b8e80941Smrg       * If num_loop_jumps is not zero, last_ir cannot be NULL... there has to
459b8e80941Smrg       * be at least num_loop_jumps instructions in the loop.
460b8e80941Smrg       */
461b8e80941Smrg      if (ls->num_loop_jumps == 1 && is_break(last_ir)) {
462b8e80941Smrg         last_ir->remove();
463b8e80941Smrg
464b8e80941Smrg         simple_unroll(ir, 1);
465b8e80941Smrg      }
466b8e80941Smrg
467b8e80941Smrg      /* Don't try to unroll loops where the number of iterations is not known
468b8e80941Smrg       * at compile-time.
469b8e80941Smrg       */
470b8e80941Smrg      return visit_continue;
471b8e80941Smrg   }
472b8e80941Smrg
473b8e80941Smrg   int iterations = ls->limiting_terminator->iterations;
474b8e80941Smrg
475b8e80941Smrg   const int max_iterations = options->MaxUnrollIterations;
476b8e80941Smrg
477b8e80941Smrg   /* Don't try to unroll loops that have zillions of iterations either.
478b8e80941Smrg    */
479b8e80941Smrg   if (iterations > max_iterations)
480b8e80941Smrg      return visit_continue;
481b8e80941Smrg
482b8e80941Smrg   /* Don't try to unroll nested loops and loops with a huge body.
483b8e80941Smrg    */
484b8e80941Smrg   loop_unroll_count count(&ir->body_instructions, ls, options);
485b8e80941Smrg
486b8e80941Smrg   bool loop_too_large =
487b8e80941Smrg      count.nested_loop || count.nodes * iterations > max_iterations * 5;
488b8e80941Smrg
489b8e80941Smrg   if (loop_too_large && !count.unsupported_variable_indexing &&
490b8e80941Smrg       !count.array_indexed_by_induction_var_with_exact_iterations)
491b8e80941Smrg      return visit_continue;
492b8e80941Smrg
493b8e80941Smrg   /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps.
494b8e80941Smrg    * We'll be removing the limiting terminator before we unroll.
495b8e80941Smrg    */
496b8e80941Smrg   assert(ls->num_loop_jumps > 0);
497b8e80941Smrg   unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1;
498b8e80941Smrg
499b8e80941Smrg   if (predicted_num_loop_jumps > 1)
500b8e80941Smrg      return visit_continue;
501b8e80941Smrg
502b8e80941Smrg   if (predicted_num_loop_jumps == 0) {
503b8e80941Smrg      simple_unroll(ir, iterations);
504b8e80941Smrg      return visit_continue;
505b8e80941Smrg   }
506b8e80941Smrg
507b8e80941Smrg   ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail();
508b8e80941Smrg   assert(last_ir != NULL);
509b8e80941Smrg
510b8e80941Smrg   if (is_break(last_ir)) {
511b8e80941Smrg      /* If the only loop-jump is a break at the end of the loop, the loop
512b8e80941Smrg       * will execute exactly once.  Remove the break and use the simple
513b8e80941Smrg       * unroller with an iteration count of 1.
514b8e80941Smrg       */
515b8e80941Smrg      last_ir->remove();
516b8e80941Smrg
517b8e80941Smrg      simple_unroll(ir, 1);
518b8e80941Smrg      return visit_continue;
519b8e80941Smrg   }
520b8e80941Smrg
521b8e80941Smrg   /* Complex unrolling can only handle two terminators. One with an unknown
522b8e80941Smrg    * iteration count and one with a known iteration count. We have already
523b8e80941Smrg    * made sure we have a known iteration count above and removed any
524b8e80941Smrg    * unreachable terminators with a known count. Here we make sure there
525b8e80941Smrg    * isn't any additional unknown terminators, or any other jumps nested
526b8e80941Smrg    * inside futher ifs.
527b8e80941Smrg    */
528b8e80941Smrg   if (ls->num_loop_jumps != 2 || ls->terminators.length() != 2)
529b8e80941Smrg      return visit_continue;
530b8e80941Smrg
531b8e80941Smrg   ir_instruction *first_ir =
532b8e80941Smrg      (ir_instruction *) ir->body_instructions.get_head();
533b8e80941Smrg
534b8e80941Smrg   unsigned term_count = 0;
535b8e80941Smrg   bool first_term_then_continue = false;
536b8e80941Smrg   foreach_in_list(loop_terminator, t, &ls->terminators) {
537b8e80941Smrg      ir_if *ir_if = t->ir->as_if();
538b8e80941Smrg      assert(ir_if != NULL);
539b8e80941Smrg
540b8e80941Smrg      ir_instruction *ir_if_last =
541b8e80941Smrg         (ir_instruction *) ir_if->then_instructions.get_tail();
542b8e80941Smrg
543b8e80941Smrg      if (is_break(ir_if_last)) {
544b8e80941Smrg         splice_post_if_instructions(ir_if, &ir_if->else_instructions);
545b8e80941Smrg         ir_if_last->remove();
546b8e80941Smrg         if (term_count == 1) {
547b8e80941Smrg            bool ebi =
548b8e80941Smrg               exit_branch_has_instructions(ls->limiting_terminator->ir,
549b8e80941Smrg                                            first_term_then_continue);
550b8e80941Smrg            complex_unroll(ir, iterations, false,
551b8e80941Smrg                           first_ir->as_if() != ls->limiting_terminator->ir ||
552b8e80941Smrg                           ebi,
553b8e80941Smrg                           first_term_then_continue);
554b8e80941Smrg            return visit_continue;
555b8e80941Smrg         }
556b8e80941Smrg      } else {
557b8e80941Smrg         ir_if_last =
558b8e80941Smrg            (ir_instruction *) ir_if->else_instructions.get_tail();
559b8e80941Smrg
560b8e80941Smrg         assert(is_break(ir_if_last));
561b8e80941Smrg         if (is_break(ir_if_last)) {
562b8e80941Smrg            splice_post_if_instructions(ir_if, &ir_if->then_instructions);
563b8e80941Smrg            ir_if_last->remove();
564b8e80941Smrg            if (term_count == 1) {
565b8e80941Smrg               bool ebi =
566b8e80941Smrg                  exit_branch_has_instructions(ls->limiting_terminator->ir,
567b8e80941Smrg                                               first_term_then_continue);
568b8e80941Smrg               complex_unroll(ir, iterations, true,
569b8e80941Smrg                              first_ir->as_if() != ls->limiting_terminator->ir ||
570b8e80941Smrg                              ebi,
571b8e80941Smrg                              first_term_then_continue);
572b8e80941Smrg               return visit_continue;
573b8e80941Smrg            } else {
574b8e80941Smrg               first_term_then_continue = true;
575b8e80941Smrg            }
576b8e80941Smrg         }
577b8e80941Smrg      }
578b8e80941Smrg
579b8e80941Smrg      term_count++;
580b8e80941Smrg   }
581b8e80941Smrg
582b8e80941Smrg   /* Did not find the break statement.  It must be in a complex if-nesting,
583b8e80941Smrg    * so don't try to unroll.
584b8e80941Smrg    */
585b8e80941Smrg   return visit_continue;
586b8e80941Smrg}
587b8e80941Smrg
588b8e80941Smrg
589b8e80941Smrgbool
590b8e80941Smrgunroll_loops(exec_list *instructions, loop_state *ls,
591b8e80941Smrg             const struct gl_shader_compiler_options *options)
592b8e80941Smrg{
593b8e80941Smrg   loop_unroll_visitor v(ls, options);
594b8e80941Smrg
595b8e80941Smrg   v.run(instructions);
596b8e80941Smrg
597b8e80941Smrg   return v.progress;
598b8e80941Smrg}
599