101e04c3fSmrg/*
201e04c3fSmrg * Copyright © 2010 Intel Corporation
301e04c3fSmrg *
401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a
501e04c3fSmrg * copy of this software and associated documentation files (the "Software"),
601e04c3fSmrg * to deal in the Software without restriction, including without limitation
701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the
901e04c3fSmrg * Software is furnished to do so, subject to the following conditions:
1001e04c3fSmrg *
1101e04c3fSmrg * The above copyright notice and this permission notice (including the next
1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the
1301e04c3fSmrg * Software.
1401e04c3fSmrg *
1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
2101e04c3fSmrg * DEALINGS IN THE SOFTWARE.
2201e04c3fSmrg */
2301e04c3fSmrg
2401e04c3fSmrg#include "compiler/glsl_types.h"
2501e04c3fSmrg#include "loop_analysis.h"
2601e04c3fSmrg#include "ir_hierarchical_visitor.h"
2701e04c3fSmrg
2801e04c3fSmrg#include "main/mtypes.h"
2901e04c3fSmrg
3001e04c3fSmrgnamespace {
3101e04c3fSmrg
3201e04c3fSmrgclass loop_unroll_visitor : public ir_hierarchical_visitor {
3301e04c3fSmrgpublic:
3401e04c3fSmrg   loop_unroll_visitor(loop_state *state,
3501e04c3fSmrg                       const struct gl_shader_compiler_options *options)
3601e04c3fSmrg   {
3701e04c3fSmrg      this->state = state;
3801e04c3fSmrg      this->progress = false;
3901e04c3fSmrg      this->options = options;
4001e04c3fSmrg   }
4101e04c3fSmrg
4201e04c3fSmrg   virtual ir_visitor_status visit_leave(ir_loop *ir);
4301e04c3fSmrg   void simple_unroll(ir_loop *ir, int iterations);
4401e04c3fSmrg   void complex_unroll(ir_loop *ir, int iterations,
4501e04c3fSmrg                       bool continue_from_then_branch,
4601e04c3fSmrg                       bool limiting_term_first,
4701e04c3fSmrg                       bool lt_continue_from_then_branch);
4801e04c3fSmrg   void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest);
4901e04c3fSmrg
5001e04c3fSmrg   loop_state *state;
5101e04c3fSmrg
5201e04c3fSmrg   bool progress;
5301e04c3fSmrg   const struct gl_shader_compiler_options *options;
5401e04c3fSmrg};
5501e04c3fSmrg
5601e04c3fSmrg} /* anonymous namespace */
5701e04c3fSmrg
5801e04c3fSmrgclass loop_unroll_count : public ir_hierarchical_visitor {
5901e04c3fSmrgpublic:
6001e04c3fSmrg   int nodes;
6101e04c3fSmrg   bool unsupported_variable_indexing;
6201e04c3fSmrg   bool array_indexed_by_induction_var_with_exact_iterations;
6301e04c3fSmrg   /* If there are nested loops, the node count will be inaccurate. */
6401e04c3fSmrg   bool nested_loop;
6501e04c3fSmrg
6601e04c3fSmrg   loop_unroll_count(exec_list *list, loop_variable_state *ls,
6701e04c3fSmrg                     const struct gl_shader_compiler_options *options)
6801e04c3fSmrg      : ls(ls), options(options)
6901e04c3fSmrg   {
7001e04c3fSmrg      nodes = 0;
7101e04c3fSmrg      nested_loop = false;
7201e04c3fSmrg      unsupported_variable_indexing = false;
7301e04c3fSmrg      array_indexed_by_induction_var_with_exact_iterations = false;
7401e04c3fSmrg
7501e04c3fSmrg      run(list);
7601e04c3fSmrg   }
7701e04c3fSmrg
7801e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_assignment *)
7901e04c3fSmrg   {
8001e04c3fSmrg      nodes++;
8101e04c3fSmrg      return visit_continue;
8201e04c3fSmrg   }
8301e04c3fSmrg
8401e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_expression *)
8501e04c3fSmrg   {
8601e04c3fSmrg      nodes++;
8701e04c3fSmrg      return visit_continue;
8801e04c3fSmrg   }
8901e04c3fSmrg
9001e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_loop *)
9101e04c3fSmrg   {
9201e04c3fSmrg      nested_loop = true;
9301e04c3fSmrg      return visit_continue;
9401e04c3fSmrg   }
9501e04c3fSmrg
9601e04c3fSmrg   virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
9701e04c3fSmrg   {
9801e04c3fSmrg      /* Force unroll in case of dynamic indexing with sampler arrays
9901e04c3fSmrg       * when EmitNoIndirectSampler is set.
10001e04c3fSmrg       */
10101e04c3fSmrg      if (options->EmitNoIndirectSampler) {
10201e04c3fSmrg         if ((ir->array->type->is_array() &&
10301e04c3fSmrg              ir->array->type->contains_sampler()) &&
10401e04c3fSmrg             !ir->array_index->constant_expression_value(ralloc_parent(ir))) {
10501e04c3fSmrg            unsupported_variable_indexing = true;
10601e04c3fSmrg            return visit_continue;
10701e04c3fSmrg         }
10801e04c3fSmrg      }
10901e04c3fSmrg
11001e04c3fSmrg      /* Check for arrays variably-indexed by a loop induction variable.
11101e04c3fSmrg       * Unrolling the loop may convert that access into constant-indexing.
11201e04c3fSmrg       *
11301e04c3fSmrg       * Many drivers don't support particular kinds of variable indexing,
11401e04c3fSmrg       * and have to resort to using lower_variable_index_to_cond_assign to
11501e04c3fSmrg       * handle it.  This results in huge amounts of horrible code, so we'd
11601e04c3fSmrg       * like to avoid that if possible.  Here, we just note that it will
11701e04c3fSmrg       * happen.
11801e04c3fSmrg       */
11901e04c3fSmrg      if ((ir->array->type->is_array() || ir->array->type->is_matrix()) &&
12001e04c3fSmrg          !ir->array_index->as_constant()) {
12101e04c3fSmrg         ir_variable *array = ir->array->variable_referenced();
12201e04c3fSmrg         loop_variable *lv = ls->get(ir->array_index->variable_referenced());
12301e04c3fSmrg         if (array && lv && lv->is_induction_var()) {
12401e04c3fSmrg            /* If an array is indexed by a loop induction variable, and the
12501e04c3fSmrg             * array size is exactly the number of loop iterations, this is
12601e04c3fSmrg             * probably a simple for-loop trying to access each element in
12701e04c3fSmrg             * turn; the application may expect it to be unrolled.
12801e04c3fSmrg             */
12901e04c3fSmrg            if (int(array->type->length) == ls->limiting_terminator->iterations)
13001e04c3fSmrg               array_indexed_by_induction_var_with_exact_iterations = true;
13101e04c3fSmrg
13201e04c3fSmrg            switch (array->data.mode) {
13301e04c3fSmrg            case ir_var_auto:
13401e04c3fSmrg            case ir_var_temporary:
13501e04c3fSmrg            case ir_var_const_in:
13601e04c3fSmrg            case ir_var_function_in:
13701e04c3fSmrg            case ir_var_function_out:
13801e04c3fSmrg            case ir_var_function_inout:
13901e04c3fSmrg               if (options->EmitNoIndirectTemp)
14001e04c3fSmrg                  unsupported_variable_indexing = true;
14101e04c3fSmrg               break;
14201e04c3fSmrg            case ir_var_uniform:
14301e04c3fSmrg            case ir_var_shader_storage:
14401e04c3fSmrg               if (options->EmitNoIndirectUniform)
14501e04c3fSmrg                  unsupported_variable_indexing = true;
14601e04c3fSmrg               break;
14701e04c3fSmrg            case ir_var_shader_in:
14801e04c3fSmrg               if (options->EmitNoIndirectInput)
14901e04c3fSmrg                  unsupported_variable_indexing = true;
15001e04c3fSmrg               break;
15101e04c3fSmrg            case ir_var_shader_out:
15201e04c3fSmrg               if (options->EmitNoIndirectOutput)
15301e04c3fSmrg                  unsupported_variable_indexing = true;
15401e04c3fSmrg               break;
15501e04c3fSmrg            }
15601e04c3fSmrg         }
15701e04c3fSmrg      }
15801e04c3fSmrg      return visit_continue;
15901e04c3fSmrg   }
16001e04c3fSmrg
16101e04c3fSmrgprivate:
16201e04c3fSmrg   loop_variable_state *ls;
16301e04c3fSmrg   const struct gl_shader_compiler_options *options;
16401e04c3fSmrg};
16501e04c3fSmrg
16601e04c3fSmrg
16701e04c3fSmrg/**
16801e04c3fSmrg * Unroll a loop which does not contain any jumps.  For example, if the input
16901e04c3fSmrg * is:
17001e04c3fSmrg *
17101e04c3fSmrg *     (loop (...) ...instrs...)
17201e04c3fSmrg *
17301e04c3fSmrg * And the iteration count is 3, the output will be:
17401e04c3fSmrg *
17501e04c3fSmrg *     ...instrs... ...instrs... ...instrs...
17601e04c3fSmrg */
17701e04c3fSmrgvoid
17801e04c3fSmrgloop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations)
17901e04c3fSmrg{
18001e04c3fSmrg   void *const mem_ctx = ralloc_parent(ir);
18101e04c3fSmrg   loop_variable_state *const ls = this->state->get(ir);
18201e04c3fSmrg
1837e102996Smaya   /* If there are no terminators, then the loop iteration count must be 1.
1847e102996Smaya    * This is the 'do { } while (false);' case.
1857e102996Smaya    */
1867e102996Smaya   assert(!ls->terminators.is_empty() || iterations == 1);
1877e102996Smaya
18801e04c3fSmrg   ir_instruction *first_ir =
18901e04c3fSmrg      (ir_instruction *) ir->body_instructions.get_head();
19001e04c3fSmrg
19101e04c3fSmrg   if (!first_ir) {
19201e04c3fSmrg      /* The loop is empty remove it and return */
19301e04c3fSmrg      ir->remove();
19401e04c3fSmrg      return;
19501e04c3fSmrg   }
19601e04c3fSmrg
19701e04c3fSmrg   ir_if *limit_if = NULL;
19801e04c3fSmrg   bool exit_branch_has_instructions = false;
19901e04c3fSmrg   if (ls->limiting_terminator) {
20001e04c3fSmrg      limit_if = ls->limiting_terminator->ir;
20101e04c3fSmrg      ir_instruction *ir_if_last = (ir_instruction *)
20201e04c3fSmrg         limit_if->then_instructions.get_tail();
20301e04c3fSmrg
20401e04c3fSmrg      if (is_break(ir_if_last)) {
20501e04c3fSmrg         if (ir_if_last != limit_if->then_instructions.get_head())
20601e04c3fSmrg            exit_branch_has_instructions = true;
20701e04c3fSmrg
20801e04c3fSmrg         splice_post_if_instructions(limit_if, &limit_if->else_instructions);
20901e04c3fSmrg         ir_if_last->remove();
21001e04c3fSmrg      } else {
21101e04c3fSmrg         ir_if_last = (ir_instruction *)
21201e04c3fSmrg            limit_if->else_instructions.get_tail();
21301e04c3fSmrg         assert(is_break(ir_if_last));
21401e04c3fSmrg
21501e04c3fSmrg         if (ir_if_last != limit_if->else_instructions.get_head())
21601e04c3fSmrg            exit_branch_has_instructions = true;
21701e04c3fSmrg
21801e04c3fSmrg         splice_post_if_instructions(limit_if, &limit_if->then_instructions);
21901e04c3fSmrg         ir_if_last->remove();
22001e04c3fSmrg      }
22101e04c3fSmrg   }
22201e04c3fSmrg
22301e04c3fSmrg   /* Because 'iterations' is the number of times we pass over the *entire*
22401e04c3fSmrg    * loop body before hitting the first break, we need to bump the number of
22501e04c3fSmrg    * iterations if the limiting terminator is not the first instruction in
22601e04c3fSmrg    * the loop, or it the exit branch contains instructions. This ensures we
22701e04c3fSmrg    * execute any instructions before the terminator or in its exit branch.
22801e04c3fSmrg    */
2297e102996Smaya   if (!ls->terminators.is_empty() &&
2307e102996Smaya       (limit_if != first_ir->as_if() || exit_branch_has_instructions))
23101e04c3fSmrg      iterations++;
23201e04c3fSmrg
23301e04c3fSmrg   for (int i = 0; i < iterations; i++) {
23401e04c3fSmrg      exec_list copy_list;
23501e04c3fSmrg
23601e04c3fSmrg      copy_list.make_empty();
23701e04c3fSmrg      clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
23801e04c3fSmrg
23901e04c3fSmrg      ir->insert_before(&copy_list);
24001e04c3fSmrg   }
24101e04c3fSmrg
24201e04c3fSmrg   /* The loop has been replaced by the unrolled copies.  Remove the original
24301e04c3fSmrg    * loop from the IR sequence.
24401e04c3fSmrg    */
24501e04c3fSmrg   ir->remove();
24601e04c3fSmrg
24701e04c3fSmrg   this->progress = true;
24801e04c3fSmrg}
24901e04c3fSmrg
25001e04c3fSmrg
25101e04c3fSmrg/**
25201e04c3fSmrg * Unroll a loop whose last statement is an ir_if.  If \c
25301e04c3fSmrg * continue_from_then_branch is true, the loop is repeated only when the
25401e04c3fSmrg * "then" branch of the if is taken; otherwise it is repeated only when the
25501e04c3fSmrg * "else" branch of the if is taken.
25601e04c3fSmrg *
25701e04c3fSmrg * For example, if the input is:
25801e04c3fSmrg *
25901e04c3fSmrg *     (loop (...)
26001e04c3fSmrg *      ...body...
26101e04c3fSmrg *      (if (cond)
26201e04c3fSmrg *          (...then_instrs...)
26301e04c3fSmrg *        (...else_instrs...)))
26401e04c3fSmrg *
26501e04c3fSmrg * And the iteration count is 3, and \c continue_from_then_branch is true,
26601e04c3fSmrg * then the output will be:
26701e04c3fSmrg *
26801e04c3fSmrg *     ...body...
26901e04c3fSmrg *     (if (cond)
27001e04c3fSmrg *         (...then_instrs...
27101e04c3fSmrg *          ...body...
27201e04c3fSmrg *          (if (cond)
27301e04c3fSmrg *              (...then_instrs...
27401e04c3fSmrg *               ...body...
27501e04c3fSmrg *               (if (cond)
27601e04c3fSmrg *                   (...then_instrs...)
27701e04c3fSmrg *                 (...else_instrs...)))
27801e04c3fSmrg *            (...else_instrs...)))
27901e04c3fSmrg *       (...else_instrs))
28001e04c3fSmrg */
28101e04c3fSmrgvoid
28201e04c3fSmrgloop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations,
28301e04c3fSmrg                                    bool second_term_then_continue,
28401e04c3fSmrg                                    bool extra_iteration_required,
28501e04c3fSmrg                                    bool first_term_then_continue)
28601e04c3fSmrg{
28701e04c3fSmrg   void *const mem_ctx = ralloc_parent(ir);
28801e04c3fSmrg   ir_instruction *ir_to_replace = ir;
28901e04c3fSmrg
29001e04c3fSmrg   /* Because 'iterations' is the number of times we pass over the *entire*
29101e04c3fSmrg    * loop body before hitting the first break, we need to bump the number of
29201e04c3fSmrg    * iterations if the limiting terminator is not the first instruction in
29301e04c3fSmrg    * the loop, or it the exit branch contains instructions. This ensures we
29401e04c3fSmrg    * execute any instructions before the terminator or in its exit branch.
29501e04c3fSmrg    */
29601e04c3fSmrg   if (extra_iteration_required)
29701e04c3fSmrg      iterations++;
29801e04c3fSmrg
29901e04c3fSmrg   for (int i = 0; i < iterations; i++) {
30001e04c3fSmrg      exec_list copy_list;
30101e04c3fSmrg
30201e04c3fSmrg      copy_list.make_empty();
30301e04c3fSmrg      clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
30401e04c3fSmrg
30501e04c3fSmrg      ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if();
30601e04c3fSmrg      assert(ir_if != NULL);
30701e04c3fSmrg
30801e04c3fSmrg      exec_list *const first_list = first_term_then_continue
30901e04c3fSmrg         ? &ir_if->then_instructions : &ir_if->else_instructions;
31001e04c3fSmrg      ir_if = ((ir_instruction *) first_list->get_tail())->as_if();
31101e04c3fSmrg
31201e04c3fSmrg      ir_to_replace->insert_before(&copy_list);
31301e04c3fSmrg      ir_to_replace->remove();
31401e04c3fSmrg
31501e04c3fSmrg      /* placeholder that will be removed in the next iteration */
31601e04c3fSmrg      ir_to_replace =
31701e04c3fSmrg         new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue);
31801e04c3fSmrg
31901e04c3fSmrg      exec_list *const second_term_continue_list = second_term_then_continue
32001e04c3fSmrg         ? &ir_if->then_instructions : &ir_if->else_instructions;
32101e04c3fSmrg
32201e04c3fSmrg      second_term_continue_list->push_tail(ir_to_replace);
32301e04c3fSmrg   }
32401e04c3fSmrg
32501e04c3fSmrg   ir_to_replace->remove();
32601e04c3fSmrg
32701e04c3fSmrg   this->progress = true;
32801e04c3fSmrg}
32901e04c3fSmrg
33001e04c3fSmrg
33101e04c3fSmrg/**
33201e04c3fSmrg * Move all of the instructions which follow \c ir_if to the end of
33301e04c3fSmrg * \c splice_dest.
33401e04c3fSmrg *
33501e04c3fSmrg * For example, in the code snippet:
33601e04c3fSmrg *
33701e04c3fSmrg *     (if (cond)
33801e04c3fSmrg *         (...then_instructions...
33901e04c3fSmrg *          break)
34001e04c3fSmrg *       (...else_instructions...))
34101e04c3fSmrg *     ...post_if_instructions...
34201e04c3fSmrg *
34301e04c3fSmrg * If \c ir_if points to the "if" instruction, and \c splice_dest points to
34401e04c3fSmrg * (...else_instructions...), the code snippet is transformed into:
34501e04c3fSmrg *
34601e04c3fSmrg *     (if (cond)
34701e04c3fSmrg *         (...then_instructions...
34801e04c3fSmrg *          break)
34901e04c3fSmrg *       (...else_instructions...
35001e04c3fSmrg *        ...post_if_instructions...))
35101e04c3fSmrg */
35201e04c3fSmrgvoid
35301e04c3fSmrgloop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if,
35401e04c3fSmrg                                                 exec_list *splice_dest)
35501e04c3fSmrg{
35601e04c3fSmrg   while (!ir_if->get_next()->is_tail_sentinel()) {
35701e04c3fSmrg      ir_instruction *move_ir = (ir_instruction *) ir_if->get_next();
35801e04c3fSmrg
35901e04c3fSmrg      move_ir->remove();
36001e04c3fSmrg      splice_dest->push_tail(move_ir);
36101e04c3fSmrg   }
36201e04c3fSmrg}
36301e04c3fSmrg
36401e04c3fSmrgstatic bool
36501e04c3fSmrgexit_branch_has_instructions(ir_if *term_if, bool lt_then_continue)
36601e04c3fSmrg{
36701e04c3fSmrg   if (lt_then_continue) {
36801e04c3fSmrg      if (term_if->else_instructions.get_head() ==
36901e04c3fSmrg          term_if->else_instructions.get_tail())
37001e04c3fSmrg         return false;
37101e04c3fSmrg   } else {
37201e04c3fSmrg      if (term_if->then_instructions.get_head() ==
37301e04c3fSmrg          term_if->then_instructions.get_tail())
37401e04c3fSmrg         return false;
37501e04c3fSmrg   }
37601e04c3fSmrg
37701e04c3fSmrg   return true;
37801e04c3fSmrg}
37901e04c3fSmrg
38001e04c3fSmrgir_visitor_status
38101e04c3fSmrgloop_unroll_visitor::visit_leave(ir_loop *ir)
38201e04c3fSmrg{
38301e04c3fSmrg   loop_variable_state *const ls = this->state->get(ir);
38401e04c3fSmrg
38501e04c3fSmrg   /* If we've entered a loop that hasn't been analyzed, something really,
38601e04c3fSmrg    * really bad has happened.
38701e04c3fSmrg    */
38801e04c3fSmrg   if (ls == NULL) {
38901e04c3fSmrg      assert(ls != NULL);
39001e04c3fSmrg      return visit_continue;
39101e04c3fSmrg   }
39201e04c3fSmrg
3937ec681f3Smrg   /* Limiting terminator may have iteration count of zero,
3947ec681f3Smrg    * this is a valid case because the loop may break during
3957ec681f3Smrg    * the first iteration.
3967ec681f3Smrg    */
39701e04c3fSmrg
39801e04c3fSmrg   /* Remove the conditional break statements associated with all terminators
39901e04c3fSmrg    * that are associated with a fixed iteration count, except for the one
40001e04c3fSmrg    * associated with the limiting terminator--that one needs to stay, since
40101e04c3fSmrg    * it terminates the loop.  Exception: if the loop still has a normative
40201e04c3fSmrg    * bound, then that terminates the loop, so we don't even need the limiting
40301e04c3fSmrg    * terminator.
40401e04c3fSmrg    */
40501e04c3fSmrg   foreach_in_list_safe(loop_terminator, t, &ls->terminators) {
40601e04c3fSmrg      if (t->iterations < 0)
40701e04c3fSmrg         continue;
40801e04c3fSmrg
40901e04c3fSmrg      exec_list *branch_instructions;
41001e04c3fSmrg      if (t != ls->limiting_terminator) {
41101e04c3fSmrg         ir_instruction *ir_if_last = (ir_instruction *)
41201e04c3fSmrg            t->ir->then_instructions.get_tail();
41301e04c3fSmrg         if (is_break(ir_if_last)) {
41401e04c3fSmrg            branch_instructions = &t->ir->else_instructions;
41501e04c3fSmrg         } else {
41601e04c3fSmrg            branch_instructions = &t->ir->then_instructions;
41701e04c3fSmrg            assert(is_break((ir_instruction *)
41801e04c3fSmrg                            t->ir->else_instructions.get_tail()));
41901e04c3fSmrg         }
42001e04c3fSmrg
42101e04c3fSmrg         exec_list copy_list;
42201e04c3fSmrg         copy_list.make_empty();
42301e04c3fSmrg         clone_ir_list(ir, &copy_list, branch_instructions);
42401e04c3fSmrg
42501e04c3fSmrg         t->ir->insert_before(&copy_list);
42601e04c3fSmrg         t->ir->remove();
42701e04c3fSmrg
42801e04c3fSmrg         assert(ls->num_loop_jumps > 0);
42901e04c3fSmrg         ls->num_loop_jumps--;
43001e04c3fSmrg
43101e04c3fSmrg         /* Also remove it from the terminator list */
43201e04c3fSmrg         t->remove();
43301e04c3fSmrg
43401e04c3fSmrg         this->progress = true;
43501e04c3fSmrg      }
43601e04c3fSmrg   }
43701e04c3fSmrg
43801e04c3fSmrg   if (ls->limiting_terminator == NULL) {
43901e04c3fSmrg      ir_instruction *last_ir =
44001e04c3fSmrg         (ir_instruction *) ir->body_instructions.get_tail();
44101e04c3fSmrg
44201e04c3fSmrg      /* If a loop has no induction variable and the last instruction is
44301e04c3fSmrg       * a break, unroll the loop with a count of 1.  This is the classic
44401e04c3fSmrg       *
44501e04c3fSmrg       *    do {
44601e04c3fSmrg       *        // ...
44701e04c3fSmrg       *    } while (false)
44801e04c3fSmrg       *
44901e04c3fSmrg       * that is used to wrap multi-line macros.
45001e04c3fSmrg       *
45101e04c3fSmrg       * If num_loop_jumps is not zero, last_ir cannot be NULL... there has to
45201e04c3fSmrg       * be at least num_loop_jumps instructions in the loop.
45301e04c3fSmrg       */
45401e04c3fSmrg      if (ls->num_loop_jumps == 1 && is_break(last_ir)) {
45501e04c3fSmrg         last_ir->remove();
45601e04c3fSmrg
45701e04c3fSmrg         simple_unroll(ir, 1);
45801e04c3fSmrg      }
45901e04c3fSmrg
46001e04c3fSmrg      /* Don't try to unroll loops where the number of iterations is not known
46101e04c3fSmrg       * at compile-time.
46201e04c3fSmrg       */
46301e04c3fSmrg      return visit_continue;
46401e04c3fSmrg   }
46501e04c3fSmrg
46601e04c3fSmrg   int iterations = ls->limiting_terminator->iterations;
46701e04c3fSmrg
46801e04c3fSmrg   const int max_iterations = options->MaxUnrollIterations;
46901e04c3fSmrg
47001e04c3fSmrg   /* Don't try to unroll loops that have zillions of iterations either.
47101e04c3fSmrg    */
47201e04c3fSmrg   if (iterations > max_iterations)
47301e04c3fSmrg      return visit_continue;
47401e04c3fSmrg
47501e04c3fSmrg   /* Don't try to unroll nested loops and loops with a huge body.
47601e04c3fSmrg    */
47701e04c3fSmrg   loop_unroll_count count(&ir->body_instructions, ls, options);
47801e04c3fSmrg
47901e04c3fSmrg   bool loop_too_large =
48001e04c3fSmrg      count.nested_loop || count.nodes * iterations > max_iterations * 5;
48101e04c3fSmrg
48201e04c3fSmrg   if (loop_too_large && !count.unsupported_variable_indexing &&
48301e04c3fSmrg       !count.array_indexed_by_induction_var_with_exact_iterations)
48401e04c3fSmrg      return visit_continue;
48501e04c3fSmrg
48601e04c3fSmrg   /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps.
48701e04c3fSmrg    * We'll be removing the limiting terminator before we unroll.
48801e04c3fSmrg    */
48901e04c3fSmrg   assert(ls->num_loop_jumps > 0);
49001e04c3fSmrg   unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1;
49101e04c3fSmrg
49201e04c3fSmrg   if (predicted_num_loop_jumps > 1)
49301e04c3fSmrg      return visit_continue;
49401e04c3fSmrg
49501e04c3fSmrg   if (predicted_num_loop_jumps == 0) {
49601e04c3fSmrg      simple_unroll(ir, iterations);
49701e04c3fSmrg      return visit_continue;
49801e04c3fSmrg   }
49901e04c3fSmrg
50001e04c3fSmrg   ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail();
50101e04c3fSmrg   assert(last_ir != NULL);
50201e04c3fSmrg
50301e04c3fSmrg   if (is_break(last_ir)) {
50401e04c3fSmrg      /* If the only loop-jump is a break at the end of the loop, the loop
50501e04c3fSmrg       * will execute exactly once.  Remove the break and use the simple
50601e04c3fSmrg       * unroller with an iteration count of 1.
50701e04c3fSmrg       */
50801e04c3fSmrg      last_ir->remove();
50901e04c3fSmrg
51001e04c3fSmrg      simple_unroll(ir, 1);
51101e04c3fSmrg      return visit_continue;
51201e04c3fSmrg   }
51301e04c3fSmrg
51401e04c3fSmrg   /* Complex unrolling can only handle two terminators. One with an unknown
51501e04c3fSmrg    * iteration count and one with a known iteration count. We have already
51601e04c3fSmrg    * made sure we have a known iteration count above and removed any
51701e04c3fSmrg    * unreachable terminators with a known count. Here we make sure there
51801e04c3fSmrg    * isn't any additional unknown terminators, or any other jumps nested
51901e04c3fSmrg    * inside futher ifs.
52001e04c3fSmrg    */
52101e04c3fSmrg   if (ls->num_loop_jumps != 2 || ls->terminators.length() != 2)
52201e04c3fSmrg      return visit_continue;
52301e04c3fSmrg
52401e04c3fSmrg   ir_instruction *first_ir =
52501e04c3fSmrg      (ir_instruction *) ir->body_instructions.get_head();
52601e04c3fSmrg
52701e04c3fSmrg   unsigned term_count = 0;
52801e04c3fSmrg   bool first_term_then_continue = false;
52901e04c3fSmrg   foreach_in_list(loop_terminator, t, &ls->terminators) {
53001e04c3fSmrg      ir_if *ir_if = t->ir->as_if();
53101e04c3fSmrg      assert(ir_if != NULL);
53201e04c3fSmrg
53301e04c3fSmrg      ir_instruction *ir_if_last =
53401e04c3fSmrg         (ir_instruction *) ir_if->then_instructions.get_tail();
53501e04c3fSmrg
53601e04c3fSmrg      if (is_break(ir_if_last)) {
53701e04c3fSmrg         splice_post_if_instructions(ir_if, &ir_if->else_instructions);
53801e04c3fSmrg         ir_if_last->remove();
53901e04c3fSmrg         if (term_count == 1) {
54001e04c3fSmrg            bool ebi =
54101e04c3fSmrg               exit_branch_has_instructions(ls->limiting_terminator->ir,
54201e04c3fSmrg                                            first_term_then_continue);
54301e04c3fSmrg            complex_unroll(ir, iterations, false,
54401e04c3fSmrg                           first_ir->as_if() != ls->limiting_terminator->ir ||
54501e04c3fSmrg                           ebi,
54601e04c3fSmrg                           first_term_then_continue);
54701e04c3fSmrg            return visit_continue;
54801e04c3fSmrg         }
54901e04c3fSmrg      } else {
55001e04c3fSmrg         ir_if_last =
55101e04c3fSmrg            (ir_instruction *) ir_if->else_instructions.get_tail();
55201e04c3fSmrg
55301e04c3fSmrg         assert(is_break(ir_if_last));
55401e04c3fSmrg         if (is_break(ir_if_last)) {
55501e04c3fSmrg            splice_post_if_instructions(ir_if, &ir_if->then_instructions);
55601e04c3fSmrg            ir_if_last->remove();
55701e04c3fSmrg            if (term_count == 1) {
55801e04c3fSmrg               bool ebi =
55901e04c3fSmrg                  exit_branch_has_instructions(ls->limiting_terminator->ir,
56001e04c3fSmrg                                               first_term_then_continue);
56101e04c3fSmrg               complex_unroll(ir, iterations, true,
56201e04c3fSmrg                              first_ir->as_if() != ls->limiting_terminator->ir ||
56301e04c3fSmrg                              ebi,
56401e04c3fSmrg                              first_term_then_continue);
56501e04c3fSmrg               return visit_continue;
56601e04c3fSmrg            } else {
56701e04c3fSmrg               first_term_then_continue = true;
56801e04c3fSmrg            }
56901e04c3fSmrg         }
57001e04c3fSmrg      }
57101e04c3fSmrg
57201e04c3fSmrg      term_count++;
57301e04c3fSmrg   }
57401e04c3fSmrg
57501e04c3fSmrg   /* Did not find the break statement.  It must be in a complex if-nesting,
57601e04c3fSmrg    * so don't try to unroll.
57701e04c3fSmrg    */
57801e04c3fSmrg   return visit_continue;
57901e04c3fSmrg}
58001e04c3fSmrg
58101e04c3fSmrg
58201e04c3fSmrgbool
58301e04c3fSmrgunroll_loops(exec_list *instructions, loop_state *ls,
58401e04c3fSmrg             const struct gl_shader_compiler_options *options)
58501e04c3fSmrg{
58601e04c3fSmrg   loop_unroll_visitor v(ls, options);
58701e04c3fSmrg
58801e04c3fSmrg   v.run(instructions);
58901e04c3fSmrg
59001e04c3fSmrg   return v.progress;
59101e04c3fSmrg}
592