101e04c3fSmrg/* 201e04c3fSmrg * Copyright © 2010 Intel Corporation 301e04c3fSmrg * 401e04c3fSmrg * Permission is hereby granted, free of charge, to any person obtaining a 501e04c3fSmrg * copy of this software and associated documentation files (the "Software"), 601e04c3fSmrg * to deal in the Software without restriction, including without limitation 701e04c3fSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 801e04c3fSmrg * and/or sell copies of the Software, and to permit persons to whom the 901e04c3fSmrg * Software is furnished to do so, subject to the following conditions: 1001e04c3fSmrg * 1101e04c3fSmrg * The above copyright notice and this permission notice (including the next 1201e04c3fSmrg * paragraph) shall be included in all copies or substantial portions of the 1301e04c3fSmrg * Software. 1401e04c3fSmrg * 1501e04c3fSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 1601e04c3fSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 1701e04c3fSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 1801e04c3fSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 1901e04c3fSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 2001e04c3fSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 2101e04c3fSmrg * DEALINGS IN THE SOFTWARE. 2201e04c3fSmrg */ 2301e04c3fSmrg 2401e04c3fSmrg#include "compiler/glsl_types.h" 2501e04c3fSmrg#include "loop_analysis.h" 2601e04c3fSmrg#include "ir_hierarchical_visitor.h" 2701e04c3fSmrg 2801e04c3fSmrg#include "main/mtypes.h" 2901e04c3fSmrg 3001e04c3fSmrgnamespace { 3101e04c3fSmrg 3201e04c3fSmrgclass loop_unroll_visitor : public ir_hierarchical_visitor { 3301e04c3fSmrgpublic: 3401e04c3fSmrg loop_unroll_visitor(loop_state *state, 3501e04c3fSmrg const struct gl_shader_compiler_options *options) 3601e04c3fSmrg { 3701e04c3fSmrg this->state = state; 3801e04c3fSmrg this->progress = false; 3901e04c3fSmrg this->options = options; 4001e04c3fSmrg } 4101e04c3fSmrg 4201e04c3fSmrg virtual ir_visitor_status visit_leave(ir_loop *ir); 4301e04c3fSmrg void simple_unroll(ir_loop *ir, int iterations); 4401e04c3fSmrg void complex_unroll(ir_loop *ir, int iterations, 4501e04c3fSmrg bool continue_from_then_branch, 4601e04c3fSmrg bool limiting_term_first, 4701e04c3fSmrg bool lt_continue_from_then_branch); 4801e04c3fSmrg void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest); 4901e04c3fSmrg 5001e04c3fSmrg loop_state *state; 5101e04c3fSmrg 5201e04c3fSmrg bool progress; 5301e04c3fSmrg const struct gl_shader_compiler_options *options; 5401e04c3fSmrg}; 5501e04c3fSmrg 5601e04c3fSmrg} /* anonymous namespace */ 5701e04c3fSmrg 5801e04c3fSmrgclass loop_unroll_count : public ir_hierarchical_visitor { 5901e04c3fSmrgpublic: 6001e04c3fSmrg int nodes; 6101e04c3fSmrg bool unsupported_variable_indexing; 6201e04c3fSmrg bool array_indexed_by_induction_var_with_exact_iterations; 6301e04c3fSmrg /* If there are nested loops, the node count will be inaccurate. */ 6401e04c3fSmrg bool nested_loop; 6501e04c3fSmrg 6601e04c3fSmrg loop_unroll_count(exec_list *list, loop_variable_state *ls, 6701e04c3fSmrg const struct gl_shader_compiler_options *options) 6801e04c3fSmrg : ls(ls), options(options) 6901e04c3fSmrg { 7001e04c3fSmrg nodes = 0; 7101e04c3fSmrg nested_loop = false; 7201e04c3fSmrg unsupported_variable_indexing = false; 7301e04c3fSmrg array_indexed_by_induction_var_with_exact_iterations = false; 7401e04c3fSmrg 7501e04c3fSmrg run(list); 7601e04c3fSmrg } 7701e04c3fSmrg 7801e04c3fSmrg virtual ir_visitor_status visit_enter(ir_assignment *) 7901e04c3fSmrg { 8001e04c3fSmrg nodes++; 8101e04c3fSmrg return visit_continue; 8201e04c3fSmrg } 8301e04c3fSmrg 8401e04c3fSmrg virtual ir_visitor_status visit_enter(ir_expression *) 8501e04c3fSmrg { 8601e04c3fSmrg nodes++; 8701e04c3fSmrg return visit_continue; 8801e04c3fSmrg } 8901e04c3fSmrg 9001e04c3fSmrg virtual ir_visitor_status visit_enter(ir_loop *) 9101e04c3fSmrg { 9201e04c3fSmrg nested_loop = true; 9301e04c3fSmrg return visit_continue; 9401e04c3fSmrg } 9501e04c3fSmrg 9601e04c3fSmrg virtual ir_visitor_status visit_enter(ir_dereference_array *ir) 9701e04c3fSmrg { 9801e04c3fSmrg /* Force unroll in case of dynamic indexing with sampler arrays 9901e04c3fSmrg * when EmitNoIndirectSampler is set. 10001e04c3fSmrg */ 10101e04c3fSmrg if (options->EmitNoIndirectSampler) { 10201e04c3fSmrg if ((ir->array->type->is_array() && 10301e04c3fSmrg ir->array->type->contains_sampler()) && 10401e04c3fSmrg !ir->array_index->constant_expression_value(ralloc_parent(ir))) { 10501e04c3fSmrg unsupported_variable_indexing = true; 10601e04c3fSmrg return visit_continue; 10701e04c3fSmrg } 10801e04c3fSmrg } 10901e04c3fSmrg 11001e04c3fSmrg /* Check for arrays variably-indexed by a loop induction variable. 11101e04c3fSmrg * Unrolling the loop may convert that access into constant-indexing. 11201e04c3fSmrg * 11301e04c3fSmrg * Many drivers don't support particular kinds of variable indexing, 11401e04c3fSmrg * and have to resort to using lower_variable_index_to_cond_assign to 11501e04c3fSmrg * handle it. This results in huge amounts of horrible code, so we'd 11601e04c3fSmrg * like to avoid that if possible. Here, we just note that it will 11701e04c3fSmrg * happen. 11801e04c3fSmrg */ 11901e04c3fSmrg if ((ir->array->type->is_array() || ir->array->type->is_matrix()) && 12001e04c3fSmrg !ir->array_index->as_constant()) { 12101e04c3fSmrg ir_variable *array = ir->array->variable_referenced(); 12201e04c3fSmrg loop_variable *lv = ls->get(ir->array_index->variable_referenced()); 12301e04c3fSmrg if (array && lv && lv->is_induction_var()) { 12401e04c3fSmrg /* If an array is indexed by a loop induction variable, and the 12501e04c3fSmrg * array size is exactly the number of loop iterations, this is 12601e04c3fSmrg * probably a simple for-loop trying to access each element in 12701e04c3fSmrg * turn; the application may expect it to be unrolled. 12801e04c3fSmrg */ 12901e04c3fSmrg if (int(array->type->length) == ls->limiting_terminator->iterations) 13001e04c3fSmrg array_indexed_by_induction_var_with_exact_iterations = true; 13101e04c3fSmrg 13201e04c3fSmrg switch (array->data.mode) { 13301e04c3fSmrg case ir_var_auto: 13401e04c3fSmrg case ir_var_temporary: 13501e04c3fSmrg case ir_var_const_in: 13601e04c3fSmrg case ir_var_function_in: 13701e04c3fSmrg case ir_var_function_out: 13801e04c3fSmrg case ir_var_function_inout: 13901e04c3fSmrg if (options->EmitNoIndirectTemp) 14001e04c3fSmrg unsupported_variable_indexing = true; 14101e04c3fSmrg break; 14201e04c3fSmrg case ir_var_uniform: 14301e04c3fSmrg case ir_var_shader_storage: 14401e04c3fSmrg if (options->EmitNoIndirectUniform) 14501e04c3fSmrg unsupported_variable_indexing = true; 14601e04c3fSmrg break; 14701e04c3fSmrg case ir_var_shader_in: 14801e04c3fSmrg if (options->EmitNoIndirectInput) 14901e04c3fSmrg unsupported_variable_indexing = true; 15001e04c3fSmrg break; 15101e04c3fSmrg case ir_var_shader_out: 15201e04c3fSmrg if (options->EmitNoIndirectOutput) 15301e04c3fSmrg unsupported_variable_indexing = true; 15401e04c3fSmrg break; 15501e04c3fSmrg } 15601e04c3fSmrg } 15701e04c3fSmrg } 15801e04c3fSmrg return visit_continue; 15901e04c3fSmrg } 16001e04c3fSmrg 16101e04c3fSmrgprivate: 16201e04c3fSmrg loop_variable_state *ls; 16301e04c3fSmrg const struct gl_shader_compiler_options *options; 16401e04c3fSmrg}; 16501e04c3fSmrg 16601e04c3fSmrg 16701e04c3fSmrg/** 16801e04c3fSmrg * Unroll a loop which does not contain any jumps. For example, if the input 16901e04c3fSmrg * is: 17001e04c3fSmrg * 17101e04c3fSmrg * (loop (...) ...instrs...) 17201e04c3fSmrg * 17301e04c3fSmrg * And the iteration count is 3, the output will be: 17401e04c3fSmrg * 17501e04c3fSmrg * ...instrs... ...instrs... ...instrs... 17601e04c3fSmrg */ 17701e04c3fSmrgvoid 17801e04c3fSmrgloop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations) 17901e04c3fSmrg{ 18001e04c3fSmrg void *const mem_ctx = ralloc_parent(ir); 18101e04c3fSmrg loop_variable_state *const ls = this->state->get(ir); 18201e04c3fSmrg 1837e102996Smaya /* If there are no terminators, then the loop iteration count must be 1. 1847e102996Smaya * This is the 'do { } while (false);' case. 1857e102996Smaya */ 1867e102996Smaya assert(!ls->terminators.is_empty() || iterations == 1); 1877e102996Smaya 18801e04c3fSmrg ir_instruction *first_ir = 18901e04c3fSmrg (ir_instruction *) ir->body_instructions.get_head(); 19001e04c3fSmrg 19101e04c3fSmrg if (!first_ir) { 19201e04c3fSmrg /* The loop is empty remove it and return */ 19301e04c3fSmrg ir->remove(); 19401e04c3fSmrg return; 19501e04c3fSmrg } 19601e04c3fSmrg 19701e04c3fSmrg ir_if *limit_if = NULL; 19801e04c3fSmrg bool exit_branch_has_instructions = false; 19901e04c3fSmrg if (ls->limiting_terminator) { 20001e04c3fSmrg limit_if = ls->limiting_terminator->ir; 20101e04c3fSmrg ir_instruction *ir_if_last = (ir_instruction *) 20201e04c3fSmrg limit_if->then_instructions.get_tail(); 20301e04c3fSmrg 20401e04c3fSmrg if (is_break(ir_if_last)) { 20501e04c3fSmrg if (ir_if_last != limit_if->then_instructions.get_head()) 20601e04c3fSmrg exit_branch_has_instructions = true; 20701e04c3fSmrg 20801e04c3fSmrg splice_post_if_instructions(limit_if, &limit_if->else_instructions); 20901e04c3fSmrg ir_if_last->remove(); 21001e04c3fSmrg } else { 21101e04c3fSmrg ir_if_last = (ir_instruction *) 21201e04c3fSmrg limit_if->else_instructions.get_tail(); 21301e04c3fSmrg assert(is_break(ir_if_last)); 21401e04c3fSmrg 21501e04c3fSmrg if (ir_if_last != limit_if->else_instructions.get_head()) 21601e04c3fSmrg exit_branch_has_instructions = true; 21701e04c3fSmrg 21801e04c3fSmrg splice_post_if_instructions(limit_if, &limit_if->then_instructions); 21901e04c3fSmrg ir_if_last->remove(); 22001e04c3fSmrg } 22101e04c3fSmrg } 22201e04c3fSmrg 22301e04c3fSmrg /* Because 'iterations' is the number of times we pass over the *entire* 22401e04c3fSmrg * loop body before hitting the first break, we need to bump the number of 22501e04c3fSmrg * iterations if the limiting terminator is not the first instruction in 22601e04c3fSmrg * the loop, or it the exit branch contains instructions. This ensures we 22701e04c3fSmrg * execute any instructions before the terminator or in its exit branch. 22801e04c3fSmrg */ 2297e102996Smaya if (!ls->terminators.is_empty() && 2307e102996Smaya (limit_if != first_ir->as_if() || exit_branch_has_instructions)) 23101e04c3fSmrg iterations++; 23201e04c3fSmrg 23301e04c3fSmrg for (int i = 0; i < iterations; i++) { 23401e04c3fSmrg exec_list copy_list; 23501e04c3fSmrg 23601e04c3fSmrg copy_list.make_empty(); 23701e04c3fSmrg clone_ir_list(mem_ctx, ©_list, &ir->body_instructions); 23801e04c3fSmrg 23901e04c3fSmrg ir->insert_before(©_list); 24001e04c3fSmrg } 24101e04c3fSmrg 24201e04c3fSmrg /* The loop has been replaced by the unrolled copies. Remove the original 24301e04c3fSmrg * loop from the IR sequence. 24401e04c3fSmrg */ 24501e04c3fSmrg ir->remove(); 24601e04c3fSmrg 24701e04c3fSmrg this->progress = true; 24801e04c3fSmrg} 24901e04c3fSmrg 25001e04c3fSmrg 25101e04c3fSmrg/** 25201e04c3fSmrg * Unroll a loop whose last statement is an ir_if. If \c 25301e04c3fSmrg * continue_from_then_branch is true, the loop is repeated only when the 25401e04c3fSmrg * "then" branch of the if is taken; otherwise it is repeated only when the 25501e04c3fSmrg * "else" branch of the if is taken. 25601e04c3fSmrg * 25701e04c3fSmrg * For example, if the input is: 25801e04c3fSmrg * 25901e04c3fSmrg * (loop (...) 26001e04c3fSmrg * ...body... 26101e04c3fSmrg * (if (cond) 26201e04c3fSmrg * (...then_instrs...) 26301e04c3fSmrg * (...else_instrs...))) 26401e04c3fSmrg * 26501e04c3fSmrg * And the iteration count is 3, and \c continue_from_then_branch is true, 26601e04c3fSmrg * then the output will be: 26701e04c3fSmrg * 26801e04c3fSmrg * ...body... 26901e04c3fSmrg * (if (cond) 27001e04c3fSmrg * (...then_instrs... 27101e04c3fSmrg * ...body... 27201e04c3fSmrg * (if (cond) 27301e04c3fSmrg * (...then_instrs... 27401e04c3fSmrg * ...body... 27501e04c3fSmrg * (if (cond) 27601e04c3fSmrg * (...then_instrs...) 27701e04c3fSmrg * (...else_instrs...))) 27801e04c3fSmrg * (...else_instrs...))) 27901e04c3fSmrg * (...else_instrs)) 28001e04c3fSmrg */ 28101e04c3fSmrgvoid 28201e04c3fSmrgloop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations, 28301e04c3fSmrg bool second_term_then_continue, 28401e04c3fSmrg bool extra_iteration_required, 28501e04c3fSmrg bool first_term_then_continue) 28601e04c3fSmrg{ 28701e04c3fSmrg void *const mem_ctx = ralloc_parent(ir); 28801e04c3fSmrg ir_instruction *ir_to_replace = ir; 28901e04c3fSmrg 29001e04c3fSmrg /* Because 'iterations' is the number of times we pass over the *entire* 29101e04c3fSmrg * loop body before hitting the first break, we need to bump the number of 29201e04c3fSmrg * iterations if the limiting terminator is not the first instruction in 29301e04c3fSmrg * the loop, or it the exit branch contains instructions. This ensures we 29401e04c3fSmrg * execute any instructions before the terminator or in its exit branch. 29501e04c3fSmrg */ 29601e04c3fSmrg if (extra_iteration_required) 29701e04c3fSmrg iterations++; 29801e04c3fSmrg 29901e04c3fSmrg for (int i = 0; i < iterations; i++) { 30001e04c3fSmrg exec_list copy_list; 30101e04c3fSmrg 30201e04c3fSmrg copy_list.make_empty(); 30301e04c3fSmrg clone_ir_list(mem_ctx, ©_list, &ir->body_instructions); 30401e04c3fSmrg 30501e04c3fSmrg ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if(); 30601e04c3fSmrg assert(ir_if != NULL); 30701e04c3fSmrg 30801e04c3fSmrg exec_list *const first_list = first_term_then_continue 30901e04c3fSmrg ? &ir_if->then_instructions : &ir_if->else_instructions; 31001e04c3fSmrg ir_if = ((ir_instruction *) first_list->get_tail())->as_if(); 31101e04c3fSmrg 31201e04c3fSmrg ir_to_replace->insert_before(©_list); 31301e04c3fSmrg ir_to_replace->remove(); 31401e04c3fSmrg 31501e04c3fSmrg /* placeholder that will be removed in the next iteration */ 31601e04c3fSmrg ir_to_replace = 31701e04c3fSmrg new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue); 31801e04c3fSmrg 31901e04c3fSmrg exec_list *const second_term_continue_list = second_term_then_continue 32001e04c3fSmrg ? &ir_if->then_instructions : &ir_if->else_instructions; 32101e04c3fSmrg 32201e04c3fSmrg second_term_continue_list->push_tail(ir_to_replace); 32301e04c3fSmrg } 32401e04c3fSmrg 32501e04c3fSmrg ir_to_replace->remove(); 32601e04c3fSmrg 32701e04c3fSmrg this->progress = true; 32801e04c3fSmrg} 32901e04c3fSmrg 33001e04c3fSmrg 33101e04c3fSmrg/** 33201e04c3fSmrg * Move all of the instructions which follow \c ir_if to the end of 33301e04c3fSmrg * \c splice_dest. 33401e04c3fSmrg * 33501e04c3fSmrg * For example, in the code snippet: 33601e04c3fSmrg * 33701e04c3fSmrg * (if (cond) 33801e04c3fSmrg * (...then_instructions... 33901e04c3fSmrg * break) 34001e04c3fSmrg * (...else_instructions...)) 34101e04c3fSmrg * ...post_if_instructions... 34201e04c3fSmrg * 34301e04c3fSmrg * If \c ir_if points to the "if" instruction, and \c splice_dest points to 34401e04c3fSmrg * (...else_instructions...), the code snippet is transformed into: 34501e04c3fSmrg * 34601e04c3fSmrg * (if (cond) 34701e04c3fSmrg * (...then_instructions... 34801e04c3fSmrg * break) 34901e04c3fSmrg * (...else_instructions... 35001e04c3fSmrg * ...post_if_instructions...)) 35101e04c3fSmrg */ 35201e04c3fSmrgvoid 35301e04c3fSmrgloop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if, 35401e04c3fSmrg exec_list *splice_dest) 35501e04c3fSmrg{ 35601e04c3fSmrg while (!ir_if->get_next()->is_tail_sentinel()) { 35701e04c3fSmrg ir_instruction *move_ir = (ir_instruction *) ir_if->get_next(); 35801e04c3fSmrg 35901e04c3fSmrg move_ir->remove(); 36001e04c3fSmrg splice_dest->push_tail(move_ir); 36101e04c3fSmrg } 36201e04c3fSmrg} 36301e04c3fSmrg 36401e04c3fSmrgstatic bool 36501e04c3fSmrgexit_branch_has_instructions(ir_if *term_if, bool lt_then_continue) 36601e04c3fSmrg{ 36701e04c3fSmrg if (lt_then_continue) { 36801e04c3fSmrg if (term_if->else_instructions.get_head() == 36901e04c3fSmrg term_if->else_instructions.get_tail()) 37001e04c3fSmrg return false; 37101e04c3fSmrg } else { 37201e04c3fSmrg if (term_if->then_instructions.get_head() == 37301e04c3fSmrg term_if->then_instructions.get_tail()) 37401e04c3fSmrg return false; 37501e04c3fSmrg } 37601e04c3fSmrg 37701e04c3fSmrg return true; 37801e04c3fSmrg} 37901e04c3fSmrg 38001e04c3fSmrgir_visitor_status 38101e04c3fSmrgloop_unroll_visitor::visit_leave(ir_loop *ir) 38201e04c3fSmrg{ 38301e04c3fSmrg loop_variable_state *const ls = this->state->get(ir); 38401e04c3fSmrg 38501e04c3fSmrg /* If we've entered a loop that hasn't been analyzed, something really, 38601e04c3fSmrg * really bad has happened. 38701e04c3fSmrg */ 38801e04c3fSmrg if (ls == NULL) { 38901e04c3fSmrg assert(ls != NULL); 39001e04c3fSmrg return visit_continue; 39101e04c3fSmrg } 39201e04c3fSmrg 3937ec681f3Smrg /* Limiting terminator may have iteration count of zero, 3947ec681f3Smrg * this is a valid case because the loop may break during 3957ec681f3Smrg * the first iteration. 3967ec681f3Smrg */ 39701e04c3fSmrg 39801e04c3fSmrg /* Remove the conditional break statements associated with all terminators 39901e04c3fSmrg * that are associated with a fixed iteration count, except for the one 40001e04c3fSmrg * associated with the limiting terminator--that one needs to stay, since 40101e04c3fSmrg * it terminates the loop. Exception: if the loop still has a normative 40201e04c3fSmrg * bound, then that terminates the loop, so we don't even need the limiting 40301e04c3fSmrg * terminator. 40401e04c3fSmrg */ 40501e04c3fSmrg foreach_in_list_safe(loop_terminator, t, &ls->terminators) { 40601e04c3fSmrg if (t->iterations < 0) 40701e04c3fSmrg continue; 40801e04c3fSmrg 40901e04c3fSmrg exec_list *branch_instructions; 41001e04c3fSmrg if (t != ls->limiting_terminator) { 41101e04c3fSmrg ir_instruction *ir_if_last = (ir_instruction *) 41201e04c3fSmrg t->ir->then_instructions.get_tail(); 41301e04c3fSmrg if (is_break(ir_if_last)) { 41401e04c3fSmrg branch_instructions = &t->ir->else_instructions; 41501e04c3fSmrg } else { 41601e04c3fSmrg branch_instructions = &t->ir->then_instructions; 41701e04c3fSmrg assert(is_break((ir_instruction *) 41801e04c3fSmrg t->ir->else_instructions.get_tail())); 41901e04c3fSmrg } 42001e04c3fSmrg 42101e04c3fSmrg exec_list copy_list; 42201e04c3fSmrg copy_list.make_empty(); 42301e04c3fSmrg clone_ir_list(ir, ©_list, branch_instructions); 42401e04c3fSmrg 42501e04c3fSmrg t->ir->insert_before(©_list); 42601e04c3fSmrg t->ir->remove(); 42701e04c3fSmrg 42801e04c3fSmrg assert(ls->num_loop_jumps > 0); 42901e04c3fSmrg ls->num_loop_jumps--; 43001e04c3fSmrg 43101e04c3fSmrg /* Also remove it from the terminator list */ 43201e04c3fSmrg t->remove(); 43301e04c3fSmrg 43401e04c3fSmrg this->progress = true; 43501e04c3fSmrg } 43601e04c3fSmrg } 43701e04c3fSmrg 43801e04c3fSmrg if (ls->limiting_terminator == NULL) { 43901e04c3fSmrg ir_instruction *last_ir = 44001e04c3fSmrg (ir_instruction *) ir->body_instructions.get_tail(); 44101e04c3fSmrg 44201e04c3fSmrg /* If a loop has no induction variable and the last instruction is 44301e04c3fSmrg * a break, unroll the loop with a count of 1. This is the classic 44401e04c3fSmrg * 44501e04c3fSmrg * do { 44601e04c3fSmrg * // ... 44701e04c3fSmrg * } while (false) 44801e04c3fSmrg * 44901e04c3fSmrg * that is used to wrap multi-line macros. 45001e04c3fSmrg * 45101e04c3fSmrg * If num_loop_jumps is not zero, last_ir cannot be NULL... there has to 45201e04c3fSmrg * be at least num_loop_jumps instructions in the loop. 45301e04c3fSmrg */ 45401e04c3fSmrg if (ls->num_loop_jumps == 1 && is_break(last_ir)) { 45501e04c3fSmrg last_ir->remove(); 45601e04c3fSmrg 45701e04c3fSmrg simple_unroll(ir, 1); 45801e04c3fSmrg } 45901e04c3fSmrg 46001e04c3fSmrg /* Don't try to unroll loops where the number of iterations is not known 46101e04c3fSmrg * at compile-time. 46201e04c3fSmrg */ 46301e04c3fSmrg return visit_continue; 46401e04c3fSmrg } 46501e04c3fSmrg 46601e04c3fSmrg int iterations = ls->limiting_terminator->iterations; 46701e04c3fSmrg 46801e04c3fSmrg const int max_iterations = options->MaxUnrollIterations; 46901e04c3fSmrg 47001e04c3fSmrg /* Don't try to unroll loops that have zillions of iterations either. 47101e04c3fSmrg */ 47201e04c3fSmrg if (iterations > max_iterations) 47301e04c3fSmrg return visit_continue; 47401e04c3fSmrg 47501e04c3fSmrg /* Don't try to unroll nested loops and loops with a huge body. 47601e04c3fSmrg */ 47701e04c3fSmrg loop_unroll_count count(&ir->body_instructions, ls, options); 47801e04c3fSmrg 47901e04c3fSmrg bool loop_too_large = 48001e04c3fSmrg count.nested_loop || count.nodes * iterations > max_iterations * 5; 48101e04c3fSmrg 48201e04c3fSmrg if (loop_too_large && !count.unsupported_variable_indexing && 48301e04c3fSmrg !count.array_indexed_by_induction_var_with_exact_iterations) 48401e04c3fSmrg return visit_continue; 48501e04c3fSmrg 48601e04c3fSmrg /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps. 48701e04c3fSmrg * We'll be removing the limiting terminator before we unroll. 48801e04c3fSmrg */ 48901e04c3fSmrg assert(ls->num_loop_jumps > 0); 49001e04c3fSmrg unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1; 49101e04c3fSmrg 49201e04c3fSmrg if (predicted_num_loop_jumps > 1) 49301e04c3fSmrg return visit_continue; 49401e04c3fSmrg 49501e04c3fSmrg if (predicted_num_loop_jumps == 0) { 49601e04c3fSmrg simple_unroll(ir, iterations); 49701e04c3fSmrg return visit_continue; 49801e04c3fSmrg } 49901e04c3fSmrg 50001e04c3fSmrg ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail(); 50101e04c3fSmrg assert(last_ir != NULL); 50201e04c3fSmrg 50301e04c3fSmrg if (is_break(last_ir)) { 50401e04c3fSmrg /* If the only loop-jump is a break at the end of the loop, the loop 50501e04c3fSmrg * will execute exactly once. Remove the break and use the simple 50601e04c3fSmrg * unroller with an iteration count of 1. 50701e04c3fSmrg */ 50801e04c3fSmrg last_ir->remove(); 50901e04c3fSmrg 51001e04c3fSmrg simple_unroll(ir, 1); 51101e04c3fSmrg return visit_continue; 51201e04c3fSmrg } 51301e04c3fSmrg 51401e04c3fSmrg /* Complex unrolling can only handle two terminators. One with an unknown 51501e04c3fSmrg * iteration count and one with a known iteration count. We have already 51601e04c3fSmrg * made sure we have a known iteration count above and removed any 51701e04c3fSmrg * unreachable terminators with a known count. Here we make sure there 51801e04c3fSmrg * isn't any additional unknown terminators, or any other jumps nested 51901e04c3fSmrg * inside futher ifs. 52001e04c3fSmrg */ 52101e04c3fSmrg if (ls->num_loop_jumps != 2 || ls->terminators.length() != 2) 52201e04c3fSmrg return visit_continue; 52301e04c3fSmrg 52401e04c3fSmrg ir_instruction *first_ir = 52501e04c3fSmrg (ir_instruction *) ir->body_instructions.get_head(); 52601e04c3fSmrg 52701e04c3fSmrg unsigned term_count = 0; 52801e04c3fSmrg bool first_term_then_continue = false; 52901e04c3fSmrg foreach_in_list(loop_terminator, t, &ls->terminators) { 53001e04c3fSmrg ir_if *ir_if = t->ir->as_if(); 53101e04c3fSmrg assert(ir_if != NULL); 53201e04c3fSmrg 53301e04c3fSmrg ir_instruction *ir_if_last = 53401e04c3fSmrg (ir_instruction *) ir_if->then_instructions.get_tail(); 53501e04c3fSmrg 53601e04c3fSmrg if (is_break(ir_if_last)) { 53701e04c3fSmrg splice_post_if_instructions(ir_if, &ir_if->else_instructions); 53801e04c3fSmrg ir_if_last->remove(); 53901e04c3fSmrg if (term_count == 1) { 54001e04c3fSmrg bool ebi = 54101e04c3fSmrg exit_branch_has_instructions(ls->limiting_terminator->ir, 54201e04c3fSmrg first_term_then_continue); 54301e04c3fSmrg complex_unroll(ir, iterations, false, 54401e04c3fSmrg first_ir->as_if() != ls->limiting_terminator->ir || 54501e04c3fSmrg ebi, 54601e04c3fSmrg first_term_then_continue); 54701e04c3fSmrg return visit_continue; 54801e04c3fSmrg } 54901e04c3fSmrg } else { 55001e04c3fSmrg ir_if_last = 55101e04c3fSmrg (ir_instruction *) ir_if->else_instructions.get_tail(); 55201e04c3fSmrg 55301e04c3fSmrg assert(is_break(ir_if_last)); 55401e04c3fSmrg if (is_break(ir_if_last)) { 55501e04c3fSmrg splice_post_if_instructions(ir_if, &ir_if->then_instructions); 55601e04c3fSmrg ir_if_last->remove(); 55701e04c3fSmrg if (term_count == 1) { 55801e04c3fSmrg bool ebi = 55901e04c3fSmrg exit_branch_has_instructions(ls->limiting_terminator->ir, 56001e04c3fSmrg first_term_then_continue); 56101e04c3fSmrg complex_unroll(ir, iterations, true, 56201e04c3fSmrg first_ir->as_if() != ls->limiting_terminator->ir || 56301e04c3fSmrg ebi, 56401e04c3fSmrg first_term_then_continue); 56501e04c3fSmrg return visit_continue; 56601e04c3fSmrg } else { 56701e04c3fSmrg first_term_then_continue = true; 56801e04c3fSmrg } 56901e04c3fSmrg } 57001e04c3fSmrg } 57101e04c3fSmrg 57201e04c3fSmrg term_count++; 57301e04c3fSmrg } 57401e04c3fSmrg 57501e04c3fSmrg /* Did not find the break statement. It must be in a complex if-nesting, 57601e04c3fSmrg * so don't try to unroll. 57701e04c3fSmrg */ 57801e04c3fSmrg return visit_continue; 57901e04c3fSmrg} 58001e04c3fSmrg 58101e04c3fSmrg 58201e04c3fSmrgbool 58301e04c3fSmrgunroll_loops(exec_list *instructions, loop_state *ls, 58401e04c3fSmrg const struct gl_shader_compiler_options *options) 58501e04c3fSmrg{ 58601e04c3fSmrg loop_unroll_visitor v(ls, options); 58701e04c3fSmrg 58801e04c3fSmrg v.run(instructions); 58901e04c3fSmrg 59001e04c3fSmrg return v.progress; 59101e04c3fSmrg} 592