1b8e80941Smrg/* 2b8e80941Smrg * Copyright © 2010 Intel Corporation 3b8e80941Smrg * 4b8e80941Smrg * Permission is hereby granted, free of charge, to any person obtaining a 5b8e80941Smrg * copy of this software and associated documentation files (the "Software"), 6b8e80941Smrg * to deal in the Software without restriction, including without limitation 7b8e80941Smrg * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8b8e80941Smrg * and/or sell copies of the Software, and to permit persons to whom the 9b8e80941Smrg * Software is furnished to do so, subject to the following conditions: 10b8e80941Smrg * 11b8e80941Smrg * The above copyright notice and this permission notice (including the next 12b8e80941Smrg * paragraph) shall be included in all copies or substantial portions of the 13b8e80941Smrg * Software. 14b8e80941Smrg * 15b8e80941Smrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16b8e80941Smrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17b8e80941Smrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18b8e80941Smrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19b8e80941Smrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20b8e80941Smrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21b8e80941Smrg * DEALINGS IN THE SOFTWARE. 22b8e80941Smrg */ 23b8e80941Smrg 24b8e80941Smrg#include "compiler/glsl_types.h" 25b8e80941Smrg#include "loop_analysis.h" 26b8e80941Smrg#include "ir_hierarchical_visitor.h" 27b8e80941Smrg 28b8e80941Smrg#include "main/mtypes.h" 29b8e80941Smrg 30b8e80941Smrgnamespace { 31b8e80941Smrg 32b8e80941Smrgclass loop_unroll_visitor : public ir_hierarchical_visitor { 33b8e80941Smrgpublic: 34b8e80941Smrg loop_unroll_visitor(loop_state *state, 35b8e80941Smrg const struct gl_shader_compiler_options *options) 36b8e80941Smrg { 37b8e80941Smrg this->state = state; 38b8e80941Smrg this->progress = false; 39b8e80941Smrg this->options = options; 40b8e80941Smrg } 41b8e80941Smrg 42b8e80941Smrg virtual ir_visitor_status visit_leave(ir_loop *ir); 43b8e80941Smrg void simple_unroll(ir_loop *ir, int iterations); 44b8e80941Smrg void complex_unroll(ir_loop *ir, int iterations, 45b8e80941Smrg bool continue_from_then_branch, 46b8e80941Smrg bool limiting_term_first, 47b8e80941Smrg bool lt_continue_from_then_branch); 48b8e80941Smrg void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest); 49b8e80941Smrg 50b8e80941Smrg loop_state *state; 51b8e80941Smrg 52b8e80941Smrg bool progress; 53b8e80941Smrg const struct gl_shader_compiler_options *options; 54b8e80941Smrg}; 55b8e80941Smrg 56b8e80941Smrg} /* anonymous namespace */ 57b8e80941Smrg 58b8e80941Smrgclass loop_unroll_count : public ir_hierarchical_visitor { 59b8e80941Smrgpublic: 60b8e80941Smrg int nodes; 61b8e80941Smrg bool unsupported_variable_indexing; 62b8e80941Smrg bool array_indexed_by_induction_var_with_exact_iterations; 63b8e80941Smrg /* If there are nested loops, the node count will be inaccurate. */ 64b8e80941Smrg bool nested_loop; 65b8e80941Smrg 66b8e80941Smrg loop_unroll_count(exec_list *list, loop_variable_state *ls, 67b8e80941Smrg const struct gl_shader_compiler_options *options) 68b8e80941Smrg : ls(ls), options(options) 69b8e80941Smrg { 70b8e80941Smrg nodes = 0; 71b8e80941Smrg nested_loop = false; 72b8e80941Smrg unsupported_variable_indexing = false; 73b8e80941Smrg array_indexed_by_induction_var_with_exact_iterations = false; 74b8e80941Smrg 75b8e80941Smrg run(list); 76b8e80941Smrg } 77b8e80941Smrg 78b8e80941Smrg virtual ir_visitor_status visit_enter(ir_assignment *) 79b8e80941Smrg { 80b8e80941Smrg nodes++; 81b8e80941Smrg return visit_continue; 82b8e80941Smrg } 83b8e80941Smrg 84b8e80941Smrg virtual ir_visitor_status visit_enter(ir_expression *) 85b8e80941Smrg { 86b8e80941Smrg nodes++; 87b8e80941Smrg return visit_continue; 88b8e80941Smrg } 89b8e80941Smrg 90b8e80941Smrg virtual ir_visitor_status visit_enter(ir_loop *) 91b8e80941Smrg { 92b8e80941Smrg nested_loop = true; 93b8e80941Smrg return visit_continue; 94b8e80941Smrg } 95b8e80941Smrg 96b8e80941Smrg virtual ir_visitor_status visit_enter(ir_dereference_array *ir) 97b8e80941Smrg { 98b8e80941Smrg /* Force unroll in case of dynamic indexing with sampler arrays 99b8e80941Smrg * when EmitNoIndirectSampler is set. 100b8e80941Smrg */ 101b8e80941Smrg if (options->EmitNoIndirectSampler) { 102b8e80941Smrg if ((ir->array->type->is_array() && 103b8e80941Smrg ir->array->type->contains_sampler()) && 104b8e80941Smrg !ir->array_index->constant_expression_value(ralloc_parent(ir))) { 105b8e80941Smrg unsupported_variable_indexing = true; 106b8e80941Smrg return visit_continue; 107b8e80941Smrg } 108b8e80941Smrg } 109b8e80941Smrg 110b8e80941Smrg /* Check for arrays variably-indexed by a loop induction variable. 111b8e80941Smrg * Unrolling the loop may convert that access into constant-indexing. 112b8e80941Smrg * 113b8e80941Smrg * Many drivers don't support particular kinds of variable indexing, 114b8e80941Smrg * and have to resort to using lower_variable_index_to_cond_assign to 115b8e80941Smrg * handle it. This results in huge amounts of horrible code, so we'd 116b8e80941Smrg * like to avoid that if possible. Here, we just note that it will 117b8e80941Smrg * happen. 118b8e80941Smrg */ 119b8e80941Smrg if ((ir->array->type->is_array() || ir->array->type->is_matrix()) && 120b8e80941Smrg !ir->array_index->as_constant()) { 121b8e80941Smrg ir_variable *array = ir->array->variable_referenced(); 122b8e80941Smrg loop_variable *lv = ls->get(ir->array_index->variable_referenced()); 123b8e80941Smrg if (array && lv && lv->is_induction_var()) { 124b8e80941Smrg /* If an array is indexed by a loop induction variable, and the 125b8e80941Smrg * array size is exactly the number of loop iterations, this is 126b8e80941Smrg * probably a simple for-loop trying to access each element in 127b8e80941Smrg * turn; the application may expect it to be unrolled. 128b8e80941Smrg */ 129b8e80941Smrg if (int(array->type->length) == ls->limiting_terminator->iterations) 130b8e80941Smrg array_indexed_by_induction_var_with_exact_iterations = true; 131b8e80941Smrg 132b8e80941Smrg switch (array->data.mode) { 133b8e80941Smrg case ir_var_auto: 134b8e80941Smrg case ir_var_temporary: 135b8e80941Smrg case ir_var_const_in: 136b8e80941Smrg case ir_var_function_in: 137b8e80941Smrg case ir_var_function_out: 138b8e80941Smrg case ir_var_function_inout: 139b8e80941Smrg if (options->EmitNoIndirectTemp) 140b8e80941Smrg unsupported_variable_indexing = true; 141b8e80941Smrg break; 142b8e80941Smrg case ir_var_uniform: 143b8e80941Smrg case ir_var_shader_storage: 144b8e80941Smrg if (options->EmitNoIndirectUniform) 145b8e80941Smrg unsupported_variable_indexing = true; 146b8e80941Smrg break; 147b8e80941Smrg case ir_var_shader_in: 148b8e80941Smrg if (options->EmitNoIndirectInput) 149b8e80941Smrg unsupported_variable_indexing = true; 150b8e80941Smrg break; 151b8e80941Smrg case ir_var_shader_out: 152b8e80941Smrg if (options->EmitNoIndirectOutput) 153b8e80941Smrg unsupported_variable_indexing = true; 154b8e80941Smrg break; 155b8e80941Smrg } 156b8e80941Smrg } 157b8e80941Smrg } 158b8e80941Smrg return visit_continue; 159b8e80941Smrg } 160b8e80941Smrg 161b8e80941Smrgprivate: 162b8e80941Smrg loop_variable_state *ls; 163b8e80941Smrg const struct gl_shader_compiler_options *options; 164b8e80941Smrg}; 165b8e80941Smrg 166b8e80941Smrg 167b8e80941Smrg/** 168b8e80941Smrg * Unroll a loop which does not contain any jumps. For example, if the input 169b8e80941Smrg * is: 170b8e80941Smrg * 171b8e80941Smrg * (loop (...) ...instrs...) 172b8e80941Smrg * 173b8e80941Smrg * And the iteration count is 3, the output will be: 174b8e80941Smrg * 175b8e80941Smrg * ...instrs... ...instrs... ...instrs... 176b8e80941Smrg */ 177b8e80941Smrgvoid 178b8e80941Smrgloop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations) 179b8e80941Smrg{ 180b8e80941Smrg void *const mem_ctx = ralloc_parent(ir); 181b8e80941Smrg loop_variable_state *const ls = this->state->get(ir); 182b8e80941Smrg 183b8e80941Smrg /* If there are no terminators, then the loop iteration count must be 1. 184b8e80941Smrg * This is the 'do { } while (false);' case. 185b8e80941Smrg */ 186b8e80941Smrg assert(!ls->terminators.is_empty() || iterations == 1); 187b8e80941Smrg 188b8e80941Smrg ir_instruction *first_ir = 189b8e80941Smrg (ir_instruction *) ir->body_instructions.get_head(); 190b8e80941Smrg 191b8e80941Smrg if (!first_ir) { 192b8e80941Smrg /* The loop is empty remove it and return */ 193b8e80941Smrg ir->remove(); 194b8e80941Smrg return; 195b8e80941Smrg } 196b8e80941Smrg 197b8e80941Smrg ir_if *limit_if = NULL; 198b8e80941Smrg bool exit_branch_has_instructions = false; 199b8e80941Smrg if (ls->limiting_terminator) { 200b8e80941Smrg limit_if = ls->limiting_terminator->ir; 201b8e80941Smrg ir_instruction *ir_if_last = (ir_instruction *) 202b8e80941Smrg limit_if->then_instructions.get_tail(); 203b8e80941Smrg 204b8e80941Smrg if (is_break(ir_if_last)) { 205b8e80941Smrg if (ir_if_last != limit_if->then_instructions.get_head()) 206b8e80941Smrg exit_branch_has_instructions = true; 207b8e80941Smrg 208b8e80941Smrg splice_post_if_instructions(limit_if, &limit_if->else_instructions); 209b8e80941Smrg ir_if_last->remove(); 210b8e80941Smrg } else { 211b8e80941Smrg ir_if_last = (ir_instruction *) 212b8e80941Smrg limit_if->else_instructions.get_tail(); 213b8e80941Smrg assert(is_break(ir_if_last)); 214b8e80941Smrg 215b8e80941Smrg if (ir_if_last != limit_if->else_instructions.get_head()) 216b8e80941Smrg exit_branch_has_instructions = true; 217b8e80941Smrg 218b8e80941Smrg splice_post_if_instructions(limit_if, &limit_if->then_instructions); 219b8e80941Smrg ir_if_last->remove(); 220b8e80941Smrg } 221b8e80941Smrg } 222b8e80941Smrg 223b8e80941Smrg /* Because 'iterations' is the number of times we pass over the *entire* 224b8e80941Smrg * loop body before hitting the first break, we need to bump the number of 225b8e80941Smrg * iterations if the limiting terminator is not the first instruction in 226b8e80941Smrg * the loop, or it the exit branch contains instructions. This ensures we 227b8e80941Smrg * execute any instructions before the terminator or in its exit branch. 228b8e80941Smrg */ 229b8e80941Smrg if (!ls->terminators.is_empty() && 230b8e80941Smrg (limit_if != first_ir->as_if() || exit_branch_has_instructions)) 231b8e80941Smrg iterations++; 232b8e80941Smrg 233b8e80941Smrg for (int i = 0; i < iterations; i++) { 234b8e80941Smrg exec_list copy_list; 235b8e80941Smrg 236b8e80941Smrg copy_list.make_empty(); 237b8e80941Smrg clone_ir_list(mem_ctx, ©_list, &ir->body_instructions); 238b8e80941Smrg 239b8e80941Smrg ir->insert_before(©_list); 240b8e80941Smrg } 241b8e80941Smrg 242b8e80941Smrg /* The loop has been replaced by the unrolled copies. Remove the original 243b8e80941Smrg * loop from the IR sequence. 244b8e80941Smrg */ 245b8e80941Smrg ir->remove(); 246b8e80941Smrg 247b8e80941Smrg this->progress = true; 248b8e80941Smrg} 249b8e80941Smrg 250b8e80941Smrg 251b8e80941Smrg/** 252b8e80941Smrg * Unroll a loop whose last statement is an ir_if. If \c 253b8e80941Smrg * continue_from_then_branch is true, the loop is repeated only when the 254b8e80941Smrg * "then" branch of the if is taken; otherwise it is repeated only when the 255b8e80941Smrg * "else" branch of the if is taken. 256b8e80941Smrg * 257b8e80941Smrg * For example, if the input is: 258b8e80941Smrg * 259b8e80941Smrg * (loop (...) 260b8e80941Smrg * ...body... 261b8e80941Smrg * (if (cond) 262b8e80941Smrg * (...then_instrs...) 263b8e80941Smrg * (...else_instrs...))) 264b8e80941Smrg * 265b8e80941Smrg * And the iteration count is 3, and \c continue_from_then_branch is true, 266b8e80941Smrg * then the output will be: 267b8e80941Smrg * 268b8e80941Smrg * ...body... 269b8e80941Smrg * (if (cond) 270b8e80941Smrg * (...then_instrs... 271b8e80941Smrg * ...body... 272b8e80941Smrg * (if (cond) 273b8e80941Smrg * (...then_instrs... 274b8e80941Smrg * ...body... 275b8e80941Smrg * (if (cond) 276b8e80941Smrg * (...then_instrs...) 277b8e80941Smrg * (...else_instrs...))) 278b8e80941Smrg * (...else_instrs...))) 279b8e80941Smrg * (...else_instrs)) 280b8e80941Smrg */ 281b8e80941Smrgvoid 282b8e80941Smrgloop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations, 283b8e80941Smrg bool second_term_then_continue, 284b8e80941Smrg bool extra_iteration_required, 285b8e80941Smrg bool first_term_then_continue) 286b8e80941Smrg{ 287b8e80941Smrg void *const mem_ctx = ralloc_parent(ir); 288b8e80941Smrg ir_instruction *ir_to_replace = ir; 289b8e80941Smrg 290b8e80941Smrg /* Because 'iterations' is the number of times we pass over the *entire* 291b8e80941Smrg * loop body before hitting the first break, we need to bump the number of 292b8e80941Smrg * iterations if the limiting terminator is not the first instruction in 293b8e80941Smrg * the loop, or it the exit branch contains instructions. This ensures we 294b8e80941Smrg * execute any instructions before the terminator or in its exit branch. 295b8e80941Smrg */ 296b8e80941Smrg if (extra_iteration_required) 297b8e80941Smrg iterations++; 298b8e80941Smrg 299b8e80941Smrg for (int i = 0; i < iterations; i++) { 300b8e80941Smrg exec_list copy_list; 301b8e80941Smrg 302b8e80941Smrg copy_list.make_empty(); 303b8e80941Smrg clone_ir_list(mem_ctx, ©_list, &ir->body_instructions); 304b8e80941Smrg 305b8e80941Smrg ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if(); 306b8e80941Smrg assert(ir_if != NULL); 307b8e80941Smrg 308b8e80941Smrg exec_list *const first_list = first_term_then_continue 309b8e80941Smrg ? &ir_if->then_instructions : &ir_if->else_instructions; 310b8e80941Smrg ir_if = ((ir_instruction *) first_list->get_tail())->as_if(); 311b8e80941Smrg 312b8e80941Smrg ir_to_replace->insert_before(©_list); 313b8e80941Smrg ir_to_replace->remove(); 314b8e80941Smrg 315b8e80941Smrg /* placeholder that will be removed in the next iteration */ 316b8e80941Smrg ir_to_replace = 317b8e80941Smrg new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue); 318b8e80941Smrg 319b8e80941Smrg exec_list *const second_term_continue_list = second_term_then_continue 320b8e80941Smrg ? &ir_if->then_instructions : &ir_if->else_instructions; 321b8e80941Smrg 322b8e80941Smrg second_term_continue_list->push_tail(ir_to_replace); 323b8e80941Smrg } 324b8e80941Smrg 325b8e80941Smrg ir_to_replace->remove(); 326b8e80941Smrg 327b8e80941Smrg this->progress = true; 328b8e80941Smrg} 329b8e80941Smrg 330b8e80941Smrg 331b8e80941Smrg/** 332b8e80941Smrg * Move all of the instructions which follow \c ir_if to the end of 333b8e80941Smrg * \c splice_dest. 334b8e80941Smrg * 335b8e80941Smrg * For example, in the code snippet: 336b8e80941Smrg * 337b8e80941Smrg * (if (cond) 338b8e80941Smrg * (...then_instructions... 339b8e80941Smrg * break) 340b8e80941Smrg * (...else_instructions...)) 341b8e80941Smrg * ...post_if_instructions... 342b8e80941Smrg * 343b8e80941Smrg * If \c ir_if points to the "if" instruction, and \c splice_dest points to 344b8e80941Smrg * (...else_instructions...), the code snippet is transformed into: 345b8e80941Smrg * 346b8e80941Smrg * (if (cond) 347b8e80941Smrg * (...then_instructions... 348b8e80941Smrg * break) 349b8e80941Smrg * (...else_instructions... 350b8e80941Smrg * ...post_if_instructions...)) 351b8e80941Smrg */ 352b8e80941Smrgvoid 353b8e80941Smrgloop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if, 354b8e80941Smrg exec_list *splice_dest) 355b8e80941Smrg{ 356b8e80941Smrg while (!ir_if->get_next()->is_tail_sentinel()) { 357b8e80941Smrg ir_instruction *move_ir = (ir_instruction *) ir_if->get_next(); 358b8e80941Smrg 359b8e80941Smrg move_ir->remove(); 360b8e80941Smrg splice_dest->push_tail(move_ir); 361b8e80941Smrg } 362b8e80941Smrg} 363b8e80941Smrg 364b8e80941Smrgstatic bool 365b8e80941Smrgexit_branch_has_instructions(ir_if *term_if, bool lt_then_continue) 366b8e80941Smrg{ 367b8e80941Smrg if (lt_then_continue) { 368b8e80941Smrg if (term_if->else_instructions.get_head() == 369b8e80941Smrg term_if->else_instructions.get_tail()) 370b8e80941Smrg return false; 371b8e80941Smrg } else { 372b8e80941Smrg if (term_if->then_instructions.get_head() == 373b8e80941Smrg term_if->then_instructions.get_tail()) 374b8e80941Smrg return false; 375b8e80941Smrg } 376b8e80941Smrg 377b8e80941Smrg return true; 378b8e80941Smrg} 379b8e80941Smrg 380b8e80941Smrgir_visitor_status 381b8e80941Smrgloop_unroll_visitor::visit_leave(ir_loop *ir) 382b8e80941Smrg{ 383b8e80941Smrg loop_variable_state *const ls = this->state->get(ir); 384b8e80941Smrg 385b8e80941Smrg /* If we've entered a loop that hasn't been analyzed, something really, 386b8e80941Smrg * really bad has happened. 387b8e80941Smrg */ 388b8e80941Smrg if (ls == NULL) { 389b8e80941Smrg assert(ls != NULL); 390b8e80941Smrg return visit_continue; 391b8e80941Smrg } 392b8e80941Smrg 393b8e80941Smrg if (ls->limiting_terminator != NULL) { 394b8e80941Smrg /* If the limiting terminator has an iteration count of zero, then we've 395b8e80941Smrg * proven that the loop cannot run, so delete it. 396b8e80941Smrg */ 397b8e80941Smrg int iterations = ls->limiting_terminator->iterations; 398b8e80941Smrg if (iterations == 0) { 399b8e80941Smrg ir->remove(); 400b8e80941Smrg this->progress = true; 401b8e80941Smrg return visit_continue; 402b8e80941Smrg } 403b8e80941Smrg } 404b8e80941Smrg 405b8e80941Smrg /* Remove the conditional break statements associated with all terminators 406b8e80941Smrg * that are associated with a fixed iteration count, except for the one 407b8e80941Smrg * associated with the limiting terminator--that one needs to stay, since 408b8e80941Smrg * it terminates the loop. Exception: if the loop still has a normative 409b8e80941Smrg * bound, then that terminates the loop, so we don't even need the limiting 410b8e80941Smrg * terminator. 411b8e80941Smrg */ 412b8e80941Smrg foreach_in_list_safe(loop_terminator, t, &ls->terminators) { 413b8e80941Smrg if (t->iterations < 0) 414b8e80941Smrg continue; 415b8e80941Smrg 416b8e80941Smrg exec_list *branch_instructions; 417b8e80941Smrg if (t != ls->limiting_terminator) { 418b8e80941Smrg ir_instruction *ir_if_last = (ir_instruction *) 419b8e80941Smrg t->ir->then_instructions.get_tail(); 420b8e80941Smrg if (is_break(ir_if_last)) { 421b8e80941Smrg branch_instructions = &t->ir->else_instructions; 422b8e80941Smrg } else { 423b8e80941Smrg branch_instructions = &t->ir->then_instructions; 424b8e80941Smrg assert(is_break((ir_instruction *) 425b8e80941Smrg t->ir->else_instructions.get_tail())); 426b8e80941Smrg } 427b8e80941Smrg 428b8e80941Smrg exec_list copy_list; 429b8e80941Smrg copy_list.make_empty(); 430b8e80941Smrg clone_ir_list(ir, ©_list, branch_instructions); 431b8e80941Smrg 432b8e80941Smrg t->ir->insert_before(©_list); 433b8e80941Smrg t->ir->remove(); 434b8e80941Smrg 435b8e80941Smrg assert(ls->num_loop_jumps > 0); 436b8e80941Smrg ls->num_loop_jumps--; 437b8e80941Smrg 438b8e80941Smrg /* Also remove it from the terminator list */ 439b8e80941Smrg t->remove(); 440b8e80941Smrg 441b8e80941Smrg this->progress = true; 442b8e80941Smrg } 443b8e80941Smrg } 444b8e80941Smrg 445b8e80941Smrg if (ls->limiting_terminator == NULL) { 446b8e80941Smrg ir_instruction *last_ir = 447b8e80941Smrg (ir_instruction *) ir->body_instructions.get_tail(); 448b8e80941Smrg 449b8e80941Smrg /* If a loop has no induction variable and the last instruction is 450b8e80941Smrg * a break, unroll the loop with a count of 1. This is the classic 451b8e80941Smrg * 452b8e80941Smrg * do { 453b8e80941Smrg * // ... 454b8e80941Smrg * } while (false) 455b8e80941Smrg * 456b8e80941Smrg * that is used to wrap multi-line macros. 457b8e80941Smrg * 458b8e80941Smrg * If num_loop_jumps is not zero, last_ir cannot be NULL... there has to 459b8e80941Smrg * be at least num_loop_jumps instructions in the loop. 460b8e80941Smrg */ 461b8e80941Smrg if (ls->num_loop_jumps == 1 && is_break(last_ir)) { 462b8e80941Smrg last_ir->remove(); 463b8e80941Smrg 464b8e80941Smrg simple_unroll(ir, 1); 465b8e80941Smrg } 466b8e80941Smrg 467b8e80941Smrg /* Don't try to unroll loops where the number of iterations is not known 468b8e80941Smrg * at compile-time. 469b8e80941Smrg */ 470b8e80941Smrg return visit_continue; 471b8e80941Smrg } 472b8e80941Smrg 473b8e80941Smrg int iterations = ls->limiting_terminator->iterations; 474b8e80941Smrg 475b8e80941Smrg const int max_iterations = options->MaxUnrollIterations; 476b8e80941Smrg 477b8e80941Smrg /* Don't try to unroll loops that have zillions of iterations either. 478b8e80941Smrg */ 479b8e80941Smrg if (iterations > max_iterations) 480b8e80941Smrg return visit_continue; 481b8e80941Smrg 482b8e80941Smrg /* Don't try to unroll nested loops and loops with a huge body. 483b8e80941Smrg */ 484b8e80941Smrg loop_unroll_count count(&ir->body_instructions, ls, options); 485b8e80941Smrg 486b8e80941Smrg bool loop_too_large = 487b8e80941Smrg count.nested_loop || count.nodes * iterations > max_iterations * 5; 488b8e80941Smrg 489b8e80941Smrg if (loop_too_large && !count.unsupported_variable_indexing && 490b8e80941Smrg !count.array_indexed_by_induction_var_with_exact_iterations) 491b8e80941Smrg return visit_continue; 492b8e80941Smrg 493b8e80941Smrg /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps. 494b8e80941Smrg * We'll be removing the limiting terminator before we unroll. 495b8e80941Smrg */ 496b8e80941Smrg assert(ls->num_loop_jumps > 0); 497b8e80941Smrg unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1; 498b8e80941Smrg 499b8e80941Smrg if (predicted_num_loop_jumps > 1) 500b8e80941Smrg return visit_continue; 501b8e80941Smrg 502b8e80941Smrg if (predicted_num_loop_jumps == 0) { 503b8e80941Smrg simple_unroll(ir, iterations); 504b8e80941Smrg return visit_continue; 505b8e80941Smrg } 506b8e80941Smrg 507b8e80941Smrg ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail(); 508b8e80941Smrg assert(last_ir != NULL); 509b8e80941Smrg 510b8e80941Smrg if (is_break(last_ir)) { 511b8e80941Smrg /* If the only loop-jump is a break at the end of the loop, the loop 512b8e80941Smrg * will execute exactly once. Remove the break and use the simple 513b8e80941Smrg * unroller with an iteration count of 1. 514b8e80941Smrg */ 515b8e80941Smrg last_ir->remove(); 516b8e80941Smrg 517b8e80941Smrg simple_unroll(ir, 1); 518b8e80941Smrg return visit_continue; 519b8e80941Smrg } 520b8e80941Smrg 521b8e80941Smrg /* Complex unrolling can only handle two terminators. One with an unknown 522b8e80941Smrg * iteration count and one with a known iteration count. We have already 523b8e80941Smrg * made sure we have a known iteration count above and removed any 524b8e80941Smrg * unreachable terminators with a known count. Here we make sure there 525b8e80941Smrg * isn't any additional unknown terminators, or any other jumps nested 526b8e80941Smrg * inside futher ifs. 527b8e80941Smrg */ 528b8e80941Smrg if (ls->num_loop_jumps != 2 || ls->terminators.length() != 2) 529b8e80941Smrg return visit_continue; 530b8e80941Smrg 531b8e80941Smrg ir_instruction *first_ir = 532b8e80941Smrg (ir_instruction *) ir->body_instructions.get_head(); 533b8e80941Smrg 534b8e80941Smrg unsigned term_count = 0; 535b8e80941Smrg bool first_term_then_continue = false; 536b8e80941Smrg foreach_in_list(loop_terminator, t, &ls->terminators) { 537b8e80941Smrg ir_if *ir_if = t->ir->as_if(); 538b8e80941Smrg assert(ir_if != NULL); 539b8e80941Smrg 540b8e80941Smrg ir_instruction *ir_if_last = 541b8e80941Smrg (ir_instruction *) ir_if->then_instructions.get_tail(); 542b8e80941Smrg 543b8e80941Smrg if (is_break(ir_if_last)) { 544b8e80941Smrg splice_post_if_instructions(ir_if, &ir_if->else_instructions); 545b8e80941Smrg ir_if_last->remove(); 546b8e80941Smrg if (term_count == 1) { 547b8e80941Smrg bool ebi = 548b8e80941Smrg exit_branch_has_instructions(ls->limiting_terminator->ir, 549b8e80941Smrg first_term_then_continue); 550b8e80941Smrg complex_unroll(ir, iterations, false, 551b8e80941Smrg first_ir->as_if() != ls->limiting_terminator->ir || 552b8e80941Smrg ebi, 553b8e80941Smrg first_term_then_continue); 554b8e80941Smrg return visit_continue; 555b8e80941Smrg } 556b8e80941Smrg } else { 557b8e80941Smrg ir_if_last = 558b8e80941Smrg (ir_instruction *) ir_if->else_instructions.get_tail(); 559b8e80941Smrg 560b8e80941Smrg assert(is_break(ir_if_last)); 561b8e80941Smrg if (is_break(ir_if_last)) { 562b8e80941Smrg splice_post_if_instructions(ir_if, &ir_if->then_instructions); 563b8e80941Smrg ir_if_last->remove(); 564b8e80941Smrg if (term_count == 1) { 565b8e80941Smrg bool ebi = 566b8e80941Smrg exit_branch_has_instructions(ls->limiting_terminator->ir, 567b8e80941Smrg first_term_then_continue); 568b8e80941Smrg complex_unroll(ir, iterations, true, 569b8e80941Smrg first_ir->as_if() != ls->limiting_terminator->ir || 570b8e80941Smrg ebi, 571b8e80941Smrg first_term_then_continue); 572b8e80941Smrg return visit_continue; 573b8e80941Smrg } else { 574b8e80941Smrg first_term_then_continue = true; 575b8e80941Smrg } 576b8e80941Smrg } 577b8e80941Smrg } 578b8e80941Smrg 579b8e80941Smrg term_count++; 580b8e80941Smrg } 581b8e80941Smrg 582b8e80941Smrg /* Did not find the break statement. It must be in a complex if-nesting, 583b8e80941Smrg * so don't try to unroll. 584b8e80941Smrg */ 585b8e80941Smrg return visit_continue; 586b8e80941Smrg} 587b8e80941Smrg 588b8e80941Smrg 589b8e80941Smrgbool 590b8e80941Smrgunroll_loops(exec_list *instructions, loop_state *ls, 591b8e80941Smrg const struct gl_shader_compiler_options *options) 592b8e80941Smrg{ 593b8e80941Smrg loop_unroll_visitor v(ls, options); 594b8e80941Smrg 595b8e80941Smrg v.run(instructions); 596b8e80941Smrg 597b8e80941Smrg return v.progress; 598b8e80941Smrg} 599