1/* 2 * Copyright © 2010 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24#include "compiler/glsl_types.h" 25#include "loop_analysis.h" 26#include "ir_hierarchical_visitor.h" 27 28#include "main/mtypes.h" 29 30namespace { 31 32class loop_unroll_visitor : public ir_hierarchical_visitor { 33public: 34 loop_unroll_visitor(loop_state *state, 35 const struct gl_shader_compiler_options *options) 36 { 37 this->state = state; 38 this->progress = false; 39 this->options = options; 40 } 41 42 virtual ir_visitor_status visit_leave(ir_loop *ir); 43 void simple_unroll(ir_loop *ir, int iterations); 44 void complex_unroll(ir_loop *ir, int iterations, 45 bool continue_from_then_branch, 46 bool limiting_term_first, 47 bool lt_continue_from_then_branch); 48 void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest); 49 50 loop_state *state; 51 52 bool progress; 53 const struct gl_shader_compiler_options *options; 54}; 55 56} /* anonymous namespace */ 57 58class loop_unroll_count : public ir_hierarchical_visitor { 59public: 60 int nodes; 61 bool unsupported_variable_indexing; 62 bool array_indexed_by_induction_var_with_exact_iterations; 63 /* If there are nested loops, the node count will be inaccurate. */ 64 bool nested_loop; 65 66 loop_unroll_count(exec_list *list, loop_variable_state *ls, 67 const struct gl_shader_compiler_options *options) 68 : ls(ls), options(options) 69 { 70 nodes = 0; 71 nested_loop = false; 72 unsupported_variable_indexing = false; 73 array_indexed_by_induction_var_with_exact_iterations = false; 74 75 run(list); 76 } 77 78 virtual ir_visitor_status visit_enter(ir_assignment *) 79 { 80 nodes++; 81 return visit_continue; 82 } 83 84 virtual ir_visitor_status visit_enter(ir_expression *) 85 { 86 nodes++; 87 return visit_continue; 88 } 89 90 virtual ir_visitor_status visit_enter(ir_loop *) 91 { 92 nested_loop = true; 93 return visit_continue; 94 } 95 96 virtual ir_visitor_status visit_enter(ir_dereference_array *ir) 97 { 98 /* Force unroll in case of dynamic indexing with sampler arrays 99 * when EmitNoIndirectSampler is set. 100 */ 101 if (options->EmitNoIndirectSampler) { 102 if ((ir->array->type->is_array() && 103 ir->array->type->contains_sampler()) && 104 !ir->array_index->constant_expression_value(ralloc_parent(ir))) { 105 unsupported_variable_indexing = true; 106 return visit_continue; 107 } 108 } 109 110 /* Check for arrays variably-indexed by a loop induction variable. 111 * Unrolling the loop may convert that access into constant-indexing. 112 * 113 * Many drivers don't support particular kinds of variable indexing, 114 * and have to resort to using lower_variable_index_to_cond_assign to 115 * handle it. This results in huge amounts of horrible code, so we'd 116 * like to avoid that if possible. Here, we just note that it will 117 * happen. 118 */ 119 if ((ir->array->type->is_array() || ir->array->type->is_matrix()) && 120 !ir->array_index->as_constant()) { 121 ir_variable *array = ir->array->variable_referenced(); 122 loop_variable *lv = ls->get(ir->array_index->variable_referenced()); 123 if (array && lv && lv->is_induction_var()) { 124 /* If an array is indexed by a loop induction variable, and the 125 * array size is exactly the number of loop iterations, this is 126 * probably a simple for-loop trying to access each element in 127 * turn; the application may expect it to be unrolled. 128 */ 129 if (int(array->type->length) == ls->limiting_terminator->iterations) 130 array_indexed_by_induction_var_with_exact_iterations = true; 131 132 switch (array->data.mode) { 133 case ir_var_auto: 134 case ir_var_temporary: 135 case ir_var_const_in: 136 case ir_var_function_in: 137 case ir_var_function_out: 138 case ir_var_function_inout: 139 if (options->EmitNoIndirectTemp) 140 unsupported_variable_indexing = true; 141 break; 142 case ir_var_uniform: 143 case ir_var_shader_storage: 144 if (options->EmitNoIndirectUniform) 145 unsupported_variable_indexing = true; 146 break; 147 case ir_var_shader_in: 148 if (options->EmitNoIndirectInput) 149 unsupported_variable_indexing = true; 150 break; 151 case ir_var_shader_out: 152 if (options->EmitNoIndirectOutput) 153 unsupported_variable_indexing = true; 154 break; 155 } 156 } 157 } 158 return visit_continue; 159 } 160 161private: 162 loop_variable_state *ls; 163 const struct gl_shader_compiler_options *options; 164}; 165 166 167/** 168 * Unroll a loop which does not contain any jumps. For example, if the input 169 * is: 170 * 171 * (loop (...) ...instrs...) 172 * 173 * And the iteration count is 3, the output will be: 174 * 175 * ...instrs... ...instrs... ...instrs... 176 */ 177void 178loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations) 179{ 180 void *const mem_ctx = ralloc_parent(ir); 181 loop_variable_state *const ls = this->state->get(ir); 182 183 /* If there are no terminators, then the loop iteration count must be 1. 184 * This is the 'do { } while (false);' case. 185 */ 186 assert(!ls->terminators.is_empty() || iterations == 1); 187 188 ir_instruction *first_ir = 189 (ir_instruction *) ir->body_instructions.get_head(); 190 191 if (!first_ir) { 192 /* The loop is empty remove it and return */ 193 ir->remove(); 194 return; 195 } 196 197 ir_if *limit_if = NULL; 198 bool exit_branch_has_instructions = false; 199 if (ls->limiting_terminator) { 200 limit_if = ls->limiting_terminator->ir; 201 ir_instruction *ir_if_last = (ir_instruction *) 202 limit_if->then_instructions.get_tail(); 203 204 if (is_break(ir_if_last)) { 205 if (ir_if_last != limit_if->then_instructions.get_head()) 206 exit_branch_has_instructions = true; 207 208 splice_post_if_instructions(limit_if, &limit_if->else_instructions); 209 ir_if_last->remove(); 210 } else { 211 ir_if_last = (ir_instruction *) 212 limit_if->else_instructions.get_tail(); 213 assert(is_break(ir_if_last)); 214 215 if (ir_if_last != limit_if->else_instructions.get_head()) 216 exit_branch_has_instructions = true; 217 218 splice_post_if_instructions(limit_if, &limit_if->then_instructions); 219 ir_if_last->remove(); 220 } 221 } 222 223 /* Because 'iterations' is the number of times we pass over the *entire* 224 * loop body before hitting the first break, we need to bump the number of 225 * iterations if the limiting terminator is not the first instruction in 226 * the loop, or it the exit branch contains instructions. This ensures we 227 * execute any instructions before the terminator or in its exit branch. 228 */ 229 if (!ls->terminators.is_empty() && 230 (limit_if != first_ir->as_if() || exit_branch_has_instructions)) 231 iterations++; 232 233 for (int i = 0; i < iterations; i++) { 234 exec_list copy_list; 235 236 copy_list.make_empty(); 237 clone_ir_list(mem_ctx, ©_list, &ir->body_instructions); 238 239 ir->insert_before(©_list); 240 } 241 242 /* The loop has been replaced by the unrolled copies. Remove the original 243 * loop from the IR sequence. 244 */ 245 ir->remove(); 246 247 this->progress = true; 248} 249 250 251/** 252 * Unroll a loop whose last statement is an ir_if. If \c 253 * continue_from_then_branch is true, the loop is repeated only when the 254 * "then" branch of the if is taken; otherwise it is repeated only when the 255 * "else" branch of the if is taken. 256 * 257 * For example, if the input is: 258 * 259 * (loop (...) 260 * ...body... 261 * (if (cond) 262 * (...then_instrs...) 263 * (...else_instrs...))) 264 * 265 * And the iteration count is 3, and \c continue_from_then_branch is true, 266 * then the output will be: 267 * 268 * ...body... 269 * (if (cond) 270 * (...then_instrs... 271 * ...body... 272 * (if (cond) 273 * (...then_instrs... 274 * ...body... 275 * (if (cond) 276 * (...then_instrs...) 277 * (...else_instrs...))) 278 * (...else_instrs...))) 279 * (...else_instrs)) 280 */ 281void 282loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations, 283 bool second_term_then_continue, 284 bool extra_iteration_required, 285 bool first_term_then_continue) 286{ 287 void *const mem_ctx = ralloc_parent(ir); 288 ir_instruction *ir_to_replace = ir; 289 290 /* Because 'iterations' is the number of times we pass over the *entire* 291 * loop body before hitting the first break, we need to bump the number of 292 * iterations if the limiting terminator is not the first instruction in 293 * the loop, or it the exit branch contains instructions. This ensures we 294 * execute any instructions before the terminator or in its exit branch. 295 */ 296 if (extra_iteration_required) 297 iterations++; 298 299 for (int i = 0; i < iterations; i++) { 300 exec_list copy_list; 301 302 copy_list.make_empty(); 303 clone_ir_list(mem_ctx, ©_list, &ir->body_instructions); 304 305 ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if(); 306 assert(ir_if != NULL); 307 308 exec_list *const first_list = first_term_then_continue 309 ? &ir_if->then_instructions : &ir_if->else_instructions; 310 ir_if = ((ir_instruction *) first_list->get_tail())->as_if(); 311 312 ir_to_replace->insert_before(©_list); 313 ir_to_replace->remove(); 314 315 /* placeholder that will be removed in the next iteration */ 316 ir_to_replace = 317 new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue); 318 319 exec_list *const second_term_continue_list = second_term_then_continue 320 ? &ir_if->then_instructions : &ir_if->else_instructions; 321 322 second_term_continue_list->push_tail(ir_to_replace); 323 } 324 325 ir_to_replace->remove(); 326 327 this->progress = true; 328} 329 330 331/** 332 * Move all of the instructions which follow \c ir_if to the end of 333 * \c splice_dest. 334 * 335 * For example, in the code snippet: 336 * 337 * (if (cond) 338 * (...then_instructions... 339 * break) 340 * (...else_instructions...)) 341 * ...post_if_instructions... 342 * 343 * If \c ir_if points to the "if" instruction, and \c splice_dest points to 344 * (...else_instructions...), the code snippet is transformed into: 345 * 346 * (if (cond) 347 * (...then_instructions... 348 * break) 349 * (...else_instructions... 350 * ...post_if_instructions...)) 351 */ 352void 353loop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if, 354 exec_list *splice_dest) 355{ 356 while (!ir_if->get_next()->is_tail_sentinel()) { 357 ir_instruction *move_ir = (ir_instruction *) ir_if->get_next(); 358 359 move_ir->remove(); 360 splice_dest->push_tail(move_ir); 361 } 362} 363 364static bool 365exit_branch_has_instructions(ir_if *term_if, bool lt_then_continue) 366{ 367 if (lt_then_continue) { 368 if (term_if->else_instructions.get_head() == 369 term_if->else_instructions.get_tail()) 370 return false; 371 } else { 372 if (term_if->then_instructions.get_head() == 373 term_if->then_instructions.get_tail()) 374 return false; 375 } 376 377 return true; 378} 379 380ir_visitor_status 381loop_unroll_visitor::visit_leave(ir_loop *ir) 382{ 383 loop_variable_state *const ls = this->state->get(ir); 384 385 /* If we've entered a loop that hasn't been analyzed, something really, 386 * really bad has happened. 387 */ 388 if (ls == NULL) { 389 assert(ls != NULL); 390 return visit_continue; 391 } 392 393 /* Limiting terminator may have iteration count of zero, 394 * this is a valid case because the loop may break during 395 * the first iteration. 396 */ 397 398 /* Remove the conditional break statements associated with all terminators 399 * that are associated with a fixed iteration count, except for the one 400 * associated with the limiting terminator--that one needs to stay, since 401 * it terminates the loop. Exception: if the loop still has a normative 402 * bound, then that terminates the loop, so we don't even need the limiting 403 * terminator. 404 */ 405 foreach_in_list_safe(loop_terminator, t, &ls->terminators) { 406 if (t->iterations < 0) 407 continue; 408 409 exec_list *branch_instructions; 410 if (t != ls->limiting_terminator) { 411 ir_instruction *ir_if_last = (ir_instruction *) 412 t->ir->then_instructions.get_tail(); 413 if (is_break(ir_if_last)) { 414 branch_instructions = &t->ir->else_instructions; 415 } else { 416 branch_instructions = &t->ir->then_instructions; 417 assert(is_break((ir_instruction *) 418 t->ir->else_instructions.get_tail())); 419 } 420 421 exec_list copy_list; 422 copy_list.make_empty(); 423 clone_ir_list(ir, ©_list, branch_instructions); 424 425 t->ir->insert_before(©_list); 426 t->ir->remove(); 427 428 assert(ls->num_loop_jumps > 0); 429 ls->num_loop_jumps--; 430 431 /* Also remove it from the terminator list */ 432 t->remove(); 433 434 this->progress = true; 435 } 436 } 437 438 if (ls->limiting_terminator == NULL) { 439 ir_instruction *last_ir = 440 (ir_instruction *) ir->body_instructions.get_tail(); 441 442 /* If a loop has no induction variable and the last instruction is 443 * a break, unroll the loop with a count of 1. This is the classic 444 * 445 * do { 446 * // ... 447 * } while (false) 448 * 449 * that is used to wrap multi-line macros. 450 * 451 * If num_loop_jumps is not zero, last_ir cannot be NULL... there has to 452 * be at least num_loop_jumps instructions in the loop. 453 */ 454 if (ls->num_loop_jumps == 1 && is_break(last_ir)) { 455 last_ir->remove(); 456 457 simple_unroll(ir, 1); 458 } 459 460 /* Don't try to unroll loops where the number of iterations is not known 461 * at compile-time. 462 */ 463 return visit_continue; 464 } 465 466 int iterations = ls->limiting_terminator->iterations; 467 468 const int max_iterations = options->MaxUnrollIterations; 469 470 /* Don't try to unroll loops that have zillions of iterations either. 471 */ 472 if (iterations > max_iterations) 473 return visit_continue; 474 475 /* Don't try to unroll nested loops and loops with a huge body. 476 */ 477 loop_unroll_count count(&ir->body_instructions, ls, options); 478 479 bool loop_too_large = 480 count.nested_loop || count.nodes * iterations > max_iterations * 5; 481 482 if (loop_too_large && !count.unsupported_variable_indexing && 483 !count.array_indexed_by_induction_var_with_exact_iterations) 484 return visit_continue; 485 486 /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps. 487 * We'll be removing the limiting terminator before we unroll. 488 */ 489 assert(ls->num_loop_jumps > 0); 490 unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1; 491 492 if (predicted_num_loop_jumps > 1) 493 return visit_continue; 494 495 if (predicted_num_loop_jumps == 0) { 496 simple_unroll(ir, iterations); 497 return visit_continue; 498 } 499 500 ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail(); 501 assert(last_ir != NULL); 502 503 if (is_break(last_ir)) { 504 /* If the only loop-jump is a break at the end of the loop, the loop 505 * will execute exactly once. Remove the break and use the simple 506 * unroller with an iteration count of 1. 507 */ 508 last_ir->remove(); 509 510 simple_unroll(ir, 1); 511 return visit_continue; 512 } 513 514 /* Complex unrolling can only handle two terminators. One with an unknown 515 * iteration count and one with a known iteration count. We have already 516 * made sure we have a known iteration count above and removed any 517 * unreachable terminators with a known count. Here we make sure there 518 * isn't any additional unknown terminators, or any other jumps nested 519 * inside futher ifs. 520 */ 521 if (ls->num_loop_jumps != 2 || ls->terminators.length() != 2) 522 return visit_continue; 523 524 ir_instruction *first_ir = 525 (ir_instruction *) ir->body_instructions.get_head(); 526 527 unsigned term_count = 0; 528 bool first_term_then_continue = false; 529 foreach_in_list(loop_terminator, t, &ls->terminators) { 530 ir_if *ir_if = t->ir->as_if(); 531 assert(ir_if != NULL); 532 533 ir_instruction *ir_if_last = 534 (ir_instruction *) ir_if->then_instructions.get_tail(); 535 536 if (is_break(ir_if_last)) { 537 splice_post_if_instructions(ir_if, &ir_if->else_instructions); 538 ir_if_last->remove(); 539 if (term_count == 1) { 540 bool ebi = 541 exit_branch_has_instructions(ls->limiting_terminator->ir, 542 first_term_then_continue); 543 complex_unroll(ir, iterations, false, 544 first_ir->as_if() != ls->limiting_terminator->ir || 545 ebi, 546 first_term_then_continue); 547 return visit_continue; 548 } 549 } else { 550 ir_if_last = 551 (ir_instruction *) ir_if->else_instructions.get_tail(); 552 553 assert(is_break(ir_if_last)); 554 if (is_break(ir_if_last)) { 555 splice_post_if_instructions(ir_if, &ir_if->then_instructions); 556 ir_if_last->remove(); 557 if (term_count == 1) { 558 bool ebi = 559 exit_branch_has_instructions(ls->limiting_terminator->ir, 560 first_term_then_continue); 561 complex_unroll(ir, iterations, true, 562 first_ir->as_if() != ls->limiting_terminator->ir || 563 ebi, 564 first_term_then_continue); 565 return visit_continue; 566 } else { 567 first_term_then_continue = true; 568 } 569 } 570 } 571 572 term_count++; 573 } 574 575 /* Did not find the break statement. It must be in a complex if-nesting, 576 * so don't try to unroll. 577 */ 578 return visit_continue; 579} 580 581 582bool 583unroll_loops(exec_list *instructions, loop_state *ls, 584 const struct gl_shader_compiler_options *options) 585{ 586 loop_unroll_visitor v(ls, options); 587 588 v.run(instructions); 589 590 return v.progress; 591} 592