1/* 2 * Copyright © 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24#include "nir.h" 25#include "nir_builder.h" 26#include "nir_control_flow.h" 27#include "nir_loop_analyze.h" 28 29 30/* This limit is chosen fairly arbitrarily. GLSL IR max iteration is 32 31 * instructions. (Multiply counting nodes and magic number 5.) But there is 32 * no 1:1 mapping between GLSL IR and NIR so 25 was picked because it seemed 33 * to give about the same results. Around 5 instructions per node. But some 34 * loops that would unroll with GLSL IR fail to unroll if we set this to 25 so 35 * we set it to 26. 36 */ 37#define LOOP_UNROLL_LIMIT 26 38 39/* Prepare this loop for unrolling by first converting to lcssa and then 40 * converting the phis from the top level of the loop body to regs. 41 * Partially converting out of SSA allows us to unroll the loop without having 42 * to keep track of and update phis along the way which gets tricky and 43 * doesn't add much value over converting to regs. 44 * 45 * The loop may have a continue instruction at the end of the loop which does 46 * nothing. Once we're out of SSA, we can safely delete it so we don't have 47 * to deal with it later. 48 */ 49static void 50loop_prepare_for_unroll(nir_loop *loop) 51{ 52 nir_rematerialize_derefs_in_use_blocks_impl( 53 nir_cf_node_get_function(&loop->cf_node)); 54 55 nir_convert_loop_to_lcssa(loop); 56 57 /* Lower phis at the top level of the loop body */ 58 foreach_list_typed_safe(nir_cf_node, node, node, &loop->body) { 59 if (nir_cf_node_block == node->type) { 60 nir_lower_phis_to_regs_block(nir_cf_node_as_block(node)); 61 } 62 } 63 64 /* Lower phis after the loop */ 65 nir_block *block_after_loop = 66 nir_cf_node_as_block(nir_cf_node_next(&loop->cf_node)); 67 68 nir_lower_phis_to_regs_block(block_after_loop); 69 70 /* Remove continue if its the last instruction in the loop */ 71 nir_instr *last_instr = nir_block_last_instr(nir_loop_last_block(loop)); 72 if (last_instr && last_instr->type == nir_instr_type_jump) { 73 nir_instr_remove(last_instr); 74 } 75} 76 77static void 78get_first_blocks_in_terminator(nir_loop_terminator *term, 79 nir_block **first_break_block, 80 nir_block **first_continue_block) 81{ 82 if (term->continue_from_then) { 83 *first_continue_block = nir_if_first_then_block(term->nif); 84 *first_break_block = nir_if_first_else_block(term->nif); 85 } else { 86 *first_continue_block = nir_if_first_else_block(term->nif); 87 *first_break_block = nir_if_first_then_block(term->nif); 88 } 89} 90 91/** 92 * Unroll a loop where we know exactly how many iterations there are and there 93 * is only a single exit point. Note here we can unroll loops with multiple 94 * theoretical exits that only have a single terminating exit that we always 95 * know is the "real" exit. 96 * 97 * loop { 98 * ...instrs... 99 * } 100 * 101 * And the iteration count is 3, the output will be: 102 * 103 * ...instrs... ...instrs... ...instrs... 104 */ 105static void 106simple_unroll(nir_loop *loop) 107{ 108 nir_loop_terminator *limiting_term = loop->info->limiting_terminator; 109 assert(nir_is_trivial_loop_if(limiting_term->nif, 110 limiting_term->break_block)); 111 112 loop_prepare_for_unroll(loop); 113 114 /* Skip over loop terminator and get the loop body. */ 115 list_for_each_entry(nir_loop_terminator, terminator, 116 &loop->info->loop_terminator_list, 117 loop_terminator_link) { 118 119 /* Remove all but the limiting terminator as we know the other exit 120 * conditions can never be met. Note we need to extract any instructions 121 * in the continue from branch and insert then into the loop body before 122 * removing it. 123 */ 124 if (terminator->nif != limiting_term->nif) { 125 nir_block *first_break_block; 126 nir_block *first_continue_block; 127 get_first_blocks_in_terminator(terminator, &first_break_block, 128 &first_continue_block); 129 130 assert(nir_is_trivial_loop_if(terminator->nif, 131 terminator->break_block)); 132 133 nir_cf_list continue_from_lst; 134 nir_cf_extract(&continue_from_lst, 135 nir_before_block(first_continue_block), 136 nir_after_block(terminator->continue_from_block)); 137 nir_cf_reinsert(&continue_from_lst, 138 nir_after_cf_node(&terminator->nif->cf_node)); 139 140 nir_cf_node_remove(&terminator->nif->cf_node); 141 } 142 } 143 144 nir_block *first_break_block; 145 nir_block *first_continue_block; 146 get_first_blocks_in_terminator(limiting_term, &first_break_block, 147 &first_continue_block); 148 149 /* Pluck out the loop header */ 150 nir_block *header_blk = nir_loop_first_block(loop); 151 nir_cf_list lp_header; 152 nir_cf_extract(&lp_header, nir_before_block(header_blk), 153 nir_before_cf_node(&limiting_term->nif->cf_node)); 154 155 /* Add the continue from block of the limiting terminator to the loop body 156 */ 157 nir_cf_list continue_from_lst; 158 nir_cf_extract(&continue_from_lst, nir_before_block(first_continue_block), 159 nir_after_block(limiting_term->continue_from_block)); 160 nir_cf_reinsert(&continue_from_lst, 161 nir_after_cf_node(&limiting_term->nif->cf_node)); 162 163 /* Pluck out the loop body */ 164 nir_cf_list loop_body; 165 nir_cf_extract(&loop_body, nir_after_cf_node(&limiting_term->nif->cf_node), 166 nir_after_block(nir_loop_last_block(loop))); 167 168 struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL); 169 170 /* Clone the loop header and insert before the loop */ 171 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent, 172 nir_before_cf_node(&loop->cf_node), 173 remap_table); 174 175 for (unsigned i = 0; i < loop->info->max_trip_count; i++) { 176 /* Clone loop body and insert before the loop */ 177 nir_cf_list_clone_and_reinsert(&loop_body, loop->cf_node.parent, 178 nir_before_cf_node(&loop->cf_node), 179 remap_table); 180 181 /* Clone loop header and insert after loop body */ 182 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent, 183 nir_before_cf_node(&loop->cf_node), 184 remap_table); 185 } 186 187 /* Remove the break from the loop terminator and add instructions from 188 * the break block after the unrolled loop. 189 */ 190 nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block); 191 nir_instr_remove(break_instr); 192 nir_cf_list break_list; 193 nir_cf_extract(&break_list, nir_before_block(first_break_block), 194 nir_after_block(limiting_term->break_block)); 195 196 /* Clone so things get properly remapped */ 197 nir_cf_list_clone_and_reinsert(&break_list, loop->cf_node.parent, 198 nir_before_cf_node(&loop->cf_node), 199 remap_table); 200 201 /* Remove the loop */ 202 nir_cf_node_remove(&loop->cf_node); 203 204 /* Delete the original loop body, break block & header */ 205 nir_cf_delete(&lp_header); 206 nir_cf_delete(&loop_body); 207 nir_cf_delete(&break_list); 208 209 _mesa_hash_table_destroy(remap_table, NULL); 210} 211 212static void 213move_cf_list_into_loop_term(nir_cf_list *lst, nir_loop_terminator *term) 214{ 215 /* Move the rest of the loop inside the continue-from-block */ 216 nir_cf_reinsert(lst, nir_after_block(term->continue_from_block)); 217 218 /* Remove the break */ 219 nir_instr_remove(nir_block_last_instr(term->break_block)); 220} 221 222static nir_cursor 223get_complex_unroll_insert_location(nir_cf_node *node, bool continue_from_then) 224{ 225 if (node->type == nir_cf_node_loop) { 226 return nir_before_cf_node(node); 227 } else { 228 nir_if *if_stmt = nir_cf_node_as_if(node); 229 if (continue_from_then) { 230 return nir_after_block(nir_if_last_then_block(if_stmt)); 231 } else { 232 return nir_after_block(nir_if_last_else_block(if_stmt)); 233 } 234 } 235} 236 237static nir_cf_node * 238complex_unroll_loop_body(nir_loop *loop, nir_loop_terminator *unlimit_term, 239 nir_cf_list *lp_header, nir_cf_list *lp_body, 240 struct hash_table *remap_table, 241 unsigned num_times_to_clone) 242{ 243 /* In the terminator that we have no trip count for move everything after 244 * the terminator into the continue from branch. 245 */ 246 nir_cf_list loop_end; 247 nir_cf_extract(&loop_end, nir_after_cf_node(&unlimit_term->nif->cf_node), 248 nir_after_block(nir_loop_last_block(loop))); 249 move_cf_list_into_loop_term(&loop_end, unlimit_term); 250 251 /* Pluck out the loop body. */ 252 nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)), 253 nir_after_block(nir_loop_last_block(loop))); 254 255 /* Set unroll_loc to the loop as we will insert the unrolled loop before it 256 */ 257 nir_cf_node *unroll_loc = &loop->cf_node; 258 259 /* Temp list to store the cloned loop as we unroll */ 260 nir_cf_list unrolled_lp_body; 261 262 for (unsigned i = 0; i < num_times_to_clone; i++) { 263 264 nir_cursor cursor = 265 get_complex_unroll_insert_location(unroll_loc, 266 unlimit_term->continue_from_then); 267 268 /* Clone loop header and insert in if branch */ 269 nir_cf_list_clone_and_reinsert(lp_header, loop->cf_node.parent, 270 cursor, remap_table); 271 272 cursor = 273 get_complex_unroll_insert_location(unroll_loc, 274 unlimit_term->continue_from_then); 275 276 /* Clone loop body */ 277 nir_cf_list_clone(&unrolled_lp_body, lp_body, loop->cf_node.parent, 278 remap_table); 279 280 unroll_loc = exec_node_data(nir_cf_node, 281 exec_list_get_tail(&unrolled_lp_body.list), 282 node); 283 assert(unroll_loc->type == nir_cf_node_block && 284 exec_list_is_empty(&nir_cf_node_as_block(unroll_loc)->instr_list)); 285 286 /* Get the unrolled if node */ 287 unroll_loc = nir_cf_node_prev(unroll_loc); 288 289 /* Insert unrolled loop body */ 290 nir_cf_reinsert(&unrolled_lp_body, cursor); 291 } 292 293 return unroll_loc; 294} 295 296/** 297 * Unroll a loop with two exists when the trip count of one of the exits is 298 * unknown. If continue_from_then is true, the loop is repeated only when the 299 * "then" branch of the if is taken; otherwise it is repeated only 300 * when the "else" branch of the if is taken. 301 * 302 * For example, if the input is: 303 * 304 * loop { 305 * ...phis/condition... 306 * if condition { 307 * ...then instructions... 308 * } else { 309 * ...continue instructions... 310 * break 311 * } 312 * ...body... 313 * } 314 * 315 * And the iteration count is 3, and unlimit_term->continue_from_then is true, 316 * then the output will be: 317 * 318 * ...condition... 319 * if condition { 320 * ...then instructions... 321 * ...body... 322 * if condition { 323 * ...then instructions... 324 * ...body... 325 * if condition { 326 * ...then instructions... 327 * ...body... 328 * } else { 329 * ...continue instructions... 330 * } 331 * } else { 332 * ...continue instructions... 333 * } 334 * } else { 335 * ...continue instructions... 336 * } 337 */ 338static void 339complex_unroll(nir_loop *loop, nir_loop_terminator *unlimit_term, 340 bool limiting_term_second) 341{ 342 assert(nir_is_trivial_loop_if(unlimit_term->nif, 343 unlimit_term->break_block)); 344 345 nir_loop_terminator *limiting_term = loop->info->limiting_terminator; 346 assert(nir_is_trivial_loop_if(limiting_term->nif, 347 limiting_term->break_block)); 348 349 loop_prepare_for_unroll(loop); 350 351 nir_block *header_blk = nir_loop_first_block(loop); 352 353 nir_cf_list lp_header; 354 nir_cf_list limit_break_list; 355 unsigned num_times_to_clone; 356 if (limiting_term_second) { 357 /* Pluck out the loop header */ 358 nir_cf_extract(&lp_header, nir_before_block(header_blk), 359 nir_before_cf_node(&unlimit_term->nif->cf_node)); 360 361 /* We need some special handling when its the second terminator causing 362 * us to exit the loop for example: 363 * 364 * for (int i = 0; i < uniform_lp_count; i++) { 365 * colour = vec4(0.0, 1.0, 0.0, 1.0); 366 * 367 * if (i == 1) { 368 * break; 369 * } 370 * ... any further code is unreachable after i == 1 ... 371 * } 372 */ 373 nir_cf_list after_lt; 374 nir_if *limit_if = limiting_term->nif; 375 nir_cf_extract(&after_lt, nir_after_cf_node(&limit_if->cf_node), 376 nir_after_block(nir_loop_last_block(loop))); 377 move_cf_list_into_loop_term(&after_lt, limiting_term); 378 379 /* Because the trip count is the number of times we pass over the entire 380 * loop before hitting a break when the second terminator is the 381 * limiting terminator we can actually execute code inside the loop when 382 * trip count == 0 e.g. the code above the break. So we need to bump 383 * the trip_count in order for the code below to clone anything. When 384 * trip count == 1 we execute the code above the break twice and the 385 * code below it once so we need clone things twice and so on. 386 */ 387 num_times_to_clone = loop->info->max_trip_count + 1; 388 } else { 389 /* Pluck out the loop header */ 390 nir_cf_extract(&lp_header, nir_before_block(header_blk), 391 nir_before_cf_node(&limiting_term->nif->cf_node)); 392 393 nir_block *first_break_block; 394 nir_block *first_continue_block; 395 get_first_blocks_in_terminator(limiting_term, &first_break_block, 396 &first_continue_block); 397 398 /* Remove the break then extract instructions from the break block so we 399 * can insert them in the innermost else of the unrolled loop. 400 */ 401 nir_instr *break_instr = nir_block_last_instr(limiting_term->break_block); 402 nir_instr_remove(break_instr); 403 nir_cf_extract(&limit_break_list, nir_before_block(first_break_block), 404 nir_after_block(limiting_term->break_block)); 405 406 nir_cf_list continue_list; 407 nir_cf_extract(&continue_list, nir_before_block(first_continue_block), 408 nir_after_block(limiting_term->continue_from_block)); 409 410 nir_cf_reinsert(&continue_list, 411 nir_after_cf_node(&limiting_term->nif->cf_node)); 412 413 nir_cf_node_remove(&limiting_term->nif->cf_node); 414 415 num_times_to_clone = loop->info->max_trip_count; 416 } 417 418 struct hash_table *remap_table = _mesa_pointer_hash_table_create(NULL); 419 420 nir_cf_list lp_body; 421 nir_cf_node *unroll_loc = 422 complex_unroll_loop_body(loop, unlimit_term, &lp_header, &lp_body, 423 remap_table, num_times_to_clone); 424 425 if (!limiting_term_second) { 426 assert(unroll_loc->type == nir_cf_node_if); 427 428 nir_cursor cursor = 429 get_complex_unroll_insert_location(unroll_loc, 430 unlimit_term->continue_from_then); 431 432 /* Clone loop header and insert in if branch */ 433 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent, 434 cursor, remap_table); 435 436 cursor = 437 get_complex_unroll_insert_location(unroll_loc, 438 unlimit_term->continue_from_then); 439 440 /* Clone so things get properly remapped, and insert break block from 441 * the limiting terminator. 442 */ 443 nir_cf_list_clone_and_reinsert(&limit_break_list, loop->cf_node.parent, 444 cursor, remap_table); 445 446 nir_cf_delete(&limit_break_list); 447 } 448 449 /* The loop has been unrolled so remove it. */ 450 nir_cf_node_remove(&loop->cf_node); 451 452 /* Delete the original loop header and body */ 453 nir_cf_delete(&lp_header); 454 nir_cf_delete(&lp_body); 455 456 _mesa_hash_table_destroy(remap_table, NULL); 457} 458 459/** 460 * Unroll loops where we only have a single terminator but the exact trip 461 * count is unknown. For example: 462 * 463 * for (int i = 0; i < imin(x, 4); i++) 464 * ... 465 */ 466static void 467complex_unroll_single_terminator(nir_loop *loop) 468{ 469 assert(list_length(&loop->info->loop_terminator_list) == 1); 470 assert(loop->info->limiting_terminator); 471 assert(nir_is_trivial_loop_if(loop->info->limiting_terminator->nif, 472 loop->info->limiting_terminator->break_block)); 473 474 nir_loop_terminator *terminator = loop->info->limiting_terminator; 475 476 loop_prepare_for_unroll(loop); 477 478 /* Pluck out the loop header */ 479 nir_cf_list lp_header; 480 nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)), 481 nir_before_cf_node(&terminator->nif->cf_node)); 482 483 struct hash_table *remap_table = 484 _mesa_hash_table_create(NULL, _mesa_hash_pointer, 485 _mesa_key_pointer_equal); 486 487 /* We need to clone the loop one extra time in order to clone the lcssa 488 * vars for the last iteration (they are inside the following ifs break 489 * branch). We leave other passes to clean up this redundant if. 490 */ 491 unsigned num_times_to_clone = loop->info->max_trip_count + 1; 492 493 nir_cf_list lp_body; 494 MAYBE_UNUSED nir_cf_node *unroll_loc = 495 complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body, 496 remap_table, num_times_to_clone); 497 498 /* Delete the original loop header and body */ 499 nir_cf_delete(&lp_header); 500 nir_cf_delete(&lp_body); 501 502 /* The original loop has been replaced so remove it. */ 503 nir_cf_node_remove(&loop->cf_node); 504 505 _mesa_hash_table_destroy(remap_table, NULL); 506} 507 508/* Unrolls the classic wrapper loops e.g 509 * 510 * do { 511 * // ... 512 * } while (false) 513 */ 514static bool 515wrapper_unroll(nir_loop *loop) 516{ 517 if (!list_empty(&loop->info->loop_terminator_list)) { 518 519 /* Unrolling a loop with a large number of exits can result in a 520 * large inrease in register pressure. For now we just skip 521 * unrolling if we have more than 3 exits (not including the break 522 * at the end of the loop). 523 * 524 * TODO: Most loops that fit this pattern are simply switch 525 * statements that are converted to a loop to take advantage of 526 * exiting jump instruction handling. In this case we could make 527 * use of a binary seach pattern like we do in 528 * nir_lower_indirect_derefs(), this should allow us to unroll the 529 * loops in an optimal way and should also avoid some of the 530 * register pressure that comes from simply nesting the 531 * terminators one after the other. 532 */ 533 if (list_length(&loop->info->loop_terminator_list) > 3) 534 return false; 535 536 loop_prepare_for_unroll(loop); 537 538 nir_cursor loop_end = nir_after_block(nir_loop_last_block(loop)); 539 list_for_each_entry(nir_loop_terminator, terminator, 540 &loop->info->loop_terminator_list, 541 loop_terminator_link) { 542 543 /* Remove break from the terminator */ 544 nir_instr *break_instr = 545 nir_block_last_instr(terminator->break_block); 546 nir_instr_remove(break_instr); 547 548 /* Pluck out the loop body. */ 549 nir_cf_list loop_body; 550 nir_cf_extract(&loop_body, 551 nir_after_cf_node(&terminator->nif->cf_node), 552 loop_end); 553 554 /* Reinsert loop body into continue from block */ 555 nir_cf_reinsert(&loop_body, 556 nir_after_block(terminator->continue_from_block)); 557 558 loop_end = terminator->continue_from_then ? 559 nir_after_block(nir_if_last_then_block(terminator->nif)) : 560 nir_after_block(nir_if_last_else_block(terminator->nif)); 561 } 562 } else { 563 loop_prepare_for_unroll(loop); 564 } 565 566 /* Pluck out the loop body. */ 567 nir_cf_list loop_body; 568 nir_cf_extract(&loop_body, nir_before_block(nir_loop_first_block(loop)), 569 nir_after_block(nir_loop_last_block(loop))); 570 571 /* Reinsert loop body after the loop */ 572 nir_cf_reinsert(&loop_body, nir_after_cf_node(&loop->cf_node)); 573 574 /* The loop has been unrolled so remove it. */ 575 nir_cf_node_remove(&loop->cf_node); 576 577 return true; 578} 579 580static bool 581is_access_out_of_bounds(nir_loop_terminator *term, nir_deref_instr *deref, 582 unsigned trip_count) 583{ 584 for (nir_deref_instr *d = deref; d; d = nir_deref_instr_parent(d)) { 585 if (d->deref_type != nir_deref_type_array) 586 continue; 587 588 nir_alu_instr *alu = nir_instr_as_alu(term->conditional_instr); 589 nir_src src = term->induction_rhs ? alu->src[1].src : alu->src[0].src; 590 if (!nir_srcs_equal(d->arr.index, src)) 591 continue; 592 593 nir_deref_instr *parent = nir_deref_instr_parent(d); 594 assert(glsl_type_is_array(parent->type) || 595 glsl_type_is_matrix(parent->type)); 596 597 /* We have already unrolled the loop and the new one will be imbedded in 598 * the innermost continue branch. So unless the array is greater than 599 * the trip count any iteration over the loop will be an out of bounds 600 * access of the array. 601 */ 602 return glsl_get_length(parent->type) <= trip_count; 603 } 604 605 return false; 606} 607 608/* If we know an array access is going to be out of bounds remove or replace 609 * the access with an undef. This can later result in the entire loop being 610 * removed by nir_opt_dead_cf(). 611 */ 612static void 613remove_out_of_bounds_induction_use(nir_shader *shader, nir_loop *loop, 614 nir_loop_terminator *term, 615 nir_cf_list *lp_header, 616 nir_cf_list *lp_body, 617 unsigned trip_count) 618{ 619 if (!loop->info->guessed_trip_count) 620 return; 621 622 /* Temporarily recreate the original loop so we can alter it */ 623 nir_cf_reinsert(lp_header, nir_after_block(nir_loop_last_block(loop))); 624 nir_cf_reinsert(lp_body, nir_after_block(nir_loop_last_block(loop))); 625 626 nir_builder b; 627 nir_builder_init(&b, nir_cf_node_get_function(&loop->cf_node)); 628 629 nir_foreach_block_in_cf_node(block, &loop->cf_node) { 630 nir_foreach_instr_safe(instr, block) { 631 if (instr->type != nir_instr_type_intrinsic) 632 continue; 633 634 nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr); 635 636 /* Check for arrays variably-indexed by a loop induction variable. 637 * If this access is out of bounds remove the instruction or replace 638 * its use with an undefined instruction. 639 * If the loop is no longer useful we leave it for the appropriate 640 * pass to clean it up for us. 641 */ 642 if (intrin->intrinsic == nir_intrinsic_load_deref || 643 intrin->intrinsic == nir_intrinsic_store_deref || 644 intrin->intrinsic == nir_intrinsic_copy_deref) { 645 646 if (is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[0]), 647 trip_count)) { 648 if (intrin->intrinsic == nir_intrinsic_load_deref) { 649 nir_ssa_def *undef = 650 nir_ssa_undef(&b, intrin->dest.ssa.num_components, 651 intrin->dest.ssa.bit_size); 652 nir_ssa_def_rewrite_uses(&intrin->dest.ssa, 653 nir_src_for_ssa(undef)); 654 } else { 655 nir_instr_remove(instr); 656 continue; 657 } 658 } 659 660 if (intrin->intrinsic == nir_intrinsic_copy_deref && 661 is_access_out_of_bounds(term, nir_src_as_deref(intrin->src[1]), 662 trip_count)) { 663 nir_instr_remove(instr); 664 } 665 } 666 } 667 } 668 669 /* Now that we are done extract the loop header and body again */ 670 nir_cf_extract(lp_header, nir_before_block(nir_loop_first_block(loop)), 671 nir_before_cf_node(&term->nif->cf_node)); 672 nir_cf_extract(lp_body, nir_before_block(nir_loop_first_block(loop)), 673 nir_after_block(nir_loop_last_block(loop))); 674} 675 676/* Partially unrolls loops that don't have a known trip count. 677 */ 678static void 679partial_unroll(nir_shader *shader, nir_loop *loop, unsigned trip_count) 680{ 681 assert(list_length(&loop->info->loop_terminator_list) == 1); 682 683 nir_loop_terminator *terminator = 684 list_first_entry(&loop->info->loop_terminator_list, 685 nir_loop_terminator, loop_terminator_link); 686 687 assert(nir_is_trivial_loop_if(terminator->nif, terminator->break_block)); 688 689 loop_prepare_for_unroll(loop); 690 691 /* Pluck out the loop header */ 692 nir_cf_list lp_header; 693 nir_cf_extract(&lp_header, nir_before_block(nir_loop_first_block(loop)), 694 nir_before_cf_node(&terminator->nif->cf_node)); 695 696 struct hash_table *remap_table = 697 _mesa_hash_table_create(NULL, _mesa_hash_pointer, 698 _mesa_key_pointer_equal); 699 700 nir_cf_list lp_body; 701 nir_cf_node *unroll_loc = 702 complex_unroll_loop_body(loop, terminator, &lp_header, &lp_body, 703 remap_table, trip_count); 704 705 /* Attempt to remove out of bounds array access */ 706 remove_out_of_bounds_induction_use(shader, loop, terminator, &lp_header, 707 &lp_body, trip_count); 708 709 nir_cursor cursor = 710 get_complex_unroll_insert_location(unroll_loc, 711 terminator->continue_from_then); 712 713 /* Reinsert the loop in the innermost nested continue branch of the unrolled 714 * loop. 715 */ 716 nir_loop *new_loop = nir_loop_create(shader); 717 nir_cf_node_insert(cursor, &new_loop->cf_node); 718 new_loop->partially_unrolled = true; 719 720 /* Clone loop header and insert into new loop */ 721 nir_cf_list_clone_and_reinsert(&lp_header, loop->cf_node.parent, 722 nir_after_cf_list(&new_loop->body), 723 remap_table); 724 725 /* Clone loop body and insert into new loop */ 726 nir_cf_list_clone_and_reinsert(&lp_body, loop->cf_node.parent, 727 nir_after_cf_list(&new_loop->body), 728 remap_table); 729 730 /* Insert break back into terminator */ 731 nir_jump_instr *brk = nir_jump_instr_create(shader, nir_jump_break); 732 nir_if *nif = nir_block_get_following_if(nir_loop_first_block(new_loop)); 733 if (terminator->continue_from_then) { 734 nir_instr_insert_after_block(nir_if_last_else_block(nif), &brk->instr); 735 } else { 736 nir_instr_insert_after_block(nir_if_last_then_block(nif), &brk->instr); 737 } 738 739 /* Delete the original loop header and body */ 740 nir_cf_delete(&lp_header); 741 nir_cf_delete(&lp_body); 742 743 /* The original loop has been replaced so remove it. */ 744 nir_cf_node_remove(&loop->cf_node); 745 746 _mesa_hash_table_destroy(remap_table, NULL); 747} 748 749/* 750 * Returns true if we should unroll the loop, otherwise false. 751 */ 752static bool 753check_unrolling_restrictions(nir_shader *shader, nir_loop *loop) 754{ 755 if (loop->control == nir_loop_control_unroll) 756 return true; 757 758 if (loop->control == nir_loop_control_dont_unroll) 759 return false; 760 761 nir_loop_info *li = loop->info; 762 unsigned max_iter = shader->options->max_unroll_iterations; 763 unsigned trip_count = 764 li->max_trip_count ? li->max_trip_count : li->guessed_trip_count; 765 766 if (trip_count > max_iter) 767 return false; 768 769 if (li->force_unroll && !li->guessed_trip_count) 770 return true; 771 772 bool loop_not_too_large = 773 li->instr_cost * trip_count <= max_iter * LOOP_UNROLL_LIMIT; 774 775 return loop_not_too_large; 776} 777 778static bool 779process_loops(nir_shader *sh, nir_cf_node *cf_node, bool *has_nested_loop_out) 780{ 781 bool progress = false; 782 bool has_nested_loop = false; 783 nir_loop *loop; 784 785 switch (cf_node->type) { 786 case nir_cf_node_block: 787 return progress; 788 case nir_cf_node_if: { 789 nir_if *if_stmt = nir_cf_node_as_if(cf_node); 790 foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->then_list) 791 progress |= process_loops(sh, nested_node, has_nested_loop_out); 792 foreach_list_typed_safe(nir_cf_node, nested_node, node, &if_stmt->else_list) 793 progress |= process_loops(sh, nested_node, has_nested_loop_out); 794 return progress; 795 } 796 case nir_cf_node_loop: { 797 loop = nir_cf_node_as_loop(cf_node); 798 foreach_list_typed_safe(nir_cf_node, nested_node, node, &loop->body) 799 progress |= process_loops(sh, nested_node, &has_nested_loop); 800 801 break; 802 } 803 default: 804 unreachable("unknown cf node type"); 805 } 806 807 /* Don't attempt to unroll a second inner loop in this pass, wait until the 808 * next pass as we have altered the cf. 809 */ 810 if (!progress && loop->control != nir_loop_control_dont_unroll) { 811 812 /* Check for the classic 813 * 814 * do { 815 * // ... 816 * } while (false) 817 * 818 * that is used to wrap multi-line macros. GLSL IR also wraps switch 819 * statements in a loop like this. 820 */ 821 if (loop->info->limiting_terminator == NULL && 822 !loop->info->complex_loop) { 823 824 nir_block *last_loop_blk = nir_loop_last_block(loop); 825 if (nir_block_ends_in_break(last_loop_blk)) { 826 progress = wrapper_unroll(loop); 827 goto exit; 828 } 829 830 /* If we were able to guess the loop iteration based on array access 831 * then do a partial unroll. 832 */ 833 unsigned num_lt = list_length(&loop->info->loop_terminator_list); 834 if (!has_nested_loop && num_lt == 1 && !loop->partially_unrolled && 835 loop->info->guessed_trip_count && 836 check_unrolling_restrictions(sh, loop)) { 837 partial_unroll(sh, loop, loop->info->guessed_trip_count); 838 progress = true; 839 } 840 } 841 842 if (has_nested_loop || !loop->info->limiting_terminator) 843 goto exit; 844 845 if (!check_unrolling_restrictions(sh, loop)) 846 goto exit; 847 848 if (loop->info->exact_trip_count_known) { 849 simple_unroll(loop); 850 progress = true; 851 } else { 852 /* Attempt to unroll loops with two terminators. */ 853 unsigned num_lt = list_length(&loop->info->loop_terminator_list); 854 if (num_lt == 2 && 855 !loop->info->limiting_terminator->exact_trip_count_unknown) { 856 bool limiting_term_second = true; 857 nir_loop_terminator *terminator = 858 list_first_entry(&loop->info->loop_terminator_list, 859 nir_loop_terminator, loop_terminator_link); 860 861 862 if (terminator->nif == loop->info->limiting_terminator->nif) { 863 limiting_term_second = false; 864 terminator = 865 list_last_entry(&loop->info->loop_terminator_list, 866 nir_loop_terminator, loop_terminator_link); 867 } 868 869 /* If the first terminator has a trip count of zero and is the 870 * limiting terminator just do a simple unroll as the second 871 * terminator can never be reached. 872 */ 873 if (loop->info->max_trip_count == 0 && !limiting_term_second) { 874 simple_unroll(loop); 875 } else { 876 complex_unroll(loop, terminator, limiting_term_second); 877 } 878 progress = true; 879 } 880 881 if (num_lt == 1) { 882 assert(loop->info->limiting_terminator->exact_trip_count_unknown); 883 complex_unroll_single_terminator(loop); 884 progress = true; 885 } 886 } 887 } 888 889exit: 890 *has_nested_loop_out = true; 891 return progress; 892} 893 894static bool 895nir_opt_loop_unroll_impl(nir_function_impl *impl, 896 nir_variable_mode indirect_mask) 897{ 898 bool progress = false; 899 nir_metadata_require(impl, nir_metadata_loop_analysis, indirect_mask); 900 nir_metadata_require(impl, nir_metadata_block_index); 901 902 foreach_list_typed_safe(nir_cf_node, node, node, &impl->body) { 903 bool has_nested_loop = false; 904 progress |= process_loops(impl->function->shader, node, 905 &has_nested_loop); 906 } 907 908 if (progress) 909 nir_lower_regs_to_ssa_impl(impl); 910 911 return progress; 912} 913 914/** 915 * indirect_mask specifies which type of indirectly accessed variables 916 * should force loop unrolling. 917 */ 918bool 919nir_opt_loop_unroll(nir_shader *shader, nir_variable_mode indirect_mask) 920{ 921 bool progress = false; 922 923 nir_foreach_function(function, shader) { 924 if (function->impl) { 925 progress |= nir_opt_loop_unroll_impl(function->impl, indirect_mask); 926 } 927 } 928 return progress; 929} 930