1 /* Loop optimizer initialization routines and RTL loop optimization passes. 2 Copyright (C) 2002-2024 Free Software Foundation, Inc. 3 4 This file is part of GCC. 5 6 GCC is free software; you can redistribute it and/or modify it under 7 the terms of the GNU General Public License as published by the Free 8 Software Foundation; either version 3, or (at your option) any later 9 version. 10 11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY 12 WARRANTY; without even the implied warranty of MERCHANTABILITY or 13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 14 for more details. 15 16 You should have received a copy of the GNU General Public License 17 along with GCC; see the file COPYING3. If not see 18 <http://www.gnu.org/licenses/>. */ 19 20 #include "config.h" 21 #include "system.h" 22 #include "coretypes.h" 23 #include "backend.h" 24 #include "target.h" 25 #include "rtl.h" 26 #include "tree.h" 27 #include "cfghooks.h" 28 #include "df.h" 29 #include "regs.h" 30 #include "cfgcleanup.h" 31 #include "cfgloop.h" 32 #include "tree-pass.h" 33 #include "tree-ssa-loop-niter.h" 34 #include "loop-unroll.h" 35 #include "tree-scalar-evolution.h" 36 #include "tree-cfgcleanup.h" 37 38 39 /* Apply FLAGS to the loop state. */ 41 42 static void 43 apply_loop_flags (unsigned flags) 44 { 45 if (flags & LOOPS_MAY_HAVE_MULTIPLE_LATCHES) 46 { 47 /* If the loops may have multiple latches, we cannot canonicalize 48 them further (and most of the loop manipulation functions will 49 not work). However, we avoid modifying cfg, which some 50 passes may want. */ 51 gcc_assert ((flags & ~(LOOPS_MAY_HAVE_MULTIPLE_LATCHES 52 | LOOPS_HAVE_RECORDED_EXITS 53 | LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) == 0); 54 loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES); 55 } 56 else 57 disambiguate_loops_with_multiple_latches (); 58 59 /* Create pre-headers. */ 60 if (flags & LOOPS_HAVE_PREHEADERS) 61 { 62 int cp_flags = CP_SIMPLE_PREHEADERS; 63 64 if (flags & LOOPS_HAVE_FALLTHRU_PREHEADERS) 65 cp_flags |= CP_FALLTHRU_PREHEADERS; 66 67 create_preheaders (cp_flags); 68 } 69 70 /* Force all latches to have only single successor. */ 71 if (flags & LOOPS_HAVE_SIMPLE_LATCHES) 72 force_single_succ_latches (); 73 74 /* Mark irreducible loops. */ 75 if (flags & LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS) 76 mark_irreducible_loops (); 77 78 if (flags & LOOPS_HAVE_RECORDED_EXITS) 79 record_loop_exits (); 80 } 81 82 /* Initialize loop structures. This is used by the tree and RTL loop 83 optimizers. FLAGS specify what properties to compute and/or ensure for 84 loops. */ 85 86 void 87 loop_optimizer_init (unsigned flags) 88 { 89 timevar_push (TV_LOOP_INIT); 90 91 if (!current_loops) 92 { 93 gcc_assert (!(cfun->curr_properties & PROP_loops)); 94 95 /* Find the loops. */ 96 current_loops = flow_loops_find (NULL); 97 } 98 else 99 { 100 bool recorded_exits = loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS); 101 bool needs_fixup = loops_state_satisfies_p (LOOPS_NEED_FIXUP); 102 103 gcc_assert (cfun->curr_properties & PROP_loops); 104 105 /* Ensure that the dominators are computed, like flow_loops_find does. */ 106 calculate_dominance_info (CDI_DOMINATORS); 107 108 if (!needs_fixup) 109 checking_verify_loop_structure (); 110 111 /* Clear all flags. */ 112 if (recorded_exits) 113 release_recorded_exits (cfun); 114 loops_state_clear (~0U); 115 116 if (needs_fixup) 117 { 118 /* Apply LOOPS_MAY_HAVE_MULTIPLE_LATCHES early as fix_loop_structure 119 re-applies flags. */ 120 loops_state_set (flags & LOOPS_MAY_HAVE_MULTIPLE_LATCHES); 121 fix_loop_structure (NULL); 122 } 123 } 124 125 /* Apply flags to loops. */ 126 apply_loop_flags (flags); 127 128 /* Dump loops. */ 129 flow_loops_dump (dump_file, NULL, 1); 130 131 checking_verify_loop_structure (); 132 133 timevar_pop (TV_LOOP_INIT); 134 } 135 136 /* Finalize loop structures. */ 137 138 void 139 loop_optimizer_finalize (struct function *fn, bool clean_loop_closed_phi) 140 { 141 basic_block bb; 142 143 timevar_push (TV_LOOP_FINI); 144 145 if (clean_loop_closed_phi && loops_state_satisfies_p (fn, LOOP_CLOSED_SSA)) 146 { 147 clean_up_loop_closed_phi (fn); 148 loops_state_clear (fn, LOOP_CLOSED_SSA); 149 } 150 151 if (loops_state_satisfies_p (fn, LOOPS_HAVE_RECORDED_EXITS)) 152 release_recorded_exits (fn); 153 154 free_numbers_of_iterations_estimates (fn); 155 156 /* If we should preserve loop structure, do not free it but clear 157 flags that advanced properties are there as we are not preserving 158 that in full. */ 159 if (fn->curr_properties & PROP_loops) 160 { 161 loops_state_clear (fn, LOOP_CLOSED_SSA 162 | LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS 163 | LOOPS_HAVE_PREHEADERS 164 | LOOPS_HAVE_SIMPLE_LATCHES 165 | LOOPS_HAVE_FALLTHRU_PREHEADERS); 166 loops_state_set (fn, LOOPS_MAY_HAVE_MULTIPLE_LATCHES); 167 goto loop_fini_done; 168 } 169 170 for (auto loop : loops_list (fn, 0)) 171 free_simple_loop_desc (loop); 172 173 /* Clean up. */ 174 flow_loops_free (loops_for_fn (fn)); 175 ggc_free (loops_for_fn (fn)); 176 set_loops_for_fn (fn, NULL); 177 178 FOR_ALL_BB_FN (bb, fn) 179 { 180 bb->loop_father = NULL; 181 } 182 183 loop_fini_done: 184 timevar_pop (TV_LOOP_FINI); 185 } 186 187 /* The structure of loops might have changed. Some loops might get removed 188 (and their headers and latches were set to NULL), loop exists might get 189 removed (thus the loop nesting may be wrong), and some blocks and edges 190 were changed (so the information about bb --> loop mapping does not have 191 to be correct). But still for the remaining loops the header dominates 192 the latch, and loops did not get new subloops (new loops might possibly 193 get created, but we are not interested in them). Fix up the mess. 194 195 If CHANGED_BBS is not NULL, basic blocks whose loop depth has changed are 196 marked in it. 197 198 Returns the number of new discovered plus the number of removed loops. */ 199 200 unsigned 201 fix_loop_structure (bitmap changed_bbs) 202 { 203 basic_block bb; 204 int record_exits = 0; 205 unsigned old_nloops, i; 206 207 timevar_push (TV_LOOP_INIT); 208 209 if (dump_file && (dump_flags & TDF_DETAILS)) 210 fprintf (dump_file, "fix_loop_structure: fixing up loops for function\n"); 211 212 /* We need exact and fast dominance info to be available. */ 213 gcc_assert (dom_info_state (CDI_DOMINATORS) == DOM_OK); 214 215 if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS)) 216 { 217 release_recorded_exits (cfun); 218 record_exits = LOOPS_HAVE_RECORDED_EXITS; 219 } 220 221 /* Remember the depth of the blocks in the loop hierarchy, so that we can 222 recognize blocks whose loop nesting relationship has changed. */ 223 if (changed_bbs) 224 FOR_EACH_BB_FN (bb, cfun) 225 bb->aux = (void *) (size_t) loop_depth (bb->loop_father); 226 227 /* Remove the dead loops from structures. We start from the innermost 228 loops, so that when we remove the loops, we know that the loops inside 229 are preserved, and do not waste time relinking loops that will be 230 removed later. */ 231 for (auto loop : loops_list (cfun, LI_FROM_INNERMOST)) 232 { 233 /* Detect the case that the loop is no longer present even though 234 it wasn't marked for removal. 235 ??? If we do that we can get away with not marking loops for 236 removal at all. And possibly avoid some spurious removals. */ 237 if (loop->header 238 && bb_loop_header_p (loop->header)) 239 continue; 240 241 if (dump_file && (dump_flags & TDF_DETAILS)) 242 fprintf (dump_file, "fix_loop_structure: removing loop %d\n", 243 loop->num); 244 245 while (loop->inner) 246 { 247 class loop *ploop = loop->inner; 248 flow_loop_tree_node_remove (ploop); 249 flow_loop_tree_node_add (loop_outer (loop), ploop); 250 } 251 252 /* Remove the loop. */ 253 if (loop->header) 254 loop->former_header = loop->header; 255 else 256 gcc_assert (loop->former_header != NULL); 257 loop->header = NULL; 258 flow_loop_tree_node_remove (loop); 259 } 260 261 /* Remember the number of loops so we can return how many new loops 262 flow_loops_find discovered. */ 263 old_nloops = number_of_loops (cfun); 264 265 /* Re-compute loop structure in-place. */ 266 flow_loops_find (current_loops); 267 268 /* Mark the blocks whose loop has changed. */ 269 if (changed_bbs) 270 { 271 FOR_EACH_BB_FN (bb, cfun) 272 { 273 if ((void *) (size_t) loop_depth (bb->loop_father) != bb->aux) 274 bitmap_set_bit (changed_bbs, bb->index); 275 276 bb->aux = NULL; 277 } 278 } 279 280 /* Finally free deleted loops. */ 281 unsigned n_deleted = 0; 282 class loop *loop; 283 FOR_EACH_VEC_ELT (*get_loops (cfun), i, loop) 284 if (loop && loop->header == NULL) 285 { 286 if (dump_file 287 && ((unsigned) loop->former_header->index 288 < basic_block_info_for_fn (cfun)->length ())) 289 { 290 basic_block former_header 291 = BASIC_BLOCK_FOR_FN (cfun, loop->former_header->index); 292 /* If the old header still exists we want to check if the 293 original loop is re-discovered or the old header is now 294 part of a newly discovered loop. 295 In both cases we should have avoided removing the loop. */ 296 if (former_header == loop->former_header) 297 { 298 if (former_header->loop_father->header == former_header) 299 fprintf (dump_file, "fix_loop_structure: rediscovered " 300 "removed loop %d as loop %d with old header %d\n", 301 loop->num, former_header->loop_father->num, 302 former_header->index); 303 else if ((unsigned) former_header->loop_father->num 304 >= old_nloops) 305 fprintf (dump_file, "fix_loop_structure: header %d of " 306 "removed loop %d is part of the newly " 307 "discovered loop %d with header %d\n", 308 former_header->index, loop->num, 309 former_header->loop_father->num, 310 former_header->loop_father->header->index); 311 } 312 } 313 (*get_loops (cfun))[i] = NULL; 314 flow_loop_free (loop); 315 n_deleted++; 316 } 317 318 /* If we deleted loops then the cached scalar evolutions refering to 319 those loops become invalid. */ 320 if (n_deleted > 0 && scev_initialized_p ()) 321 scev_reset_htab (); 322 323 loops_state_clear (LOOPS_NEED_FIXUP); 324 325 /* Apply flags to loops. */ 326 apply_loop_flags (current_loops->state | record_exits); 327 328 checking_verify_loop_structure (); 329 330 timevar_pop (TV_LOOP_INIT); 331 332 return number_of_loops (cfun) - old_nloops + n_deleted; 333 } 334 335 /* The RTL loop superpass. The actual passes are subpasses. See passes.cc for 337 more on that. */ 338 339 namespace { 340 341 const pass_data pass_data_loop2 = 342 { 343 RTL_PASS, /* type */ 344 "loop2", /* name */ 345 OPTGROUP_LOOP, /* optinfo_flags */ 346 TV_LOOP, /* tv_id */ 347 0, /* properties_required */ 348 0, /* properties_provided */ 349 0, /* properties_destroyed */ 350 0, /* todo_flags_start */ 351 0, /* todo_flags_finish */ 352 }; 353 354 class pass_loop2 : public rtl_opt_pass 355 { 356 public: 357 pass_loop2 (gcc::context *ctxt) 358 : rtl_opt_pass (pass_data_loop2, ctxt) 359 {} 360 361 /* opt_pass methods: */ 362 bool gate (function *) final override; 363 364 }; // class pass_loop2 365 366 bool 367 pass_loop2::gate (function *fun) 368 { 369 if (optimize > 0 370 && (flag_move_loop_invariants 371 || flag_unswitch_loops 372 || flag_unroll_loops 373 || (flag_branch_on_count_reg && targetm.have_doloop_end ()) 374 || cfun->has_unroll)) 375 return true; 376 else 377 { 378 /* No longer preserve loops, remove them now. */ 379 fun->curr_properties &= ~PROP_loops; 380 if (current_loops) 381 loop_optimizer_finalize (); 382 return false; 383 } 384 } 385 386 } // anon namespace 387 388 rtl_opt_pass * 389 make_pass_loop2 (gcc::context *ctxt) 390 { 391 return new pass_loop2 (ctxt); 392 } 393 394 395 /* Initialization of the RTL loop passes. */ 397 static unsigned int 398 rtl_loop_init (void) 399 { 400 gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT); 401 402 if (dump_file) 403 { 404 dump_reg_info (dump_file); 405 dump_flow_info (dump_file, dump_flags); 406 } 407 408 loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS); 409 return 0; 410 } 411 412 namespace { 413 414 const pass_data pass_data_rtl_loop_init = 415 { 416 RTL_PASS, /* type */ 417 "loop2_init", /* name */ 418 OPTGROUP_LOOP, /* optinfo_flags */ 419 TV_LOOP, /* tv_id */ 420 0, /* properties_required */ 421 0, /* properties_provided */ 422 0, /* properties_destroyed */ 423 0, /* todo_flags_start */ 424 0, /* todo_flags_finish */ 425 }; 426 427 class pass_rtl_loop_init : public rtl_opt_pass 428 { 429 public: 430 pass_rtl_loop_init (gcc::context *ctxt) 431 : rtl_opt_pass (pass_data_rtl_loop_init, ctxt) 432 {} 433 434 /* opt_pass methods: */ 435 unsigned int execute (function *) final override { return rtl_loop_init (); } 436 437 }; // class pass_rtl_loop_init 438 439 } // anon namespace 440 441 rtl_opt_pass * 442 make_pass_rtl_loop_init (gcc::context *ctxt) 443 { 444 return new pass_rtl_loop_init (ctxt); 445 } 446 447 448 /* Finalization of the RTL loop passes. */ 450 451 namespace { 452 453 const pass_data pass_data_rtl_loop_done = 454 { 455 RTL_PASS, /* type */ 456 "loop2_done", /* name */ 457 OPTGROUP_LOOP, /* optinfo_flags */ 458 TV_LOOP, /* tv_id */ 459 0, /* properties_required */ 460 0, /* properties_provided */ 461 PROP_loops, /* properties_destroyed */ 462 0, /* todo_flags_start */ 463 0, /* todo_flags_finish */ 464 }; 465 466 class pass_rtl_loop_done : public rtl_opt_pass 467 { 468 public: 469 pass_rtl_loop_done (gcc::context *ctxt) 470 : rtl_opt_pass (pass_data_rtl_loop_done, ctxt) 471 {} 472 473 /* opt_pass methods: */ 474 unsigned int execute (function *) final override; 475 476 }; // class pass_rtl_loop_done 477 478 unsigned int 479 pass_rtl_loop_done::execute (function *fun) 480 { 481 /* No longer preserve loops, remove them now. */ 482 fun->curr_properties &= ~PROP_loops; 483 loop_optimizer_finalize (); 484 free_dominance_info (CDI_DOMINATORS); 485 486 cleanup_cfg (0); 487 if (dump_file) 488 { 489 dump_reg_info (dump_file); 490 dump_flow_info (dump_file, dump_flags); 491 } 492 493 return 0; 494 } 495 496 } // anon namespace 497 498 rtl_opt_pass * 499 make_pass_rtl_loop_done (gcc::context *ctxt) 500 { 501 return new pass_rtl_loop_done (ctxt); 502 } 503 504 505 /* Loop invariant code motion. */ 507 508 namespace { 509 510 const pass_data pass_data_rtl_move_loop_invariants = 511 { 512 RTL_PASS, /* type */ 513 "loop2_invariant", /* name */ 514 OPTGROUP_LOOP, /* optinfo_flags */ 515 TV_LOOP_MOVE_INVARIANTS, /* tv_id */ 516 0, /* properties_required */ 517 0, /* properties_provided */ 518 0, /* properties_destroyed */ 519 0, /* todo_flags_start */ 520 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */ 521 }; 522 523 class pass_rtl_move_loop_invariants : public rtl_opt_pass 524 { 525 public: 526 pass_rtl_move_loop_invariants (gcc::context *ctxt) 527 : rtl_opt_pass (pass_data_rtl_move_loop_invariants, ctxt) 528 {} 529 530 /* opt_pass methods: */ 531 bool gate (function *) final override { return flag_move_loop_invariants; } 532 unsigned int execute (function *fun) final override 533 { 534 if (number_of_loops (fun) > 1) 535 move_loop_invariants (); 536 return 0; 537 } 538 539 }; // class pass_rtl_move_loop_invariants 540 541 } // anon namespace 542 543 rtl_opt_pass * 544 make_pass_rtl_move_loop_invariants (gcc::context *ctxt) 545 { 546 return new pass_rtl_move_loop_invariants (ctxt); 547 } 548 549 550 namespace { 552 553 const pass_data pass_data_rtl_unroll_loops = 554 { 555 RTL_PASS, /* type */ 556 "loop2_unroll", /* name */ 557 OPTGROUP_LOOP, /* optinfo_flags */ 558 TV_LOOP_UNROLL, /* tv_id */ 559 0, /* properties_required */ 560 0, /* properties_provided */ 561 0, /* properties_destroyed */ 562 0, /* todo_flags_start */ 563 0, /* todo_flags_finish */ 564 }; 565 566 class pass_rtl_unroll_loops : public rtl_opt_pass 567 { 568 public: 569 pass_rtl_unroll_loops (gcc::context *ctxt) 570 : rtl_opt_pass (pass_data_rtl_unroll_loops, ctxt) 571 {} 572 573 /* opt_pass methods: */ 574 bool gate (function *) final override 575 { 576 return (flag_unroll_loops || flag_unroll_all_loops 577 || cfun->has_unroll); 578 } 579 580 unsigned int execute (function *) final override; 581 582 }; // class pass_rtl_unroll_loops 583 584 unsigned int 585 pass_rtl_unroll_loops::execute (function *fun) 586 { 587 if (number_of_loops (fun) > 1) 588 { 589 int flags = 0; 590 if (dump_file) 591 df_dump (dump_file); 592 593 if (flag_unroll_loops) 594 flags |= UAP_UNROLL; 595 if (flag_unroll_all_loops) 596 flags |= UAP_UNROLL_ALL; 597 598 unroll_loops (flags); 599 } 600 return 0; 601 } 602 603 } // anon namespace 604 605 rtl_opt_pass * 606 make_pass_rtl_unroll_loops (gcc::context *ctxt) 607 { 608 return new pass_rtl_unroll_loops (ctxt); 609 } 610 611 612 namespace { 614 615 const pass_data pass_data_rtl_doloop = 616 { 617 RTL_PASS, /* type */ 618 "loop2_doloop", /* name */ 619 OPTGROUP_LOOP, /* optinfo_flags */ 620 TV_LOOP_DOLOOP, /* tv_id */ 621 0, /* properties_required */ 622 0, /* properties_provided */ 623 0, /* properties_destroyed */ 624 0, /* todo_flags_start */ 625 0, /* todo_flags_finish */ 626 }; 627 628 class pass_rtl_doloop : public rtl_opt_pass 629 { 630 public: 631 pass_rtl_doloop (gcc::context *ctxt) 632 : rtl_opt_pass (pass_data_rtl_doloop, ctxt) 633 {} 634 635 /* opt_pass methods: */ 636 bool gate (function *) final override; 637 unsigned int execute (function *) final override; 638 639 }; // class pass_rtl_doloop 640 641 bool 642 pass_rtl_doloop::gate (function *) 643 { 644 return (flag_branch_on_count_reg && targetm.have_doloop_end ()); 645 } 646 647 unsigned int 648 pass_rtl_doloop::execute (function *fun) 649 { 650 if (number_of_loops (fun) > 1) 651 doloop_optimize_loops (); 652 return 0; 653 } 654 655 } // anon namespace 656 657 rtl_opt_pass * 658 make_pass_rtl_doloop (gcc::context *ctxt) 659 { 660 return new pass_rtl_doloop (ctxt); 661 } 662