Home | History | Annotate | Line # | Download | only in gcc
      1 /* Loop optimizer initialization routines and RTL loop optimization passes.
      2    Copyright (C) 2002-2024 Free Software Foundation, Inc.
      3 
      4 This file is part of GCC.
      5 
      6 GCC is free software; you can redistribute it and/or modify it under
      7 the terms of the GNU General Public License as published by the Free
      8 Software Foundation; either version 3, or (at your option) any later
      9 version.
     10 
     11 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
     13 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     14 for more details.
     15 
     16 You should have received a copy of the GNU General Public License
     17 along with GCC; see the file COPYING3.  If not see
     18 <http://www.gnu.org/licenses/>.  */
     19 
     20 #include "config.h"
     21 #include "system.h"
     22 #include "coretypes.h"
     23 #include "backend.h"
     24 #include "target.h"
     25 #include "rtl.h"
     26 #include "tree.h"
     27 #include "cfghooks.h"
     28 #include "df.h"
     29 #include "regs.h"
     30 #include "cfgcleanup.h"
     31 #include "cfgloop.h"
     32 #include "tree-pass.h"
     33 #include "tree-ssa-loop-niter.h"
     34 #include "loop-unroll.h"
     35 #include "tree-scalar-evolution.h"
     36 #include "tree-cfgcleanup.h"
     37 
     38 
     39 /* Apply FLAGS to the loop state.  */
     41 
     42 static void
     43 apply_loop_flags (unsigned flags)
     44 {
     45   if (flags & LOOPS_MAY_HAVE_MULTIPLE_LATCHES)
     46     {
     47       /* If the loops may have multiple latches, we cannot canonicalize
     48 	 them further (and most of the loop manipulation functions will
     49 	 not work).  However, we avoid modifying cfg, which some
     50 	 passes may want.  */
     51       gcc_assert ((flags & ~(LOOPS_MAY_HAVE_MULTIPLE_LATCHES
     52 			     | LOOPS_HAVE_RECORDED_EXITS
     53 			     | LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)) == 0);
     54       loops_state_set (LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
     55     }
     56   else
     57     disambiguate_loops_with_multiple_latches ();
     58 
     59   /* Create pre-headers.  */
     60   if (flags & LOOPS_HAVE_PREHEADERS)
     61     {
     62       int cp_flags = CP_SIMPLE_PREHEADERS;
     63 
     64       if (flags & LOOPS_HAVE_FALLTHRU_PREHEADERS)
     65         cp_flags |= CP_FALLTHRU_PREHEADERS;
     66 
     67       create_preheaders (cp_flags);
     68     }
     69 
     70   /* Force all latches to have only single successor.  */
     71   if (flags & LOOPS_HAVE_SIMPLE_LATCHES)
     72     force_single_succ_latches ();
     73 
     74   /* Mark irreducible loops.  */
     75   if (flags & LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS)
     76     mark_irreducible_loops ();
     77 
     78   if (flags & LOOPS_HAVE_RECORDED_EXITS)
     79     record_loop_exits ();
     80 }
     81 
     82 /* Initialize loop structures.  This is used by the tree and RTL loop
     83    optimizers.  FLAGS specify what properties to compute and/or ensure for
     84    loops.  */
     85 
     86 void
     87 loop_optimizer_init (unsigned flags)
     88 {
     89   timevar_push (TV_LOOP_INIT);
     90 
     91   if (!current_loops)
     92     {
     93       gcc_assert (!(cfun->curr_properties & PROP_loops));
     94 
     95       /* Find the loops.  */
     96       current_loops = flow_loops_find (NULL);
     97     }
     98   else
     99     {
    100       bool recorded_exits = loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS);
    101       bool needs_fixup = loops_state_satisfies_p (LOOPS_NEED_FIXUP);
    102 
    103       gcc_assert (cfun->curr_properties & PROP_loops);
    104 
    105       /* Ensure that the dominators are computed, like flow_loops_find does.  */
    106       calculate_dominance_info (CDI_DOMINATORS);
    107 
    108       if (!needs_fixup)
    109 	checking_verify_loop_structure ();
    110 
    111       /* Clear all flags.  */
    112       if (recorded_exits)
    113 	release_recorded_exits (cfun);
    114       loops_state_clear (~0U);
    115 
    116       if (needs_fixup)
    117 	{
    118 	  /* Apply LOOPS_MAY_HAVE_MULTIPLE_LATCHES early as fix_loop_structure
    119 	     re-applies flags.  */
    120 	  loops_state_set (flags & LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
    121 	  fix_loop_structure (NULL);
    122 	}
    123     }
    124 
    125   /* Apply flags to loops.  */
    126   apply_loop_flags (flags);
    127 
    128   /* Dump loops.  */
    129   flow_loops_dump (dump_file, NULL, 1);
    130 
    131   checking_verify_loop_structure ();
    132 
    133   timevar_pop (TV_LOOP_INIT);
    134 }
    135 
    136 /* Finalize loop structures.  */
    137 
    138 void
    139 loop_optimizer_finalize (struct function *fn, bool clean_loop_closed_phi)
    140 {
    141   basic_block bb;
    142 
    143   timevar_push (TV_LOOP_FINI);
    144 
    145   if (clean_loop_closed_phi && loops_state_satisfies_p (fn, LOOP_CLOSED_SSA))
    146     {
    147       clean_up_loop_closed_phi (fn);
    148       loops_state_clear (fn, LOOP_CLOSED_SSA);
    149     }
    150 
    151   if (loops_state_satisfies_p (fn, LOOPS_HAVE_RECORDED_EXITS))
    152     release_recorded_exits (fn);
    153 
    154   free_numbers_of_iterations_estimates (fn);
    155 
    156   /* If we should preserve loop structure, do not free it but clear
    157      flags that advanced properties are there as we are not preserving
    158      that in full.  */
    159   if (fn->curr_properties & PROP_loops)
    160     {
    161       loops_state_clear (fn, LOOP_CLOSED_SSA
    162 			 | LOOPS_HAVE_MARKED_IRREDUCIBLE_REGIONS
    163 			 | LOOPS_HAVE_PREHEADERS
    164 			 | LOOPS_HAVE_SIMPLE_LATCHES
    165 			 | LOOPS_HAVE_FALLTHRU_PREHEADERS);
    166       loops_state_set (fn, LOOPS_MAY_HAVE_MULTIPLE_LATCHES);
    167       goto loop_fini_done;
    168     }
    169 
    170   for (auto loop : loops_list (fn, 0))
    171     free_simple_loop_desc (loop);
    172 
    173   /* Clean up.  */
    174   flow_loops_free (loops_for_fn (fn));
    175   ggc_free (loops_for_fn (fn));
    176   set_loops_for_fn (fn, NULL);
    177 
    178   FOR_ALL_BB_FN (bb, fn)
    179     {
    180       bb->loop_father = NULL;
    181     }
    182 
    183 loop_fini_done:
    184   timevar_pop (TV_LOOP_FINI);
    185 }
    186 
    187 /* The structure of loops might have changed.  Some loops might get removed
    188    (and their headers and latches were set to NULL), loop exists might get
    189    removed (thus the loop nesting may be wrong), and some blocks and edges
    190    were changed (so the information about bb --> loop mapping does not have
    191    to be correct).  But still for the remaining loops the header dominates
    192    the latch, and loops did not get new subloops (new loops might possibly
    193    get created, but we are not interested in them).  Fix up the mess.
    194 
    195    If CHANGED_BBS is not NULL, basic blocks whose loop depth has changed are
    196    marked in it.
    197 
    198    Returns the number of new discovered plus the number of removed loops.  */
    199 
    200 unsigned
    201 fix_loop_structure (bitmap changed_bbs)
    202 {
    203   basic_block bb;
    204   int record_exits = 0;
    205   unsigned old_nloops, i;
    206 
    207   timevar_push (TV_LOOP_INIT);
    208 
    209   if (dump_file && (dump_flags & TDF_DETAILS))
    210     fprintf (dump_file, "fix_loop_structure: fixing up loops for function\n");
    211 
    212   /* We need exact and fast dominance info to be available.  */
    213   gcc_assert (dom_info_state (CDI_DOMINATORS) == DOM_OK);
    214 
    215   if (loops_state_satisfies_p (LOOPS_HAVE_RECORDED_EXITS))
    216     {
    217       release_recorded_exits (cfun);
    218       record_exits = LOOPS_HAVE_RECORDED_EXITS;
    219     }
    220 
    221   /* Remember the depth of the blocks in the loop hierarchy, so that we can
    222      recognize blocks whose loop nesting relationship has changed.  */
    223   if (changed_bbs)
    224     FOR_EACH_BB_FN (bb, cfun)
    225       bb->aux = (void *) (size_t) loop_depth (bb->loop_father);
    226 
    227   /* Remove the dead loops from structures.  We start from the innermost
    228      loops, so that when we remove the loops, we know that the loops inside
    229      are preserved, and do not waste time relinking loops that will be
    230      removed later.  */
    231   for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
    232     {
    233       /* Detect the case that the loop is no longer present even though
    234          it wasn't marked for removal.
    235 	 ???  If we do that we can get away with not marking loops for
    236 	 removal at all.  And possibly avoid some spurious removals.  */
    237       if (loop->header
    238 	  && bb_loop_header_p (loop->header))
    239 	continue;
    240 
    241       if (dump_file && (dump_flags & TDF_DETAILS))
    242 	fprintf (dump_file, "fix_loop_structure: removing loop %d\n",
    243 		 loop->num);
    244 
    245       while (loop->inner)
    246 	{
    247 	  class loop *ploop = loop->inner;
    248 	  flow_loop_tree_node_remove (ploop);
    249 	  flow_loop_tree_node_add (loop_outer (loop), ploop);
    250 	}
    251 
    252       /* Remove the loop.  */
    253       if (loop->header)
    254 	loop->former_header = loop->header;
    255       else
    256 	gcc_assert (loop->former_header != NULL);
    257       loop->header = NULL;
    258       flow_loop_tree_node_remove (loop);
    259     }
    260 
    261   /* Remember the number of loops so we can return how many new loops
    262      flow_loops_find discovered.  */
    263   old_nloops = number_of_loops (cfun);
    264 
    265   /* Re-compute loop structure in-place.  */
    266   flow_loops_find (current_loops);
    267 
    268   /* Mark the blocks whose loop has changed.  */
    269   if (changed_bbs)
    270     {
    271       FOR_EACH_BB_FN (bb, cfun)
    272 	{
    273 	  if ((void *) (size_t) loop_depth (bb->loop_father) != bb->aux)
    274 	    bitmap_set_bit (changed_bbs, bb->index);
    275 
    276     	  bb->aux = NULL;
    277 	}
    278     }
    279 
    280   /* Finally free deleted loops.  */
    281   unsigned n_deleted = 0;
    282   class loop *loop;
    283   FOR_EACH_VEC_ELT (*get_loops (cfun), i, loop)
    284     if (loop && loop->header == NULL)
    285       {
    286 	if (dump_file
    287 	    && ((unsigned) loop->former_header->index
    288 		< basic_block_info_for_fn (cfun)->length ()))
    289 	  {
    290 	    basic_block former_header
    291 	      = BASIC_BLOCK_FOR_FN (cfun, loop->former_header->index);
    292 	    /* If the old header still exists we want to check if the
    293 	       original loop is re-discovered or the old header is now
    294 	       part of a newly discovered loop.
    295 	       In both cases we should have avoided removing the loop.  */
    296 	    if (former_header == loop->former_header)
    297 	      {
    298 		if (former_header->loop_father->header == former_header)
    299 		  fprintf (dump_file, "fix_loop_structure: rediscovered "
    300 			   "removed loop %d as loop %d with old header %d\n",
    301 			   loop->num, former_header->loop_father->num,
    302 			   former_header->index);
    303 		else if ((unsigned) former_header->loop_father->num
    304 			 >= old_nloops)
    305 		  fprintf (dump_file, "fix_loop_structure: header %d of "
    306 			   "removed loop %d is part of the newly "
    307 			   "discovered loop %d with header %d\n",
    308 			   former_header->index, loop->num,
    309 			   former_header->loop_father->num,
    310 			   former_header->loop_father->header->index);
    311 	      }
    312 	  }
    313 	(*get_loops (cfun))[i] = NULL;
    314 	flow_loop_free (loop);
    315 	n_deleted++;
    316       }
    317 
    318   /* If we deleted loops then the cached scalar evolutions refering to
    319      those loops become invalid.  */
    320   if (n_deleted > 0 && scev_initialized_p ())
    321     scev_reset_htab ();
    322 
    323   loops_state_clear (LOOPS_NEED_FIXUP);
    324 
    325   /* Apply flags to loops.  */
    326   apply_loop_flags (current_loops->state | record_exits);
    327 
    328   checking_verify_loop_structure ();
    329 
    330   timevar_pop (TV_LOOP_INIT);
    331 
    332   return number_of_loops (cfun) - old_nloops + n_deleted;
    333 }
    334 
    335 /* The RTL loop superpass.  The actual passes are subpasses.  See passes.cc for
    337    more on that.  */
    338 
    339 namespace {
    340 
    341 const pass_data pass_data_loop2 =
    342 {
    343   RTL_PASS, /* type */
    344   "loop2", /* name */
    345   OPTGROUP_LOOP, /* optinfo_flags */
    346   TV_LOOP, /* tv_id */
    347   0, /* properties_required */
    348   0, /* properties_provided */
    349   0, /* properties_destroyed */
    350   0, /* todo_flags_start */
    351   0, /* todo_flags_finish */
    352 };
    353 
    354 class pass_loop2 : public rtl_opt_pass
    355 {
    356 public:
    357   pass_loop2 (gcc::context *ctxt)
    358     : rtl_opt_pass (pass_data_loop2, ctxt)
    359   {}
    360 
    361   /* opt_pass methods: */
    362   bool gate (function *) final override;
    363 
    364 }; // class pass_loop2
    365 
    366 bool
    367 pass_loop2::gate (function *fun)
    368 {
    369   if (optimize > 0
    370       && (flag_move_loop_invariants
    371 	  || flag_unswitch_loops
    372 	  || flag_unroll_loops
    373 	  || (flag_branch_on_count_reg && targetm.have_doloop_end ())
    374 	  || cfun->has_unroll))
    375     return true;
    376   else
    377     {
    378       /* No longer preserve loops, remove them now.  */
    379       fun->curr_properties &= ~PROP_loops;
    380       if (current_loops)
    381 	loop_optimizer_finalize ();
    382       return false;
    383     }
    384 }
    385 
    386 } // anon namespace
    387 
    388 rtl_opt_pass *
    389 make_pass_loop2 (gcc::context *ctxt)
    390 {
    391   return new pass_loop2 (ctxt);
    392 }
    393 
    394 
    395 /* Initialization of the RTL loop passes.  */
    397 static unsigned int
    398 rtl_loop_init (void)
    399 {
    400   gcc_assert (current_ir_type () == IR_RTL_CFGLAYOUT);
    401 
    402   if (dump_file)
    403     {
    404       dump_reg_info (dump_file);
    405       dump_flow_info (dump_file, dump_flags);
    406     }
    407 
    408   loop_optimizer_init (LOOPS_NORMAL | LOOPS_HAVE_RECORDED_EXITS);
    409   return 0;
    410 }
    411 
    412 namespace {
    413 
    414 const pass_data pass_data_rtl_loop_init =
    415 {
    416   RTL_PASS, /* type */
    417   "loop2_init", /* name */
    418   OPTGROUP_LOOP, /* optinfo_flags */
    419   TV_LOOP, /* tv_id */
    420   0, /* properties_required */
    421   0, /* properties_provided */
    422   0, /* properties_destroyed */
    423   0, /* todo_flags_start */
    424   0, /* todo_flags_finish */
    425 };
    426 
    427 class pass_rtl_loop_init : public rtl_opt_pass
    428 {
    429 public:
    430   pass_rtl_loop_init (gcc::context *ctxt)
    431     : rtl_opt_pass (pass_data_rtl_loop_init, ctxt)
    432   {}
    433 
    434   /* opt_pass methods: */
    435   unsigned int execute (function *) final override { return rtl_loop_init (); }
    436 
    437 }; // class pass_rtl_loop_init
    438 
    439 } // anon namespace
    440 
    441 rtl_opt_pass *
    442 make_pass_rtl_loop_init (gcc::context *ctxt)
    443 {
    444   return new pass_rtl_loop_init (ctxt);
    445 }
    446 
    447 
    448 /* Finalization of the RTL loop passes.  */
    450 
    451 namespace {
    452 
    453 const pass_data pass_data_rtl_loop_done =
    454 {
    455   RTL_PASS, /* type */
    456   "loop2_done", /* name */
    457   OPTGROUP_LOOP, /* optinfo_flags */
    458   TV_LOOP, /* tv_id */
    459   0, /* properties_required */
    460   0, /* properties_provided */
    461   PROP_loops, /* properties_destroyed */
    462   0, /* todo_flags_start */
    463   0, /* todo_flags_finish */
    464 };
    465 
    466 class pass_rtl_loop_done : public rtl_opt_pass
    467 {
    468 public:
    469   pass_rtl_loop_done (gcc::context *ctxt)
    470     : rtl_opt_pass (pass_data_rtl_loop_done, ctxt)
    471   {}
    472 
    473   /* opt_pass methods: */
    474   unsigned int execute (function *) final override;
    475 
    476 }; // class pass_rtl_loop_done
    477 
    478 unsigned int
    479 pass_rtl_loop_done::execute (function *fun)
    480 {
    481   /* No longer preserve loops, remove them now.  */
    482   fun->curr_properties &= ~PROP_loops;
    483   loop_optimizer_finalize ();
    484   free_dominance_info (CDI_DOMINATORS);
    485 
    486   cleanup_cfg (0);
    487   if (dump_file)
    488     {
    489       dump_reg_info (dump_file);
    490       dump_flow_info (dump_file, dump_flags);
    491     }
    492 
    493   return 0;
    494 }
    495 
    496 } // anon namespace
    497 
    498 rtl_opt_pass *
    499 make_pass_rtl_loop_done (gcc::context *ctxt)
    500 {
    501   return new pass_rtl_loop_done (ctxt);
    502 }
    503 
    504 
    505 /* Loop invariant code motion.  */
    507 
    508 namespace {
    509 
    510 const pass_data pass_data_rtl_move_loop_invariants =
    511 {
    512   RTL_PASS, /* type */
    513   "loop2_invariant", /* name */
    514   OPTGROUP_LOOP, /* optinfo_flags */
    515   TV_LOOP_MOVE_INVARIANTS, /* tv_id */
    516   0, /* properties_required */
    517   0, /* properties_provided */
    518   0, /* properties_destroyed */
    519   0, /* todo_flags_start */
    520   ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
    521 };
    522 
    523 class pass_rtl_move_loop_invariants : public rtl_opt_pass
    524 {
    525 public:
    526   pass_rtl_move_loop_invariants (gcc::context *ctxt)
    527     : rtl_opt_pass (pass_data_rtl_move_loop_invariants, ctxt)
    528   {}
    529 
    530   /* opt_pass methods: */
    531   bool gate (function *) final override { return flag_move_loop_invariants; }
    532   unsigned int execute (function *fun) final override
    533     {
    534       if (number_of_loops (fun) > 1)
    535 	move_loop_invariants ();
    536       return 0;
    537     }
    538 
    539 }; // class pass_rtl_move_loop_invariants
    540 
    541 } // anon namespace
    542 
    543 rtl_opt_pass *
    544 make_pass_rtl_move_loop_invariants (gcc::context *ctxt)
    545 {
    546   return new pass_rtl_move_loop_invariants (ctxt);
    547 }
    548 
    549 
    550 namespace {
    552 
    553 const pass_data pass_data_rtl_unroll_loops =
    554 {
    555   RTL_PASS, /* type */
    556   "loop2_unroll", /* name */
    557   OPTGROUP_LOOP, /* optinfo_flags */
    558   TV_LOOP_UNROLL, /* tv_id */
    559   0, /* properties_required */
    560   0, /* properties_provided */
    561   0, /* properties_destroyed */
    562   0, /* todo_flags_start */
    563   0, /* todo_flags_finish */
    564 };
    565 
    566 class pass_rtl_unroll_loops : public rtl_opt_pass
    567 {
    568 public:
    569   pass_rtl_unroll_loops (gcc::context *ctxt)
    570     : rtl_opt_pass (pass_data_rtl_unroll_loops, ctxt)
    571   {}
    572 
    573   /* opt_pass methods: */
    574   bool gate (function *) final override
    575     {
    576       return (flag_unroll_loops || flag_unroll_all_loops
    577 	      || cfun->has_unroll);
    578     }
    579 
    580   unsigned int execute (function *) final override;
    581 
    582 }; // class pass_rtl_unroll_loops
    583 
    584 unsigned int
    585 pass_rtl_unroll_loops::execute (function *fun)
    586 {
    587   if (number_of_loops (fun) > 1)
    588     {
    589       int flags = 0;
    590       if (dump_file)
    591 	df_dump (dump_file);
    592 
    593       if (flag_unroll_loops)
    594 	flags |= UAP_UNROLL;
    595       if (flag_unroll_all_loops)
    596 	flags |= UAP_UNROLL_ALL;
    597 
    598       unroll_loops (flags);
    599     }
    600   return 0;
    601 }
    602 
    603 } // anon namespace
    604 
    605 rtl_opt_pass *
    606 make_pass_rtl_unroll_loops (gcc::context *ctxt)
    607 {
    608   return new pass_rtl_unroll_loops (ctxt);
    609 }
    610 
    611 
    612 namespace {
    614 
    615 const pass_data pass_data_rtl_doloop =
    616 {
    617   RTL_PASS, /* type */
    618   "loop2_doloop", /* name */
    619   OPTGROUP_LOOP, /* optinfo_flags */
    620   TV_LOOP_DOLOOP, /* tv_id */
    621   0, /* properties_required */
    622   0, /* properties_provided */
    623   0, /* properties_destroyed */
    624   0, /* todo_flags_start */
    625   0, /* todo_flags_finish */
    626 };
    627 
    628 class pass_rtl_doloop : public rtl_opt_pass
    629 {
    630 public:
    631   pass_rtl_doloop (gcc::context *ctxt)
    632     : rtl_opt_pass (pass_data_rtl_doloop, ctxt)
    633   {}
    634 
    635   /* opt_pass methods: */
    636   bool gate (function *) final override;
    637   unsigned int execute (function *) final override;
    638 
    639 }; // class pass_rtl_doloop
    640 
    641 bool
    642 pass_rtl_doloop::gate (function *)
    643 {
    644   return (flag_branch_on_count_reg && targetm.have_doloop_end ());
    645 }
    646 
    647 unsigned int
    648 pass_rtl_doloop::execute (function *fun)
    649 {
    650   if (number_of_loops (fun) > 1)
    651     doloop_optimize_loops ();
    652   return 0;
    653 }
    654 
    655 } // anon namespace
    656 
    657 rtl_opt_pass *
    658 make_pass_rtl_doloop (gcc::context *ctxt)
    659 {
    660   return new pass_rtl_doloop (ctxt);
    661 }
    662