Home | History | Annotate | Line # | Download | only in gcc
      1  1.1  mrg /* Shrink-wrapping related optimizations.
      2  1.1  mrg    Copyright (C) 1987-2022 Free Software Foundation, Inc.
      3  1.1  mrg 
      4  1.1  mrg This file is part of GCC.
      5  1.1  mrg 
      6  1.1  mrg GCC is free software; you can redistribute it and/or modify it under
      7  1.1  mrg the terms of the GNU General Public License as published by the Free
      8  1.1  mrg Software Foundation; either version 3, or (at your option) any later
      9  1.1  mrg version.
     10  1.1  mrg 
     11  1.1  mrg GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     12  1.1  mrg WARRANTY; without even the implied warranty of MERCHANTABILITY or
     13  1.1  mrg FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     14  1.1  mrg for more details.
     15  1.1  mrg 
     16  1.1  mrg You should have received a copy of the GNU General Public License
     17  1.1  mrg along with GCC; see the file COPYING3.  If not see
     18  1.1  mrg <http://www.gnu.org/licenses/>.  */
     19  1.1  mrg 
     20  1.1  mrg /* This file handles shrink-wrapping related optimizations.  */
     21  1.1  mrg 
     22  1.1  mrg #include "config.h"
     23  1.1  mrg #include "system.h"
     24  1.1  mrg #include "coretypes.h"
     25  1.1  mrg #include "backend.h"
     26  1.1  mrg #include "target.h"
     27  1.1  mrg #include "rtl.h"
     28  1.1  mrg #include "tree.h"
     29  1.1  mrg #include "cfghooks.h"
     30  1.1  mrg #include "df.h"
     31  1.1  mrg #include "memmodel.h"
     32  1.1  mrg #include "tm_p.h"
     33  1.1  mrg #include "regs.h"
     34  1.1  mrg #include "insn-config.h"
     35  1.1  mrg #include "emit-rtl.h"
     36  1.1  mrg #include "output.h"
     37  1.1  mrg #include "tree-pass.h"
     38  1.1  mrg #include "cfgrtl.h"
     39  1.1  mrg #include "cfgbuild.h"
     40  1.1  mrg #include "bb-reorder.h"
     41  1.1  mrg #include "shrink-wrap.h"
     42  1.1  mrg #include "regcprop.h"
     43  1.1  mrg #include "rtl-iter.h"
     44  1.1  mrg #include "valtrack.h"
     45  1.1  mrg #include "function-abi.h"
     46  1.1  mrg #include "print-rtl.h"
     47  1.1  mrg 
     48  1.1  mrg /* Return true if INSN requires the stack frame to be set up.
     49  1.1  mrg    PROLOGUE_USED contains the hard registers used in the function
     50  1.1  mrg    prologue.  SET_UP_BY_PROLOGUE is the set of registers we expect the
     51  1.1  mrg    prologue to set up for the function.  */
     52  1.1  mrg bool
     53  1.1  mrg requires_stack_frame_p (rtx_insn *insn, HARD_REG_SET prologue_used,
     54  1.1  mrg 			HARD_REG_SET set_up_by_prologue)
     55  1.1  mrg {
     56  1.1  mrg   df_ref def, use;
     57  1.1  mrg   HARD_REG_SET hardregs;
     58  1.1  mrg   unsigned regno;
     59  1.1  mrg 
     60  1.1  mrg   if (CALL_P (insn) && !FAKE_CALL_P (insn))
     61  1.1  mrg     return !SIBLING_CALL_P (insn);
     62  1.1  mrg 
     63  1.1  mrg   /* We need a frame to get the unique CFA expected by the unwinder.  */
     64  1.1  mrg   if (cfun->can_throw_non_call_exceptions && can_throw_internal (insn))
     65  1.1  mrg     return true;
     66  1.1  mrg 
     67  1.1  mrg   CLEAR_HARD_REG_SET (hardregs);
     68  1.1  mrg   FOR_EACH_INSN_DEF (def, insn)
     69  1.1  mrg     {
     70  1.1  mrg       rtx dreg = DF_REF_REG (def);
     71  1.1  mrg 
     72  1.1  mrg       if (!REG_P (dreg))
     73  1.1  mrg 	continue;
     74  1.1  mrg 
     75  1.1  mrg       add_to_hard_reg_set (&hardregs, GET_MODE (dreg), REGNO (dreg));
     76  1.1  mrg     }
     77  1.1  mrg   if (hard_reg_set_intersect_p (hardregs, prologue_used))
     78  1.1  mrg     return true;
     79  1.1  mrg   hardregs &= ~crtl->abi->full_reg_clobbers ();
     80  1.1  mrg   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     81  1.1  mrg     if (TEST_HARD_REG_BIT (hardregs, regno)
     82  1.1  mrg 	&& df_regs_ever_live_p (regno))
     83  1.1  mrg       return true;
     84  1.1  mrg 
     85  1.1  mrg   FOR_EACH_INSN_USE (use, insn)
     86  1.1  mrg     {
     87  1.1  mrg       rtx reg = DF_REF_REG (use);
     88  1.1  mrg 
     89  1.1  mrg       if (!REG_P (reg))
     90  1.1  mrg 	continue;
     91  1.1  mrg 
     92  1.1  mrg       add_to_hard_reg_set (&hardregs, GET_MODE (reg),
     93  1.1  mrg 			   REGNO (reg));
     94  1.1  mrg     }
     95  1.1  mrg   if (hard_reg_set_intersect_p (hardregs, set_up_by_prologue))
     96  1.1  mrg     return true;
     97  1.1  mrg 
     98  1.1  mrg   return false;
     99  1.1  mrg }
    100  1.1  mrg 
    101  1.1  mrg /* See whether there has a single live edge from BB, which dest uses
    102  1.1  mrg    [REGNO, END_REGNO).  Return the live edge if its dest bb has
    103  1.1  mrg    one or two predecessors.  Otherwise return NULL.  */
    104  1.1  mrg 
    105  1.1  mrg static edge
    106  1.1  mrg live_edge_for_reg (basic_block bb, int regno, int end_regno)
    107  1.1  mrg {
    108  1.1  mrg   edge e, live_edge;
    109  1.1  mrg   edge_iterator ei;
    110  1.1  mrg   bitmap live;
    111  1.1  mrg   int i;
    112  1.1  mrg 
    113  1.1  mrg   live_edge = NULL;
    114  1.1  mrg   FOR_EACH_EDGE (e, ei, bb->succs)
    115  1.1  mrg     {
    116  1.1  mrg       live = df_get_live_in (e->dest);
    117  1.1  mrg       for (i = regno; i < end_regno; i++)
    118  1.1  mrg 	if (REGNO_REG_SET_P (live, i))
    119  1.1  mrg 	  {
    120  1.1  mrg 	    if (live_edge && live_edge != e)
    121  1.1  mrg 	      return NULL;
    122  1.1  mrg 	    live_edge = e;
    123  1.1  mrg 	  }
    124  1.1  mrg     }
    125  1.1  mrg 
    126  1.1  mrg   /* We can sometimes encounter dead code.  Don't try to move it
    127  1.1  mrg      into the exit block.  */
    128  1.1  mrg   if (!live_edge || live_edge->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
    129  1.1  mrg     return NULL;
    130  1.1  mrg 
    131  1.1  mrg   /* Reject targets of abnormal edges.  This is needed for correctness
    132  1.1  mrg      on ports like Alpha and MIPS, whose pic_offset_table_rtx can die on
    133  1.1  mrg      exception edges even though it is generally treated as call-saved
    134  1.1  mrg      for the majority of the compilation.  Moving across abnormal edges
    135  1.1  mrg      isn't going to be interesting for shrink-wrap usage anyway.  */
    136  1.1  mrg   if (live_edge->flags & EDGE_ABNORMAL)
    137  1.1  mrg     return NULL;
    138  1.1  mrg 
    139  1.1  mrg   /* When live_edge->dest->preds == 2, we can create a new block on
    140  1.1  mrg      the edge to make it meet the requirement.  */
    141  1.1  mrg   if (EDGE_COUNT (live_edge->dest->preds) > 2)
    142  1.1  mrg     return NULL;
    143  1.1  mrg 
    144  1.1  mrg   return live_edge;
    145  1.1  mrg }
    146  1.1  mrg 
    147  1.1  mrg /* Try to move INSN from BB to a successor.  Return true on success.
    148  1.1  mrg    USES and DEFS are the set of registers that are used and defined
    149  1.1  mrg    after INSN in BB.  SPLIT_P indicates whether a live edge from BB
    150  1.1  mrg    is splitted or not.  */
    151  1.1  mrg 
    152  1.1  mrg static bool
    153  1.1  mrg move_insn_for_shrink_wrap (basic_block bb, rtx_insn *insn,
    154  1.1  mrg 			   const_hard_reg_set uses,
    155  1.1  mrg 			   const_hard_reg_set defs,
    156  1.1  mrg 			   bool *split_p,
    157  1.1  mrg 			   struct dead_debug_local *debug)
    158  1.1  mrg {
    159  1.1  mrg   rtx set, src, dest;
    160  1.1  mrg   bitmap live_out, live_in, bb_uses = NULL, bb_defs = NULL;
    161  1.1  mrg   unsigned int i, dregno, end_dregno;
    162  1.1  mrg   unsigned int sregno = FIRST_PSEUDO_REGISTER;
    163  1.1  mrg   unsigned int end_sregno = FIRST_PSEUDO_REGISTER;
    164  1.1  mrg   basic_block next_block;
    165  1.1  mrg   edge live_edge;
    166  1.1  mrg   rtx_insn *dinsn;
    167  1.1  mrg   df_ref def;
    168  1.1  mrg 
    169  1.1  mrg   /* Look for a simple register assignment.  We don't use single_set here
    170  1.1  mrg      because we can't deal with any CLOBBERs, USEs, or REG_UNUSED secondary
    171  1.1  mrg      destinations.  */
    172  1.1  mrg   if (!INSN_P (insn))
    173  1.1  mrg     return false;
    174  1.1  mrg   set = PATTERN (insn);
    175  1.1  mrg   if (GET_CODE (set) != SET)
    176  1.1  mrg     return false;
    177  1.1  mrg   src = SET_SRC (set);
    178  1.1  mrg   dest = SET_DEST (set);
    179  1.1  mrg 
    180  1.1  mrg   /* For the destination, we want only a register.  Also disallow STACK
    181  1.1  mrg      or FRAME related adjustments.  They are likely part of the prologue,
    182  1.1  mrg      so keep them in the entry block.  */
    183  1.1  mrg   if (!REG_P (dest)
    184  1.1  mrg       || dest == stack_pointer_rtx
    185  1.1  mrg       || dest == frame_pointer_rtx
    186  1.1  mrg       || dest == hard_frame_pointer_rtx)
    187  1.1  mrg     return false;
    188  1.1  mrg 
    189  1.1  mrg   /* For the source, we want one of:
    190  1.1  mrg       (1) A (non-overlapping) register
    191  1.1  mrg       (2) A constant,
    192  1.1  mrg       (3) An expression involving no more than one register.
    193  1.1  mrg 
    194  1.1  mrg      That last point comes from the code following, which was originally
    195  1.1  mrg      written to handle only register move operations, and still only handles
    196  1.1  mrg      a single source register when checking for overlaps.  Happily, the
    197  1.1  mrg      same checks can be applied to expressions like (plus reg const).  */
    198  1.1  mrg 
    199  1.1  mrg   if (CONSTANT_P (src))
    200  1.1  mrg     ;
    201  1.1  mrg   else if (!REG_P (src))
    202  1.1  mrg     {
    203  1.1  mrg       rtx src_inner = NULL_RTX;
    204  1.1  mrg 
    205  1.1  mrg       if (can_throw_internal (insn))
    206  1.1  mrg 	return false;
    207  1.1  mrg 
    208  1.1  mrg       subrtx_var_iterator::array_type array;
    209  1.1  mrg       FOR_EACH_SUBRTX_VAR (iter, array, src, ALL)
    210  1.1  mrg 	{
    211  1.1  mrg 	  rtx x = *iter;
    212  1.1  mrg 	  switch (GET_RTX_CLASS (GET_CODE (x)))
    213  1.1  mrg 	    {
    214  1.1  mrg 	    case RTX_CONST_OBJ:
    215  1.1  mrg 	    case RTX_COMPARE:
    216  1.1  mrg 	    case RTX_COMM_COMPARE:
    217  1.1  mrg 	    case RTX_BIN_ARITH:
    218  1.1  mrg 	    case RTX_COMM_ARITH:
    219  1.1  mrg 	    case RTX_UNARY:
    220  1.1  mrg 	    case RTX_TERNARY:
    221  1.1  mrg 	      /* Constant or expression.  Continue.  */
    222  1.1  mrg 	      break;
    223  1.1  mrg 
    224  1.1  mrg 	    case RTX_OBJ:
    225  1.1  mrg 	    case RTX_EXTRA:
    226  1.1  mrg 	      switch (GET_CODE (x))
    227  1.1  mrg 		{
    228  1.1  mrg 		case UNSPEC:
    229  1.1  mrg 		case SUBREG:
    230  1.1  mrg 		case STRICT_LOW_PART:
    231  1.1  mrg 		case PC:
    232  1.1  mrg 		case LO_SUM:
    233  1.1  mrg 		  /* Ok.  Continue.  */
    234  1.1  mrg 		  break;
    235  1.1  mrg 
    236  1.1  mrg 		case REG:
    237  1.1  mrg 		  /* Fail if we see a second inner register.  */
    238  1.1  mrg 		  if (src_inner != NULL)
    239  1.1  mrg 		    return false;
    240  1.1  mrg 		  src_inner = x;
    241  1.1  mrg 		  break;
    242  1.1  mrg 
    243  1.1  mrg 		default:
    244  1.1  mrg 		  return false;
    245  1.1  mrg 		}
    246  1.1  mrg 	      break;
    247  1.1  mrg 
    248  1.1  mrg 	    default:
    249  1.1  mrg 	      return false;
    250  1.1  mrg 	    }
    251  1.1  mrg 	}
    252  1.1  mrg 
    253  1.1  mrg       if (src_inner != NULL)
    254  1.1  mrg 	src = src_inner;
    255  1.1  mrg     }
    256  1.1  mrg 
    257  1.1  mrg   /* Make sure that the source register isn't defined later in BB.  */
    258  1.1  mrg   if (REG_P (src))
    259  1.1  mrg     {
    260  1.1  mrg       sregno = REGNO (src);
    261  1.1  mrg       end_sregno = END_REGNO (src);
    262  1.1  mrg       if (overlaps_hard_reg_set_p (defs, GET_MODE (src), sregno))
    263  1.1  mrg 	return false;
    264  1.1  mrg     }
    265  1.1  mrg 
    266  1.1  mrg   /* Make sure that the destination register isn't referenced later in BB.  */
    267  1.1  mrg   dregno = REGNO (dest);
    268  1.1  mrg   end_dregno = END_REGNO (dest);
    269  1.1  mrg   if (overlaps_hard_reg_set_p (uses, GET_MODE (dest), dregno)
    270  1.1  mrg       || overlaps_hard_reg_set_p (defs, GET_MODE (dest), dregno))
    271  1.1  mrg     return false;
    272  1.1  mrg 
    273  1.1  mrg   /* See whether there is a successor block to which we could move INSN.  */
    274  1.1  mrg   live_edge = live_edge_for_reg (bb, dregno, end_dregno);
    275  1.1  mrg   if (!live_edge)
    276  1.1  mrg     return false;
    277  1.1  mrg 
    278  1.1  mrg   next_block = live_edge->dest;
    279  1.1  mrg   /* Create a new basic block on the edge.  */
    280  1.1  mrg   if (EDGE_COUNT (next_block->preds) == 2)
    281  1.1  mrg     {
    282  1.1  mrg       /* split_edge for a block with only one successor is meaningless.  */
    283  1.1  mrg       if (EDGE_COUNT (bb->succs) == 1)
    284  1.1  mrg 	return false;
    285  1.1  mrg 
    286  1.1  mrg       /* If DF_LIVE doesn't exist, i.e. at -O1, just give up.  */
    287  1.1  mrg       if (!df_live)
    288  1.1  mrg 	return false;
    289  1.1  mrg 
    290  1.1  mrg       basic_block old_dest = live_edge->dest;
    291  1.1  mrg       next_block = split_edge (live_edge);
    292  1.1  mrg 
    293  1.1  mrg       /* We create a new basic block.  Call df_grow_bb_info to make sure
    294  1.1  mrg 	 all data structures are allocated.  */
    295  1.1  mrg       df_grow_bb_info (df_live);
    296  1.1  mrg 
    297  1.1  mrg       bitmap_and (df_get_live_in (next_block), df_get_live_out (bb),
    298  1.1  mrg 		  df_get_live_in (old_dest));
    299  1.1  mrg       df_set_bb_dirty (next_block);
    300  1.1  mrg 
    301  1.1  mrg       /* We should not split more than once for a function.  */
    302  1.1  mrg       if (*split_p)
    303  1.1  mrg 	return false;
    304  1.1  mrg 
    305  1.1  mrg       *split_p = true;
    306  1.1  mrg     }
    307  1.1  mrg 
    308  1.1  mrg   /* At this point we are committed to moving INSN, but let's try to
    309  1.1  mrg      move it as far as we can.  */
    310  1.1  mrg   do
    311  1.1  mrg     {
    312  1.1  mrg       if (MAY_HAVE_DEBUG_BIND_INSNS)
    313  1.1  mrg 	{
    314  1.1  mrg 	  FOR_BB_INSNS_REVERSE (bb, dinsn)
    315  1.1  mrg 	    if (DEBUG_BIND_INSN_P (dinsn))
    316  1.1  mrg 	      {
    317  1.1  mrg 		df_ref use;
    318  1.1  mrg 		FOR_EACH_INSN_USE (use, dinsn)
    319  1.1  mrg 		  if (refers_to_regno_p (dregno, end_dregno,
    320  1.1  mrg 					 DF_REF_REG (use), (rtx *) NULL))
    321  1.1  mrg 		    dead_debug_add (debug, use, DF_REF_REGNO (use));
    322  1.1  mrg 	      }
    323  1.1  mrg 	    else if (dinsn == insn)
    324  1.1  mrg 	      break;
    325  1.1  mrg 	}
    326  1.1  mrg       live_out = df_get_live_out (bb);
    327  1.1  mrg       live_in = df_get_live_in (next_block);
    328  1.1  mrg       bb = next_block;
    329  1.1  mrg 
    330  1.1  mrg       /* Check whether BB uses DEST or clobbers DEST.  We need to add
    331  1.1  mrg 	 INSN to BB if so.  Either way, DEST is no longer live on entry,
    332  1.1  mrg 	 except for any part that overlaps SRC (next loop).  */
    333  1.1  mrg       if (!*split_p)
    334  1.1  mrg 	{
    335  1.1  mrg 	  bb_uses = &DF_LR_BB_INFO (bb)->use;
    336  1.1  mrg 	  bb_defs = &DF_LR_BB_INFO (bb)->def;
    337  1.1  mrg 	}
    338  1.1  mrg       if (df_live)
    339  1.1  mrg 	{
    340  1.1  mrg 	  for (i = dregno; i < end_dregno; i++)
    341  1.1  mrg 	    {
    342  1.1  mrg 	      if (*split_p
    343  1.1  mrg 		  || REGNO_REG_SET_P (bb_uses, i)
    344  1.1  mrg 		  || REGNO_REG_SET_P (bb_defs, i)
    345  1.1  mrg 		  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
    346  1.1  mrg 		next_block = NULL;
    347  1.1  mrg 	      CLEAR_REGNO_REG_SET (live_out, i);
    348  1.1  mrg 	      CLEAR_REGNO_REG_SET (live_in, i);
    349  1.1  mrg 	    }
    350  1.1  mrg 
    351  1.1  mrg 	  /* Check whether BB clobbers SRC.  We need to add INSN to BB if so.
    352  1.1  mrg 	     Either way, SRC is now live on entry.  */
    353  1.1  mrg 	  for (i = sregno; i < end_sregno; i++)
    354  1.1  mrg 	    {
    355  1.1  mrg 	      if (*split_p
    356  1.1  mrg 		  || REGNO_REG_SET_P (bb_defs, i)
    357  1.1  mrg 		  || REGNO_REG_SET_P (&DF_LIVE_BB_INFO (bb)->gen, i))
    358  1.1  mrg 		next_block = NULL;
    359  1.1  mrg 	      SET_REGNO_REG_SET (live_out, i);
    360  1.1  mrg 	      SET_REGNO_REG_SET (live_in, i);
    361  1.1  mrg 	    }
    362  1.1  mrg 	}
    363  1.1  mrg       else
    364  1.1  mrg 	{
    365  1.1  mrg 	  /* DF_LR_BB_INFO (bb)->def does not comprise the DF_REF_PARTIAL and
    366  1.1  mrg 	     DF_REF_CONDITIONAL defs.  So if DF_LIVE doesn't exist, i.e.
    367  1.1  mrg 	     at -O1, just give up searching NEXT_BLOCK.  */
    368  1.1  mrg 	  next_block = NULL;
    369  1.1  mrg 	  for (i = dregno; i < end_dregno; i++)
    370  1.1  mrg 	    {
    371  1.1  mrg 	      CLEAR_REGNO_REG_SET (live_out, i);
    372  1.1  mrg 	      CLEAR_REGNO_REG_SET (live_in, i);
    373  1.1  mrg 	    }
    374  1.1  mrg 
    375  1.1  mrg 	  for (i = sregno; i < end_sregno; i++)
    376  1.1  mrg 	    {
    377  1.1  mrg 	      SET_REGNO_REG_SET (live_out, i);
    378  1.1  mrg 	      SET_REGNO_REG_SET (live_in, i);
    379  1.1  mrg 	    }
    380  1.1  mrg 	}
    381  1.1  mrg 
    382  1.1  mrg       /* If we don't need to add the move to BB, look for a single
    383  1.1  mrg 	 successor block.  */
    384  1.1  mrg       if (next_block)
    385  1.1  mrg 	{
    386  1.1  mrg 	  live_edge = live_edge_for_reg (next_block, dregno, end_dregno);
    387  1.1  mrg 	  if (!live_edge || EDGE_COUNT (live_edge->dest->preds) > 1)
    388  1.1  mrg 	    break;
    389  1.1  mrg 	  next_block = live_edge->dest;
    390  1.1  mrg 	}
    391  1.1  mrg     }
    392  1.1  mrg   while (next_block);
    393  1.1  mrg 
    394  1.1  mrg   /* For the new created basic block, there is no dataflow info at all.
    395  1.1  mrg      So skip the following dataflow update and check.  */
    396  1.1  mrg   if (!(*split_p))
    397  1.1  mrg     {
    398  1.1  mrg       /* BB now defines DEST.  It only uses the parts of DEST that overlap SRC
    399  1.1  mrg 	 (next loop).  */
    400  1.1  mrg       for (i = dregno; i < end_dregno; i++)
    401  1.1  mrg 	{
    402  1.1  mrg 	  CLEAR_REGNO_REG_SET (bb_uses, i);
    403  1.1  mrg 	  SET_REGNO_REG_SET (bb_defs, i);
    404  1.1  mrg 	}
    405  1.1  mrg 
    406  1.1  mrg       /* BB now uses SRC.  */
    407  1.1  mrg       for (i = sregno; i < end_sregno; i++)
    408  1.1  mrg 	SET_REGNO_REG_SET (bb_uses, i);
    409  1.1  mrg     }
    410  1.1  mrg 
    411  1.1  mrg   /* Insert debug temps for dead REGs used in subsequent debug insns.  */
    412  1.1  mrg   if (debug->used && !bitmap_empty_p (debug->used))
    413  1.1  mrg     FOR_EACH_INSN_DEF (def, insn)
    414  1.1  mrg       dead_debug_insert_temp (debug, DF_REF_REGNO (def), insn,
    415  1.1  mrg 			      DEBUG_TEMP_BEFORE_WITH_VALUE);
    416  1.1  mrg 
    417  1.1  mrg   rtx_insn *insn_copy = emit_insn_after (PATTERN (insn), bb_note (bb));
    418  1.1  mrg   /* Update the LABEL_NUSES count on any referenced labels. The ideal
    419  1.1  mrg      solution here would be to actually move the instruction instead
    420  1.1  mrg      of copying/deleting it as this loses some notations on the
    421  1.1  mrg      insn.  */
    422  1.1  mrg   mark_jump_label (PATTERN (insn), insn_copy, 0);
    423  1.1  mrg   delete_insn (insn);
    424  1.1  mrg   return true;
    425  1.1  mrg }
    426  1.1  mrg 
    427  1.1  mrg /* Look for register copies in the first block of the function, and move
    428  1.1  mrg    them down into successor blocks if the register is used only on one
    429  1.1  mrg    path.  This exposes more opportunities for shrink-wrapping.  These
    430  1.1  mrg    kinds of sets often occur when incoming argument registers are moved
    431  1.1  mrg    to call-saved registers because their values are live across one or
    432  1.1  mrg    more calls during the function.  */
    433  1.1  mrg 
    434  1.1  mrg static void
    435  1.1  mrg prepare_shrink_wrap (basic_block entry_block)
    436  1.1  mrg {
    437  1.1  mrg   rtx_insn *insn, *curr;
    438  1.1  mrg   rtx x;
    439  1.1  mrg   HARD_REG_SET uses, defs;
    440  1.1  mrg   df_ref def, use;
    441  1.1  mrg   bool split_p = false;
    442  1.1  mrg   unsigned int i;
    443  1.1  mrg   struct dead_debug_local debug;
    444  1.1  mrg 
    445  1.1  mrg   if (JUMP_P (BB_END (entry_block)))
    446  1.1  mrg     {
    447  1.1  mrg       /* To have more shrink-wrapping opportunities, prepare_shrink_wrap tries
    448  1.1  mrg 	 to sink the copies from parameter to callee saved register out of
    449  1.1  mrg 	 entry block.  copyprop_hardreg_forward_bb_without_debug_insn is called
    450  1.1  mrg 	 to release some dependences.  */
    451  1.1  mrg       copyprop_hardreg_forward_bb_without_debug_insn (entry_block);
    452  1.1  mrg     }
    453  1.1  mrg 
    454  1.1  mrg   dead_debug_local_init (&debug, NULL, NULL);
    455  1.1  mrg   CLEAR_HARD_REG_SET (uses);
    456  1.1  mrg   CLEAR_HARD_REG_SET (defs);
    457  1.1  mrg 
    458  1.1  mrg   FOR_BB_INSNS_REVERSE_SAFE (entry_block, insn, curr)
    459  1.1  mrg     if (NONDEBUG_INSN_P (insn)
    460  1.1  mrg 	&& !move_insn_for_shrink_wrap (entry_block, insn, uses, defs,
    461  1.1  mrg 				       &split_p, &debug))
    462  1.1  mrg       {
    463  1.1  mrg 	/* Add all defined registers to DEFs.  */
    464  1.1  mrg 	FOR_EACH_INSN_DEF (def, insn)
    465  1.1  mrg 	  {
    466  1.1  mrg 	    x = DF_REF_REG (def);
    467  1.1  mrg 	    if (REG_P (x) && HARD_REGISTER_P (x))
    468  1.1  mrg 	      for (i = REGNO (x); i < END_REGNO (x); i++)
    469  1.1  mrg 		SET_HARD_REG_BIT (defs, i);
    470  1.1  mrg 	  }
    471  1.1  mrg 
    472  1.1  mrg 	/* Add all used registers to USESs.  */
    473  1.1  mrg 	FOR_EACH_INSN_USE (use, insn)
    474  1.1  mrg 	  {
    475  1.1  mrg 	    x = DF_REF_REG (use);
    476  1.1  mrg 	    if (REG_P (x) && HARD_REGISTER_P (x))
    477  1.1  mrg 	      for (i = REGNO (x); i < END_REGNO (x); i++)
    478  1.1  mrg 		SET_HARD_REG_BIT (uses, i);
    479  1.1  mrg 	  }
    480  1.1  mrg       }
    481  1.1  mrg 
    482  1.1  mrg   dead_debug_local_finish (&debug, NULL);
    483  1.1  mrg }
    484  1.1  mrg 
    485  1.1  mrg /* Return whether basic block PRO can get the prologue.  It cannot if it
    486  1.1  mrg    has incoming complex edges that need a prologue inserted (we make a new
    487  1.1  mrg    block for the prologue, so those edges would need to be redirected, which
    488  1.1  mrg    does not work).  It also cannot if there exist registers live on entry
    489  1.1  mrg    to PRO that are clobbered by the prologue.  */
    490  1.1  mrg 
    491  1.1  mrg static bool
    492  1.1  mrg can_get_prologue (basic_block pro, HARD_REG_SET prologue_clobbered)
    493  1.1  mrg {
    494  1.1  mrg   edge e;
    495  1.1  mrg   edge_iterator ei;
    496  1.1  mrg   FOR_EACH_EDGE (e, ei, pro->preds)
    497  1.1  mrg     if (e->flags & EDGE_COMPLEX
    498  1.1  mrg 	&& !dominated_by_p (CDI_DOMINATORS, e->src, pro))
    499  1.1  mrg       return false;
    500  1.1  mrg 
    501  1.1  mrg   HARD_REG_SET live;
    502  1.1  mrg   REG_SET_TO_HARD_REG_SET (live, df_get_live_in (pro));
    503  1.1  mrg   if (hard_reg_set_intersect_p (live, prologue_clobbered))
    504  1.1  mrg     return false;
    505  1.1  mrg 
    506  1.1  mrg   return true;
    507  1.1  mrg }
    508  1.1  mrg 
    509  1.1  mrg /* Return whether we can duplicate basic block BB for shrink wrapping.  We
    510  1.1  mrg    cannot if the block cannot be duplicated at all, or if any of its incoming
    511  1.1  mrg    edges are complex and come from a block that does not require a prologue
    512  1.1  mrg    (we cannot redirect such edges), or if the block is too big to copy.
    513  1.1  mrg    PRO is the basic block before which we would put the prologue, MAX_SIZE is
    514  1.1  mrg    the maximum size block we allow to be copied.  */
    515  1.1  mrg 
    516  1.1  mrg static bool
    517  1.1  mrg can_dup_for_shrink_wrapping (basic_block bb, basic_block pro, unsigned max_size)
    518  1.1  mrg {
    519  1.1  mrg   if (!can_duplicate_block_p (bb))
    520  1.1  mrg     return false;
    521  1.1  mrg 
    522  1.1  mrg   edge e;
    523  1.1  mrg   edge_iterator ei;
    524  1.1  mrg   FOR_EACH_EDGE (e, ei, bb->preds)
    525  1.1  mrg     if (e->flags & (EDGE_COMPLEX | EDGE_CROSSING)
    526  1.1  mrg 	&& !dominated_by_p (CDI_DOMINATORS, e->src, pro))
    527  1.1  mrg       return false;
    528  1.1  mrg 
    529  1.1  mrg   unsigned size = 0;
    530  1.1  mrg 
    531  1.1  mrg   rtx_insn *insn;
    532  1.1  mrg   FOR_BB_INSNS (bb, insn)
    533  1.1  mrg     if (NONDEBUG_INSN_P (insn))
    534  1.1  mrg       {
    535  1.1  mrg 	size += get_attr_min_length (insn);
    536  1.1  mrg 	if (size > max_size)
    537  1.1  mrg 	  return false;
    538  1.1  mrg       }
    539  1.1  mrg 
    540  1.1  mrg   return true;
    541  1.1  mrg }
    542  1.1  mrg 
    543  1.1  mrg /* Do whatever needs to be done for exits that run without prologue.
    544  1.1  mrg    Sibcalls need nothing done.  Normal exits get a simple_return inserted.  */
    545  1.1  mrg 
    546  1.1  mrg static void
    547  1.1  mrg handle_simple_exit (edge e)
    548  1.1  mrg {
    549  1.1  mrg 
    550  1.1  mrg   if (e->flags & EDGE_SIBCALL)
    551  1.1  mrg     {
    552  1.1  mrg       /* Tell function.cc to take no further action on this edge.  */
    553  1.1  mrg       e->flags |= EDGE_IGNORE;
    554  1.1  mrg 
    555  1.1  mrg       e->flags &= ~EDGE_FALLTHRU;
    556  1.1  mrg       emit_barrier_after_bb (e->src);
    557  1.1  mrg       return;
    558  1.1  mrg     }
    559  1.1  mrg 
    560  1.1  mrg   /* If the basic block the edge comes from has multiple successors,
    561  1.1  mrg      split the edge.  */
    562  1.1  mrg   if (EDGE_COUNT (e->src->succs) > 1)
    563  1.1  mrg     {
    564  1.1  mrg       basic_block old_bb = e->src;
    565  1.1  mrg       rtx_insn *end = BB_END (old_bb);
    566  1.1  mrg       rtx_note *note = emit_note_after (NOTE_INSN_DELETED, end);
    567  1.1  mrg       basic_block new_bb = create_basic_block (note, note, old_bb);
    568  1.1  mrg       BB_COPY_PARTITION (new_bb, old_bb);
    569  1.1  mrg       BB_END (old_bb) = end;
    570  1.1  mrg 
    571  1.1  mrg       redirect_edge_succ (e, new_bb);
    572  1.1  mrg       new_bb->count = e->count ();
    573  1.1  mrg       e->flags |= EDGE_FALLTHRU;
    574  1.1  mrg 
    575  1.1  mrg       e = make_single_succ_edge (new_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
    576  1.1  mrg     }
    577  1.1  mrg 
    578  1.1  mrg   e->flags &= ~EDGE_FALLTHRU;
    579  1.1  mrg   rtx_jump_insn *ret = emit_jump_insn_after (targetm.gen_simple_return (),
    580  1.1  mrg 					     BB_END (e->src));
    581  1.1  mrg   JUMP_LABEL (ret) = simple_return_rtx;
    582  1.1  mrg   emit_barrier_after_bb (e->src);
    583  1.1  mrg 
    584  1.1  mrg   if (dump_file)
    585  1.1  mrg     fprintf (dump_file, "Made simple_return with UID %d in bb %d\n",
    586  1.1  mrg 	     INSN_UID (ret), e->src->index);
    587  1.1  mrg }
    588  1.1  mrg 
    589  1.1  mrg /* Try to perform a kind of shrink-wrapping, making sure the
    590  1.1  mrg    prologue/epilogue is emitted only around those parts of the
    591  1.1  mrg    function that require it.
    592  1.1  mrg 
    593  1.1  mrg    There will be exactly one prologue, and it will be executed either
    594  1.1  mrg    zero or one time, on any path.  Depending on where the prologue is
    595  1.1  mrg    placed, some of the basic blocks can be reached via both paths with
    596  1.1  mrg    and without a prologue.  Such blocks will be duplicated here, and the
    597  1.1  mrg    edges changed to match.
    598  1.1  mrg 
    599  1.1  mrg    Paths that go to the exit without going through the prologue will use
    600  1.1  mrg    a simple_return instead of the epilogue.  We maximize the number of
    601  1.1  mrg    those, making sure to only duplicate blocks that can be duplicated.
    602  1.1  mrg    If the prologue can then still be placed in multiple locations, we
    603  1.1  mrg    place it as early as possible.
    604  1.1  mrg 
    605  1.1  mrg    An example, where we duplicate blocks with control flow (legend:
    606  1.1  mrg    _B_egin, _R_eturn and _S_imple_return; edges without arrowhead should
    607  1.1  mrg    be taken to point down or to the right, to simplify the diagram; here,
    608  1.1  mrg    block 3 needs a prologue, the rest does not):
    609  1.1  mrg 
    610  1.1  mrg 
    611  1.1  mrg        B                 B
    612  1.1  mrg        |                 |
    613  1.1  mrg        2                 2
    614  1.1  mrg        |\                |\
    615  1.1  mrg        | 3    becomes    | 3
    616  1.1  mrg        |/                |  \
    617  1.1  mrg        4                 7   4
    618  1.1  mrg        |\                |\  |\
    619  1.1  mrg        | 5               | 8 | 5
    620  1.1  mrg        |/                |/  |/
    621  1.1  mrg        6                 9   6
    622  1.1  mrg        |                 |   |
    623  1.1  mrg        R                 S   R
    624  1.1  mrg 
    625  1.1  mrg 
    626  1.1  mrg    (bb 4 is duplicated to 7, and so on; the prologue is inserted on the
    627  1.1  mrg    edge 2->3).
    628  1.1  mrg 
    629  1.1  mrg    Another example, where part of a loop is duplicated (again, bb 3 is
    630  1.1  mrg    the only block that needs a prologue):
    631  1.1  mrg 
    632  1.1  mrg 
    633  1.1  mrg        B   3<--              B       ->3<--
    634  1.1  mrg        |   |   |             |      |  |   |
    635  1.1  mrg        |   v   |   becomes   |      |  v   |
    636  1.1  mrg        2---4---              2---5--   4---
    637  1.1  mrg            |                     |     |
    638  1.1  mrg            R                     S     R
    639  1.1  mrg 
    640  1.1  mrg 
    641  1.1  mrg    (bb 4 is duplicated to 5; the prologue is inserted on the edge 5->3).
    642  1.1  mrg 
    643  1.1  mrg    ENTRY_EDGE is the edge where the prologue will be placed, possibly
    644  1.1  mrg    changed by this function.  PROLOGUE_SEQ is the prologue we will insert.  */
    645  1.1  mrg 
    646  1.1  mrg void
    647  1.1  mrg try_shrink_wrapping (edge *entry_edge, rtx_insn *prologue_seq)
    648  1.1  mrg {
    649  1.1  mrg   /* If we cannot shrink-wrap, are told not to shrink-wrap, or it makes
    650  1.1  mrg      no sense to shrink-wrap: then do not shrink-wrap!  */
    651  1.1  mrg 
    652  1.1  mrg   if (!SHRINK_WRAPPING_ENABLED)
    653  1.1  mrg     return;
    654  1.1  mrg 
    655  1.1  mrg   if (crtl->profile && !targetm.profile_before_prologue ())
    656  1.1  mrg     return;
    657  1.1  mrg 
    658  1.1  mrg   if (crtl->calls_eh_return)
    659  1.1  mrg     return;
    660  1.1  mrg 
    661  1.1  mrg   bool empty_prologue = true;
    662  1.1  mrg   for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
    663  1.1  mrg     if (!(NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_PROLOGUE_END))
    664  1.1  mrg       {
    665  1.1  mrg 	empty_prologue = false;
    666  1.1  mrg 	break;
    667  1.1  mrg       }
    668  1.1  mrg   if (empty_prologue)
    669  1.1  mrg     return;
    670  1.1  mrg 
    671  1.1  mrg   /* Move some code down to expose more shrink-wrapping opportunities.  */
    672  1.1  mrg 
    673  1.1  mrg   basic_block entry = (*entry_edge)->dest;
    674  1.1  mrg   prepare_shrink_wrap (entry);
    675  1.1  mrg 
    676  1.1  mrg   if (dump_file)
    677  1.1  mrg     fprintf (dump_file, "Attempting shrink-wrapping optimization.\n");
    678  1.1  mrg 
    679  1.1  mrg   /* Compute the registers set and used in the prologue.  */
    680  1.1  mrg 
    681  1.1  mrg   HARD_REG_SET prologue_clobbered, prologue_used;
    682  1.1  mrg   CLEAR_HARD_REG_SET (prologue_clobbered);
    683  1.1  mrg   CLEAR_HARD_REG_SET (prologue_used);
    684  1.1  mrg   for (rtx_insn *insn = prologue_seq; insn; insn = NEXT_INSN (insn))
    685  1.1  mrg     if (NONDEBUG_INSN_P (insn))
    686  1.1  mrg       {
    687  1.1  mrg 	HARD_REG_SET this_used;
    688  1.1  mrg 	CLEAR_HARD_REG_SET (this_used);
    689  1.1  mrg 	note_uses (&PATTERN (insn), record_hard_reg_uses, &this_used);
    690  1.1  mrg 	this_used &= ~prologue_clobbered;
    691  1.1  mrg 	prologue_used |= this_used;
    692  1.1  mrg 	note_stores (insn, record_hard_reg_sets, &prologue_clobbered);
    693  1.1  mrg       }
    694  1.1  mrg   CLEAR_HARD_REG_BIT (prologue_clobbered, STACK_POINTER_REGNUM);
    695  1.1  mrg   if (frame_pointer_needed)
    696  1.1  mrg     CLEAR_HARD_REG_BIT (prologue_clobbered, HARD_FRAME_POINTER_REGNUM);
    697  1.1  mrg 
    698  1.1  mrg   /* Find out what registers are set up by the prologue; any use of these
    699  1.1  mrg      cannot happen before the prologue.  */
    700  1.1  mrg 
    701  1.1  mrg   struct hard_reg_set_container set_up_by_prologue;
    702  1.1  mrg   CLEAR_HARD_REG_SET (set_up_by_prologue.set);
    703  1.1  mrg   add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, STACK_POINTER_REGNUM);
    704  1.1  mrg   add_to_hard_reg_set (&set_up_by_prologue.set, Pmode, ARG_POINTER_REGNUM);
    705  1.1  mrg   if (frame_pointer_needed)
    706  1.1  mrg     add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
    707  1.1  mrg 			 HARD_FRAME_POINTER_REGNUM);
    708  1.1  mrg   if (pic_offset_table_rtx
    709  1.1  mrg       && (unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
    710  1.1  mrg     add_to_hard_reg_set (&set_up_by_prologue.set, Pmode,
    711  1.1  mrg 			 PIC_OFFSET_TABLE_REGNUM);
    712  1.1  mrg   if (crtl->drap_reg)
    713  1.1  mrg     add_to_hard_reg_set (&set_up_by_prologue.set,
    714  1.1  mrg 			 GET_MODE (crtl->drap_reg),
    715  1.1  mrg 			 REGNO (crtl->drap_reg));
    716  1.1  mrg   if (targetm.set_up_by_prologue)
    717  1.1  mrg     targetm.set_up_by_prologue (&set_up_by_prologue);
    718  1.1  mrg 
    719  1.1  mrg   /* We will insert the prologue before the basic block PRO.  PRO should
    720  1.1  mrg      dominate all basic blocks that need the prologue to be executed
    721  1.1  mrg      before them.  First, make PRO the "tightest wrap" possible.  */
    722  1.1  mrg 
    723  1.1  mrg   calculate_dominance_info (CDI_DOMINATORS);
    724  1.1  mrg 
    725  1.1  mrg   basic_block pro = 0;
    726  1.1  mrg 
    727  1.1  mrg   basic_block bb;
    728  1.1  mrg   edge e;
    729  1.1  mrg   edge_iterator ei;
    730  1.1  mrg   FOR_EACH_BB_FN (bb, cfun)
    731  1.1  mrg     {
    732  1.1  mrg       rtx_insn *insn;
    733  1.1  mrg       FOR_BB_INSNS (bb, insn)
    734  1.1  mrg 	if (NONDEBUG_INSN_P (insn)
    735  1.1  mrg 	    && requires_stack_frame_p (insn, prologue_used,
    736  1.1  mrg 				       set_up_by_prologue.set))
    737  1.1  mrg 	  {
    738  1.1  mrg 	    if (dump_file)
    739  1.1  mrg 	      {
    740  1.1  mrg 		fprintf (dump_file, "Block %d needs prologue due to insn %d:\n",
    741  1.1  mrg 			 bb->index, INSN_UID (insn));
    742  1.1  mrg 		print_rtl_single (dump_file, insn);
    743  1.1  mrg 	      }
    744  1.1  mrg 	    pro = nearest_common_dominator (CDI_DOMINATORS, pro, bb);
    745  1.1  mrg 	    break;
    746  1.1  mrg 	  }
    747  1.1  mrg     }
    748  1.1  mrg 
    749  1.1  mrg   /* If nothing needs a prologue, just put it at the start.  This really
    750  1.1  mrg      shouldn't happen, but we cannot fix it here.  */
    751  1.1  mrg 
    752  1.1  mrg   if (pro == 0)
    753  1.1  mrg     {
    754  1.1  mrg       if (dump_file)
    755  1.1  mrg 	fprintf(dump_file, "Nothing needs a prologue, but it isn't empty; "
    756  1.1  mrg 			   "putting it at the start.\n");
    757  1.1  mrg       pro = entry;
    758  1.1  mrg     }
    759  1.1  mrg 
    760  1.1  mrg   if (dump_file)
    761  1.1  mrg     fprintf (dump_file, "After wrapping required blocks, PRO is now %d\n",
    762  1.1  mrg 	     pro->index);
    763  1.1  mrg 
    764  1.1  mrg   /* Now see if we can put the prologue at the start of PRO.  Putting it
    765  1.1  mrg      there might require duplicating a block that cannot be duplicated,
    766  1.1  mrg      or in some cases we cannot insert the prologue there at all.  If PRO
    767  1.1  mrg      wont't do, try again with the immediate dominator of PRO, and so on.
    768  1.1  mrg 
    769  1.1  mrg      The blocks that need duplicating are those reachable from PRO but
    770  1.1  mrg      not dominated by it.  We keep in BB_WITH a bitmap of the blocks
    771  1.1  mrg      reachable from PRO that we already found, and in VEC a stack of
    772  1.1  mrg      those we still need to consider (to find successors).  */
    773  1.1  mrg 
    774  1.1  mrg   auto_bitmap bb_with;
    775  1.1  mrg   bitmap_set_bit (bb_with, pro->index);
    776  1.1  mrg 
    777  1.1  mrg   vec<basic_block> vec;
    778  1.1  mrg   vec.create (n_basic_blocks_for_fn (cfun));
    779  1.1  mrg   vec.quick_push (pro);
    780  1.1  mrg 
    781  1.1  mrg   unsigned max_grow_size = get_uncond_jump_length ();
    782  1.1  mrg   max_grow_size *= param_max_grow_copy_bb_insns;
    783  1.1  mrg 
    784  1.1  mrg   basic_block checked_pro = NULL;
    785  1.1  mrg 
    786  1.1  mrg   while (pro != entry)
    787  1.1  mrg     {
    788  1.1  mrg       if (pro != checked_pro)
    789  1.1  mrg 	{
    790  1.1  mrg 	  while (pro != entry && !can_get_prologue (pro, prologue_clobbered))
    791  1.1  mrg 	    {
    792  1.1  mrg 	      pro = get_immediate_dominator (CDI_DOMINATORS, pro);
    793  1.1  mrg 
    794  1.1  mrg 	      if (bitmap_set_bit (bb_with, pro->index))
    795  1.1  mrg 		vec.quick_push (pro);
    796  1.1  mrg 	    }
    797  1.1  mrg 	  checked_pro = pro;
    798  1.1  mrg 	}
    799  1.1  mrg 
    800  1.1  mrg       if (vec.is_empty ())
    801  1.1  mrg 	break;
    802  1.1  mrg 
    803  1.1  mrg       basic_block bb = vec.pop ();
    804  1.1  mrg       if (!can_dup_for_shrink_wrapping (bb, pro, max_grow_size))
    805  1.1  mrg 	while (!dominated_by_p (CDI_DOMINATORS, bb, pro))
    806  1.1  mrg 	  {
    807  1.1  mrg 	    gcc_assert (pro != entry);
    808  1.1  mrg 
    809  1.1  mrg 	    pro = get_immediate_dominator (CDI_DOMINATORS, pro);
    810  1.1  mrg 
    811  1.1  mrg 	    if (bitmap_set_bit (bb_with, pro->index))
    812  1.1  mrg 	      vec.quick_push (pro);
    813  1.1  mrg 	  }
    814  1.1  mrg 
    815  1.1  mrg       FOR_EACH_EDGE (e, ei, bb->succs)
    816  1.1  mrg 	if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
    817  1.1  mrg 	    && bitmap_set_bit (bb_with, e->dest->index))
    818  1.1  mrg 	  vec.quick_push (e->dest);
    819  1.1  mrg     }
    820  1.1  mrg 
    821  1.1  mrg   if (dump_file)
    822  1.1  mrg     fprintf (dump_file, "Avoiding non-duplicatable blocks, PRO is now %d\n",
    823  1.1  mrg 	     pro->index);
    824  1.1  mrg 
    825  1.1  mrg   /* If we can move PRO back without having to duplicate more blocks, do so.
    826  1.1  mrg      We do this because putting the prologue earlier is better for scheduling.
    827  1.1  mrg 
    828  1.1  mrg      We can move back to a block PRE if every path from PRE will eventually
    829  1.1  mrg      need a prologue, that is, PRO is a post-dominator of PRE.  PRE needs
    830  1.1  mrg      to dominate every block reachable from itself.  We keep in BB_TMP a
    831  1.1  mrg      bitmap of the blocks reachable from PRE that we already found, and in
    832  1.1  mrg      VEC a stack of those we still need to consider.
    833  1.1  mrg 
    834  1.1  mrg      Any block reachable from PRE is also reachable from all predecessors
    835  1.1  mrg      of PRE, so if we find we need to move PRE back further we can leave
    836  1.1  mrg      everything not considered so far on the stack.  Any block dominated
    837  1.1  mrg      by PRE is also dominated by all other dominators of PRE, so anything
    838  1.1  mrg      found good for some PRE does not need to be reconsidered later.
    839  1.1  mrg 
    840  1.1  mrg      We don't need to update BB_WITH because none of the new blocks found
    841  1.1  mrg      can jump to a block that does not need the prologue.  */
    842  1.1  mrg 
    843  1.1  mrg   if (pro != entry)
    844  1.1  mrg     {
    845  1.1  mrg       calculate_dominance_info (CDI_POST_DOMINATORS);
    846  1.1  mrg 
    847  1.1  mrg       auto_bitmap bb_tmp;
    848  1.1  mrg       bitmap_copy (bb_tmp, bb_with);
    849  1.1  mrg       basic_block last_ok = pro;
    850  1.1  mrg       vec.truncate (0);
    851  1.1  mrg 
    852  1.1  mrg       while (pro != entry)
    853  1.1  mrg 	{
    854  1.1  mrg 	  basic_block pre = get_immediate_dominator (CDI_DOMINATORS, pro);
    855  1.1  mrg 	  if (!dominated_by_p (CDI_POST_DOMINATORS, pre, pro))
    856  1.1  mrg 	    break;
    857  1.1  mrg 
    858  1.1  mrg 	  if (bitmap_set_bit (bb_tmp, pre->index))
    859  1.1  mrg 	    vec.quick_push (pre);
    860  1.1  mrg 
    861  1.1  mrg 	  bool ok = true;
    862  1.1  mrg 	  while (!vec.is_empty ())
    863  1.1  mrg 	    {
    864  1.1  mrg 	      if (!dominated_by_p (CDI_DOMINATORS, vec.last (), pre))
    865  1.1  mrg 		{
    866  1.1  mrg 		  ok = false;
    867  1.1  mrg 		  break;
    868  1.1  mrg 		}
    869  1.1  mrg 
    870  1.1  mrg 	      basic_block bb = vec.pop ();
    871  1.1  mrg 	      FOR_EACH_EDGE (e, ei, bb->succs)
    872  1.1  mrg 		if (bitmap_set_bit (bb_tmp, e->dest->index))
    873  1.1  mrg 		  vec.quick_push (e->dest);
    874  1.1  mrg 	    }
    875  1.1  mrg 
    876  1.1  mrg 	  if (ok && can_get_prologue (pre, prologue_clobbered))
    877  1.1  mrg 	    last_ok = pre;
    878  1.1  mrg 
    879  1.1  mrg 	  pro = pre;
    880  1.1  mrg 	}
    881  1.1  mrg 
    882  1.1  mrg       pro = last_ok;
    883  1.1  mrg 
    884  1.1  mrg       free_dominance_info (CDI_POST_DOMINATORS);
    885  1.1  mrg     }
    886  1.1  mrg 
    887  1.1  mrg   vec.release ();
    888  1.1  mrg 
    889  1.1  mrg   if (dump_file)
    890  1.1  mrg     fprintf (dump_file, "Bumping back to anticipatable blocks, PRO is now %d\n",
    891  1.1  mrg 	     pro->index);
    892  1.1  mrg 
    893  1.1  mrg   if (pro == entry)
    894  1.1  mrg     {
    895  1.1  mrg       free_dominance_info (CDI_DOMINATORS);
    896  1.1  mrg       return;
    897  1.1  mrg     }
    898  1.1  mrg 
    899  1.1  mrg   /* Compute what fraction of the frequency and count of the blocks that run
    900  1.1  mrg      both with and without prologue are for running with prologue.  This gives
    901  1.1  mrg      the correct answer for reducible flow graphs; for irreducible flow graphs
    902  1.1  mrg      our profile is messed up beyond repair anyway.  */
    903  1.1  mrg 
    904  1.1  mrg   profile_count num = profile_count::zero ();
    905  1.1  mrg   profile_count den = profile_count::zero ();
    906  1.1  mrg 
    907  1.1  mrg   FOR_EACH_EDGE (e, ei, pro->preds)
    908  1.1  mrg     if (!dominated_by_p (CDI_DOMINATORS, e->src, pro))
    909  1.1  mrg       {
    910  1.1  mrg 	if (e->count ().initialized_p ())
    911  1.1  mrg 	  num += e->count ();
    912  1.1  mrg 	if (e->src->count.initialized_p ())
    913  1.1  mrg 	  den += e->src->count;
    914  1.1  mrg       }
    915  1.1  mrg 
    916  1.1  mrg   /* All is okay, so do it.  */
    917  1.1  mrg 
    918  1.1  mrg   crtl->shrink_wrapped = true;
    919  1.1  mrg   if (dump_file)
    920  1.1  mrg     fprintf (dump_file, "Performing shrink-wrapping.\n");
    921  1.1  mrg 
    922  1.1  mrg   /* Copy the blocks that can run both with and without prologue.  The
    923  1.1  mrg      originals run with prologue, the copies without.  Store a pointer to
    924  1.1  mrg      the copy in the ->aux field of the original.  */
    925  1.1  mrg 
    926  1.1  mrg   FOR_EACH_BB_FN (bb, cfun)
    927  1.1  mrg     if (bitmap_bit_p (bb_with, bb->index)
    928  1.1  mrg 	&& !dominated_by_p (CDI_DOMINATORS, bb, pro))
    929  1.1  mrg       {
    930  1.1  mrg 	basic_block dup = duplicate_block (bb, 0, 0);
    931  1.1  mrg 
    932  1.1  mrg 	bb->aux = dup;
    933  1.1  mrg 
    934  1.1  mrg 	if (JUMP_P (BB_END (dup)) && !any_condjump_p (BB_END (dup)))
    935  1.1  mrg 	  emit_barrier_after_bb (dup);
    936  1.1  mrg 
    937  1.1  mrg 	if (EDGE_COUNT (dup->succs) == 0)
    938  1.1  mrg 	  emit_barrier_after_bb (dup);
    939  1.1  mrg 
    940  1.1  mrg 	if (dump_file)
    941  1.1  mrg 	  fprintf (dump_file, "Duplicated %d to %d\n", bb->index, dup->index);
    942  1.1  mrg 
    943  1.1  mrg 	if (num == profile_count::zero () || den.nonzero_p ())
    944  1.1  mrg 	  bb->count = bb->count.apply_scale (num, den);
    945  1.1  mrg 	dup->count -= bb->count;
    946  1.1  mrg       }
    947  1.1  mrg 
    948  1.1  mrg   /* Now change the edges to point to the copies, where appropriate.  */
    949  1.1  mrg 
    950  1.1  mrg   FOR_EACH_BB_FN (bb, cfun)
    951  1.1  mrg     if (!dominated_by_p (CDI_DOMINATORS, bb, pro))
    952  1.1  mrg       {
    953  1.1  mrg 	basic_block src = bb;
    954  1.1  mrg 	if (bitmap_bit_p (bb_with, bb->index))
    955  1.1  mrg 	  src = (basic_block) bb->aux;
    956  1.1  mrg 
    957  1.1  mrg 	FOR_EACH_EDGE (e, ei, src->succs)
    958  1.1  mrg 	  {
    959  1.1  mrg 	    if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
    960  1.1  mrg 	      continue;
    961  1.1  mrg 
    962  1.1  mrg 	    if (bitmap_bit_p (bb_with, e->dest->index)
    963  1.1  mrg 		&& !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
    964  1.1  mrg 	      {
    965  1.1  mrg 		if (dump_file)
    966  1.1  mrg 		  fprintf (dump_file, "Redirecting edge %d->%d to %d\n",
    967  1.1  mrg 			   e->src->index, e->dest->index,
    968  1.1  mrg 			   ((basic_block) e->dest->aux)->index);
    969  1.1  mrg 		redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
    970  1.1  mrg 	      }
    971  1.1  mrg 	    else if (e->flags & EDGE_FALLTHRU
    972  1.1  mrg 		     && bitmap_bit_p (bb_with, bb->index))
    973  1.1  mrg 	      force_nonfallthru (e);
    974  1.1  mrg 	  }
    975  1.1  mrg       }
    976  1.1  mrg 
    977  1.1  mrg   /* Also redirect the function entry edge if necessary.  */
    978  1.1  mrg 
    979  1.1  mrg   FOR_EACH_EDGE (e, ei, ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs)
    980  1.1  mrg     if (bitmap_bit_p (bb_with, e->dest->index)
    981  1.1  mrg 	&& !dominated_by_p (CDI_DOMINATORS, e->dest, pro))
    982  1.1  mrg       {
    983  1.1  mrg 	basic_block split_bb = split_edge (e);
    984  1.1  mrg 	e = single_succ_edge (split_bb);
    985  1.1  mrg 	redirect_edge_and_branch_force (e, (basic_block) e->dest->aux);
    986  1.1  mrg       }
    987  1.1  mrg 
    988  1.1  mrg   /* Make a simple_return for those exits that run without prologue.  */
    989  1.1  mrg 
    990  1.1  mrg   FOR_EACH_BB_REVERSE_FN (bb, cfun)
    991  1.1  mrg     if (!bitmap_bit_p (bb_with, bb->index))
    992  1.1  mrg       FOR_EACH_EDGE (e, ei, bb->succs)
    993  1.1  mrg 	if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
    994  1.1  mrg 	  handle_simple_exit (e);
    995  1.1  mrg 
    996  1.1  mrg   /* Finally, we want a single edge to put the prologue on.  Make a new
    997  1.1  mrg      block before the PRO block; the edge beteen them is the edge we want.
    998  1.1  mrg      Then redirect those edges into PRO that come from blocks without the
    999  1.1  mrg      prologue, to point to the new block instead.  The new prologue block
   1000  1.1  mrg      is put at the end of the insn chain.  */
   1001  1.1  mrg 
   1002  1.1  mrg   basic_block new_bb = create_empty_bb (EXIT_BLOCK_PTR_FOR_FN (cfun)->prev_bb);
   1003  1.1  mrg   BB_COPY_PARTITION (new_bb, pro);
   1004  1.1  mrg   new_bb->count = profile_count::zero ();
   1005  1.1  mrg   if (dump_file)
   1006  1.1  mrg     fprintf (dump_file, "Made prologue block %d\n", new_bb->index);
   1007  1.1  mrg 
   1008  1.1  mrg   for (ei = ei_start (pro->preds); (e = ei_safe_edge (ei)); )
   1009  1.1  mrg     {
   1010  1.1  mrg       if (bitmap_bit_p (bb_with, e->src->index)
   1011  1.1  mrg 	  || dominated_by_p (CDI_DOMINATORS, e->src, pro))
   1012  1.1  mrg 	{
   1013  1.1  mrg 	  ei_next (&ei);
   1014  1.1  mrg 	  continue;
   1015  1.1  mrg 	}
   1016  1.1  mrg 
   1017  1.1  mrg       new_bb->count += e->count ();
   1018  1.1  mrg 
   1019  1.1  mrg       redirect_edge_and_branch_force (e, new_bb);
   1020  1.1  mrg       if (dump_file)
   1021  1.1  mrg 	fprintf (dump_file, "Redirected edge from %d\n", e->src->index);
   1022  1.1  mrg     }
   1023  1.1  mrg 
   1024  1.1  mrg   *entry_edge = make_single_succ_edge (new_bb, pro, EDGE_FALLTHRU);
   1025  1.1  mrg   force_nonfallthru (*entry_edge);
   1026  1.1  mrg 
   1027  1.1  mrg   free_dominance_info (CDI_DOMINATORS);
   1028  1.1  mrg }
   1029  1.1  mrg 
   1030  1.1  mrg /* Separate shrink-wrapping
   1032  1.1  mrg 
   1033  1.1  mrg    Instead of putting all of the prologue and epilogue in one spot, we
   1034  1.1  mrg    can put parts of it in places where those components are executed less
   1035  1.1  mrg    frequently.  The following code does this, for prologue and epilogue
   1036  1.1  mrg    components that can be put in more than one location, and where those
   1037  1.1  mrg    components can be executed more than once (the epilogue component will
   1038  1.1  mrg    always be executed before the prologue component is executed a second
   1039  1.1  mrg    time).
   1040  1.1  mrg 
   1041  1.1  mrg    What exactly is a component is target-dependent.  The more usual
   1042  1.1  mrg    components are simple saves/restores to/from the frame of callee-saved
   1043  1.1  mrg    registers.  This code treats components abstractly (as an sbitmap),
   1044  1.1  mrg    letting the target handle all details.
   1045  1.1  mrg 
   1046  1.1  mrg    Prologue components are placed in such a way that for every component
   1047  1.1  mrg    the prologue is executed as infrequently as possible.  We do this by
   1048  1.1  mrg    walking the dominator tree, comparing the cost of placing a prologue
   1049  1.1  mrg    component before a block to the sum of costs determined for all subtrees
   1050  1.1  mrg    of that block.
   1051  1.1  mrg 
   1052  1.1  mrg    From this placement, we then determine for each component all blocks
   1053  1.1  mrg    where at least one of this block's dominators (including itself) will
   1054  1.1  mrg    get a prologue inserted.  That then is how the components are placed.
   1055  1.1  mrg    We could place the epilogue components a bit smarter (we can save a
   1056  1.1  mrg    bit of code size sometimes); this is a possible future improvement.
   1057  1.1  mrg 
   1058  1.1  mrg    Prologues and epilogues are preferably placed into a block, either at
   1059  1.1  mrg    the beginning or end of it, if it is needed for all predecessor resp.
   1060  1.1  mrg    successor edges; or placed on the edge otherwise.
   1061  1.1  mrg 
   1062  1.1  mrg    If the placement of any prologue/epilogue leads to a situation we cannot
   1063  1.1  mrg    handle (for example, an abnormal edge would need to be split, or some
   1064  1.1  mrg    targets want to use some specific registers that may not be available
   1065  1.1  mrg    where we want to put them), separate shrink-wrapping for the components
   1066  1.1  mrg    in that prologue/epilogue is aborted.  */
   1067  1.1  mrg 
   1068  1.1  mrg 
   1069  1.1  mrg /* Print the sbitmap COMPONENTS to the DUMP_FILE if not empty, with the
   1070  1.1  mrg    label LABEL.  */
   1071  1.1  mrg static void
   1072  1.1  mrg dump_components (const char *label, sbitmap components)
   1073  1.1  mrg {
   1074  1.1  mrg   if (bitmap_empty_p (components))
   1075  1.1  mrg     return;
   1076  1.1  mrg 
   1077  1.1  mrg   fprintf (dump_file, " [%s", label);
   1078  1.1  mrg 
   1079  1.1  mrg   for (unsigned int j = 0; j < components->n_bits; j++)
   1080  1.1  mrg     if (bitmap_bit_p (components, j))
   1081  1.1  mrg       fprintf (dump_file, " %u", j);
   1082  1.1  mrg 
   1083  1.1  mrg   fprintf (dump_file, "]");
   1084  1.1  mrg }
   1085  1.1  mrg 
   1086  1.1  mrg /* The data we collect for each bb.  */
   1087  1.1  mrg struct sw {
   1088  1.1  mrg   /* What components does this BB need?  */
   1089  1.1  mrg   sbitmap needs_components;
   1090  1.1  mrg 
   1091  1.1  mrg   /* What components does this BB have?  This is the main decision this
   1092  1.1  mrg      pass makes.  */
   1093  1.1  mrg   sbitmap has_components;
   1094  1.1  mrg 
   1095  1.1  mrg   /* The components for which we placed code at the start of the BB (instead
   1096  1.1  mrg      of on all incoming edges).  */
   1097  1.1  mrg   sbitmap head_components;
   1098  1.1  mrg 
   1099  1.1  mrg   /* The components for which we placed code at the end of the BB (instead
   1100  1.1  mrg      of on all outgoing edges).  */
   1101  1.1  mrg   sbitmap tail_components;
   1102  1.1  mrg 
   1103  1.1  mrg   /* The frequency of executing the prologue for this BB, if a prologue is
   1104  1.1  mrg      placed on this BB.  This is a pessimistic estimate (no prologue is
   1105  1.1  mrg      needed for edges from blocks that have the component under consideration
   1106  1.1  mrg      active already).  */
   1107  1.1  mrg   gcov_type own_cost;
   1108  1.1  mrg 
   1109  1.1  mrg   /* The frequency of executing the prologue for this BB and all BBs
   1110  1.1  mrg      dominated by it.  */
   1111  1.1  mrg   gcov_type total_cost;
   1112  1.1  mrg };
   1113  1.1  mrg 
   1114  1.1  mrg /* A helper function for accessing the pass-specific info.  */
   1115  1.1  mrg static inline struct sw *
   1116  1.1  mrg SW (basic_block bb)
   1117  1.1  mrg {
   1118  1.1  mrg   gcc_assert (bb->aux);
   1119  1.1  mrg   return (struct sw *) bb->aux;
   1120  1.1  mrg }
   1121  1.1  mrg 
   1122  1.1  mrg /* Create the pass-specific data structures for separately shrink-wrapping
   1123  1.1  mrg    with components COMPONENTS.  */
   1124  1.1  mrg static void
   1125  1.1  mrg init_separate_shrink_wrap (sbitmap components)
   1126  1.1  mrg {
   1127  1.1  mrg   basic_block bb;
   1128  1.1  mrg   FOR_ALL_BB_FN (bb, cfun)
   1129  1.1  mrg     {
   1130  1.1  mrg       bb->aux = xcalloc (1, sizeof (struct sw));
   1131  1.1  mrg 
   1132  1.1  mrg       SW (bb)->needs_components = targetm.shrink_wrap.components_for_bb (bb);
   1133  1.1  mrg 
   1134  1.1  mrg       /* Mark all basic blocks without successor as needing all components.
   1135  1.1  mrg 	 This avoids problems in at least cfgcleanup, sel-sched, and
   1136  1.1  mrg 	 regrename (largely to do with all paths to such a block still
   1137  1.1  mrg 	 needing the same dwarf CFI info).  */
   1138  1.1  mrg       if (EDGE_COUNT (bb->succs) == 0)
   1139  1.1  mrg 	bitmap_copy (SW (bb)->needs_components, components);
   1140  1.1  mrg 
   1141  1.1  mrg       if (dump_file)
   1142  1.1  mrg 	{
   1143  1.1  mrg 	  fprintf (dump_file, "bb %d components:", bb->index);
   1144  1.1  mrg 	  dump_components ("has", SW (bb)->needs_components);
   1145  1.1  mrg 	  fprintf (dump_file, "\n");
   1146  1.1  mrg 	}
   1147  1.1  mrg 
   1148  1.1  mrg       SW (bb)->has_components = sbitmap_alloc (SBITMAP_SIZE (components));
   1149  1.1  mrg       SW (bb)->head_components = sbitmap_alloc (SBITMAP_SIZE (components));
   1150  1.1  mrg       SW (bb)->tail_components = sbitmap_alloc (SBITMAP_SIZE (components));
   1151  1.1  mrg       bitmap_clear (SW (bb)->has_components);
   1152  1.1  mrg     }
   1153  1.1  mrg }
   1154  1.1  mrg 
   1155  1.1  mrg /* Destroy the pass-specific data.  */
   1156  1.1  mrg static void
   1157  1.1  mrg fini_separate_shrink_wrap (void)
   1158  1.1  mrg {
   1159  1.1  mrg   basic_block bb;
   1160  1.1  mrg   FOR_ALL_BB_FN (bb, cfun)
   1161  1.1  mrg     if (bb->aux)
   1162  1.1  mrg       {
   1163  1.1  mrg 	sbitmap_free (SW (bb)->needs_components);
   1164  1.1  mrg 	sbitmap_free (SW (bb)->has_components);
   1165  1.1  mrg 	sbitmap_free (SW (bb)->head_components);
   1166  1.1  mrg 	sbitmap_free (SW (bb)->tail_components);
   1167  1.1  mrg 	free (bb->aux);
   1168  1.1  mrg 	bb->aux = 0;
   1169  1.1  mrg       }
   1170  1.1  mrg }
   1171  1.1  mrg 
   1172  1.1  mrg /* Place the prologue for component WHICH, in the basic blocks dominated
   1173  1.1  mrg    by HEAD.  Do a DFS over the dominator tree, and set bit WHICH in the
   1174  1.1  mrg    HAS_COMPONENTS of a block if either the block has that bit set in
   1175  1.1  mrg    NEEDS_COMPONENTS, or it is cheaper to place the prologue here than in all
   1176  1.1  mrg    dominator subtrees separately.  */
   1177  1.1  mrg static void
   1178  1.1  mrg place_prologue_for_one_component (unsigned int which, basic_block head)
   1179  1.1  mrg {
   1180  1.1  mrg   /* The block we are currently dealing with.  */
   1181  1.1  mrg   basic_block bb = head;
   1182  1.1  mrg   /* Is this the first time we visit this block, i.e. have we just gone
   1183  1.1  mrg      down the tree.  */
   1184  1.1  mrg   bool first_visit = true;
   1185  1.1  mrg 
   1186  1.1  mrg   /* Walk the dominator tree, visit one block per iteration of this loop.
   1187  1.1  mrg      Each basic block is visited twice: once before visiting any children
   1188  1.1  mrg      of the block, and once after visiting all of them (leaf nodes are
   1189  1.1  mrg      visited only once).  As an optimization, we do not visit subtrees
   1190  1.1  mrg      that can no longer influence the prologue placement.  */
   1191  1.1  mrg   for (;;)
   1192  1.1  mrg     {
   1193  1.1  mrg       /* First visit of a block: set the (children) cost accumulator to zero;
   1194  1.1  mrg 	 if the block does not have the component itself, walk down.  */
   1195  1.1  mrg       if (first_visit)
   1196  1.1  mrg 	{
   1197  1.1  mrg 	  /* Initialize the cost.  The cost is the block execution frequency
   1198  1.1  mrg 	     that does not come from backedges.  Calculating this by simply
   1199  1.1  mrg 	     adding the cost of all edges that aren't backedges does not
   1200  1.1  mrg 	     work: this does not always add up to the block frequency at
   1201  1.1  mrg 	     all, and even if it does, rounding error makes for bad
   1202  1.1  mrg 	     decisions.  */
   1203  1.1  mrg 	  SW (bb)->own_cost = bb->count.to_frequency (cfun);
   1204  1.1  mrg 
   1205  1.1  mrg 	  edge e;
   1206  1.1  mrg 	  edge_iterator ei;
   1207  1.1  mrg 	  FOR_EACH_EDGE (e, ei, bb->preds)
   1208  1.1  mrg 	    if (dominated_by_p (CDI_DOMINATORS, e->src, bb))
   1209  1.1  mrg 	      {
   1210  1.1  mrg 		if (SW (bb)->own_cost > EDGE_FREQUENCY (e))
   1211  1.1  mrg 		  SW (bb)->own_cost -= EDGE_FREQUENCY (e);
   1212  1.1  mrg 		else
   1213  1.1  mrg 		  SW (bb)->own_cost = 0;
   1214  1.1  mrg 	      }
   1215  1.1  mrg 
   1216  1.1  mrg 	  SW (bb)->total_cost = 0;
   1217  1.1  mrg 
   1218  1.1  mrg 	  if (!bitmap_bit_p (SW (bb)->needs_components, which)
   1219  1.1  mrg 	      && first_dom_son (CDI_DOMINATORS, bb))
   1220  1.1  mrg 	    {
   1221  1.1  mrg 	      bb = first_dom_son (CDI_DOMINATORS, bb);
   1222  1.1  mrg 	      continue;
   1223  1.1  mrg 	    }
   1224  1.1  mrg 	}
   1225  1.1  mrg 
   1226  1.1  mrg       /* If this block does need the component itself, or it is cheaper to
   1227  1.1  mrg 	 put the prologue here than in all the descendants that need it,
   1228  1.1  mrg 	 mark it so.  If this block's immediate post-dominator is dominated
   1229  1.1  mrg 	 by this block, and that needs the prologue, we can put it on this
   1230  1.1  mrg 	 block as well (earlier is better).  */
   1231  1.1  mrg       if (bitmap_bit_p (SW (bb)->needs_components, which)
   1232  1.1  mrg 	  || SW (bb)->total_cost > SW (bb)->own_cost)
   1233  1.1  mrg 	{
   1234  1.1  mrg 	  SW (bb)->total_cost = SW (bb)->own_cost;
   1235  1.1  mrg 	  bitmap_set_bit (SW (bb)->has_components, which);
   1236  1.1  mrg 	}
   1237  1.1  mrg       else
   1238  1.1  mrg 	{
   1239  1.1  mrg 	  basic_block kid = get_immediate_dominator (CDI_POST_DOMINATORS, bb);
   1240  1.1  mrg 	  if (dominated_by_p (CDI_DOMINATORS, kid, bb)
   1241  1.1  mrg 	      && bitmap_bit_p (SW (kid)->has_components, which))
   1242  1.1  mrg 	    {
   1243  1.1  mrg 	      SW (bb)->total_cost = SW (bb)->own_cost;
   1244  1.1  mrg 	      bitmap_set_bit (SW (bb)->has_components, which);
   1245  1.1  mrg 	    }
   1246  1.1  mrg 	}
   1247  1.1  mrg 
   1248  1.1  mrg       /* We are back where we started, so we are done now.  */
   1249  1.1  mrg       if (bb == head)
   1250  1.1  mrg 	return;
   1251  1.1  mrg 
   1252  1.1  mrg       /* We now know the cost of the subtree rooted at the current block.
   1253  1.1  mrg 	 Accumulate this cost in the parent.  */
   1254  1.1  mrg       basic_block parent = get_immediate_dominator (CDI_DOMINATORS, bb);
   1255  1.1  mrg       SW (parent)->total_cost += SW (bb)->total_cost;
   1256  1.1  mrg 
   1257  1.1  mrg       /* Don't walk the tree down unless necessary.  */
   1258  1.1  mrg       if (next_dom_son (CDI_DOMINATORS, bb)
   1259  1.1  mrg           && SW (parent)->total_cost <= SW (parent)->own_cost)
   1260  1.1  mrg 	{
   1261  1.1  mrg 	  bb = next_dom_son (CDI_DOMINATORS, bb);
   1262  1.1  mrg 	  first_visit = true;
   1263  1.1  mrg 	}
   1264  1.1  mrg       else
   1265  1.1  mrg 	{
   1266  1.1  mrg 	  bb = parent;
   1267  1.1  mrg 	  first_visit = false;
   1268  1.1  mrg 	}
   1269  1.1  mrg     }
   1270  1.1  mrg }
   1271  1.1  mrg 
   1272  1.1  mrg /* Set HAS_COMPONENTS in every block to the maximum it can be set to without
   1273  1.1  mrg    setting it on any path from entry to exit where it was not already set
   1274  1.1  mrg    somewhere (or, for blocks that have no path to the exit, consider only
   1275  1.1  mrg    paths from the entry to the block itself).  Return whether any changes
   1276  1.1  mrg    were made to some HAS_COMPONENTS.  */
   1277  1.1  mrg static bool
   1278  1.1  mrg spread_components (sbitmap components)
   1279  1.1  mrg {
   1280  1.1  mrg   basic_block entry_block = ENTRY_BLOCK_PTR_FOR_FN (cfun);
   1281  1.1  mrg   basic_block exit_block = EXIT_BLOCK_PTR_FOR_FN (cfun);
   1282  1.1  mrg 
   1283  1.1  mrg   /* A stack of all blocks left to consider, and a bitmap of all blocks
   1284  1.1  mrg      on that stack.  */
   1285  1.1  mrg   vec<basic_block> todo;
   1286  1.1  mrg   todo.create (n_basic_blocks_for_fn (cfun));
   1287  1.1  mrg   auto_bitmap seen;
   1288  1.1  mrg 
   1289  1.1  mrg   auto_sbitmap old (SBITMAP_SIZE (components));
   1290  1.1  mrg 
   1291  1.1  mrg   /* Find for every block the components that are *not* needed on some path
   1292  1.1  mrg      from the entry to that block.  Do this with a flood fill from the entry
   1293  1.1  mrg      block.  Every block can be visited at most as often as the number of
   1294  1.1  mrg      components (plus one), and usually much less often.  */
   1295  1.1  mrg 
   1296  1.1  mrg   if (dump_file)
   1297  1.1  mrg     fprintf (dump_file, "Spreading down...\n");
   1298  1.1  mrg 
   1299  1.1  mrg   basic_block bb;
   1300  1.1  mrg   FOR_ALL_BB_FN (bb, cfun)
   1301  1.1  mrg     bitmap_clear (SW (bb)->head_components);
   1302  1.1  mrg 
   1303  1.1  mrg   bitmap_copy (SW (entry_block)->head_components, components);
   1304  1.1  mrg 
   1305  1.1  mrg   edge e;
   1306  1.1  mrg   edge_iterator ei;
   1307  1.1  mrg 
   1308  1.1  mrg   todo.quick_push (single_succ (entry_block));
   1309  1.1  mrg   bitmap_set_bit (seen, single_succ (entry_block)->index);
   1310  1.1  mrg   while (!todo.is_empty ())
   1311  1.1  mrg     {
   1312  1.1  mrg       bb = todo.pop ();
   1313  1.1  mrg 
   1314  1.1  mrg       bitmap_copy (old, SW (bb)->head_components);
   1315  1.1  mrg 
   1316  1.1  mrg       FOR_EACH_EDGE (e, ei, bb->preds)
   1317  1.1  mrg 	bitmap_ior (SW (bb)->head_components, SW (bb)->head_components,
   1318  1.1  mrg 		    SW (e->src)->head_components);
   1319  1.1  mrg 
   1320  1.1  mrg       bitmap_and_compl (SW (bb)->head_components, SW (bb)->head_components,
   1321  1.1  mrg 			SW (bb)->has_components);
   1322  1.1  mrg 
   1323  1.1  mrg       if (!bitmap_equal_p (old, SW (bb)->head_components))
   1324  1.1  mrg 	FOR_EACH_EDGE (e, ei, bb->succs)
   1325  1.1  mrg 	  if (bitmap_set_bit (seen, e->dest->index))
   1326  1.1  mrg 	    todo.quick_push (e->dest);
   1327  1.1  mrg 
   1328  1.1  mrg       bitmap_clear_bit (seen, bb->index);
   1329  1.1  mrg     }
   1330  1.1  mrg 
   1331  1.1  mrg   /* Find for every block the components that are *not* needed on some reverse
   1332  1.1  mrg      path from the exit to that block.  */
   1333  1.1  mrg 
   1334  1.1  mrg   if (dump_file)
   1335  1.1  mrg     fprintf (dump_file, "Spreading up...\n");
   1336  1.1  mrg 
   1337  1.1  mrg   /* First, mark all blocks not reachable from the exit block as not needing
   1338  1.1  mrg      any component on any path to the exit.  Mark everything, and then clear
   1339  1.1  mrg      again by a flood fill.  */
   1340  1.1  mrg 
   1341  1.1  mrg   FOR_ALL_BB_FN (bb, cfun)
   1342  1.1  mrg     bitmap_copy (SW (bb)->tail_components, components);
   1343  1.1  mrg 
   1344  1.1  mrg   FOR_EACH_EDGE (e, ei, exit_block->preds)
   1345  1.1  mrg     {
   1346  1.1  mrg       todo.quick_push (e->src);
   1347  1.1  mrg       bitmap_set_bit (seen, e->src->index);
   1348  1.1  mrg     }
   1349  1.1  mrg 
   1350  1.1  mrg   while (!todo.is_empty ())
   1351  1.1  mrg     {
   1352  1.1  mrg       bb = todo.pop ();
   1353  1.1  mrg 
   1354  1.1  mrg       if (!bitmap_empty_p (SW (bb)->tail_components))
   1355  1.1  mrg 	FOR_EACH_EDGE (e, ei, bb->preds)
   1356  1.1  mrg 	  if (bitmap_set_bit (seen, e->src->index))
   1357  1.1  mrg 	    todo.quick_push (e->src);
   1358  1.1  mrg 
   1359  1.1  mrg       bitmap_clear (SW (bb)->tail_components);
   1360  1.1  mrg 
   1361  1.1  mrg       bitmap_clear_bit (seen, bb->index);
   1362  1.1  mrg     }
   1363  1.1  mrg 
   1364  1.1  mrg   /* And then, flood fill backwards to find for every block the components
   1365  1.1  mrg      not needed on some path to the exit.  */
   1366  1.1  mrg 
   1367  1.1  mrg   bitmap_copy (SW (exit_block)->tail_components, components);
   1368  1.1  mrg 
   1369  1.1  mrg   FOR_EACH_EDGE (e, ei, exit_block->preds)
   1370  1.1  mrg     {
   1371  1.1  mrg       todo.quick_push (e->src);
   1372  1.1  mrg       bitmap_set_bit (seen, e->src->index);
   1373  1.1  mrg     }
   1374  1.1  mrg 
   1375  1.1  mrg   while (!todo.is_empty ())
   1376  1.1  mrg     {
   1377  1.1  mrg       bb = todo.pop ();
   1378  1.1  mrg 
   1379  1.1  mrg       bitmap_copy (old, SW (bb)->tail_components);
   1380  1.1  mrg 
   1381  1.1  mrg       FOR_EACH_EDGE (e, ei, bb->succs)
   1382  1.1  mrg 	bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components,
   1383  1.1  mrg 		    SW (e->dest)->tail_components);
   1384  1.1  mrg 
   1385  1.1  mrg       bitmap_and_compl (SW (bb)->tail_components, SW (bb)->tail_components,
   1386  1.1  mrg 			SW (bb)->has_components);
   1387  1.1  mrg 
   1388  1.1  mrg       if (!bitmap_equal_p (old, SW (bb)->tail_components))
   1389  1.1  mrg 	FOR_EACH_EDGE (e, ei, bb->preds)
   1390  1.1  mrg 	  if (bitmap_set_bit (seen, e->src->index))
   1391  1.1  mrg 	    todo.quick_push (e->src);
   1392  1.1  mrg 
   1393  1.1  mrg       bitmap_clear_bit (seen, bb->index);
   1394  1.1  mrg     }
   1395  1.1  mrg 
   1396  1.1  mrg   todo.release ();
   1397  1.1  mrg 
   1398  1.1  mrg   /* Finally, mark everything not needed both forwards and backwards.  */
   1399  1.1  mrg 
   1400  1.1  mrg   bool did_changes = false;
   1401  1.1  mrg 
   1402  1.1  mrg   FOR_EACH_BB_FN (bb, cfun)
   1403  1.1  mrg     {
   1404  1.1  mrg       bitmap_copy (old, SW (bb)->has_components);
   1405  1.1  mrg 
   1406  1.1  mrg       bitmap_and (SW (bb)->head_components, SW (bb)->head_components,
   1407  1.1  mrg 		  SW (bb)->tail_components);
   1408  1.1  mrg       bitmap_and_compl (SW (bb)->has_components, components,
   1409  1.1  mrg 			SW (bb)->head_components);
   1410  1.1  mrg 
   1411  1.1  mrg       if (!did_changes && !bitmap_equal_p (old, SW (bb)->has_components))
   1412  1.1  mrg 	did_changes = true;
   1413  1.1  mrg     }
   1414  1.1  mrg 
   1415  1.1  mrg   FOR_ALL_BB_FN (bb, cfun)
   1416  1.1  mrg     {
   1417  1.1  mrg       if (dump_file)
   1418  1.1  mrg 	{
   1419  1.1  mrg 	  fprintf (dump_file, "bb %d components:", bb->index);
   1420  1.1  mrg 	  dump_components ("has", SW (bb)->has_components);
   1421  1.1  mrg 	  fprintf (dump_file, "\n");
   1422  1.1  mrg 	}
   1423  1.1  mrg     }
   1424  1.1  mrg 
   1425  1.1  mrg   return did_changes;
   1426  1.1  mrg }
   1427  1.1  mrg 
   1428  1.1  mrg /* If we cannot handle placing some component's prologues or epilogues where
   1429  1.1  mrg    we decided we should place them, unmark that component in COMPONENTS so
   1430  1.1  mrg    that it is not wrapped separately.  */
   1431  1.1  mrg static void
   1432  1.1  mrg disqualify_problematic_components (sbitmap components)
   1433  1.1  mrg {
   1434  1.1  mrg   auto_sbitmap pro (SBITMAP_SIZE (components));
   1435  1.1  mrg   auto_sbitmap epi (SBITMAP_SIZE (components));
   1436  1.1  mrg 
   1437  1.1  mrg   basic_block bb;
   1438  1.1  mrg   FOR_EACH_BB_FN (bb, cfun)
   1439  1.1  mrg     {
   1440  1.1  mrg       edge e;
   1441  1.1  mrg       edge_iterator ei;
   1442  1.1  mrg       FOR_EACH_EDGE (e, ei, bb->succs)
   1443  1.1  mrg 	{
   1444  1.1  mrg 	  /* Find which components we want pro/epilogues for here.  */
   1445  1.1  mrg 	  bitmap_and_compl (epi, SW (e->src)->has_components,
   1446  1.1  mrg 			    SW (e->dest)->has_components);
   1447  1.1  mrg 	  bitmap_and_compl (pro, SW (e->dest)->has_components,
   1448  1.1  mrg 			    SW (e->src)->has_components);
   1449  1.1  mrg 
   1450  1.1  mrg 	  /* Ask the target what it thinks about things.  */
   1451  1.1  mrg 	  if (!bitmap_empty_p (epi))
   1452  1.1  mrg 	    targetm.shrink_wrap.disqualify_components (components, e, epi,
   1453  1.1  mrg 						       false);
   1454  1.1  mrg 	  if (!bitmap_empty_p (pro))
   1455  1.1  mrg 	    targetm.shrink_wrap.disqualify_components (components, e, pro,
   1456  1.1  mrg 						       true);
   1457  1.1  mrg 
   1458  1.1  mrg 	  /* If this edge doesn't need splitting, we're fine.  */
   1459  1.1  mrg 	  if (single_pred_p (e->dest)
   1460  1.1  mrg 	      && e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun))
   1461  1.1  mrg 	    continue;
   1462  1.1  mrg 
   1463  1.1  mrg 	  /* If the edge can be split, that is fine too.  */
   1464  1.1  mrg 	  if ((e->flags & EDGE_ABNORMAL) == 0)
   1465  1.1  mrg 	    continue;
   1466  1.1  mrg 
   1467  1.1  mrg 	  /* We also can handle sibcalls.  */
   1468  1.1  mrg 	  if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
   1469  1.1  mrg 	    {
   1470  1.1  mrg 	      gcc_assert (e->flags & EDGE_SIBCALL);
   1471  1.1  mrg 	      continue;
   1472  1.1  mrg 	    }
   1473  1.1  mrg 
   1474  1.1  mrg 	  /* Remove from consideration those components we would need
   1475  1.1  mrg 	     pro/epilogues for on edges where we cannot insert them.  */
   1476  1.1  mrg 	  bitmap_and_compl (components, components, epi);
   1477  1.1  mrg 	  bitmap_and_compl (components, components, pro);
   1478  1.1  mrg 
   1479  1.1  mrg 	  if (dump_file && !bitmap_subset_p (epi, components))
   1480  1.1  mrg 	    {
   1481  1.1  mrg 	      fprintf (dump_file, "  BAD epi %d->%d", e->src->index,
   1482  1.1  mrg 		       e->dest->index);
   1483  1.1  mrg 	      if (e->flags & EDGE_EH)
   1484  1.1  mrg 		fprintf (dump_file, " for EH");
   1485  1.1  mrg 	      dump_components ("epi", epi);
   1486  1.1  mrg 	      fprintf (dump_file, "\n");
   1487  1.1  mrg 	    }
   1488  1.1  mrg 
   1489  1.1  mrg 	  if (dump_file && !bitmap_subset_p (pro, components))
   1490  1.1  mrg 	    {
   1491  1.1  mrg 	      fprintf (dump_file, "  BAD pro %d->%d", e->src->index,
   1492  1.1  mrg 		       e->dest->index);
   1493  1.1  mrg 	      if (e->flags & EDGE_EH)
   1494  1.1  mrg 		fprintf (dump_file, " for EH");
   1495  1.1  mrg 	      dump_components ("pro", pro);
   1496  1.1  mrg 	      fprintf (dump_file, "\n");
   1497  1.1  mrg 	    }
   1498  1.1  mrg 	}
   1499  1.1  mrg     }
   1500  1.1  mrg }
   1501  1.1  mrg 
   1502  1.1  mrg /* Place code for prologues and epilogues for COMPONENTS where we can put
   1503  1.1  mrg    that code at the start of basic blocks.  */
   1504  1.1  mrg static void
   1505  1.1  mrg emit_common_heads_for_components (sbitmap components)
   1506  1.1  mrg {
   1507  1.1  mrg   auto_sbitmap pro (SBITMAP_SIZE (components));
   1508  1.1  mrg   auto_sbitmap epi (SBITMAP_SIZE (components));
   1509  1.1  mrg   auto_sbitmap tmp (SBITMAP_SIZE (components));
   1510  1.1  mrg 
   1511  1.1  mrg   basic_block bb;
   1512  1.1  mrg   FOR_ALL_BB_FN (bb, cfun)
   1513  1.1  mrg     bitmap_clear (SW (bb)->head_components);
   1514  1.1  mrg 
   1515  1.1  mrg   FOR_EACH_BB_FN (bb, cfun)
   1516  1.1  mrg     {
   1517  1.1  mrg       /* Find which prologue resp. epilogue components are needed for all
   1518  1.1  mrg 	 predecessor edges to this block.  */
   1519  1.1  mrg 
   1520  1.1  mrg       /* First, select all possible components.  */
   1521  1.1  mrg       bitmap_copy (epi, components);
   1522  1.1  mrg       bitmap_copy (pro, components);
   1523  1.1  mrg 
   1524  1.1  mrg       edge e;
   1525  1.1  mrg       edge_iterator ei;
   1526  1.1  mrg       FOR_EACH_EDGE (e, ei, bb->preds)
   1527  1.1  mrg 	{
   1528  1.1  mrg 	  if (e->flags & EDGE_ABNORMAL)
   1529  1.1  mrg 	    {
   1530  1.1  mrg 	      bitmap_clear (epi);
   1531  1.1  mrg 	      bitmap_clear (pro);
   1532  1.1  mrg 	      break;
   1533  1.1  mrg 	    }
   1534  1.1  mrg 
   1535  1.1  mrg 	  /* Deselect those epilogue components that should not be inserted
   1536  1.1  mrg 	     for this edge.  */
   1537  1.1  mrg 	  bitmap_and_compl (tmp, SW (e->src)->has_components,
   1538  1.1  mrg 			    SW (e->dest)->has_components);
   1539  1.1  mrg 	  bitmap_and (epi, epi, tmp);
   1540  1.1  mrg 
   1541  1.1  mrg 	  /* Similar, for the prologue.  */
   1542  1.1  mrg 	  bitmap_and_compl (tmp, SW (e->dest)->has_components,
   1543  1.1  mrg 			    SW (e->src)->has_components);
   1544  1.1  mrg 	  bitmap_and (pro, pro, tmp);
   1545  1.1  mrg 	}
   1546  1.1  mrg 
   1547  1.1  mrg       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
   1548  1.1  mrg 	fprintf (dump_file, "  bb %d", bb->index);
   1549  1.1  mrg 
   1550  1.1  mrg       if (dump_file && !bitmap_empty_p (epi))
   1551  1.1  mrg 	dump_components ("epi", epi);
   1552  1.1  mrg       if (dump_file && !bitmap_empty_p (pro))
   1553  1.1  mrg 	dump_components ("pro", pro);
   1554  1.1  mrg 
   1555  1.1  mrg       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
   1556  1.1  mrg 	fprintf (dump_file, "\n");
   1557  1.1  mrg 
   1558  1.1  mrg       /* Place code after the BB note.  */
   1559  1.1  mrg       if (!bitmap_empty_p (pro))
   1560  1.1  mrg 	{
   1561  1.1  mrg 	  start_sequence ();
   1562  1.1  mrg 	  targetm.shrink_wrap.emit_prologue_components (pro);
   1563  1.1  mrg 	  rtx_insn *seq = get_insns ();
   1564  1.1  mrg 	  end_sequence ();
   1565  1.1  mrg 	  record_prologue_seq (seq);
   1566  1.1  mrg 
   1567  1.1  mrg 	  emit_insn_after (seq, bb_note (bb));
   1568  1.1  mrg 
   1569  1.1  mrg 	  bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, pro);
   1570  1.1  mrg 	}
   1571  1.1  mrg 
   1572  1.1  mrg       if (!bitmap_empty_p (epi))
   1573  1.1  mrg 	{
   1574  1.1  mrg 	  start_sequence ();
   1575  1.1  mrg 	  targetm.shrink_wrap.emit_epilogue_components (epi);
   1576  1.1  mrg 	  rtx_insn *seq = get_insns ();
   1577  1.1  mrg 	  end_sequence ();
   1578  1.1  mrg 	  record_epilogue_seq (seq);
   1579  1.1  mrg 
   1580  1.1  mrg 	  emit_insn_after (seq, bb_note (bb));
   1581  1.1  mrg 
   1582  1.1  mrg 	  bitmap_ior (SW (bb)->head_components, SW (bb)->head_components, epi);
   1583  1.1  mrg 	}
   1584  1.1  mrg     }
   1585  1.1  mrg }
   1586  1.1  mrg 
   1587  1.1  mrg /* Place code for prologues and epilogues for COMPONENTS where we can put
   1588  1.1  mrg    that code at the end of basic blocks.  */
   1589  1.1  mrg static void
   1590  1.1  mrg emit_common_tails_for_components (sbitmap components)
   1591  1.1  mrg {
   1592  1.1  mrg   auto_sbitmap pro (SBITMAP_SIZE (components));
   1593  1.1  mrg   auto_sbitmap epi (SBITMAP_SIZE (components));
   1594  1.1  mrg   auto_sbitmap tmp (SBITMAP_SIZE (components));
   1595  1.1  mrg 
   1596  1.1  mrg   basic_block bb;
   1597  1.1  mrg   FOR_ALL_BB_FN (bb, cfun)
   1598  1.1  mrg     bitmap_clear (SW (bb)->tail_components);
   1599  1.1  mrg 
   1600  1.1  mrg   FOR_EACH_BB_FN (bb, cfun)
   1601  1.1  mrg     {
   1602  1.1  mrg       /* Find which prologue resp. epilogue components are needed for all
   1603  1.1  mrg 	 successor edges from this block.  */
   1604  1.1  mrg       if (EDGE_COUNT (bb->succs) == 0)
   1605  1.1  mrg 	continue;
   1606  1.1  mrg 
   1607  1.1  mrg       /* First, select all possible components.  */
   1608  1.1  mrg       bitmap_copy (epi, components);
   1609  1.1  mrg       bitmap_copy (pro, components);
   1610  1.1  mrg 
   1611  1.1  mrg       edge e;
   1612  1.1  mrg       edge_iterator ei;
   1613  1.1  mrg       FOR_EACH_EDGE (e, ei, bb->succs)
   1614  1.1  mrg 	{
   1615  1.1  mrg 	  if (e->flags & EDGE_ABNORMAL)
   1616  1.1  mrg 	    {
   1617  1.1  mrg 	      bitmap_clear (epi);
   1618  1.1  mrg 	      bitmap_clear (pro);
   1619  1.1  mrg 	      break;
   1620  1.1  mrg 	    }
   1621  1.1  mrg 
   1622  1.1  mrg 	  /* Deselect those epilogue components that should not be inserted
   1623  1.1  mrg 	     for this edge, and also those that are already put at the head
   1624  1.1  mrg 	     of the successor block.  */
   1625  1.1  mrg 	  bitmap_and_compl (tmp, SW (e->src)->has_components,
   1626  1.1  mrg 			    SW (e->dest)->has_components);
   1627  1.1  mrg 	  bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
   1628  1.1  mrg 	  bitmap_and (epi, epi, tmp);
   1629  1.1  mrg 
   1630  1.1  mrg 	  /* Similarly, for the prologue.  */
   1631  1.1  mrg 	  bitmap_and_compl (tmp, SW (e->dest)->has_components,
   1632  1.1  mrg 			    SW (e->src)->has_components);
   1633  1.1  mrg 	  bitmap_and_compl (tmp, tmp, SW (e->dest)->head_components);
   1634  1.1  mrg 	  bitmap_and (pro, pro, tmp);
   1635  1.1  mrg 	}
   1636  1.1  mrg 
   1637  1.1  mrg       /* If the last insn of this block is a control flow insn we cannot
   1638  1.1  mrg 	 put anything after it.  We can put our code before it instead,
   1639  1.1  mrg 	 but only if that jump insn is a simple jump.  */
   1640  1.1  mrg       rtx_insn *last_insn = BB_END (bb);
   1641  1.1  mrg       if (control_flow_insn_p (last_insn) && !simplejump_p (last_insn))
   1642  1.1  mrg 	{
   1643  1.1  mrg 	  bitmap_clear (epi);
   1644  1.1  mrg 	  bitmap_clear (pro);
   1645  1.1  mrg 	}
   1646  1.1  mrg 
   1647  1.1  mrg       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
   1648  1.1  mrg 	fprintf (dump_file, "  bb %d", bb->index);
   1649  1.1  mrg 
   1650  1.1  mrg       if (dump_file && !bitmap_empty_p (epi))
   1651  1.1  mrg 	dump_components ("epi", epi);
   1652  1.1  mrg       if (dump_file && !bitmap_empty_p (pro))
   1653  1.1  mrg 	dump_components ("pro", pro);
   1654  1.1  mrg 
   1655  1.1  mrg       if (dump_file && !(bitmap_empty_p (epi) && bitmap_empty_p (pro)))
   1656  1.1  mrg 	fprintf (dump_file, "\n");
   1657  1.1  mrg 
   1658  1.1  mrg       /* Put the code at the end of the BB, but before any final jump.  */
   1659  1.1  mrg       if (!bitmap_empty_p (epi))
   1660  1.1  mrg 	{
   1661  1.1  mrg 	  start_sequence ();
   1662  1.1  mrg 	  targetm.shrink_wrap.emit_epilogue_components (epi);
   1663  1.1  mrg 	  rtx_insn *seq = get_insns ();
   1664  1.1  mrg 	  end_sequence ();
   1665  1.1  mrg 	  record_epilogue_seq (seq);
   1666  1.1  mrg 
   1667  1.1  mrg 	  if (control_flow_insn_p (last_insn))
   1668  1.1  mrg 	    emit_insn_before (seq, last_insn);
   1669  1.1  mrg 	  else
   1670  1.1  mrg 	    emit_insn_after (seq, last_insn);
   1671  1.1  mrg 
   1672  1.1  mrg 	  bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, epi);
   1673  1.1  mrg 	}
   1674  1.1  mrg 
   1675  1.1  mrg       if (!bitmap_empty_p (pro))
   1676  1.1  mrg 	{
   1677  1.1  mrg 	  start_sequence ();
   1678  1.1  mrg 	  targetm.shrink_wrap.emit_prologue_components (pro);
   1679  1.1  mrg 	  rtx_insn *seq = get_insns ();
   1680  1.1  mrg 	  end_sequence ();
   1681  1.1  mrg 	  record_prologue_seq (seq);
   1682  1.1  mrg 
   1683  1.1  mrg 	  if (control_flow_insn_p (last_insn))
   1684  1.1  mrg 	    emit_insn_before (seq, last_insn);
   1685  1.1  mrg 	  else
   1686  1.1  mrg 	    emit_insn_after (seq, last_insn);
   1687  1.1  mrg 
   1688  1.1  mrg 	  bitmap_ior (SW (bb)->tail_components, SW (bb)->tail_components, pro);
   1689  1.1  mrg 	}
   1690  1.1  mrg     }
   1691  1.1  mrg }
   1692  1.1  mrg 
   1693  1.1  mrg /* Place prologues and epilogues for COMPONENTS on edges, if we haven't already
   1694  1.1  mrg    placed them inside blocks directly.  */
   1695  1.1  mrg static void
   1696  1.1  mrg insert_prologue_epilogue_for_components (sbitmap components)
   1697  1.1  mrg {
   1698  1.1  mrg   auto_sbitmap pro (SBITMAP_SIZE (components));
   1699  1.1  mrg   auto_sbitmap epi (SBITMAP_SIZE (components));
   1700  1.1  mrg 
   1701  1.1  mrg   basic_block bb;
   1702  1.1  mrg   FOR_EACH_BB_FN (bb, cfun)
   1703  1.1  mrg     {
   1704  1.1  mrg       if (!bb->aux)
   1705  1.1  mrg 	continue;
   1706  1.1  mrg 
   1707  1.1  mrg       edge e;
   1708  1.1  mrg       edge_iterator ei;
   1709  1.1  mrg       FOR_EACH_EDGE (e, ei, bb->succs)
   1710  1.1  mrg 	{
   1711  1.1  mrg 	  /* Find which pro/epilogue components are needed on this edge.  */
   1712  1.1  mrg 	  bitmap_and_compl (epi, SW (e->src)->has_components,
   1713  1.1  mrg 			    SW (e->dest)->has_components);
   1714  1.1  mrg 	  bitmap_and_compl (pro, SW (e->dest)->has_components,
   1715  1.1  mrg 			    SW (e->src)->has_components);
   1716  1.1  mrg 	  bitmap_and (epi, epi, components);
   1717  1.1  mrg 	  bitmap_and (pro, pro, components);
   1718  1.1  mrg 
   1719  1.1  mrg 	  /* Deselect those we already have put at the head or tail of the
   1720  1.1  mrg 	     edge's dest resp. src.  */
   1721  1.1  mrg 	  bitmap_and_compl (epi, epi, SW (e->dest)->head_components);
   1722  1.1  mrg 	  bitmap_and_compl (pro, pro, SW (e->dest)->head_components);
   1723  1.1  mrg 	  bitmap_and_compl (epi, epi, SW (e->src)->tail_components);
   1724  1.1  mrg 	  bitmap_and_compl (pro, pro, SW (e->src)->tail_components);
   1725  1.1  mrg 
   1726  1.1  mrg 	  if (!bitmap_empty_p (epi) || !bitmap_empty_p (pro))
   1727  1.1  mrg 	    {
   1728  1.1  mrg 	      if (dump_file)
   1729  1.1  mrg 		{
   1730  1.1  mrg 		  fprintf (dump_file, "  %d->%d", e->src->index,
   1731  1.1  mrg 			   e->dest->index);
   1732  1.1  mrg 		  dump_components ("epi", epi);
   1733  1.1  mrg 		  dump_components ("pro", pro);
   1734  1.1  mrg 		  if (e->flags & EDGE_SIBCALL)
   1735  1.1  mrg 		    fprintf (dump_file, "  (SIBCALL)");
   1736  1.1  mrg 		  else if (e->flags & EDGE_ABNORMAL)
   1737  1.1  mrg 		    fprintf (dump_file, "  (ABNORMAL)");
   1738  1.1  mrg 		  fprintf (dump_file, "\n");
   1739  1.1  mrg 		}
   1740  1.1  mrg 
   1741  1.1  mrg 	      /* Put the epilogue components in place.  */
   1742  1.1  mrg 	      start_sequence ();
   1743  1.1  mrg 	      targetm.shrink_wrap.emit_epilogue_components (epi);
   1744  1.1  mrg 	      rtx_insn *seq = get_insns ();
   1745  1.1  mrg 	      end_sequence ();
   1746  1.1  mrg 	      record_epilogue_seq (seq);
   1747  1.1  mrg 
   1748  1.1  mrg 	      if (e->flags & EDGE_SIBCALL)
   1749  1.1  mrg 		{
   1750  1.1  mrg 		  gcc_assert (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun));
   1751  1.1  mrg 
   1752  1.1  mrg 		  rtx_insn *insn = BB_END (e->src);
   1753  1.1  mrg 		  gcc_assert (CALL_P (insn) && SIBLING_CALL_P (insn));
   1754  1.1  mrg 		  emit_insn_before (seq, insn);
   1755  1.1  mrg 		}
   1756  1.1  mrg 	      else if (e->dest == EXIT_BLOCK_PTR_FOR_FN (cfun))
   1757  1.1  mrg 		{
   1758  1.1  mrg 		  gcc_assert (e->flags & EDGE_FALLTHRU);
   1759  1.1  mrg 		  basic_block new_bb = split_edge (e);
   1760  1.1  mrg 		  emit_insn_after (seq, BB_END (new_bb));
   1761  1.1  mrg 		}
   1762  1.1  mrg 	      else
   1763  1.1  mrg 		insert_insn_on_edge (seq, e);
   1764  1.1  mrg 
   1765  1.1  mrg 	      /* Put the prologue components in place.  */
   1766  1.1  mrg 	      start_sequence ();
   1767  1.1  mrg 	      targetm.shrink_wrap.emit_prologue_components (pro);
   1768  1.1  mrg 	      seq = get_insns ();
   1769  1.1  mrg 	      end_sequence ();
   1770  1.1  mrg 	      record_prologue_seq (seq);
   1771  1.1  mrg 
   1772  1.1  mrg 	      insert_insn_on_edge (seq, e);
   1773  1.1  mrg 	    }
   1774  1.1  mrg 	}
   1775  1.1  mrg     }
   1776  1.1  mrg 
   1777  1.1  mrg   commit_edge_insertions ();
   1778  1.1  mrg }
   1779  1.1  mrg 
   1780  1.1  mrg /* The main entry point to this subpass.  FIRST_BB is where the prologue
   1781  1.1  mrg    would be normally put.  */
   1782  1.1  mrg void
   1783  1.1  mrg try_shrink_wrapping_separate (basic_block first_bb)
   1784  1.1  mrg {
   1785  1.1  mrg   if (!(SHRINK_WRAPPING_ENABLED
   1786  1.1  mrg 	&& flag_shrink_wrap_separate
   1787  1.1  mrg 	&& optimize_function_for_speed_p (cfun)
   1788  1.1  mrg 	&& targetm.shrink_wrap.get_separate_components))
   1789  1.1  mrg     return;
   1790  1.1  mrg 
   1791  1.1  mrg   /* We don't handle "strange" functions.  */
   1792  1.1  mrg   if (cfun->calls_alloca
   1793  1.1  mrg       || cfun->calls_setjmp
   1794  1.1  mrg       || cfun->can_throw_non_call_exceptions
   1795  1.1  mrg       || crtl->calls_eh_return
   1796  1.1  mrg       || crtl->has_nonlocal_goto
   1797  1.1  mrg       || crtl->saves_all_registers)
   1798  1.1  mrg     return;
   1799  1.1  mrg 
   1800  1.1  mrg   /* Ask the target what components there are.  If it returns NULL, don't
   1801  1.1  mrg      do anything.  */
   1802  1.1  mrg   sbitmap components = targetm.shrink_wrap.get_separate_components ();
   1803  1.1  mrg   if (!components)
   1804  1.1  mrg     return;
   1805  1.1  mrg 
   1806  1.1  mrg   /* We need LIVE info, not defining anything in the entry block and not
   1807  1.1  mrg      using anything in the exit block.  A block then needs a component if
   1808  1.1  mrg      the register for that component is in the IN or GEN or KILL set for
   1809  1.1  mrg      that block.  */
   1810  1.1  mrg   df_scan->local_flags |= DF_SCAN_EMPTY_ENTRY_EXIT;
   1811  1.1  mrg   df_update_entry_exit_and_calls ();
   1812  1.1  mrg   df_live_add_problem ();
   1813  1.1  mrg   df_live_set_all_dirty ();
   1814  1.1  mrg   df_analyze ();
   1815  1.1  mrg 
   1816  1.1  mrg   calculate_dominance_info (CDI_DOMINATORS);
   1817  1.1  mrg   calculate_dominance_info (CDI_POST_DOMINATORS);
   1818  1.1  mrg 
   1819  1.1  mrg   init_separate_shrink_wrap (components);
   1820  1.1  mrg 
   1821  1.1  mrg   sbitmap_iterator sbi;
   1822  1.1  mrg   unsigned int j;
   1823  1.1  mrg   EXECUTE_IF_SET_IN_BITMAP (components, 0, j, sbi)
   1824  1.1  mrg     place_prologue_for_one_component (j, first_bb);
   1825  1.1  mrg 
   1826  1.1  mrg   /* Try to minimize the number of saves and restores.  Do this as long as
   1827  1.1  mrg      it changes anything.  This does not iterate more than a few times.  */
   1828  1.1  mrg   int spread_times = 0;
   1829  1.1  mrg   while (spread_components (components))
   1830  1.1  mrg     {
   1831  1.1  mrg       spread_times++;
   1832  1.1  mrg 
   1833  1.1  mrg       if (dump_file)
   1834  1.1  mrg 	fprintf (dump_file, "Now spread %d times.\n", spread_times);
   1835  1.1  mrg     }
   1836  1.1  mrg 
   1837  1.1  mrg   disqualify_problematic_components (components);
   1838  1.1  mrg 
   1839  1.1  mrg   /* Don't separately shrink-wrap anything where the "main" prologue will
   1840  1.1  mrg      go; the target code can often optimize things if it is presented with
   1841  1.1  mrg      all components together (say, if it generates store-multiple insns).  */
   1842  1.1  mrg   bitmap_and_compl (components, components, SW (first_bb)->has_components);
   1843  1.1  mrg 
   1844  1.1  mrg   if (bitmap_empty_p (components))
   1845  1.1  mrg     {
   1846  1.1  mrg       if (dump_file)
   1847  1.1  mrg 	fprintf (dump_file, "Not wrapping anything separately.\n");
   1848  1.1  mrg     }
   1849  1.1  mrg   else
   1850  1.1  mrg     {
   1851  1.1  mrg       if (dump_file)
   1852  1.1  mrg 	{
   1853  1.1  mrg 	  fprintf (dump_file, "The components we wrap separately are");
   1854  1.1  mrg 	  dump_components ("sep", components);
   1855  1.1  mrg 	  fprintf (dump_file, "\n");
   1856  1.1  mrg 
   1857  1.1  mrg 	  fprintf (dump_file, "... Inserting common heads...\n");
   1858  1.1  mrg 	}
   1859  1.1  mrg 
   1860  1.1  mrg       emit_common_heads_for_components (components);
   1861  1.1  mrg 
   1862  1.1  mrg       if (dump_file)
   1863  1.1  mrg 	fprintf (dump_file, "... Inserting common tails...\n");
   1864  1.1  mrg 
   1865  1.1  mrg       emit_common_tails_for_components (components);
   1866  1.1  mrg 
   1867  1.1  mrg       if (dump_file)
   1868  1.1  mrg 	fprintf (dump_file, "... Inserting the more difficult ones...\n");
   1869  1.1  mrg 
   1870  1.1  mrg       insert_prologue_epilogue_for_components (components);
   1871  1.1  mrg 
   1872  1.1  mrg       if (dump_file)
   1873  1.1  mrg 	fprintf (dump_file, "... Done.\n");
   1874  1.1  mrg 
   1875  1.1  mrg       targetm.shrink_wrap.set_handled_components (components);
   1876  1.1  mrg 
   1877  1.1  mrg       crtl->shrink_wrapped_separate = true;
   1878  1.1  mrg     }
   1879  1.1  mrg 
   1880  1.1  mrg   fini_separate_shrink_wrap ();
   1881  1.1  mrg 
   1882  1.1  mrg   sbitmap_free (components);
   1883  1.1  mrg   free_dominance_info (CDI_DOMINATORS);
   1884  1.1  mrg   free_dominance_info (CDI_POST_DOMINATORS);
   1885  1.1  mrg 
   1886  1.1  mrg   /* All done.  */
   1887  1.1  mrg   df_scan->local_flags &= ~DF_SCAN_EMPTY_ENTRY_EXIT;
   1888  1.1  mrg   df_update_entry_exit_and_calls ();
   1889  1.1  mrg   df_live_set_all_dirty ();
   1890  1.1  mrg   df_analyze ();
   1891           }
   1892