Home | History | Annotate | Line # | Download | only in gcc
ipa-split.cc revision 1.1.1.1
      1 /* Function splitting pass
      2    Copyright (C) 2010-2022 Free Software Foundation, Inc.
      3    Contributed by Jan Hubicka  <jh (at) suse.cz>
      4 
      5 This file is part of GCC.
      6 
      7 GCC is free software; you can redistribute it and/or modify it under
      8 the terms of the GNU General Public License as published by the Free
      9 Software Foundation; either version 3, or (at your option) any later
     10 version.
     11 
     12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
     13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
     14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
     15 for more details.
     16 
     17 You should have received a copy of the GNU General Public License
     18 along with GCC; see the file COPYING3.  If not see
     19 <http://www.gnu.org/licenses/>.  */
     20 
     21 /* The purpose of this pass is to split function bodies to improve
     22    inlining.  I.e. for function of the form:
     23 
     24    func (...)
     25      {
     26        if (cheap_test)
     27 	 something_small
     28        else
     29 	 something_big
     30      }
     31 
     32    Produce:
     33 
     34    func.part (...)
     35      {
     36 	something_big
     37      }
     38 
     39    func (...)
     40      {
     41        if (cheap_test)
     42 	 something_small
     43        else
     44 	 func.part (...);
     45      }
     46 
     47    When func becomes inlinable and when cheap_test is often true, inlining func,
     48    but not fund.part leads to performance improvement similar as inlining
     49    original func while the code size growth is smaller.
     50 
     51    The pass is organized in three stages:
     52    1) Collect local info about basic block into BB_INFO structure and
     53       compute function body estimated size and time.
     54    2) Via DFS walk find all possible basic blocks where we can split
     55       and chose best one.
     56    3) If split point is found, split at the specified BB by creating a clone
     57       and updating function to call it.
     58 
     59    The decisions what functions to split are in execute_split_functions
     60    and consider_split.
     61 
     62    There are several possible future improvements for this pass including:
     63 
     64    1) Splitting to break up large functions
     65    2) Splitting to reduce stack frame usage
     66    3) Allow split part of function to use values computed in the header part.
     67       The values needs to be passed to split function, perhaps via same
     68       interface as for nested functions or as argument.
     69    4) Support for simple rematerialization.  I.e. when split part use
     70       value computed in header from function parameter in very cheap way, we
     71       can just recompute it.
     72    5) Support splitting of nested functions.
     73    6) Support non-SSA arguments.
     74    7) There is nothing preventing us from producing multiple parts of single function
     75       when needed or splitting also the parts.  */
     76 
     77 #include "config.h"
     78 #include "system.h"
     79 #include "coretypes.h"
     80 #include "backend.h"
     81 #include "rtl.h"
     82 #include "tree.h"
     83 #include "gimple.h"
     84 #include "cfghooks.h"
     85 #include "alloc-pool.h"
     86 #include "tree-pass.h"
     87 #include "ssa.h"
     88 #include "cgraph.h"
     89 #include "diagnostic.h"
     90 #include "fold-const.h"
     91 #include "cfganal.h"
     92 #include "calls.h"
     93 #include "gimplify.h"
     94 #include "gimple-iterator.h"
     95 #include "gimplify-me.h"
     96 #include "gimple-walk.h"
     97 #include "symbol-summary.h"
     98 #include "ipa-prop.h"
     99 #include "tree-cfg.h"
    100 #include "tree-into-ssa.h"
    101 #include "tree-dfa.h"
    102 #include "tree-inline.h"
    103 #include "gimple-pretty-print.h"
    104 #include "ipa-fnsummary.h"
    105 #include "cfgloop.h"
    106 #include "attribs.h"
    107 
    108 /* Per basic block info.  */
    109 
    110 class split_bb_info
    111 {
    112 public:
    113   unsigned int size;
    114   sreal time;
    115 };
    116 
    117 static vec<split_bb_info> bb_info_vec;
    118 
    119 /* Description of split point.  */
    120 
    121 class split_point
    122 {
    123 public:
    124   /* Size of the partitions.  */
    125   sreal header_time, split_time;
    126   unsigned int header_size, split_size;
    127 
    128   /* SSA names that need to be passed into spit function.  */
    129   bitmap ssa_names_to_pass;
    130 
    131   /* Basic block where we split (that will become entry point of new function.  */
    132   basic_block entry_bb;
    133 
    134   /* Count for entering the split part.
    135      This is not count of the entry_bb because it may be in loop.  */
    136   profile_count count;
    137 
    138   /* Basic blocks we are splitting away.  */
    139   bitmap split_bbs;
    140 
    141   /* True when return value is computed on split part and thus it needs
    142      to be returned.  */
    143   bool split_part_set_retval;
    144 };
    145 
    146 /* Best split point found.  */
    147 
    148 class split_point best_split_point;
    149 
    150 /* Set of basic blocks that are not allowed to dominate a split point.  */
    151 
    152 static bitmap forbidden_dominators;
    153 
    154 static tree find_retval (basic_block return_bb);
    155 
    156 /* Callback for walk_stmt_load_store_addr_ops.  If T is non-SSA automatic
    157    variable, check it if it is present in bitmap passed via DATA.  */
    158 
    159 static bool
    160 test_nonssa_use (gimple *, tree t, tree, void *data)
    161 {
    162   t = get_base_address (t);
    163 
    164   if (!t || is_gimple_reg (t))
    165     return false;
    166 
    167   if (TREE_CODE (t) == PARM_DECL
    168       || (VAR_P (t)
    169 	  && auto_var_in_fn_p (t, current_function_decl))
    170       || TREE_CODE (t) == RESULT_DECL
    171 	 /* Normal labels are part of CFG and will be handled gratefully.
    172 	    Forced labels however can be used directly by statements and
    173 	    need to stay in one partition along with their uses.  */
    174       || (TREE_CODE (t) == LABEL_DECL
    175 	  && FORCED_LABEL (t)))
    176     return bitmap_bit_p ((bitmap)data, DECL_UID (t));
    177 
    178   /* For DECL_BY_REFERENCE, the return value is actually a pointer.  We want
    179      to pretend that the value pointed to is actual result decl.  */
    180   if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t))
    181       && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME
    182       && SSA_NAME_VAR (TREE_OPERAND (t, 0))
    183       && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL
    184       && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
    185     return
    186       bitmap_bit_p ((bitmap)data,
    187 		    DECL_UID (DECL_RESULT (current_function_decl)));
    188 
    189   return false;
    190 }
    191 
    192 /* Dump split point CURRENT.  */
    193 
    194 static void
    195 dump_split_point (FILE * file, class split_point *current)
    196 {
    197   fprintf (file,
    198 	   "Split point at BB %i\n"
    199 	   "  header time: %f header size: %i\n"
    200 	   "  split time: %f split size: %i\n  bbs: ",
    201 	   current->entry_bb->index, current->header_time.to_double (),
    202 	   current->header_size, current->split_time.to_double (),
    203 	   current->split_size);
    204   dump_bitmap (file, current->split_bbs);
    205   fprintf (file, "  SSA names to pass: ");
    206   dump_bitmap (file, current->ssa_names_to_pass);
    207 }
    208 
    209 /* Look for all BBs in header that might lead to the split part and verify
    210    that they are not defining any non-SSA var used by the split part.
    211    Parameters are the same as for consider_split.  */
    212 
    213 static bool
    214 verify_non_ssa_vars (class split_point *current, bitmap non_ssa_vars,
    215 		     basic_block return_bb)
    216 {
    217   bitmap seen = BITMAP_ALLOC (NULL);
    218   vec<basic_block> worklist = vNULL;
    219   edge e;
    220   edge_iterator ei;
    221   bool ok = true;
    222   basic_block bb;
    223 
    224   FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
    225     if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
    226 	&& !bitmap_bit_p (current->split_bbs, e->src->index))
    227       {
    228         worklist.safe_push (e->src);
    229 	bitmap_set_bit (seen, e->src->index);
    230       }
    231 
    232   while (!worklist.is_empty ())
    233     {
    234       bb = worklist.pop ();
    235       FOR_EACH_EDGE (e, ei, bb->preds)
    236 	if (e->src != ENTRY_BLOCK_PTR_FOR_FN (cfun)
    237 	    && bitmap_set_bit (seen, e->src->index))
    238 	  {
    239 	    gcc_checking_assert (!bitmap_bit_p (current->split_bbs,
    240 					        e->src->index));
    241 	    worklist.safe_push (e->src);
    242 	  }
    243       for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
    244 	   gsi_next (&bsi))
    245 	{
    246 	  gimple *stmt = gsi_stmt (bsi);
    247 	  if (is_gimple_debug (stmt))
    248 	    continue;
    249 	  if (walk_stmt_load_store_addr_ops
    250 	      (stmt, non_ssa_vars, test_nonssa_use, test_nonssa_use,
    251 	       test_nonssa_use))
    252 	    {
    253 	      ok = false;
    254 	      goto done;
    255 	    }
    256 	  if (glabel *label_stmt = dyn_cast <glabel *> (stmt))
    257 	    if (test_nonssa_use (stmt, gimple_label_label (label_stmt),
    258 				 NULL_TREE, non_ssa_vars))
    259 	      {
    260 		ok = false;
    261 		goto done;
    262 	      }
    263 	}
    264       for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
    265 	   gsi_next (&bsi))
    266 	{
    267 	  if (walk_stmt_load_store_addr_ops
    268 	      (gsi_stmt (bsi), non_ssa_vars, test_nonssa_use, test_nonssa_use,
    269 	       test_nonssa_use))
    270 	    {
    271 	      ok = false;
    272 	      goto done;
    273 	    }
    274 	}
    275       FOR_EACH_EDGE (e, ei, bb->succs)
    276 	{
    277 	  if (e->dest != return_bb)
    278 	    continue;
    279 	  for (gphi_iterator bsi = gsi_start_phis (return_bb);
    280 	       !gsi_end_p (bsi);
    281 	       gsi_next (&bsi))
    282 	    {
    283 	      gphi *stmt = bsi.phi ();
    284 	      tree op = gimple_phi_arg_def (stmt, e->dest_idx);
    285 
    286 	      if (virtual_operand_p (gimple_phi_result (stmt)))
    287 		continue;
    288 	      if (TREE_CODE (op) != SSA_NAME
    289 		  && test_nonssa_use (stmt, op, op, non_ssa_vars))
    290 		{
    291 		  ok = false;
    292 		  goto done;
    293 		}
    294 	    }
    295 	}
    296     }
    297 
    298   /* Verify that the rest of function does not define any label
    299      used by the split part.  */
    300   FOR_EACH_BB_FN (bb, cfun)
    301     if (!bitmap_bit_p (current->split_bbs, bb->index)
    302 	&& !bitmap_bit_p (seen, bb->index))
    303       {
    304         gimple_stmt_iterator bsi;
    305         for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
    306 	  if (glabel *label_stmt = dyn_cast <glabel *> (gsi_stmt (bsi)))
    307 	    {
    308 	      if (test_nonssa_use (label_stmt,
    309 				   gimple_label_label (label_stmt),
    310 				   NULL_TREE, non_ssa_vars))
    311 		{
    312 		  ok = false;
    313 		  goto done;
    314 		}
    315 	    }
    316 	  else
    317 	    break;
    318       }
    319 
    320 done:
    321   BITMAP_FREE (seen);
    322   worklist.release ();
    323   return ok;
    324 }
    325 
    326 /* If STMT is a call, check the callee against a list of forbidden
    327    predicate functions.  If a match is found, look for uses of the
    328    call result in condition statements that compare against zero.
    329    For each such use, find the block targeted by the condition
    330    statement for the nonzero result, and set the bit for this block
    331    in the forbidden dominators bitmap.  The purpose of this is to avoid
    332    selecting a split point where we are likely to lose the chance
    333    to optimize away an unused function call.  */
    334 
    335 static void
    336 check_forbidden_calls (gimple *stmt)
    337 {
    338   imm_use_iterator use_iter;
    339   use_operand_p use_p;
    340   tree lhs;
    341 
    342   /* At the moment, __builtin_constant_p is the only forbidden
    343      predicate function call (see PR49642).  */
    344   if (!gimple_call_builtin_p (stmt, BUILT_IN_CONSTANT_P))
    345     return;
    346 
    347   lhs = gimple_call_lhs (stmt);
    348 
    349   if (!lhs || TREE_CODE (lhs) != SSA_NAME)
    350     return;
    351 
    352   FOR_EACH_IMM_USE_FAST (use_p, use_iter, lhs)
    353     {
    354       tree op1;
    355       basic_block use_bb, forbidden_bb;
    356       enum tree_code code;
    357       edge true_edge, false_edge;
    358       gcond *use_stmt;
    359 
    360       use_stmt = dyn_cast <gcond *> (USE_STMT (use_p));
    361       if (!use_stmt)
    362 	continue;
    363 
    364       /* Assuming canonical form for GIMPLE_COND here, with constant
    365 	 in second position.  */
    366       op1 = gimple_cond_rhs (use_stmt);
    367       code = gimple_cond_code (use_stmt);
    368       use_bb = gimple_bb (use_stmt);
    369 
    370       extract_true_false_edges_from_block (use_bb, &true_edge, &false_edge);
    371 
    372       /* We're only interested in comparisons that distinguish
    373 	 unambiguously from zero.  */
    374       if (!integer_zerop (op1) || code == LE_EXPR || code == GE_EXPR)
    375 	continue;
    376 
    377       if (code == EQ_EXPR)
    378 	forbidden_bb = false_edge->dest;
    379       else
    380 	forbidden_bb = true_edge->dest;
    381 
    382       bitmap_set_bit (forbidden_dominators, forbidden_bb->index);
    383     }
    384 }
    385 
    386 /* If BB is dominated by any block in the forbidden dominators set,
    387    return TRUE; else FALSE.  */
    388 
    389 static bool
    390 dominated_by_forbidden (basic_block bb)
    391 {
    392   unsigned dom_bb;
    393   bitmap_iterator bi;
    394 
    395   EXECUTE_IF_SET_IN_BITMAP (forbidden_dominators, 1, dom_bb, bi)
    396     {
    397       if (dominated_by_p (CDI_DOMINATORS, bb,
    398 			  BASIC_BLOCK_FOR_FN (cfun, dom_bb)))
    399 	return true;
    400     }
    401 
    402   return false;
    403 }
    404 
    405 /* For give split point CURRENT and return block RETURN_BB return 1
    406    if ssa name VAL is set by split part and 0 otherwise.  */
    407 static bool
    408 split_part_set_ssa_name_p (tree val, class split_point *current,
    409 			   basic_block return_bb)
    410 {
    411   if (TREE_CODE (val) != SSA_NAME)
    412     return false;
    413 
    414   return (!SSA_NAME_IS_DEFAULT_DEF (val)
    415 	  && (bitmap_bit_p (current->split_bbs,
    416 			    gimple_bb (SSA_NAME_DEF_STMT (val))->index)
    417 	      || gimple_bb (SSA_NAME_DEF_STMT (val)) == return_bb));
    418 }
    419 
    420 /* We found an split_point CURRENT.  NON_SSA_VARS is bitmap of all non ssa
    421    variables used and RETURN_BB is return basic block.
    422    See if we can split function here.  */
    423 
    424 static void
    425 consider_split (class split_point *current, bitmap non_ssa_vars,
    426 		basic_block return_bb)
    427 {
    428   tree parm;
    429   unsigned int num_args = 0;
    430   unsigned int call_overhead;
    431   edge e;
    432   edge_iterator ei;
    433   gphi_iterator bsi;
    434   unsigned int i;
    435   tree retval;
    436   bool back_edge = false;
    437 
    438   if (dump_file && (dump_flags & TDF_DETAILS))
    439     dump_split_point (dump_file, current);
    440 
    441   current->count = profile_count::zero ();
    442   FOR_EACH_EDGE (e, ei, current->entry_bb->preds)
    443     {
    444       if (e->flags & EDGE_DFS_BACK)
    445 	back_edge = true;
    446       if (!bitmap_bit_p (current->split_bbs, e->src->index))
    447 	current->count += e->count ();
    448     }
    449 
    450   /* Do not split when we would end up calling function anyway.
    451      Compares are three state, use !(...<...) to also give up when outcome
    452      is unknown.  */
    453   if (!(current->count
    454        < (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.apply_scale
    455 	   (param_partial_inlining_entry_probability, 100))))
    456     {
    457       /* When profile is guessed, we cannot expect it to give us
    458 	 realistic estimate on likeliness of function taking the
    459 	 complex path.  As a special case, when tail of the function is
    460 	 a loop, enable splitting since inlining code skipping the loop
    461 	 is likely noticeable win.  */
    462       if (back_edge
    463 	  && profile_status_for_fn (cfun) != PROFILE_READ
    464 	  && current->count
    465 		 < ENTRY_BLOCK_PTR_FOR_FN (cfun)->count)
    466 	{
    467 	  if (dump_file && (dump_flags & TDF_DETAILS))
    468 	    {
    469 	      fprintf (dump_file,
    470 		       "  Split before loop, accepting despite low counts");
    471 	      current->count.dump (dump_file);
    472 	      fprintf (dump_file, " ");
    473 	      ENTRY_BLOCK_PTR_FOR_FN (cfun)->count.dump (dump_file);
    474 	    }
    475 	}
    476       else
    477 	{
    478 	  if (dump_file && (dump_flags & TDF_DETAILS))
    479 	    fprintf (dump_file,
    480 		     "  Refused: incoming frequency is too large.\n");
    481 	  return;
    482 	}
    483     }
    484 
    485   if (!current->header_size)
    486     {
    487       if (dump_file && (dump_flags & TDF_DETAILS))
    488 	fprintf (dump_file, "  Refused: header empty\n");
    489       return;
    490     }
    491 
    492   /* Verify that PHI args on entry are either virtual or all their operands
    493      incoming from header are the same.  */
    494   for (bsi = gsi_start_phis (current->entry_bb); !gsi_end_p (bsi); gsi_next (&bsi))
    495     {
    496       gphi *stmt = bsi.phi ();
    497       tree val = NULL;
    498 
    499       if (virtual_operand_p (gimple_phi_result (stmt)))
    500 	continue;
    501       for (i = 0; i < gimple_phi_num_args (stmt); i++)
    502 	{
    503 	  edge e = gimple_phi_arg_edge (stmt, i);
    504 	  if (!bitmap_bit_p (current->split_bbs, e->src->index))
    505 	    {
    506 	      tree edge_val = gimple_phi_arg_def (stmt, i);
    507 	      if (val && edge_val != val)
    508 	        {
    509 		  if (dump_file && (dump_flags & TDF_DETAILS))
    510 		    fprintf (dump_file,
    511 			     "  Refused: entry BB has PHI with multiple variants\n");
    512 		  return;
    513 	        }
    514 	      val = edge_val;
    515 	    }
    516 	}
    517     }
    518 
    519 
    520   /* See what argument we will pass to the split function and compute
    521      call overhead.  */
    522   call_overhead = eni_size_weights.call_cost;
    523   for (parm = DECL_ARGUMENTS (current_function_decl); parm;
    524        parm = DECL_CHAIN (parm))
    525     {
    526       if (!is_gimple_reg (parm))
    527 	{
    528 	  if (bitmap_bit_p (non_ssa_vars, DECL_UID (parm)))
    529 	    {
    530 	      if (dump_file && (dump_flags & TDF_DETAILS))
    531 		fprintf (dump_file,
    532 			 "  Refused: need to pass non-ssa param values\n");
    533 	      return;
    534 	    }
    535 	}
    536       else
    537 	{
    538 	  tree ddef = ssa_default_def (cfun, parm);
    539 	  if (ddef
    540 	      && bitmap_bit_p (current->ssa_names_to_pass,
    541 			       SSA_NAME_VERSION (ddef)))
    542 	    {
    543 	      if (!VOID_TYPE_P (TREE_TYPE (parm)))
    544 		call_overhead += estimate_move_cost (TREE_TYPE (parm), false);
    545 	      num_args++;
    546 	    }
    547 	}
    548     }
    549   if (!VOID_TYPE_P (TREE_TYPE (TREE_TYPE (current_function_decl))))
    550     call_overhead += estimate_move_cost (TREE_TYPE (TREE_TYPE
    551 						 (current_function_decl)),
    552 					 false);
    553 
    554   if (current->split_size <= call_overhead)
    555     {
    556       if (dump_file && (dump_flags & TDF_DETAILS))
    557 	fprintf (dump_file,
    558 		 "  Refused: split size is smaller than call overhead\n");
    559       return;
    560     }
    561   /* FIXME: The logic here is not very precise, because inliner does use
    562      inline predicates to reduce function body size.  We add 10 to anticipate
    563      that.  Next stage1 we should try to be more meaningful here.  */
    564   if (current->header_size + call_overhead
    565       >= (unsigned int)(DECL_DECLARED_INLINE_P (current_function_decl)
    566 			? param_max_inline_insns_single
    567 			: param_max_inline_insns_auto) + 10)
    568     {
    569       if (dump_file && (dump_flags & TDF_DETAILS))
    570 	fprintf (dump_file,
    571 		 "  Refused: header size is too large for inline candidate\n");
    572       return;
    573     }
    574 
    575   /* Splitting functions brings the target out of comdat group; this will
    576      lead to code duplication if the function is reused by other unit.
    577      Limit this duplication.  This is consistent with limit in tree-sra.cc
    578      FIXME: with LTO we ought to be able to do better!  */
    579   if (DECL_ONE_ONLY (current_function_decl)
    580       && current->split_size >= (unsigned int) param_max_inline_insns_auto + 10)
    581     {
    582       if (dump_file && (dump_flags & TDF_DETAILS))
    583 	fprintf (dump_file,
    584 		 "  Refused: function is COMDAT and tail is too large\n");
    585       return;
    586     }
    587   /* For comdat functions also reject very small tails; those will likely get
    588      inlined back and we do not want to risk the duplication overhead.
    589      FIXME: with LTO we ought to be able to do better!  */
    590   if (DECL_ONE_ONLY (current_function_decl)
    591       && current->split_size
    592 	 <= (unsigned int) param_early_inlining_insns / 2)
    593     {
    594       if (dump_file && (dump_flags & TDF_DETAILS))
    595 	fprintf (dump_file,
    596 		 "  Refused: function is COMDAT and tail is too small\n");
    597       return;
    598     }
    599 
    600   /* FIXME: we currently can pass only SSA function parameters to the split
    601      arguments.  Once parm_adjustment infrastructure is supported by cloning,
    602      we can pass more than that.  */
    603   if (num_args != bitmap_count_bits (current->ssa_names_to_pass))
    604     {
    605 
    606       if (dump_file && (dump_flags & TDF_DETAILS))
    607 	fprintf (dump_file,
    608 		 "  Refused: need to pass non-param values\n");
    609       return;
    610     }
    611 
    612   /* When there are non-ssa vars used in the split region, see if they
    613      are used in the header region.  If so, reject the split.
    614      FIXME: we can use nested function support to access both.  */
    615   if (!bitmap_empty_p (non_ssa_vars)
    616       && !verify_non_ssa_vars (current, non_ssa_vars, return_bb))
    617     {
    618       if (dump_file && (dump_flags & TDF_DETAILS))
    619 	fprintf (dump_file,
    620 		 "  Refused: split part has non-ssa uses\n");
    621       return;
    622     }
    623 
    624   /* If the split point is dominated by a forbidden block, reject
    625      the split.  */
    626   if (!bitmap_empty_p (forbidden_dominators)
    627       && dominated_by_forbidden (current->entry_bb))
    628     {
    629       if (dump_file && (dump_flags & TDF_DETAILS))
    630 	fprintf (dump_file,
    631 		 "  Refused: split point dominated by forbidden block\n");
    632       return;
    633     }
    634 
    635   /* See if retval used by return bb is computed by header or split part.
    636      When it is computed by split part, we need to produce return statement
    637      in the split part and add code to header to pass it around.
    638 
    639      This is bit tricky to test:
    640        1) When there is no return_bb or no return value, we always pass
    641           value around.
    642        2) Invariants are always computed by caller.
    643        3) For SSA we need to look if defining statement is in header or split part
    644        4) For non-SSA we need to look where the var is computed. */
    645   retval = find_retval (return_bb);
    646   if (!retval)
    647     {
    648       /* If there is a return_bb with no return value in function returning
    649 	 value by reference, also make the split part return void, otherwise
    650 	 we expansion would try to create a non-POD temporary, which is
    651 	 invalid.  */
    652       if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun)
    653 	  && DECL_RESULT (current_function_decl)
    654 	  && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
    655 	current->split_part_set_retval = false;
    656       else
    657 	current->split_part_set_retval = true;
    658     }
    659   else if (is_gimple_min_invariant (retval))
    660     current->split_part_set_retval = false;
    661   /* Special case is value returned by reference we record as if it was non-ssa
    662      set to result_decl.  */
    663   else if (TREE_CODE (retval) == SSA_NAME
    664 	   && SSA_NAME_VAR (retval)
    665 	   && TREE_CODE (SSA_NAME_VAR (retval)) == RESULT_DECL
    666 	   && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
    667     current->split_part_set_retval
    668        = bitmap_bit_p (non_ssa_vars, DECL_UID (SSA_NAME_VAR (retval)));
    669   else if (TREE_CODE (retval) == SSA_NAME)
    670     current->split_part_set_retval
    671       = split_part_set_ssa_name_p (retval, current, return_bb);
    672   else if (TREE_CODE (retval) == PARM_DECL)
    673     current->split_part_set_retval = false;
    674   else if (VAR_P (retval)
    675 	   || TREE_CODE (retval) == RESULT_DECL)
    676     current->split_part_set_retval
    677       = bitmap_bit_p (non_ssa_vars, DECL_UID (retval));
    678   else
    679     current->split_part_set_retval = true;
    680 
    681   /* split_function fixes up at most one PHI non-virtual PHI node in return_bb,
    682      for the return value.  If there are other PHIs, give up.  */
    683   if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
    684     {
    685       gphi_iterator psi;
    686 
    687       for (psi = gsi_start_phis (return_bb); !gsi_end_p (psi); gsi_next (&psi))
    688 	if (!virtual_operand_p (gimple_phi_result (psi.phi ()))
    689 	    && !(retval
    690 		 && current->split_part_set_retval
    691 		 && TREE_CODE (retval) == SSA_NAME
    692 		 && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))
    693 		 && SSA_NAME_DEF_STMT (retval) == psi.phi ()))
    694 	  {
    695 	    if (dump_file && (dump_flags & TDF_DETAILS))
    696 	      fprintf (dump_file,
    697 		       "  Refused: return bb has extra PHIs\n");
    698 	    return;
    699 	  }
    700     }
    701 
    702   if (dump_file && (dump_flags & TDF_DETAILS))
    703     fprintf (dump_file, "  Accepted!\n");
    704 
    705   /* At the moment chose split point with lowest count and that leaves
    706      out smallest size of header.
    707      In future we might re-consider this heuristics.  */
    708   if (!best_split_point.split_bbs
    709       || best_split_point.count
    710 	 > current->count
    711       || (best_split_point.count == current->count
    712 	  && best_split_point.split_size < current->split_size))
    713 
    714     {
    715       if (dump_file && (dump_flags & TDF_DETAILS))
    716 	fprintf (dump_file, "  New best split point!\n");
    717       if (best_split_point.ssa_names_to_pass)
    718 	{
    719 	  BITMAP_FREE (best_split_point.ssa_names_to_pass);
    720 	  BITMAP_FREE (best_split_point.split_bbs);
    721 	}
    722       best_split_point = *current;
    723       best_split_point.ssa_names_to_pass = BITMAP_ALLOC (NULL);
    724       bitmap_copy (best_split_point.ssa_names_to_pass,
    725 		   current->ssa_names_to_pass);
    726       best_split_point.split_bbs = BITMAP_ALLOC (NULL);
    727       bitmap_copy (best_split_point.split_bbs, current->split_bbs);
    728     }
    729 }
    730 
    731 /* Return basic block containing RETURN statement.  We allow basic blocks
    732    of the form:
    733    <retval> = tmp_var;
    734    return <retval>
    735    but return_bb cannot be more complex than this (except for
    736    -fsanitize=thread we allow TSAN_FUNC_EXIT () internal call in there).
    737    If nothing is found, return the exit block.
    738 
    739    When there are multiple RETURN statement, chose one with return value,
    740    since that one is more likely shared by multiple code paths.
    741 
    742    Return BB is special, because for function splitting it is the only
    743    basic block that is duplicated in between header and split part of the
    744    function.
    745 
    746    TODO: We might support multiple return blocks.  */
    747 
    748 static basic_block
    749 find_return_bb (void)
    750 {
    751   edge e;
    752   basic_block return_bb = EXIT_BLOCK_PTR_FOR_FN (cfun);
    753   gimple_stmt_iterator bsi;
    754   bool found_return = false;
    755   tree retval = NULL_TREE;
    756 
    757   if (!single_pred_p (EXIT_BLOCK_PTR_FOR_FN (cfun)))
    758     return return_bb;
    759 
    760   e = single_pred_edge (EXIT_BLOCK_PTR_FOR_FN (cfun));
    761   for (bsi = gsi_last_bb (e->src); !gsi_end_p (bsi); gsi_prev (&bsi))
    762     {
    763       gimple *stmt = gsi_stmt (bsi);
    764       if (gimple_code (stmt) == GIMPLE_LABEL
    765 	  || is_gimple_debug (stmt)
    766 	  || gimple_clobber_p (stmt))
    767 	;
    768       else if (gimple_code (stmt) == GIMPLE_ASSIGN
    769 	       && found_return
    770 	       && gimple_assign_single_p (stmt)
    771 	       && (auto_var_in_fn_p (gimple_assign_rhs1 (stmt),
    772 				     current_function_decl)
    773 		   || is_gimple_min_invariant (gimple_assign_rhs1 (stmt)))
    774 	       && retval == gimple_assign_lhs (stmt))
    775 	;
    776       else if (greturn *return_stmt = dyn_cast <greturn *> (stmt))
    777 	{
    778 	  found_return = true;
    779 	  retval = gimple_return_retval (return_stmt);
    780 	}
    781       /* For -fsanitize=thread, allow also TSAN_FUNC_EXIT () in the return
    782 	 bb.  */
    783       else if ((flag_sanitize & SANITIZE_THREAD)
    784 	       && gimple_call_internal_p (stmt, IFN_TSAN_FUNC_EXIT))
    785 	;
    786       else
    787 	break;
    788     }
    789   if (gsi_end_p (bsi) && found_return)
    790     return_bb = e->src;
    791 
    792   return return_bb;
    793 }
    794 
    795 /* Given return basic block RETURN_BB, see where return value is really
    796    stored.  */
    797 static tree
    798 find_retval (basic_block return_bb)
    799 {
    800   gimple_stmt_iterator bsi;
    801   for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi); gsi_next (&bsi))
    802     if (greturn *return_stmt = dyn_cast <greturn *> (gsi_stmt (bsi)))
    803       return gimple_return_retval (return_stmt);
    804     else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN
    805 	     && !gimple_clobber_p (gsi_stmt (bsi)))
    806       return gimple_assign_rhs1 (gsi_stmt (bsi));
    807   return NULL;
    808 }
    809 
    810 /* Callback for walk_stmt_load_store_addr_ops.  If T is non-SSA automatic
    811    variable, mark it as used in bitmap passed via DATA.
    812    Return true when access to T prevents splitting the function.  */
    813 
    814 static bool
    815 mark_nonssa_use (gimple *, tree t, tree, void *data)
    816 {
    817   t = get_base_address (t);
    818 
    819   if (!t || is_gimple_reg (t))
    820     return false;
    821 
    822   /* At present we can't pass non-SSA arguments to split function.
    823      FIXME: this can be relaxed by passing references to arguments.  */
    824   if (TREE_CODE (t) == PARM_DECL)
    825     {
    826       if (dump_file && (dump_flags & TDF_DETAILS))
    827 	fprintf (dump_file,
    828 		 "Cannot split: use of non-ssa function parameter.\n");
    829       return true;
    830     }
    831 
    832   if ((VAR_P (t) && auto_var_in_fn_p (t, current_function_decl))
    833       || TREE_CODE (t) == RESULT_DECL
    834       || (TREE_CODE (t) == LABEL_DECL && FORCED_LABEL (t)))
    835     bitmap_set_bit ((bitmap)data, DECL_UID (t));
    836 
    837   /* For DECL_BY_REFERENCE, the return value is actually a pointer.  We want
    838      to pretend that the value pointed to is actual result decl.  */
    839   if ((TREE_CODE (t) == MEM_REF || INDIRECT_REF_P (t))
    840       && TREE_CODE (TREE_OPERAND (t, 0)) == SSA_NAME
    841       && SSA_NAME_VAR (TREE_OPERAND (t, 0))
    842       && TREE_CODE (SSA_NAME_VAR (TREE_OPERAND (t, 0))) == RESULT_DECL
    843       && DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
    844     return
    845       bitmap_bit_p ((bitmap)data,
    846 		    DECL_UID (DECL_RESULT (current_function_decl)));
    847 
    848   return false;
    849 }
    850 
    851 /* Compute local properties of basic block BB we collect when looking for
    852    split points.  We look for ssa defs and store them in SET_SSA_NAMES,
    853    for ssa uses and store them in USED_SSA_NAMES and for any non-SSA automatic
    854    vars stored in NON_SSA_VARS.
    855 
    856    When BB has edge to RETURN_BB, collect uses in RETURN_BB too.
    857 
    858    Return false when BB contains something that prevents it from being put into
    859    split function.  */
    860 
    861 static bool
    862 visit_bb (basic_block bb, basic_block return_bb,
    863 	  bitmap set_ssa_names, bitmap used_ssa_names,
    864 	  bitmap non_ssa_vars)
    865 {
    866   edge e;
    867   edge_iterator ei;
    868   bool can_split = true;
    869 
    870   for (gimple_stmt_iterator bsi = gsi_start_bb (bb); !gsi_end_p (bsi);
    871        gsi_next (&bsi))
    872     {
    873       gimple *stmt = gsi_stmt (bsi);
    874       tree op;
    875       ssa_op_iter iter;
    876 
    877       if (is_gimple_debug (stmt))
    878 	continue;
    879 
    880       if (gimple_clobber_p (stmt))
    881 	continue;
    882 
    883       /* FIXME: We can split regions containing EH.  We cannot however
    884 	 split RESX, EH_DISPATCH and EH_POINTER referring to same region
    885 	 into different partitions.  This would require tracking of
    886 	 EH regions and checking in consider_split_point if they
    887 	 are not used elsewhere.  */
    888       if (gimple_code (stmt) == GIMPLE_RESX)
    889 	{
    890 	  if (dump_file && (dump_flags & TDF_DETAILS))
    891 	    fprintf (dump_file, "Cannot split: resx.\n");
    892 	  can_split = false;
    893 	}
    894       if (gimple_code (stmt) == GIMPLE_EH_DISPATCH)
    895 	{
    896 	  if (dump_file && (dump_flags & TDF_DETAILS))
    897 	    fprintf (dump_file, "Cannot split: eh dispatch.\n");
    898 	  can_split = false;
    899 	}
    900 
    901       /* Check calls that would prevent splitting.  */
    902       if (gimple_code (stmt) == GIMPLE_CALL)
    903 	{
    904 	  if (tree decl = gimple_call_fndecl (stmt))
    905 	    {
    906 	      /* Check builtins that would prevent splitting.  */
    907 	      if (fndecl_built_in_p (decl, BUILT_IN_NORMAL))
    908 		switch (DECL_FUNCTION_CODE (decl))
    909 		  {
    910 		  /* FIXME: once we will allow passing non-parm values to
    911 		     split part, we need to be sure to handle correct
    912 		     builtin_stack_save and builtin_stack_restore.  At the
    913 		     moment we are safe; there is no way to store
    914 		     builtin_stack_save result in non-SSA variable since all
    915 		     calls to those are compiler generated.  */
    916 		  case BUILT_IN_APPLY:
    917 		  case BUILT_IN_APPLY_ARGS:
    918 		  case BUILT_IN_VA_START:
    919 		    if (dump_file && (dump_flags & TDF_DETAILS))
    920 		      fprintf (dump_file,
    921 			       "Cannot split: builtin_apply and va_start.\n");
    922 		    can_split = false;
    923 		    break;
    924 		  case BUILT_IN_EH_POINTER:
    925 		    if (dump_file && (dump_flags & TDF_DETAILS))
    926 		      fprintf (dump_file,
    927 			       "Cannot split: builtin_eh_pointer.\n");
    928 		    can_split = false;
    929 		    break;
    930 		  default:
    931 		    break;
    932 		  }
    933 
    934 	      /* Calls to functions (which have the warning or error
    935 		 attribute on them) should not be split off into another
    936 		 function.  */
    937 	      if (lookup_attribute ("warning", DECL_ATTRIBUTES (decl))
    938 		   || lookup_attribute ("error", DECL_ATTRIBUTES (decl)))
    939 		{
    940 		  if (dump_file && (dump_flags & TDF_DETAILS))
    941 		    fprintf (dump_file,
    942 			     "Cannot split: warning or error attribute.\n");
    943 		  can_split = false;
    944 		}
    945 	    }
    946 	}
    947 
    948       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_DEF)
    949 	bitmap_set_bit (set_ssa_names, SSA_NAME_VERSION (op));
    950       FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
    951 	bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
    952       can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
    953 						   mark_nonssa_use,
    954 						   mark_nonssa_use,
    955 						   mark_nonssa_use);
    956     }
    957   for (gphi_iterator bsi = gsi_start_phis (bb); !gsi_end_p (bsi);
    958        gsi_next (&bsi))
    959     {
    960       gphi *stmt = bsi.phi ();
    961       unsigned int i;
    962 
    963       if (virtual_operand_p (gimple_phi_result (stmt)))
    964 	continue;
    965       bitmap_set_bit (set_ssa_names,
    966 		      SSA_NAME_VERSION (gimple_phi_result (stmt)));
    967       for (i = 0; i < gimple_phi_num_args (stmt); i++)
    968 	{
    969 	  tree op = gimple_phi_arg_def (stmt, i);
    970 	  if (TREE_CODE (op) == SSA_NAME)
    971 	    bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
    972 	}
    973       can_split &= !walk_stmt_load_store_addr_ops (stmt, non_ssa_vars,
    974 						   mark_nonssa_use,
    975 						   mark_nonssa_use,
    976 						   mark_nonssa_use);
    977     }
    978   /* Record also uses coming from PHI operand in return BB.  */
    979   FOR_EACH_EDGE (e, ei, bb->succs)
    980     if (e->dest == return_bb)
    981       {
    982 	for (gphi_iterator bsi = gsi_start_phis (return_bb);
    983 	     !gsi_end_p (bsi);
    984 	     gsi_next (&bsi))
    985 	  {
    986 	    gphi *stmt = bsi.phi ();
    987 	    tree op = gimple_phi_arg_def (stmt, e->dest_idx);
    988 
    989 	    if (virtual_operand_p (gimple_phi_result (stmt)))
    990 	      continue;
    991 	    if (TREE_CODE (op) == SSA_NAME)
    992 	      bitmap_set_bit (used_ssa_names, SSA_NAME_VERSION (op));
    993 	    else
    994 	      can_split &= !mark_nonssa_use (stmt, op, op, non_ssa_vars);
    995 	  }
    996       }
    997   return can_split;
    998 }
    999 
   1000 /* Stack entry for recursive DFS walk in find_split_point.  */
   1001 
   1002 class stack_entry
   1003 {
   1004 public:
   1005   /* Basic block we are examining.  */
   1006   basic_block bb;
   1007 
   1008   /* SSA names set and used by the BB and all BBs reachable
   1009      from it via DFS walk.  */
   1010   bitmap set_ssa_names, used_ssa_names;
   1011   bitmap non_ssa_vars;
   1012 
   1013   /* All BBS visited from this BB via DFS walk.  */
   1014   bitmap bbs_visited;
   1015 
   1016   /* Last examined edge in DFS walk.  Since we walk unoriented graph,
   1017      the value is up to sum of incoming and outgoing edges of BB.  */
   1018   unsigned int edge_num;
   1019 
   1020   /* Stack entry index of earliest BB reachable from current BB
   1021      or any BB visited later in DFS walk.  */
   1022   int earliest;
   1023 
   1024   /* Overall time and size of all BBs reached from this BB in DFS walk.  */
   1025   sreal overall_time;
   1026   int overall_size;
   1027 
   1028   /* When false we cannot split on this BB.  */
   1029   bool can_split;
   1030 };
   1031 
   1032 
   1033 /* Find all articulations and call consider_split on them.
   1034    OVERALL_TIME and OVERALL_SIZE is time and size of the function.
   1035 
   1036    We perform basic algorithm for finding an articulation in a graph
   1037    created from CFG by considering it to be an unoriented graph.
   1038 
   1039    The articulation is discovered via DFS walk. We collect earliest
   1040    basic block on stack that is reachable via backward edge.  Articulation
   1041    is any basic block such that there is no backward edge bypassing it.
   1042    To reduce stack usage we maintain heap allocated stack in STACK vector.
   1043    AUX pointer of BB is set to index it appears in the stack or -1 once
   1044    it is visited and popped off the stack.
   1045 
   1046    The algorithm finds articulation after visiting the whole component
   1047    reachable by it.  This makes it convenient to collect information about
   1048    the component used by consider_split.  */
   1049 
   1050 static void
   1051 find_split_points (basic_block return_bb, sreal overall_time, int overall_size)
   1052 {
   1053   stack_entry first;
   1054   vec<stack_entry> stack = vNULL;
   1055   basic_block bb;
   1056   class split_point current;
   1057 
   1058   current.header_time = overall_time;
   1059   current.header_size = overall_size;
   1060   current.split_time = 0;
   1061   current.split_size = 0;
   1062   current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
   1063 
   1064   first.bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
   1065   first.edge_num = 0;
   1066   first.overall_time = 0;
   1067   first.overall_size = 0;
   1068   first.earliest = INT_MAX;
   1069   first.set_ssa_names = 0;
   1070   first.used_ssa_names = 0;
   1071   first.non_ssa_vars = 0;
   1072   first.bbs_visited = 0;
   1073   first.can_split = false;
   1074   stack.safe_push (first);
   1075   ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = (void *)(intptr_t)-1;
   1076 
   1077   while (!stack.is_empty ())
   1078     {
   1079       stack_entry *entry = &stack.last ();
   1080 
   1081       /* We are walking an acyclic graph, so edge_num counts
   1082 	 succ and pred edges together.  However when considering
   1083          articulation, we want to have processed everything reachable
   1084 	 from articulation but nothing that reaches into it.  */
   1085       if (entry->edge_num == EDGE_COUNT (entry->bb->succs)
   1086 	  && entry->bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
   1087 	{
   1088 	  int pos = stack.length ();
   1089 	  entry->can_split &= visit_bb (entry->bb, return_bb,
   1090 					entry->set_ssa_names,
   1091 					entry->used_ssa_names,
   1092 					entry->non_ssa_vars);
   1093 	  if (pos <= entry->earliest && !entry->can_split
   1094 	      && dump_file && (dump_flags & TDF_DETAILS))
   1095 	    fprintf (dump_file,
   1096 		     "found articulation at bb %i but cannot split\n",
   1097 		     entry->bb->index);
   1098 	  if (pos <= entry->earliest && entry->can_split)
   1099 	     {
   1100 	       if (dump_file && (dump_flags & TDF_DETAILS))
   1101 		 fprintf (dump_file, "found articulation at bb %i\n",
   1102 			  entry->bb->index);
   1103 	       current.entry_bb = entry->bb;
   1104 	       current.ssa_names_to_pass = BITMAP_ALLOC (NULL);
   1105 	       bitmap_and_compl (current.ssa_names_to_pass,
   1106 				 entry->used_ssa_names, entry->set_ssa_names);
   1107 	       current.header_time = overall_time - entry->overall_time;
   1108 	       current.header_size = overall_size - entry->overall_size;
   1109 	       current.split_time = entry->overall_time;
   1110 	       current.split_size = entry->overall_size;
   1111 	       current.split_bbs = entry->bbs_visited;
   1112 	       consider_split (&current, entry->non_ssa_vars, return_bb);
   1113 	       BITMAP_FREE (current.ssa_names_to_pass);
   1114 	     }
   1115 	}
   1116       /* Do actual DFS walk.  */
   1117       if (entry->edge_num
   1118 	  < (EDGE_COUNT (entry->bb->succs)
   1119 	     + EDGE_COUNT (entry->bb->preds)))
   1120 	{
   1121           edge e;
   1122 	  basic_block dest;
   1123 	  if (entry->edge_num < EDGE_COUNT (entry->bb->succs))
   1124 	    {
   1125 	      e = EDGE_SUCC (entry->bb, entry->edge_num);
   1126 	      dest = e->dest;
   1127 	    }
   1128 	  else
   1129 	    {
   1130 	      e = EDGE_PRED (entry->bb, entry->edge_num
   1131 			     - EDGE_COUNT (entry->bb->succs));
   1132 	      dest = e->src;
   1133 	    }
   1134 
   1135 	  entry->edge_num++;
   1136 
   1137 	  /* New BB to visit, push it to the stack.  */
   1138 	  if (dest != return_bb && dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
   1139 	      && !dest->aux)
   1140 	    {
   1141 	      stack_entry new_entry;
   1142 
   1143 	      new_entry.bb = dest;
   1144 	      new_entry.edge_num = 0;
   1145 	      new_entry.overall_time
   1146 		 = bb_info_vec[dest->index].time;
   1147 	      new_entry.overall_size
   1148 		 = bb_info_vec[dest->index].size;
   1149 	      new_entry.earliest = INT_MAX;
   1150 	      new_entry.set_ssa_names = BITMAP_ALLOC (NULL);
   1151 	      new_entry.used_ssa_names = BITMAP_ALLOC (NULL);
   1152 	      new_entry.bbs_visited = BITMAP_ALLOC (NULL);
   1153 	      new_entry.non_ssa_vars = BITMAP_ALLOC (NULL);
   1154 	      new_entry.can_split = true;
   1155 	      bitmap_set_bit (new_entry.bbs_visited, dest->index);
   1156 	      stack.safe_push (new_entry);
   1157 	      dest->aux = (void *)(intptr_t)stack.length ();
   1158 	    }
   1159 	  /* Back edge found, record the earliest point.  */
   1160 	  else if ((intptr_t)dest->aux > 0
   1161 		   && (intptr_t)dest->aux < entry->earliest)
   1162 	    entry->earliest = (intptr_t)dest->aux;
   1163 	}
   1164       /* We are done with examining the edges.  Pop off the value from stack
   1165 	 and merge stuff we accumulate during the walk.  */
   1166       else if (entry->bb != ENTRY_BLOCK_PTR_FOR_FN (cfun))
   1167 	{
   1168 	  stack_entry *prev = &stack[stack.length () - 2];
   1169 
   1170 	  entry->bb->aux = (void *)(intptr_t)-1;
   1171 	  prev->can_split &= entry->can_split;
   1172 	  if (prev->set_ssa_names)
   1173 	    {
   1174 	      bitmap_ior_into (prev->set_ssa_names, entry->set_ssa_names);
   1175 	      bitmap_ior_into (prev->used_ssa_names, entry->used_ssa_names);
   1176 	      bitmap_ior_into (prev->bbs_visited, entry->bbs_visited);
   1177 	      bitmap_ior_into (prev->non_ssa_vars, entry->non_ssa_vars);
   1178 	    }
   1179 	  if (prev->earliest > entry->earliest)
   1180 	    prev->earliest = entry->earliest;
   1181 	  prev->overall_time += entry->overall_time;
   1182 	  prev->overall_size += entry->overall_size;
   1183 	  BITMAP_FREE (entry->set_ssa_names);
   1184 	  BITMAP_FREE (entry->used_ssa_names);
   1185 	  BITMAP_FREE (entry->bbs_visited);
   1186 	  BITMAP_FREE (entry->non_ssa_vars);
   1187 	  stack.pop ();
   1188 	}
   1189       else
   1190         stack.pop ();
   1191     }
   1192   ENTRY_BLOCK_PTR_FOR_FN (cfun)->aux = NULL;
   1193   FOR_EACH_BB_FN (bb, cfun)
   1194     bb->aux = NULL;
   1195   stack.release ();
   1196   BITMAP_FREE (current.ssa_names_to_pass);
   1197 }
   1198 
   1199 /* Split function at SPLIT_POINT.  */
   1200 
   1201 static void
   1202 split_function (basic_block return_bb, class split_point *split_point,
   1203 		bool add_tsan_func_exit)
   1204 {
   1205   vec<tree> args_to_pass = vNULL;
   1206   bitmap args_to_skip;
   1207   tree parm;
   1208   int num = 0;
   1209   cgraph_node *node, *cur_node = cgraph_node::get (current_function_decl);
   1210   basic_block call_bb;
   1211   gcall *call, *tsan_func_exit_call = NULL;
   1212   edge e;
   1213   edge_iterator ei;
   1214   tree retval = NULL, real_retval = NULL;
   1215   gimple *last_stmt = NULL;
   1216   unsigned int i;
   1217   tree arg, ddef;
   1218 
   1219   if (dump_file)
   1220     {
   1221       fprintf (dump_file, "\n\nSplitting function at:\n");
   1222       dump_split_point (dump_file, split_point);
   1223     }
   1224 
   1225   if (cur_node->can_change_signature)
   1226     args_to_skip = BITMAP_ALLOC (NULL);
   1227   else
   1228     args_to_skip = NULL;
   1229 
   1230   /* Collect the parameters of new function and args_to_skip bitmap.  */
   1231   for (parm = DECL_ARGUMENTS (current_function_decl);
   1232        parm; parm = DECL_CHAIN (parm), num++)
   1233     if (args_to_skip
   1234 	&& (!is_gimple_reg (parm)
   1235 	    || (ddef = ssa_default_def (cfun, parm)) == NULL_TREE
   1236 	    || !bitmap_bit_p (split_point->ssa_names_to_pass,
   1237 			      SSA_NAME_VERSION (ddef))))
   1238       bitmap_set_bit (args_to_skip, num);
   1239     else
   1240       {
   1241 	/* This parm might not have been used up to now, but is going to be
   1242 	   used, hence register it.  */
   1243 	if (is_gimple_reg (parm))
   1244 	  arg = get_or_create_ssa_default_def (cfun, parm);
   1245 	else
   1246 	  arg = parm;
   1247 
   1248 	if (!useless_type_conversion_p (DECL_ARG_TYPE (parm), TREE_TYPE (arg)))
   1249 	  arg = fold_convert (DECL_ARG_TYPE (parm), arg);
   1250 	args_to_pass.safe_push (arg);
   1251       }
   1252 
   1253   /* See if the split function will return.  */
   1254   bool split_part_return_p = false;
   1255   FOR_EACH_EDGE (e, ei, return_bb->preds)
   1256     {
   1257       if (bitmap_bit_p (split_point->split_bbs, e->src->index))
   1258 	split_part_return_p = true;
   1259     }
   1260 
   1261   /* Add return block to what will become the split function.
   1262      We do not return; no return block is needed.  */
   1263   if (!split_part_return_p)
   1264     ;
   1265   /* We have no return block, so nothing is needed.  */
   1266   else if (return_bb == EXIT_BLOCK_PTR_FOR_FN (cfun))
   1267     ;
   1268   /* When we do not want to return value, we need to construct
   1269      new return block with empty return statement.
   1270      FIXME: Once we are able to change return type, we should change function
   1271      to return void instead of just outputting function with undefined return
   1272      value.  For structures this affects quality of codegen.  */
   1273   else if ((retval = find_retval (return_bb))
   1274 	   && !split_point->split_part_set_retval)
   1275     {
   1276       bool redirected = true;
   1277       basic_block new_return_bb = create_basic_block (NULL, 0, return_bb);
   1278       gimple_stmt_iterator gsi = gsi_start_bb (new_return_bb);
   1279       gsi_insert_after (&gsi, gimple_build_return (NULL), GSI_NEW_STMT);
   1280       new_return_bb->count = profile_count::zero ();
   1281       while (redirected)
   1282 	{
   1283 	  redirected = false;
   1284 	  FOR_EACH_EDGE (e, ei, return_bb->preds)
   1285 	    if (bitmap_bit_p (split_point->split_bbs, e->src->index))
   1286 	      {
   1287 		new_return_bb->count += e->count ();
   1288 		redirect_edge_and_branch (e, new_return_bb);
   1289 		redirected = true;
   1290 		break;
   1291 	      }
   1292 	}
   1293       e = make_single_succ_edge (new_return_bb, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
   1294       add_bb_to_loop (new_return_bb, current_loops->tree_root);
   1295       bitmap_set_bit (split_point->split_bbs, new_return_bb->index);
   1296     }
   1297   /* When we pass around the value, use existing return block.  */
   1298   else
   1299     bitmap_set_bit (split_point->split_bbs, return_bb->index);
   1300 
   1301   /* If RETURN_BB has virtual operand PHIs, they must be removed and the
   1302      virtual operand marked for renaming as we change the CFG in a way that
   1303      tree-inline is not able to compensate for.
   1304 
   1305      Note this can happen whether or not we have a return value.  If we have
   1306      a return value, then RETURN_BB may have PHIs for real operands too.  */
   1307   if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
   1308     {
   1309       bool phi_p = false;
   1310       for (gphi_iterator gsi = gsi_start_phis (return_bb);
   1311 	   !gsi_end_p (gsi);)
   1312 	{
   1313 	  gphi *stmt = gsi.phi ();
   1314 	  if (!virtual_operand_p (gimple_phi_result (stmt)))
   1315 	    {
   1316 	      gsi_next (&gsi);
   1317 	      continue;
   1318 	    }
   1319 	  mark_virtual_phi_result_for_renaming (stmt);
   1320 	  remove_phi_node (&gsi, true);
   1321 	  phi_p = true;
   1322 	}
   1323       /* In reality we have to rename the reaching definition of the
   1324 	 virtual operand at return_bb as we will eventually release it
   1325 	 when we remove the code region we outlined.
   1326 	 So we have to rename all immediate virtual uses of that region
   1327 	 if we didn't see a PHI definition yet.  */
   1328       /* ???  In real reality we want to set the reaching vdef of the
   1329          entry of the SESE region as the vuse of the call and the reaching
   1330 	 vdef of the exit of the SESE region as the vdef of the call.  */
   1331       if (!phi_p)
   1332 	for (gimple_stmt_iterator gsi = gsi_start_bb (return_bb);
   1333 	     !gsi_end_p (gsi);
   1334 	     gsi_next (&gsi))
   1335 	  {
   1336 	    gimple *stmt = gsi_stmt (gsi);
   1337 	    if (gimple_vuse (stmt))
   1338 	      {
   1339 		gimple_set_vuse (stmt, NULL_TREE);
   1340 		update_stmt (stmt);
   1341 	      }
   1342 	    if (gimple_vdef (stmt))
   1343 	      break;
   1344 	  }
   1345     }
   1346 
   1347   ipa_param_adjustments *adjustments;
   1348   bool skip_return = (!split_part_return_p
   1349 		      || !split_point->split_part_set_retval);
   1350   /* TODO: Perhaps get rid of args_to_skip entirely, after we make sure the
   1351      debug info generation and discrepancy avoiding works well too.  */
   1352   if ((args_to_skip && !bitmap_empty_p (args_to_skip))
   1353       || skip_return)
   1354     {
   1355       vec<ipa_adjusted_param, va_gc> *new_params = NULL;
   1356       unsigned j;
   1357       for (parm = DECL_ARGUMENTS (current_function_decl), j = 0;
   1358 	   parm; parm = DECL_CHAIN (parm), j++)
   1359 	if (!args_to_skip || !bitmap_bit_p (args_to_skip, j))
   1360 	  {
   1361 	    ipa_adjusted_param adj;
   1362 	    memset (&adj, 0, sizeof (adj));
   1363 	    adj.op = IPA_PARAM_OP_COPY;
   1364 	    adj.base_index = j;
   1365 	    adj.prev_clone_index = j;
   1366 	    vec_safe_push (new_params, adj);
   1367 	  }
   1368       adjustments = new ipa_param_adjustments (new_params, j, skip_return);
   1369     }
   1370   else
   1371     adjustments = NULL;
   1372 
   1373   /* Now create the actual clone.  */
   1374   cgraph_edge::rebuild_edges ();
   1375   node = cur_node->create_version_clone_with_body
   1376     (vNULL, NULL, adjustments,
   1377      split_point->split_bbs, split_point->entry_bb, "part");
   1378   delete adjustments;
   1379   node->split_part = true;
   1380 
   1381   if (cur_node->same_comdat_group)
   1382     {
   1383       /* TODO: call is versionable if we make sure that all
   1384 	 callers are inside of a comdat group.  */
   1385       cur_node->calls_comdat_local = true;
   1386       node->add_to_same_comdat_group (cur_node);
   1387     }
   1388 
   1389 
   1390   /* Let's take a time profile for splitted function.  */
   1391   if (cur_node->tp_first_run)
   1392     node->tp_first_run = cur_node->tp_first_run + 1;
   1393 
   1394   /* For usual cloning it is enough to clear builtin only when signature
   1395      changes.  For partial inlining we however cannot expect the part
   1396      of builtin implementation to have same semantic as the whole.  */
   1397   if (fndecl_built_in_p (node->decl))
   1398     set_decl_built_in_function (node->decl, NOT_BUILT_IN, 0);
   1399 
   1400   /* If return_bb contains any clobbers that refer to SSA_NAMEs
   1401      set in the split part, remove them.  Also reset debug stmts that
   1402      refer to SSA_NAMEs set in the split part.  */
   1403   if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
   1404     {
   1405       gimple_stmt_iterator gsi = gsi_start_bb (return_bb);
   1406       while (!gsi_end_p (gsi))
   1407 	{
   1408 	  tree op;
   1409 	  ssa_op_iter iter;
   1410 	  gimple *stmt = gsi_stmt (gsi);
   1411 	  bool remove = false;
   1412 	  if (gimple_clobber_p (stmt) || is_gimple_debug (stmt))
   1413 	    FOR_EACH_SSA_TREE_OPERAND (op, stmt, iter, SSA_OP_USE)
   1414 	      {
   1415 		basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
   1416 		if (op != retval
   1417 		    && bb
   1418 		    && bb != return_bb
   1419 		    && bitmap_bit_p (split_point->split_bbs, bb->index))
   1420 		  {
   1421 		    if (is_gimple_debug (stmt))
   1422 		      {
   1423 			gimple_debug_bind_reset_value (stmt);
   1424 			update_stmt (stmt);
   1425 		      }
   1426 		    else
   1427 		      remove = true;
   1428 		    break;
   1429 		  }
   1430 	      }
   1431 	  if (remove)
   1432 	    gsi_remove (&gsi, true);
   1433 	  else
   1434 	    gsi_next (&gsi);
   1435 	}
   1436     }
   1437 
   1438   /* If the original function is declared inline, there is no point in issuing
   1439      a warning for the non-inlinable part.  */
   1440   DECL_NO_INLINE_WARNING_P (node->decl) = 1;
   1441   cur_node->remove_callees ();
   1442   cur_node->remove_all_references ();
   1443   if (!split_part_return_p)
   1444     TREE_THIS_VOLATILE (node->decl) = 1;
   1445   if (dump_file)
   1446     dump_function_to_file (node->decl, dump_file, dump_flags);
   1447 
   1448   /* Create the basic block we place call into.  It is the entry basic block
   1449      split after last label.  */
   1450   call_bb = split_point->entry_bb;
   1451   for (gimple_stmt_iterator gsi = gsi_start_bb (call_bb); !gsi_end_p (gsi);)
   1452     if (gimple_code (gsi_stmt (gsi)) == GIMPLE_LABEL)
   1453       {
   1454 	last_stmt = gsi_stmt (gsi);
   1455 	gsi_next (&gsi);
   1456       }
   1457     else
   1458       break;
   1459   call_bb->count = split_point->count;
   1460   e = split_block (split_point->entry_bb, last_stmt);
   1461   remove_edge (e);
   1462 
   1463   /* Produce the call statement.  */
   1464   gimple_stmt_iterator gsi = gsi_last_bb (call_bb);
   1465   FOR_EACH_VEC_ELT (args_to_pass, i, arg)
   1466     if (!is_gimple_val (arg))
   1467       {
   1468 	arg = force_gimple_operand_gsi (&gsi, arg, true, NULL_TREE,
   1469 					false, GSI_CONTINUE_LINKING);
   1470 	args_to_pass[i] = arg;
   1471       }
   1472   call = gimple_build_call_vec (node->decl, args_to_pass);
   1473   gimple_set_block (call, DECL_INITIAL (current_function_decl));
   1474   args_to_pass.release ();
   1475 
   1476   /* For optimized away parameters, add on the caller side
   1477      before the call
   1478      DEBUG D#X => parm_Y(D)
   1479      stmts and associate D#X with parm in decl_debug_args_lookup
   1480      vector to say for debug info that if parameter parm had been passed,
   1481      it would have value parm_Y(D).  */
   1482   if (args_to_skip)
   1483     {
   1484       vec<tree, va_gc> **debug_args = NULL;
   1485       unsigned i = 0, len = 0;
   1486       if (MAY_HAVE_DEBUG_BIND_STMTS)
   1487 	{
   1488 	  debug_args = decl_debug_args_lookup (node->decl);
   1489 	  if (debug_args)
   1490 	    len = vec_safe_length (*debug_args);
   1491 	}
   1492       for (parm = DECL_ARGUMENTS (current_function_decl), num = 0;
   1493 	   parm; parm = DECL_CHAIN (parm), num++)
   1494 	if (bitmap_bit_p (args_to_skip, num) && is_gimple_reg (parm))
   1495 	  {
   1496 	    tree ddecl;
   1497 	    gimple *def_temp;
   1498 
   1499 	    /* This needs to be done even without
   1500 	       MAY_HAVE_DEBUG_BIND_STMTS, otherwise if it didn't exist
   1501 	       before, we'd end up with different SSA_NAME_VERSIONs
   1502 	       between -g and -g0.  */
   1503 	    arg = get_or_create_ssa_default_def (cfun, parm);
   1504 	    if (!MAY_HAVE_DEBUG_BIND_STMTS || debug_args == NULL)
   1505 	      continue;
   1506 
   1507 	    while (i < len && (**debug_args)[i] != DECL_ORIGIN (parm))
   1508 	      i += 2;
   1509 	    if (i >= len)
   1510 	      continue;
   1511 	    ddecl = (**debug_args)[i + 1];
   1512 	    def_temp
   1513 	      = gimple_build_debug_bind (ddecl, unshare_expr (arg), call);
   1514 	    gsi_insert_after (&gsi, def_temp, GSI_NEW_STMT);
   1515 	  }
   1516       BITMAP_FREE (args_to_skip);
   1517     }
   1518 
   1519   /* We avoid address being taken on any variable used by split part,
   1520      so return slot optimization is always possible.  Moreover this is
   1521      required to make DECL_BY_REFERENCE work.  */
   1522   if (aggregate_value_p (DECL_RESULT (current_function_decl),
   1523 			 TREE_TYPE (current_function_decl))
   1524       && (!is_gimple_reg_type (TREE_TYPE (DECL_RESULT (current_function_decl)))
   1525 	  || DECL_BY_REFERENCE (DECL_RESULT (current_function_decl))))
   1526     gimple_call_set_return_slot_opt (call, true);
   1527 
   1528   if (add_tsan_func_exit)
   1529     tsan_func_exit_call = gimple_build_call_internal (IFN_TSAN_FUNC_EXIT, 0);
   1530 
   1531   /* Update return value.  This is bit tricky.  When we do not return,
   1532      do nothing.  When we return we might need to update return_bb
   1533      or produce a new return statement.  */
   1534   if (!split_part_return_p)
   1535     {
   1536       gsi_insert_after (&gsi, call, GSI_NEW_STMT);
   1537       if (tsan_func_exit_call)
   1538 	gsi_insert_after (&gsi, tsan_func_exit_call, GSI_NEW_STMT);
   1539     }
   1540   else
   1541     {
   1542       e = make_single_succ_edge (call_bb, return_bb,
   1543 				 return_bb == EXIT_BLOCK_PTR_FOR_FN (cfun)
   1544 				 ? 0 : EDGE_FALLTHRU);
   1545 
   1546       /* If there is return basic block, see what value we need to store
   1547          return value into and put call just before it.  */
   1548       if (return_bb != EXIT_BLOCK_PTR_FOR_FN (cfun))
   1549 	{
   1550 	  real_retval = retval;
   1551 	  if (real_retval && split_point->split_part_set_retval)
   1552 	    {
   1553 	      gphi_iterator psi;
   1554 
   1555 	      /* See if we need new SSA_NAME for the result.
   1556 		 When DECL_BY_REFERENCE is true, retval is actually pointer to
   1557 		 return value and it is constant in whole function.  */
   1558 	      if (TREE_CODE (retval) == SSA_NAME
   1559 		  && !DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
   1560 		{
   1561 		  retval = copy_ssa_name (retval, call);
   1562 
   1563 		  /* See if there is PHI defining return value.  */
   1564 		  for (psi = gsi_start_phis (return_bb);
   1565 		       !gsi_end_p (psi); gsi_next (&psi))
   1566 		    if (!virtual_operand_p (gimple_phi_result (psi.phi ())))
   1567 		      break;
   1568 
   1569 		  /* When there is PHI, just update its value.  */
   1570 		  if (TREE_CODE (retval) == SSA_NAME
   1571 		      && !gsi_end_p (psi))
   1572 		    add_phi_arg (psi.phi (), retval, e, UNKNOWN_LOCATION);
   1573 		  /* Otherwise update the return BB itself.
   1574 		     find_return_bb allows at most one assignment to return value,
   1575 		     so update first statement.  */
   1576 		  else
   1577 		    {
   1578 		      gimple_stmt_iterator bsi;
   1579 		      for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi);
   1580 			   gsi_next (&bsi))
   1581 			if (greturn *return_stmt
   1582 			      = dyn_cast <greturn *> (gsi_stmt (bsi)))
   1583 			  {
   1584 			    gimple_return_set_retval (return_stmt, retval);
   1585 			    break;
   1586 			  }
   1587 			else if (gimple_code (gsi_stmt (bsi)) == GIMPLE_ASSIGN
   1588 				 && !gimple_clobber_p (gsi_stmt (bsi)))
   1589 			  {
   1590 			    gimple_assign_set_rhs1 (gsi_stmt (bsi), retval);
   1591 			    break;
   1592 			  }
   1593 		      update_stmt (gsi_stmt (bsi));
   1594 		      /* Also adjust clobbers and debug stmts in return_bb.  */
   1595 		      for (bsi = gsi_start_bb (return_bb); !gsi_end_p (bsi);
   1596 			   gsi_next (&bsi))
   1597 			{
   1598 			  gimple *stmt = gsi_stmt (bsi);
   1599 			  if (gimple_clobber_p (stmt)
   1600 			      || is_gimple_debug (stmt))
   1601 			    {
   1602 			      ssa_op_iter iter;
   1603 			      use_operand_p use_p;
   1604 			      bool update = false;
   1605 			      FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter,
   1606 							SSA_OP_USE)
   1607 				if (USE_FROM_PTR (use_p) == real_retval)
   1608 				  {
   1609 				    SET_USE (use_p, retval);
   1610 				    update = true;
   1611 				  }
   1612 			      if (update)
   1613 				update_stmt (stmt);
   1614 			    }
   1615 			}
   1616 		    }
   1617 		}
   1618 	      if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
   1619 		{
   1620 		  gimple_call_set_lhs (call, build_simple_mem_ref (retval));
   1621 		  gsi_insert_after (&gsi, call, GSI_NEW_STMT);
   1622 		}
   1623 	      else
   1624 		{
   1625 		  tree restype;
   1626 		  restype = TREE_TYPE (DECL_RESULT (current_function_decl));
   1627 		  gsi_insert_after (&gsi, call, GSI_NEW_STMT);
   1628 		  if (!useless_type_conversion_p (TREE_TYPE (retval), restype))
   1629 		    {
   1630 		      gimple *cpy;
   1631 		      tree tem = create_tmp_reg (restype);
   1632 		      tem = make_ssa_name (tem, call);
   1633 		      cpy = gimple_build_assign (retval, NOP_EXPR, tem);
   1634 		      gsi_insert_after (&gsi, cpy, GSI_NEW_STMT);
   1635 		      retval = tem;
   1636 		    }
   1637 		  gimple_call_set_lhs (call, retval);
   1638 		  update_stmt (call);
   1639 		}
   1640 	    }
   1641 	  else
   1642 	    gsi_insert_after (&gsi, call, GSI_NEW_STMT);
   1643 	  if (tsan_func_exit_call)
   1644 	    gsi_insert_after (&gsi, tsan_func_exit_call, GSI_NEW_STMT);
   1645 	}
   1646       /* We don't use return block (there is either no return in function or
   1647 	 multiple of them).  So create new basic block with return statement.
   1648 	 */
   1649       else
   1650 	{
   1651 	  greturn *ret;
   1652 	  if (split_point->split_part_set_retval
   1653 	      && !VOID_TYPE_P (TREE_TYPE (TREE_TYPE (current_function_decl))))
   1654 	    {
   1655 	      retval = DECL_RESULT (current_function_decl);
   1656 
   1657 	      /* We use temporary register to hold value when aggregate_value_p
   1658 		 is false.  Similarly for DECL_BY_REFERENCE we must avoid extra
   1659 		 copy.  */
   1660 	      if (!aggregate_value_p (retval, TREE_TYPE (current_function_decl))
   1661 		  && !DECL_BY_REFERENCE (retval))
   1662 		retval = create_tmp_reg (TREE_TYPE (retval));
   1663 	      if (is_gimple_reg (retval))
   1664 		{
   1665 		  /* When returning by reference, there is only one SSA name
   1666 		     assigned to RESULT_DECL (that is pointer to return value).
   1667 		     Look it up or create new one if it is missing.  */
   1668 		  if (DECL_BY_REFERENCE (retval))
   1669 		    retval = get_or_create_ssa_default_def (cfun, retval);
   1670 		  /* Otherwise produce new SSA name for return value.  */
   1671 		  else
   1672 		    retval = make_ssa_name (retval, call);
   1673 		}
   1674 	      if (DECL_BY_REFERENCE (DECL_RESULT (current_function_decl)))
   1675 	        gimple_call_set_lhs (call, build_simple_mem_ref (retval));
   1676 	      else
   1677 	        gimple_call_set_lhs (call, retval);
   1678 	      gsi_insert_after (&gsi, call, GSI_NEW_STMT);
   1679 	    }
   1680 	  else
   1681 	    {
   1682 	      gsi_insert_after (&gsi, call, GSI_NEW_STMT);
   1683 	      if (retval
   1684 		  && is_gimple_reg_type (TREE_TYPE (retval))
   1685 		  && !is_gimple_val (retval))
   1686 		{
   1687 		  gassign *g
   1688 		    = gimple_build_assign (make_ssa_name (TREE_TYPE (retval)),
   1689 					   retval);
   1690 		  retval = gimple_assign_lhs (g);
   1691 		  gsi_insert_after (&gsi, g, GSI_NEW_STMT);
   1692 		}
   1693 	    }
   1694 	  if (tsan_func_exit_call)
   1695 	    gsi_insert_after (&gsi, tsan_func_exit_call, GSI_NEW_STMT);
   1696 	  ret = gimple_build_return (retval);
   1697 	  gsi_insert_after (&gsi, ret, GSI_NEW_STMT);
   1698 	}
   1699     }
   1700   free_dominance_info (CDI_DOMINATORS);
   1701   free_dominance_info (CDI_POST_DOMINATORS);
   1702   compute_fn_summary (node, true);
   1703 }
   1704 
   1705 /* Execute function splitting pass.  */
   1706 
   1707 static unsigned int
   1708 execute_split_functions (void)
   1709 {
   1710   gimple_stmt_iterator bsi;
   1711   basic_block bb;
   1712   sreal overall_time = 0;
   1713   int overall_size = 0;
   1714   int todo = 0;
   1715   struct cgraph_node *node = cgraph_node::get (current_function_decl);
   1716 
   1717   if (flags_from_decl_or_type (current_function_decl)
   1718       & (ECF_NORETURN|ECF_MALLOC))
   1719     {
   1720       if (dump_file)
   1721 	fprintf (dump_file, "Not splitting: noreturn/malloc function.\n");
   1722       return 0;
   1723     }
   1724   if (MAIN_NAME_P (DECL_NAME (current_function_decl)))
   1725     {
   1726       if (dump_file)
   1727 	fprintf (dump_file, "Not splitting: main function.\n");
   1728       return 0;
   1729     }
   1730   if (node->frequency == NODE_FREQUENCY_UNLIKELY_EXECUTED)
   1731     {
   1732       if (dump_file)
   1733 	fprintf (dump_file, "Not splitting: function is unlikely executed.\n");
   1734       return 0;
   1735     }
   1736   /* This can be relaxed; function might become inlinable after splitting
   1737      away the uninlinable part.  */
   1738   if (ipa_fn_summaries
   1739       && ipa_fn_summaries->get (node)
   1740       && !ipa_fn_summaries->get (node)->inlinable)
   1741     {
   1742       if (dump_file)
   1743 	fprintf (dump_file, "Not splitting: not inlinable.\n");
   1744       return 0;
   1745     }
   1746   if (DECL_DISREGARD_INLINE_LIMITS (node->decl))
   1747     {
   1748       if (dump_file)
   1749 	fprintf (dump_file, "Not splitting: disregarding inline limits.\n");
   1750       return 0;
   1751     }
   1752   /* This can be relaxed; most of versioning tests actually prevents
   1753      a duplication.  */
   1754   if (!tree_versionable_function_p (current_function_decl))
   1755     {
   1756       if (dump_file)
   1757 	fprintf (dump_file, "Not splitting: not versionable.\n");
   1758       return 0;
   1759     }
   1760   /* FIXME: we could support this.  */
   1761   if (DECL_STRUCT_FUNCTION (current_function_decl)->static_chain_decl)
   1762     {
   1763       if (dump_file)
   1764 	fprintf (dump_file, "Not splitting: nested function.\n");
   1765       return 0;
   1766     }
   1767 
   1768   /* See if it makes sense to try to split.
   1769      It makes sense to split if we inline, that is if we have direct calls to
   1770      handle or direct calls are possibly going to appear as result of indirect
   1771      inlining or LTO.  Also handle -fprofile-generate as LTO to allow non-LTO
   1772      training for LTO -fprofile-use build.
   1773 
   1774      Note that we are not completely conservative about disqualifying functions
   1775      called once.  It is possible that the caller is called more then once and
   1776      then inlining would still benefit.  */
   1777   if ((!node->callers
   1778        /* Local functions called once will be completely inlined most of time.  */
   1779        || (!node->callers->next_caller && node->local))
   1780       && !node->address_taken
   1781       && !node->has_aliases_p ()
   1782       && (!flag_lto || !node->externally_visible))
   1783     {
   1784       if (dump_file)
   1785 	fprintf (dump_file, "Not splitting: not called directly "
   1786 		 "or called once.\n");
   1787       return 0;
   1788     }
   1789 
   1790   /* FIXME: We can actually split if splitting reduces call overhead.  */
   1791   if (!flag_inline_small_functions
   1792       && !DECL_DECLARED_INLINE_P (current_function_decl))
   1793     {
   1794       if (dump_file)
   1795 	fprintf (dump_file, "Not splitting: not autoinlining and function"
   1796 		 " is not inline.\n");
   1797       return 0;
   1798     }
   1799 
   1800   if (lookup_attribute ("noinline", DECL_ATTRIBUTES (current_function_decl)))
   1801     {
   1802       if (dump_file)
   1803 	fprintf (dump_file, "Not splitting: function is noinline.\n");
   1804       return 0;
   1805     }
   1806   if (lookup_attribute ("section", DECL_ATTRIBUTES (current_function_decl)))
   1807     {
   1808       if (dump_file)
   1809 	fprintf (dump_file, "Not splitting: function is in user defined "
   1810 		 "section.\n");
   1811       return 0;
   1812     }
   1813 
   1814   /* We enforce splitting after loop headers when profile info is not
   1815      available.  */
   1816   if (profile_status_for_fn (cfun) != PROFILE_READ)
   1817     mark_dfs_back_edges ();
   1818 
   1819   /* Initialize bitmap to track forbidden calls.  */
   1820   forbidden_dominators = BITMAP_ALLOC (NULL);
   1821   calculate_dominance_info (CDI_DOMINATORS);
   1822 
   1823   /* Compute local info about basic blocks and determine function size/time.  */
   1824   bb_info_vec.safe_grow_cleared (last_basic_block_for_fn (cfun) + 1, true);
   1825   best_split_point.split_bbs = NULL;
   1826   basic_block return_bb = find_return_bb ();
   1827   int tsan_exit_found = -1;
   1828   FOR_EACH_BB_FN (bb, cfun)
   1829     {
   1830       sreal time = 0;
   1831       int size = 0;
   1832       sreal freq = bb->count.to_sreal_scale
   1833 			 (ENTRY_BLOCK_PTR_FOR_FN (cfun)->count);
   1834 
   1835       if (dump_file && (dump_flags & TDF_DETAILS))
   1836 	fprintf (dump_file, "Basic block %i\n", bb->index);
   1837 
   1838       for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
   1839 	{
   1840 	  sreal this_time;
   1841 	  int this_size;
   1842 	  gimple *stmt = gsi_stmt (bsi);
   1843 
   1844 	  this_size = estimate_num_insns (stmt, &eni_size_weights);
   1845 	  this_time = (sreal)estimate_num_insns (stmt, &eni_time_weights)
   1846 			 * freq;
   1847 	  size += this_size;
   1848 	  time += this_time;
   1849 	  check_forbidden_calls (stmt);
   1850 
   1851 	  if (dump_file && (dump_flags & TDF_DETAILS))
   1852 	    {
   1853 	      fprintf (dump_file, "  freq:%4.2f size:%3i time:%4.2f ",
   1854 		       freq.to_double (), this_size, this_time.to_double ());
   1855 	      print_gimple_stmt (dump_file, stmt, 0);
   1856 	    }
   1857 
   1858 	  if ((flag_sanitize & SANITIZE_THREAD)
   1859 	      && gimple_call_internal_p (stmt, IFN_TSAN_FUNC_EXIT))
   1860 	    {
   1861 	      /* We handle TSAN_FUNC_EXIT for splitting either in the
   1862 		 return_bb, or in its immediate predecessors.  */
   1863 	      if ((bb != return_bb && !find_edge (bb, return_bb))
   1864 		  || (tsan_exit_found != -1
   1865 		      && tsan_exit_found != (bb != return_bb)))
   1866 		{
   1867 		  if (dump_file)
   1868 		    fprintf (dump_file, "Not splitting: TSAN_FUNC_EXIT"
   1869 			     " in unexpected basic block.\n");
   1870 		  BITMAP_FREE (forbidden_dominators);
   1871 		  bb_info_vec.release ();
   1872 		  return 0;
   1873 		}
   1874 	      tsan_exit_found = bb != return_bb;
   1875 	    }
   1876 	}
   1877       overall_time += time;
   1878       overall_size += size;
   1879       bb_info_vec[bb->index].time = time;
   1880       bb_info_vec[bb->index].size = size;
   1881     }
   1882   find_split_points (return_bb, overall_time, overall_size);
   1883   if (best_split_point.split_bbs)
   1884     {
   1885       split_function (return_bb, &best_split_point, tsan_exit_found == 1);
   1886       BITMAP_FREE (best_split_point.ssa_names_to_pass);
   1887       BITMAP_FREE (best_split_point.split_bbs);
   1888       todo = TODO_update_ssa | TODO_cleanup_cfg;
   1889     }
   1890   BITMAP_FREE (forbidden_dominators);
   1891   bb_info_vec.release ();
   1892   return todo;
   1893 }
   1894 
   1895 namespace {
   1896 
   1897 const pass_data pass_data_split_functions =
   1898 {
   1899   GIMPLE_PASS, /* type */
   1900   "fnsplit", /* name */
   1901   OPTGROUP_NONE, /* optinfo_flags */
   1902   TV_IPA_FNSPLIT, /* tv_id */
   1903   PROP_cfg, /* properties_required */
   1904   0, /* properties_provided */
   1905   0, /* properties_destroyed */
   1906   0, /* todo_flags_start */
   1907   0, /* todo_flags_finish */
   1908 };
   1909 
   1910 class pass_split_functions : public gimple_opt_pass
   1911 {
   1912 public:
   1913   pass_split_functions (gcc::context *ctxt)
   1914     : gimple_opt_pass (pass_data_split_functions, ctxt)
   1915   {}
   1916 
   1917   /* opt_pass methods: */
   1918   virtual bool gate (function *);
   1919   virtual unsigned int execute (function *)
   1920     {
   1921       return execute_split_functions ();
   1922     }
   1923 
   1924 }; // class pass_split_functions
   1925 
   1926 bool
   1927 pass_split_functions::gate (function *)
   1928 {
   1929   /* When doing profile feedback, we want to execute the pass after profiling
   1930      is read.  So disable one in early optimization.  */
   1931   return (flag_partial_inlining
   1932 	  && !profile_arc_flag && !flag_branch_probabilities);
   1933 }
   1934 
   1935 } // anon namespace
   1936 
   1937 gimple_opt_pass *
   1938 make_pass_split_functions (gcc::context *ctxt)
   1939 {
   1940   return new pass_split_functions (ctxt);
   1941 }
   1942 
   1943 /* Execute function splitting pass.  */
   1944 
   1945 static unsigned int
   1946 execute_feedback_split_functions (void)
   1947 {
   1948   unsigned int retval = execute_split_functions ();
   1949   if (retval)
   1950     retval |= TODO_rebuild_cgraph_edges;
   1951   return retval;
   1952 }
   1953 
   1954 namespace {
   1955 
   1956 const pass_data pass_data_feedback_split_functions =
   1957 {
   1958   GIMPLE_PASS, /* type */
   1959   "feedback_fnsplit", /* name */
   1960   OPTGROUP_NONE, /* optinfo_flags */
   1961   TV_IPA_FNSPLIT, /* tv_id */
   1962   PROP_cfg, /* properties_required */
   1963   0, /* properties_provided */
   1964   0, /* properties_destroyed */
   1965   0, /* todo_flags_start */
   1966   0, /* todo_flags_finish */
   1967 };
   1968 
   1969 class pass_feedback_split_functions : public gimple_opt_pass
   1970 {
   1971 public:
   1972   pass_feedback_split_functions (gcc::context *ctxt)
   1973     : gimple_opt_pass (pass_data_feedback_split_functions, ctxt)
   1974   {}
   1975 
   1976   /* opt_pass methods: */
   1977   virtual bool gate (function *);
   1978   virtual unsigned int execute (function *)
   1979     {
   1980       return execute_feedback_split_functions ();
   1981     }
   1982 
   1983 }; // class pass_feedback_split_functions
   1984 
   1985 bool
   1986 pass_feedback_split_functions::gate (function *)
   1987 {
   1988   /* We don't need to split when profiling at all, we are producing
   1989      lousy code anyway.  */
   1990   return (flag_partial_inlining
   1991 	  && flag_branch_probabilities);
   1992 }
   1993 
   1994 } // anon namespace
   1995 
   1996 gimple_opt_pass *
   1997 make_pass_feedback_split_functions (gcc::context *ctxt)
   1998 {
   1999   return new pass_feedback_split_functions (ctxt);
   2000 }
   2001