1 1.1 mrg /* The tracer pass for the GNU compiler. 2 1.1 mrg Contributed by Jan Hubicka, SuSE Labs. 3 1.1 mrg Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC. 4 1.1 mrg Copyright (C) 2001-2022 Free Software Foundation, Inc. 5 1.1 mrg 6 1.1 mrg This file is part of GCC. 7 1.1 mrg 8 1.1 mrg GCC is free software; you can redistribute it and/or modify it 9 1.1 mrg under the terms of the GNU General Public License as published by 10 1.1 mrg the Free Software Foundation; either version 3, or (at your option) 11 1.1 mrg any later version. 12 1.1 mrg 13 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT 14 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public 16 1.1 mrg License for more details. 17 1.1 mrg 18 1.1 mrg You should have received a copy of the GNU General Public License 19 1.1 mrg along with GCC; see the file COPYING3. If not see 20 1.1 mrg <http://www.gnu.org/licenses/>. */ 21 1.1 mrg 22 1.1 mrg /* This pass performs the tail duplication needed for superblock formation. 23 1.1 mrg For more information see: 24 1.1 mrg 25 1.1 mrg Design and Analysis of Profile-Based Optimization in Compaq's 26 1.1 mrg Compilation Tools for Alpha; Journal of Instruction-Level 27 1.1 mrg Parallelism 3 (2000) 1-25 28 1.1 mrg 29 1.1 mrg Unlike Compaq's implementation we don't do the loop peeling as most 30 1.1 mrg probably a better job can be done by a special pass and we don't 31 1.1 mrg need to worry too much about the code size implications as the tail 32 1.1 mrg duplicates are crossjumped again if optimizations are not 33 1.1 mrg performed. */ 34 1.1 mrg 35 1.1 mrg 36 1.1 mrg #include "config.h" 37 1.1 mrg #include "system.h" 38 1.1 mrg #include "coretypes.h" 39 1.1 mrg #include "backend.h" 40 1.1 mrg #include "rtl.h" 41 1.1 mrg #include "tree.h" 42 1.1 mrg #include "gimple.h" 43 1.1 mrg #include "cfghooks.h" 44 1.1 mrg #include "tree-pass.h" 45 1.1 mrg #include "profile.h" 46 1.1 mrg #include "cfganal.h" 47 1.1 mrg #include "gimple-iterator.h" 48 1.1 mrg #include "tree-cfg.h" 49 1.1 mrg #include "tree-ssa.h" 50 1.1 mrg #include "tree-inline.h" 51 1.1 mrg #include "cfgloop.h" 52 1.1 mrg #include "alloc-pool.h" 53 1.1 mrg #include "fibonacci_heap.h" 54 1.1 mrg #include "tracer.h" 55 1.1 mrg 56 1.1 mrg static void analyze_bb (basic_block, int *); 57 1.1 mrg static bool better_p (const_edge, const_edge); 58 1.1 mrg static edge find_best_successor (basic_block); 59 1.1 mrg static edge find_best_predecessor (basic_block); 60 1.1 mrg static int find_trace (basic_block, basic_block *); 61 1.1 mrg 62 1.1 mrg /* Minimal outgoing edge probability considered for superblock formation. */ 63 1.1 mrg static int probability_cutoff; 64 1.1 mrg static int branch_ratio_cutoff; 65 1.1 mrg 66 1.1 mrg /* A bit BB->index is set if BB has already been seen, i.e. it is 67 1.1 mrg connected to some trace already. */ 68 1.1 mrg static sbitmap bb_seen; 69 1.1 mrg 70 1.1 mrg static inline void 71 1.1 mrg mark_bb_seen (basic_block bb) 72 1.1 mrg { 73 1.1 mrg unsigned int size = SBITMAP_SIZE (bb_seen); 74 1.1 mrg 75 1.1 mrg if ((unsigned int)bb->index >= size) 76 1.1 mrg bb_seen = sbitmap_resize (bb_seen, size * 2, 0); 77 1.1 mrg 78 1.1 mrg bitmap_set_bit (bb_seen, bb->index); 79 1.1 mrg } 80 1.1 mrg 81 1.1 mrg static inline bool 82 1.1 mrg bb_seen_p (basic_block bb) 83 1.1 mrg { 84 1.1 mrg return bitmap_bit_p (bb_seen, bb->index); 85 1.1 mrg } 86 1.1 mrg 87 1.1 mrg static sbitmap can_duplicate_bb; 88 1.1 mrg 89 1.1 mrg /* Cache VAL as value of can_duplicate_bb_p for BB. */ 90 1.1 mrg static inline void 91 1.1 mrg cache_can_duplicate_bb_p (const_basic_block bb, bool val) 92 1.1 mrg { 93 1.1 mrg if (val) 94 1.1 mrg bitmap_set_bit (can_duplicate_bb, bb->index); 95 1.1 mrg } 96 1.1 mrg 97 1.1 mrg /* Return cached value of can_duplicate_bb_p for BB. */ 98 1.1 mrg static bool 99 1.1 mrg cached_can_duplicate_bb_p (const_basic_block bb) 100 1.1 mrg { 101 1.1 mrg if (can_duplicate_bb) 102 1.1 mrg { 103 1.1 mrg unsigned int size = SBITMAP_SIZE (can_duplicate_bb); 104 1.1 mrg if ((unsigned int)bb->index < size) 105 1.1 mrg return bitmap_bit_p (can_duplicate_bb, bb->index); 106 1.1 mrg 107 1.1 mrg /* Assume added bb's should not be duplicated. */ 108 1.1 mrg return false; 109 1.1 mrg } 110 1.1 mrg 111 1.1 mrg return can_duplicate_block_p (bb); 112 1.1 mrg } 113 1.1 mrg 114 1.1 mrg /* Return true if we should ignore the basic block for purposes of tracing. */ 115 1.1 mrg bool 116 1.1 mrg ignore_bb_p (const_basic_block bb) 117 1.1 mrg { 118 1.1 mrg if (bb->index < NUM_FIXED_BLOCKS) 119 1.1 mrg return true; 120 1.1 mrg if (optimize_bb_for_size_p (bb)) 121 1.1 mrg return true; 122 1.1 mrg 123 1.1 mrg return !cached_can_duplicate_bb_p (bb); 124 1.1 mrg } 125 1.1 mrg 126 1.1 mrg /* Return number of instructions in the block. */ 127 1.1 mrg 128 1.1 mrg static void 129 1.1 mrg analyze_bb (basic_block bb, int *count) 130 1.1 mrg { 131 1.1 mrg gimple_stmt_iterator gsi; 132 1.1 mrg gimple *stmt; 133 1.1 mrg int n = 0; 134 1.1 mrg 135 1.1 mrg for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi)) 136 1.1 mrg { 137 1.1 mrg stmt = gsi_stmt (gsi); 138 1.1 mrg n += estimate_num_insns (stmt, &eni_size_weights); 139 1.1 mrg } 140 1.1 mrg *count = n; 141 1.1 mrg 142 1.1 mrg cache_can_duplicate_bb_p (bb, can_duplicate_block_p (CONST_CAST_BB (bb))); 143 1.1 mrg } 144 1.1 mrg 145 1.1 mrg /* Return true if E1 is more frequent than E2. */ 146 1.1 mrg static bool 147 1.1 mrg better_p (const_edge e1, const_edge e2) 148 1.1 mrg { 149 1.1 mrg if ((e1->count () > e2->count ()) || (e1->count () < e2->count ())) 150 1.1 mrg return e1->count () > e2->count (); 151 1.1 mrg /* This is needed to avoid changes in the decision after 152 1.1 mrg CFG is modified. */ 153 1.1 mrg if (e1->src != e2->src) 154 1.1 mrg return e1->src->index > e2->src->index; 155 1.1 mrg return e1->dest->index > e2->dest->index; 156 1.1 mrg } 157 1.1 mrg 158 1.1 mrg /* Return most frequent successor of basic block BB. */ 159 1.1 mrg 160 1.1 mrg static edge 161 1.1 mrg find_best_successor (basic_block bb) 162 1.1 mrg { 163 1.1 mrg edge e; 164 1.1 mrg edge best = NULL; 165 1.1 mrg edge_iterator ei; 166 1.1 mrg 167 1.1 mrg FOR_EACH_EDGE (e, ei, bb->succs) 168 1.1 mrg { 169 1.1 mrg if (!e->count ().initialized_p ()) 170 1.1 mrg return NULL; 171 1.1 mrg if (!best || better_p (e, best)) 172 1.1 mrg best = e; 173 1.1 mrg } 174 1.1 mrg if (!best || ignore_bb_p (best->dest)) 175 1.1 mrg return NULL; 176 1.1 mrg if (!best->probability.initialized_p () 177 1.1 mrg || best->probability.to_reg_br_prob_base () <= probability_cutoff) 178 1.1 mrg return NULL; 179 1.1 mrg return best; 180 1.1 mrg } 181 1.1 mrg 182 1.1 mrg /* Return most frequent predecessor of basic block BB. */ 183 1.1 mrg 184 1.1 mrg static edge 185 1.1 mrg find_best_predecessor (basic_block bb) 186 1.1 mrg { 187 1.1 mrg edge e; 188 1.1 mrg edge best = NULL; 189 1.1 mrg edge_iterator ei; 190 1.1 mrg 191 1.1 mrg FOR_EACH_EDGE (e, ei, bb->preds) 192 1.1 mrg { 193 1.1 mrg if (!e->count ().initialized_p ()) 194 1.1 mrg return NULL; 195 1.1 mrg if (!best || better_p (e, best)) 196 1.1 mrg best = e; 197 1.1 mrg } 198 1.1 mrg if (!best || ignore_bb_p (best->src)) 199 1.1 mrg return NULL; 200 1.1 mrg if (bb->count.initialized_p () 201 1.1 mrg && (best->count ().to_frequency (cfun) * REG_BR_PROB_BASE 202 1.1 mrg < bb->count.to_frequency (cfun) * branch_ratio_cutoff)) 203 1.1 mrg return NULL; 204 1.1 mrg return best; 205 1.1 mrg } 206 1.1 mrg 207 1.1 mrg /* Find the trace using bb and record it in the TRACE array. 208 1.1 mrg Return number of basic blocks recorded. */ 209 1.1 mrg 210 1.1 mrg static int 211 1.1 mrg find_trace (basic_block bb, basic_block *trace) 212 1.1 mrg { 213 1.1 mrg int i = 0; 214 1.1 mrg edge e; 215 1.1 mrg 216 1.1 mrg if (dump_file) 217 1.1 mrg fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->count.to_frequency (cfun)); 218 1.1 mrg 219 1.1 mrg while ((e = find_best_predecessor (bb)) != NULL) 220 1.1 mrg { 221 1.1 mrg basic_block bb2 = e->src; 222 1.1 mrg if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) 223 1.1 mrg || find_best_successor (bb2) != e) 224 1.1 mrg break; 225 1.1 mrg if (dump_file) 226 1.1 mrg fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun)); 227 1.1 mrg bb = bb2; 228 1.1 mrg } 229 1.1 mrg if (dump_file) 230 1.1 mrg fprintf (dump_file, " forward %i [%i]", bb->index, bb->count.to_frequency (cfun)); 231 1.1 mrg trace[i++] = bb; 232 1.1 mrg 233 1.1 mrg /* Follow the trace in forward direction. */ 234 1.1 mrg while ((e = find_best_successor (bb)) != NULL) 235 1.1 mrg { 236 1.1 mrg bb = e->dest; 237 1.1 mrg if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX)) 238 1.1 mrg || find_best_predecessor (bb) != e) 239 1.1 mrg break; 240 1.1 mrg if (dump_file) 241 1.1 mrg fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun)); 242 1.1 mrg trace[i++] = bb; 243 1.1 mrg } 244 1.1 mrg if (dump_file) 245 1.1 mrg fprintf (dump_file, "\n"); 246 1.1 mrg return i; 247 1.1 mrg } 248 1.1 mrg 249 1.1 mrg /* Duplicate block BB2, placing it after BB in the CFG. Return the 250 1.1 mrg newly created block. */ 251 1.1 mrg basic_block 252 1.1 mrg transform_duplicate (basic_block bb, basic_block bb2) 253 1.1 mrg { 254 1.1 mrg edge e; 255 1.1 mrg basic_block copy; 256 1.1 mrg 257 1.1 mrg e = find_edge (bb, bb2); 258 1.1 mrg 259 1.1 mrg copy = duplicate_block (bb2, e, bb); 260 1.1 mrg flush_pending_stmts (e); 261 1.1 mrg 262 1.1 mrg add_phi_args_after_copy (©, 1, NULL); 263 1.1 mrg 264 1.1 mrg return (copy); 265 1.1 mrg } 266 1.1 mrg 267 1.1 mrg /* Look for basic blocks in frequency order, construct traces and tail duplicate 268 1.1 mrg if profitable. */ 269 1.1 mrg 270 1.1 mrg static bool 271 1.1 mrg tail_duplicate (void) 272 1.1 mrg { 273 1.1 mrg auto_vec<fibonacci_node<long, basic_block_def>*> blocks; 274 1.1 mrg blocks.safe_grow_cleared (last_basic_block_for_fn (cfun), true); 275 1.1 mrg 276 1.1 mrg basic_block *trace = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun)); 277 1.1 mrg int *counts = XNEWVEC (int, last_basic_block_for_fn (cfun)); 278 1.1 mrg int ninsns = 0, nduplicated = 0; 279 1.1 mrg gcov_type weighted_insns = 0, traced_insns = 0; 280 1.1 mrg fibonacci_heap<long, basic_block_def> heap (LONG_MIN); 281 1.1 mrg gcov_type cover_insns; 282 1.1 mrg int max_dup_insns; 283 1.1 mrg basic_block bb; 284 1.1 mrg bool changed = false; 285 1.1 mrg 286 1.1 mrg /* Create an oversized sbitmap to reduce the chance that we need to 287 1.1 mrg resize it. */ 288 1.1 mrg bb_seen = sbitmap_alloc (last_basic_block_for_fn (cfun) * 2); 289 1.1 mrg bitmap_clear (bb_seen); 290 1.1 mrg can_duplicate_bb = sbitmap_alloc (last_basic_block_for_fn (cfun)); 291 1.1 mrg bitmap_clear (can_duplicate_bb); 292 1.1 mrg initialize_original_copy_tables (); 293 1.1 mrg 294 1.1 mrg if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ) 295 1.1 mrg probability_cutoff = param_tracer_min_branch_probability_feedback; 296 1.1 mrg else 297 1.1 mrg probability_cutoff = param_tracer_min_branch_probability; 298 1.1 mrg probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff; 299 1.1 mrg 300 1.1 mrg branch_ratio_cutoff = 301 1.1 mrg (REG_BR_PROB_BASE / 100 * param_tracer_min_branch_ratio); 302 1.1 mrg 303 1.1 mrg FOR_EACH_BB_FN (bb, cfun) 304 1.1 mrg { 305 1.1 mrg int n; 306 1.1 mrg analyze_bb (bb, &n); 307 1.1 mrg if (!ignore_bb_p (bb)) 308 1.1 mrg blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb); 309 1.1 mrg 310 1.1 mrg counts [bb->index] = n; 311 1.1 mrg ninsns += n; 312 1.1 mrg weighted_insns += n * bb->count.to_frequency (cfun); 313 1.1 mrg } 314 1.1 mrg 315 1.1 mrg if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ) 316 1.1 mrg cover_insns = param_tracer_dynamic_coverage_feedback; 317 1.1 mrg else 318 1.1 mrg cover_insns = param_tracer_dynamic_coverage; 319 1.1 mrg cover_insns = (weighted_insns * cover_insns + 50) / 100; 320 1.1 mrg max_dup_insns = (ninsns * param_tracer_max_code_growth + 50) / 100; 321 1.1 mrg 322 1.1 mrg while (traced_insns < cover_insns && nduplicated < max_dup_insns 323 1.1 mrg && !heap.empty ()) 324 1.1 mrg { 325 1.1 mrg basic_block bb = heap.extract_min (); 326 1.1 mrg int n, pos; 327 1.1 mrg 328 1.1 mrg if (!bb) 329 1.1 mrg break; 330 1.1 mrg 331 1.1 mrg blocks[bb->index] = NULL; 332 1.1 mrg 333 1.1 mrg if (ignore_bb_p (bb)) 334 1.1 mrg continue; 335 1.1 mrg gcc_assert (!bb_seen_p (bb)); 336 1.1 mrg 337 1.1 mrg n = find_trace (bb, trace); 338 1.1 mrg 339 1.1 mrg bb = trace[0]; 340 1.1 mrg traced_insns += bb->count.to_frequency (cfun) * counts [bb->index]; 341 1.1 mrg if (blocks[bb->index]) 342 1.1 mrg { 343 1.1 mrg heap.delete_node (blocks[bb->index]); 344 1.1 mrg blocks[bb->index] = NULL; 345 1.1 mrg } 346 1.1 mrg 347 1.1 mrg for (pos = 1; pos < n; pos++) 348 1.1 mrg { 349 1.1 mrg basic_block bb2 = trace[pos]; 350 1.1 mrg 351 1.1 mrg if (blocks[bb2->index]) 352 1.1 mrg { 353 1.1 mrg heap.delete_node (blocks[bb2->index]); 354 1.1 mrg blocks[bb2->index] = NULL; 355 1.1 mrg } 356 1.1 mrg traced_insns += bb2->count.to_frequency (cfun) * counts [bb2->index]; 357 1.1 mrg if (EDGE_COUNT (bb2->preds) > 1 358 1.1 mrg && can_duplicate_block_p (bb2) 359 1.1 mrg /* We have the tendency to duplicate the loop header 360 1.1 mrg of all do { } while loops. Do not do that - it is 361 1.1 mrg not profitable and it might create a loop with multiple 362 1.1 mrg entries or at least rotate the loop. */ 363 1.1 mrg && bb2->loop_father->header != bb2) 364 1.1 mrg { 365 1.1 mrg nduplicated += counts [bb2->index]; 366 1.1 mrg basic_block copy = transform_duplicate (bb, bb2); 367 1.1 mrg 368 1.1 mrg /* Reconsider the original copy of block we've duplicated. 369 1.1 mrg Removing the most common predecessor may make it to be 370 1.1 mrg head. */ 371 1.1 mrg blocks[bb2->index] = heap.insert (-bb2->count.to_frequency (cfun), bb2); 372 1.1 mrg 373 1.1 mrg if (dump_file) 374 1.1 mrg fprintf (dump_file, "Duplicated %i as %i [%i]\n", 375 1.1 mrg bb2->index, copy->index, copy->count.to_frequency (cfun)); 376 1.1 mrg 377 1.1 mrg bb2 = copy; 378 1.1 mrg changed = true; 379 1.1 mrg } 380 1.1 mrg mark_bb_seen (bb2); 381 1.1 mrg bb = bb2; 382 1.1 mrg /* In case the trace became infrequent, stop duplicating. */ 383 1.1 mrg if (ignore_bb_p (bb)) 384 1.1 mrg break; 385 1.1 mrg } 386 1.1 mrg if (dump_file) 387 1.1 mrg fprintf (dump_file, " covered now %.1f\n\n", 388 1.1 mrg traced_insns * 100.0 / weighted_insns); 389 1.1 mrg } 390 1.1 mrg if (dump_file) 391 1.1 mrg fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated, 392 1.1 mrg nduplicated * 100 / ninsns); 393 1.1 mrg 394 1.1 mrg free_original_copy_tables (); 395 1.1 mrg sbitmap_free (bb_seen); 396 1.1 mrg sbitmap_free (can_duplicate_bb); 397 1.1 mrg can_duplicate_bb = NULL; 398 1.1 mrg free (trace); 399 1.1 mrg free (counts); 400 1.1 mrg 401 1.1 mrg return changed; 402 1.1 mrg } 403 1.1 mrg 404 1.1 mrg namespace { 406 1.1 mrg 407 1.1 mrg const pass_data pass_data_tracer = 408 1.1 mrg { 409 1.1 mrg GIMPLE_PASS, /* type */ 410 1.1 mrg "tracer", /* name */ 411 1.1 mrg OPTGROUP_NONE, /* optinfo_flags */ 412 1.1 mrg TV_TRACER, /* tv_id */ 413 1.1 mrg 0, /* properties_required */ 414 1.1 mrg 0, /* properties_provided */ 415 1.1 mrg 0, /* properties_destroyed */ 416 1.1 mrg 0, /* todo_flags_start */ 417 1.1 mrg TODO_update_ssa, /* todo_flags_finish */ 418 1.1 mrg }; 419 1.1 mrg 420 1.1 mrg class pass_tracer : public gimple_opt_pass 421 1.1 mrg { 422 1.1 mrg public: 423 1.1 mrg pass_tracer (gcc::context *ctxt) 424 1.1 mrg : gimple_opt_pass (pass_data_tracer, ctxt) 425 1.1 mrg {} 426 1.1 mrg 427 1.1 mrg /* opt_pass methods: */ 428 1.1 mrg virtual bool gate (function *) 429 1.1 mrg { 430 1.1 mrg return (optimize > 0 && flag_tracer && flag_reorder_blocks); 431 1.1 mrg } 432 1.1 mrg 433 1.1 mrg virtual unsigned int execute (function *); 434 1.1 mrg 435 1.1 mrg }; // class pass_tracer 436 1.1 mrg 437 1.1 mrg unsigned int 438 1.1 mrg pass_tracer::execute (function *fun) 439 1.1 mrg { 440 1.1 mrg bool changed; 441 1.1 mrg 442 1.1 mrg if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1) 443 1.1 mrg return 0; 444 1.1 mrg 445 1.1 mrg mark_dfs_back_edges (); 446 1.1 mrg if (dump_file) 447 1.1 mrg brief_dump_cfg (dump_file, dump_flags); 448 1.1 mrg 449 1.1 mrg /* Trace formation is done on the fly inside tail_duplicate */ 450 1.1 mrg changed = tail_duplicate (); 451 1.1 mrg if (changed) 452 1.1 mrg { 453 1.1 mrg free_dominance_info (CDI_DOMINATORS); 454 1.1 mrg /* If we changed the CFG schedule loops for fixup by cleanup_cfg. */ 455 1.1 mrg loops_state_set (LOOPS_NEED_FIXUP); 456 1.1 mrg } 457 1.1 mrg 458 1.1 mrg if (dump_file) 459 1.1 mrg brief_dump_cfg (dump_file, dump_flags); 460 1.1 mrg 461 1.1 mrg return changed ? TODO_cleanup_cfg : 0; 462 1.1 mrg } 463 1.1 mrg } // anon namespace 464 1.1 mrg 465 1.1 mrg gimple_opt_pass * 466 1.1 mrg make_pass_tracer (gcc::context *ctxt) 467 1.1 mrg { 468 1.1 mrg return new pass_tracer (ctxt); 469 } 470