tracer.cc revision 1.1 1 1.1 mrg /* The tracer pass for the GNU compiler.
2 1.1 mrg Contributed by Jan Hubicka, SuSE Labs.
3 1.1 mrg Adapted to work on GIMPLE instead of RTL by Robert Kidd, UIUC.
4 1.1 mrg Copyright (C) 2001-2022 Free Software Foundation, Inc.
5 1.1 mrg
6 1.1 mrg This file is part of GCC.
7 1.1 mrg
8 1.1 mrg GCC is free software; you can redistribute it and/or modify it
9 1.1 mrg under the terms of the GNU General Public License as published by
10 1.1 mrg the Free Software Foundation; either version 3, or (at your option)
11 1.1 mrg any later version.
12 1.1 mrg
13 1.1 mrg GCC is distributed in the hope that it will be useful, but WITHOUT
14 1.1 mrg ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
15 1.1 mrg or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
16 1.1 mrg License for more details.
17 1.1 mrg
18 1.1 mrg You should have received a copy of the GNU General Public License
19 1.1 mrg along with GCC; see the file COPYING3. If not see
20 1.1 mrg <http://www.gnu.org/licenses/>. */
21 1.1 mrg
22 1.1 mrg /* This pass performs the tail duplication needed for superblock formation.
23 1.1 mrg For more information see:
24 1.1 mrg
25 1.1 mrg Design and Analysis of Profile-Based Optimization in Compaq's
26 1.1 mrg Compilation Tools for Alpha; Journal of Instruction-Level
27 1.1 mrg Parallelism 3 (2000) 1-25
28 1.1 mrg
29 1.1 mrg Unlike Compaq's implementation we don't do the loop peeling as most
30 1.1 mrg probably a better job can be done by a special pass and we don't
31 1.1 mrg need to worry too much about the code size implications as the tail
32 1.1 mrg duplicates are crossjumped again if optimizations are not
33 1.1 mrg performed. */
34 1.1 mrg
35 1.1 mrg
36 1.1 mrg #include "config.h"
37 1.1 mrg #include "system.h"
38 1.1 mrg #include "coretypes.h"
39 1.1 mrg #include "backend.h"
40 1.1 mrg #include "rtl.h"
41 1.1 mrg #include "tree.h"
42 1.1 mrg #include "gimple.h"
43 1.1 mrg #include "cfghooks.h"
44 1.1 mrg #include "tree-pass.h"
45 1.1 mrg #include "profile.h"
46 1.1 mrg #include "cfganal.h"
47 1.1 mrg #include "gimple-iterator.h"
48 1.1 mrg #include "tree-cfg.h"
49 1.1 mrg #include "tree-ssa.h"
50 1.1 mrg #include "tree-inline.h"
51 1.1 mrg #include "cfgloop.h"
52 1.1 mrg #include "alloc-pool.h"
53 1.1 mrg #include "fibonacci_heap.h"
54 1.1 mrg #include "tracer.h"
55 1.1 mrg
56 1.1 mrg static void analyze_bb (basic_block, int *);
57 1.1 mrg static bool better_p (const_edge, const_edge);
58 1.1 mrg static edge find_best_successor (basic_block);
59 1.1 mrg static edge find_best_predecessor (basic_block);
60 1.1 mrg static int find_trace (basic_block, basic_block *);
61 1.1 mrg
62 1.1 mrg /* Minimal outgoing edge probability considered for superblock formation. */
63 1.1 mrg static int probability_cutoff;
64 1.1 mrg static int branch_ratio_cutoff;
65 1.1 mrg
66 1.1 mrg /* A bit BB->index is set if BB has already been seen, i.e. it is
67 1.1 mrg connected to some trace already. */
68 1.1 mrg static sbitmap bb_seen;
69 1.1 mrg
70 1.1 mrg static inline void
71 1.1 mrg mark_bb_seen (basic_block bb)
72 1.1 mrg {
73 1.1 mrg unsigned int size = SBITMAP_SIZE (bb_seen);
74 1.1 mrg
75 1.1 mrg if ((unsigned int)bb->index >= size)
76 1.1 mrg bb_seen = sbitmap_resize (bb_seen, size * 2, 0);
77 1.1 mrg
78 1.1 mrg bitmap_set_bit (bb_seen, bb->index);
79 1.1 mrg }
80 1.1 mrg
81 1.1 mrg static inline bool
82 1.1 mrg bb_seen_p (basic_block bb)
83 1.1 mrg {
84 1.1 mrg return bitmap_bit_p (bb_seen, bb->index);
85 1.1 mrg }
86 1.1 mrg
87 1.1 mrg static sbitmap can_duplicate_bb;
88 1.1 mrg
89 1.1 mrg /* Cache VAL as value of can_duplicate_bb_p for BB. */
90 1.1 mrg static inline void
91 1.1 mrg cache_can_duplicate_bb_p (const_basic_block bb, bool val)
92 1.1 mrg {
93 1.1 mrg if (val)
94 1.1 mrg bitmap_set_bit (can_duplicate_bb, bb->index);
95 1.1 mrg }
96 1.1 mrg
97 1.1 mrg /* Return cached value of can_duplicate_bb_p for BB. */
98 1.1 mrg static bool
99 1.1 mrg cached_can_duplicate_bb_p (const_basic_block bb)
100 1.1 mrg {
101 1.1 mrg if (can_duplicate_bb)
102 1.1 mrg {
103 1.1 mrg unsigned int size = SBITMAP_SIZE (can_duplicate_bb);
104 1.1 mrg if ((unsigned int)bb->index < size)
105 1.1 mrg return bitmap_bit_p (can_duplicate_bb, bb->index);
106 1.1 mrg
107 1.1 mrg /* Assume added bb's should not be duplicated. */
108 1.1 mrg return false;
109 1.1 mrg }
110 1.1 mrg
111 1.1 mrg return can_duplicate_block_p (bb);
112 1.1 mrg }
113 1.1 mrg
114 1.1 mrg /* Return true if we should ignore the basic block for purposes of tracing. */
115 1.1 mrg bool
116 1.1 mrg ignore_bb_p (const_basic_block bb)
117 1.1 mrg {
118 1.1 mrg if (bb->index < NUM_FIXED_BLOCKS)
119 1.1 mrg return true;
120 1.1 mrg if (optimize_bb_for_size_p (bb))
121 1.1 mrg return true;
122 1.1 mrg
123 1.1 mrg return !cached_can_duplicate_bb_p (bb);
124 1.1 mrg }
125 1.1 mrg
126 1.1 mrg /* Return number of instructions in the block. */
127 1.1 mrg
128 1.1 mrg static void
129 1.1 mrg analyze_bb (basic_block bb, int *count)
130 1.1 mrg {
131 1.1 mrg gimple_stmt_iterator gsi;
132 1.1 mrg gimple *stmt;
133 1.1 mrg int n = 0;
134 1.1 mrg
135 1.1 mrg for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
136 1.1 mrg {
137 1.1 mrg stmt = gsi_stmt (gsi);
138 1.1 mrg n += estimate_num_insns (stmt, &eni_size_weights);
139 1.1 mrg }
140 1.1 mrg *count = n;
141 1.1 mrg
142 1.1 mrg cache_can_duplicate_bb_p (bb, can_duplicate_block_p (CONST_CAST_BB (bb)));
143 1.1 mrg }
144 1.1 mrg
145 1.1 mrg /* Return true if E1 is more frequent than E2. */
146 1.1 mrg static bool
147 1.1 mrg better_p (const_edge e1, const_edge e2)
148 1.1 mrg {
149 1.1 mrg if ((e1->count () > e2->count ()) || (e1->count () < e2->count ()))
150 1.1 mrg return e1->count () > e2->count ();
151 1.1 mrg /* This is needed to avoid changes in the decision after
152 1.1 mrg CFG is modified. */
153 1.1 mrg if (e1->src != e2->src)
154 1.1 mrg return e1->src->index > e2->src->index;
155 1.1 mrg return e1->dest->index > e2->dest->index;
156 1.1 mrg }
157 1.1 mrg
158 1.1 mrg /* Return most frequent successor of basic block BB. */
159 1.1 mrg
160 1.1 mrg static edge
161 1.1 mrg find_best_successor (basic_block bb)
162 1.1 mrg {
163 1.1 mrg edge e;
164 1.1 mrg edge best = NULL;
165 1.1 mrg edge_iterator ei;
166 1.1 mrg
167 1.1 mrg FOR_EACH_EDGE (e, ei, bb->succs)
168 1.1 mrg {
169 1.1 mrg if (!e->count ().initialized_p ())
170 1.1 mrg return NULL;
171 1.1 mrg if (!best || better_p (e, best))
172 1.1 mrg best = e;
173 1.1 mrg }
174 1.1 mrg if (!best || ignore_bb_p (best->dest))
175 1.1 mrg return NULL;
176 1.1 mrg if (!best->probability.initialized_p ()
177 1.1 mrg || best->probability.to_reg_br_prob_base () <= probability_cutoff)
178 1.1 mrg return NULL;
179 1.1 mrg return best;
180 1.1 mrg }
181 1.1 mrg
182 1.1 mrg /* Return most frequent predecessor of basic block BB. */
183 1.1 mrg
184 1.1 mrg static edge
185 1.1 mrg find_best_predecessor (basic_block bb)
186 1.1 mrg {
187 1.1 mrg edge e;
188 1.1 mrg edge best = NULL;
189 1.1 mrg edge_iterator ei;
190 1.1 mrg
191 1.1 mrg FOR_EACH_EDGE (e, ei, bb->preds)
192 1.1 mrg {
193 1.1 mrg if (!e->count ().initialized_p ())
194 1.1 mrg return NULL;
195 1.1 mrg if (!best || better_p (e, best))
196 1.1 mrg best = e;
197 1.1 mrg }
198 1.1 mrg if (!best || ignore_bb_p (best->src))
199 1.1 mrg return NULL;
200 1.1 mrg if (bb->count.initialized_p ()
201 1.1 mrg && (best->count ().to_frequency (cfun) * REG_BR_PROB_BASE
202 1.1 mrg < bb->count.to_frequency (cfun) * branch_ratio_cutoff))
203 1.1 mrg return NULL;
204 1.1 mrg return best;
205 1.1 mrg }
206 1.1 mrg
207 1.1 mrg /* Find the trace using bb and record it in the TRACE array.
208 1.1 mrg Return number of basic blocks recorded. */
209 1.1 mrg
210 1.1 mrg static int
211 1.1 mrg find_trace (basic_block bb, basic_block *trace)
212 1.1 mrg {
213 1.1 mrg int i = 0;
214 1.1 mrg edge e;
215 1.1 mrg
216 1.1 mrg if (dump_file)
217 1.1 mrg fprintf (dump_file, "Trace seed %i [%i]", bb->index, bb->count.to_frequency (cfun));
218 1.1 mrg
219 1.1 mrg while ((e = find_best_predecessor (bb)) != NULL)
220 1.1 mrg {
221 1.1 mrg basic_block bb2 = e->src;
222 1.1 mrg if (bb_seen_p (bb2) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
223 1.1 mrg || find_best_successor (bb2) != e)
224 1.1 mrg break;
225 1.1 mrg if (dump_file)
226 1.1 mrg fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
227 1.1 mrg bb = bb2;
228 1.1 mrg }
229 1.1 mrg if (dump_file)
230 1.1 mrg fprintf (dump_file, " forward %i [%i]", bb->index, bb->count.to_frequency (cfun));
231 1.1 mrg trace[i++] = bb;
232 1.1 mrg
233 1.1 mrg /* Follow the trace in forward direction. */
234 1.1 mrg while ((e = find_best_successor (bb)) != NULL)
235 1.1 mrg {
236 1.1 mrg bb = e->dest;
237 1.1 mrg if (bb_seen_p (bb) || (e->flags & (EDGE_DFS_BACK | EDGE_COMPLEX))
238 1.1 mrg || find_best_predecessor (bb) != e)
239 1.1 mrg break;
240 1.1 mrg if (dump_file)
241 1.1 mrg fprintf (dump_file, ",%i [%i]", bb->index, bb->count.to_frequency (cfun));
242 1.1 mrg trace[i++] = bb;
243 1.1 mrg }
244 1.1 mrg if (dump_file)
245 1.1 mrg fprintf (dump_file, "\n");
246 1.1 mrg return i;
247 1.1 mrg }
248 1.1 mrg
249 1.1 mrg /* Duplicate block BB2, placing it after BB in the CFG. Return the
250 1.1 mrg newly created block. */
251 1.1 mrg basic_block
252 1.1 mrg transform_duplicate (basic_block bb, basic_block bb2)
253 1.1 mrg {
254 1.1 mrg edge e;
255 1.1 mrg basic_block copy;
256 1.1 mrg
257 1.1 mrg e = find_edge (bb, bb2);
258 1.1 mrg
259 1.1 mrg copy = duplicate_block (bb2, e, bb);
260 1.1 mrg flush_pending_stmts (e);
261 1.1 mrg
262 1.1 mrg add_phi_args_after_copy (©, 1, NULL);
263 1.1 mrg
264 1.1 mrg return (copy);
265 1.1 mrg }
266 1.1 mrg
267 1.1 mrg /* Look for basic blocks in frequency order, construct traces and tail duplicate
268 1.1 mrg if profitable. */
269 1.1 mrg
270 1.1 mrg static bool
271 1.1 mrg tail_duplicate (void)
272 1.1 mrg {
273 1.1 mrg auto_vec<fibonacci_node<long, basic_block_def>*> blocks;
274 1.1 mrg blocks.safe_grow_cleared (last_basic_block_for_fn (cfun), true);
275 1.1 mrg
276 1.1 mrg basic_block *trace = XNEWVEC (basic_block, n_basic_blocks_for_fn (cfun));
277 1.1 mrg int *counts = XNEWVEC (int, last_basic_block_for_fn (cfun));
278 1.1 mrg int ninsns = 0, nduplicated = 0;
279 1.1 mrg gcov_type weighted_insns = 0, traced_insns = 0;
280 1.1 mrg fibonacci_heap<long, basic_block_def> heap (LONG_MIN);
281 1.1 mrg gcov_type cover_insns;
282 1.1 mrg int max_dup_insns;
283 1.1 mrg basic_block bb;
284 1.1 mrg bool changed = false;
285 1.1 mrg
286 1.1 mrg /* Create an oversized sbitmap to reduce the chance that we need to
287 1.1 mrg resize it. */
288 1.1 mrg bb_seen = sbitmap_alloc (last_basic_block_for_fn (cfun) * 2);
289 1.1 mrg bitmap_clear (bb_seen);
290 1.1 mrg can_duplicate_bb = sbitmap_alloc (last_basic_block_for_fn (cfun));
291 1.1 mrg bitmap_clear (can_duplicate_bb);
292 1.1 mrg initialize_original_copy_tables ();
293 1.1 mrg
294 1.1 mrg if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
295 1.1 mrg probability_cutoff = param_tracer_min_branch_probability_feedback;
296 1.1 mrg else
297 1.1 mrg probability_cutoff = param_tracer_min_branch_probability;
298 1.1 mrg probability_cutoff = REG_BR_PROB_BASE / 100 * probability_cutoff;
299 1.1 mrg
300 1.1 mrg branch_ratio_cutoff =
301 1.1 mrg (REG_BR_PROB_BASE / 100 * param_tracer_min_branch_ratio);
302 1.1 mrg
303 1.1 mrg FOR_EACH_BB_FN (bb, cfun)
304 1.1 mrg {
305 1.1 mrg int n;
306 1.1 mrg analyze_bb (bb, &n);
307 1.1 mrg if (!ignore_bb_p (bb))
308 1.1 mrg blocks[bb->index] = heap.insert (-bb->count.to_frequency (cfun), bb);
309 1.1 mrg
310 1.1 mrg counts [bb->index] = n;
311 1.1 mrg ninsns += n;
312 1.1 mrg weighted_insns += n * bb->count.to_frequency (cfun);
313 1.1 mrg }
314 1.1 mrg
315 1.1 mrg if (profile_info && profile_status_for_fn (cfun) == PROFILE_READ)
316 1.1 mrg cover_insns = param_tracer_dynamic_coverage_feedback;
317 1.1 mrg else
318 1.1 mrg cover_insns = param_tracer_dynamic_coverage;
319 1.1 mrg cover_insns = (weighted_insns * cover_insns + 50) / 100;
320 1.1 mrg max_dup_insns = (ninsns * param_tracer_max_code_growth + 50) / 100;
321 1.1 mrg
322 1.1 mrg while (traced_insns < cover_insns && nduplicated < max_dup_insns
323 1.1 mrg && !heap.empty ())
324 1.1 mrg {
325 1.1 mrg basic_block bb = heap.extract_min ();
326 1.1 mrg int n, pos;
327 1.1 mrg
328 1.1 mrg if (!bb)
329 1.1 mrg break;
330 1.1 mrg
331 1.1 mrg blocks[bb->index] = NULL;
332 1.1 mrg
333 1.1 mrg if (ignore_bb_p (bb))
334 1.1 mrg continue;
335 1.1 mrg gcc_assert (!bb_seen_p (bb));
336 1.1 mrg
337 1.1 mrg n = find_trace (bb, trace);
338 1.1 mrg
339 1.1 mrg bb = trace[0];
340 1.1 mrg traced_insns += bb->count.to_frequency (cfun) * counts [bb->index];
341 1.1 mrg if (blocks[bb->index])
342 1.1 mrg {
343 1.1 mrg heap.delete_node (blocks[bb->index]);
344 1.1 mrg blocks[bb->index] = NULL;
345 1.1 mrg }
346 1.1 mrg
347 1.1 mrg for (pos = 1; pos < n; pos++)
348 1.1 mrg {
349 1.1 mrg basic_block bb2 = trace[pos];
350 1.1 mrg
351 1.1 mrg if (blocks[bb2->index])
352 1.1 mrg {
353 1.1 mrg heap.delete_node (blocks[bb2->index]);
354 1.1 mrg blocks[bb2->index] = NULL;
355 1.1 mrg }
356 1.1 mrg traced_insns += bb2->count.to_frequency (cfun) * counts [bb2->index];
357 1.1 mrg if (EDGE_COUNT (bb2->preds) > 1
358 1.1 mrg && can_duplicate_block_p (bb2)
359 1.1 mrg /* We have the tendency to duplicate the loop header
360 1.1 mrg of all do { } while loops. Do not do that - it is
361 1.1 mrg not profitable and it might create a loop with multiple
362 1.1 mrg entries or at least rotate the loop. */
363 1.1 mrg && bb2->loop_father->header != bb2)
364 1.1 mrg {
365 1.1 mrg nduplicated += counts [bb2->index];
366 1.1 mrg basic_block copy = transform_duplicate (bb, bb2);
367 1.1 mrg
368 1.1 mrg /* Reconsider the original copy of block we've duplicated.
369 1.1 mrg Removing the most common predecessor may make it to be
370 1.1 mrg head. */
371 1.1 mrg blocks[bb2->index] = heap.insert (-bb2->count.to_frequency (cfun), bb2);
372 1.1 mrg
373 1.1 mrg if (dump_file)
374 1.1 mrg fprintf (dump_file, "Duplicated %i as %i [%i]\n",
375 1.1 mrg bb2->index, copy->index, copy->count.to_frequency (cfun));
376 1.1 mrg
377 1.1 mrg bb2 = copy;
378 1.1 mrg changed = true;
379 1.1 mrg }
380 1.1 mrg mark_bb_seen (bb2);
381 1.1 mrg bb = bb2;
382 1.1 mrg /* In case the trace became infrequent, stop duplicating. */
383 1.1 mrg if (ignore_bb_p (bb))
384 1.1 mrg break;
385 1.1 mrg }
386 1.1 mrg if (dump_file)
387 1.1 mrg fprintf (dump_file, " covered now %.1f\n\n",
388 1.1 mrg traced_insns * 100.0 / weighted_insns);
389 1.1 mrg }
390 1.1 mrg if (dump_file)
391 1.1 mrg fprintf (dump_file, "Duplicated %i insns (%i%%)\n", nduplicated,
392 1.1 mrg nduplicated * 100 / ninsns);
393 1.1 mrg
394 1.1 mrg free_original_copy_tables ();
395 1.1 mrg sbitmap_free (bb_seen);
396 1.1 mrg sbitmap_free (can_duplicate_bb);
397 1.1 mrg can_duplicate_bb = NULL;
398 1.1 mrg free (trace);
399 1.1 mrg free (counts);
400 1.1 mrg
401 1.1 mrg return changed;
402 1.1 mrg }
403 1.1 mrg
404 1.1 mrg namespace {
406 1.1 mrg
407 1.1 mrg const pass_data pass_data_tracer =
408 1.1 mrg {
409 1.1 mrg GIMPLE_PASS, /* type */
410 1.1 mrg "tracer", /* name */
411 1.1 mrg OPTGROUP_NONE, /* optinfo_flags */
412 1.1 mrg TV_TRACER, /* tv_id */
413 1.1 mrg 0, /* properties_required */
414 1.1 mrg 0, /* properties_provided */
415 1.1 mrg 0, /* properties_destroyed */
416 1.1 mrg 0, /* todo_flags_start */
417 1.1 mrg TODO_update_ssa, /* todo_flags_finish */
418 1.1 mrg };
419 1.1 mrg
420 1.1 mrg class pass_tracer : public gimple_opt_pass
421 1.1 mrg {
422 1.1 mrg public:
423 1.1 mrg pass_tracer (gcc::context *ctxt)
424 1.1 mrg : gimple_opt_pass (pass_data_tracer, ctxt)
425 1.1 mrg {}
426 1.1 mrg
427 1.1 mrg /* opt_pass methods: */
428 1.1 mrg virtual bool gate (function *)
429 1.1 mrg {
430 1.1 mrg return (optimize > 0 && flag_tracer && flag_reorder_blocks);
431 1.1 mrg }
432 1.1 mrg
433 1.1 mrg virtual unsigned int execute (function *);
434 1.1 mrg
435 1.1 mrg }; // class pass_tracer
436 1.1 mrg
437 1.1 mrg unsigned int
438 1.1 mrg pass_tracer::execute (function *fun)
439 1.1 mrg {
440 1.1 mrg bool changed;
441 1.1 mrg
442 1.1 mrg if (n_basic_blocks_for_fn (fun) <= NUM_FIXED_BLOCKS + 1)
443 1.1 mrg return 0;
444 1.1 mrg
445 1.1 mrg mark_dfs_back_edges ();
446 1.1 mrg if (dump_file)
447 1.1 mrg brief_dump_cfg (dump_file, dump_flags);
448 1.1 mrg
449 1.1 mrg /* Trace formation is done on the fly inside tail_duplicate */
450 1.1 mrg changed = tail_duplicate ();
451 1.1 mrg if (changed)
452 1.1 mrg {
453 1.1 mrg free_dominance_info (CDI_DOMINATORS);
454 1.1 mrg /* If we changed the CFG schedule loops for fixup by cleanup_cfg. */
455 1.1 mrg loops_state_set (LOOPS_NEED_FIXUP);
456 1.1 mrg }
457 1.1 mrg
458 1.1 mrg if (dump_file)
459 1.1 mrg brief_dump_cfg (dump_file, dump_flags);
460 1.1 mrg
461 1.1 mrg return changed ? TODO_cleanup_cfg : 0;
462 1.1 mrg }
463 1.1 mrg } // anon namespace
464 1.1 mrg
465 1.1 mrg gimple_opt_pass *
466 1.1 mrg make_pass_tracer (gcc::context *ctxt)
467 1.1 mrg {
468 1.1 mrg return new pass_tracer (ctxt);
469 }
470