cg_print.c revision 1.1.1.8 1 /* cg_print.c - Print routines for displaying call graphs.
2
3 Copyright (C) 2000-2022 Free Software Foundation, Inc.
4
5 This file is part of GNU Binutils.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
20 02110-1301, USA. */
21
22 #include "gprof.h"
24 #include "libiberty.h"
25 #include "filenames.h"
26 #include "search_list.h"
27 #include "source.h"
28 #include "symtab.h"
29 #include "cg_arcs.h"
30 #include "cg_print.h"
31 #include "hist.h"
32 #include "utils.h"
33 #include "corefile.h"
34
35 /* Return value of comparison functions used to sort tables. */
36 #define LESSTHAN -1
37 #define EQUALTO 0
38 #define GREATERTHAN 1
39
40 static void print_header (void);
41 static void print_cycle (Sym *);
42 static int cmp_member (Sym *, Sym *);
43 static void sort_members (Sym *);
44 static void print_members (Sym *);
45 static int cmp_arc (Arc *, Arc *);
46 static void sort_parents (Sym *);
47 static void print_parents (Sym *);
48 static void sort_children (Sym *);
49 static void print_children (Sym *);
50 static void print_line (Sym *);
51 static int cmp_name (const void *, const void *);
52 static int cmp_arc_count (const void *, const void *);
53 static int cmp_fun_nuses (const void *, const void *);
54 static void order_and_dump_functions_by_arcs
55 (Arc **, unsigned long, int, Arc **, unsigned long *);
56
57 /* Declarations of automatically generated functions to output blurbs. */
58 extern void bsd_callg_blurb (FILE * fp);
59 extern void fsf_callg_blurb (FILE * fp);
60
61 double print_time = 0.0;
62
63
64 static void
65 print_header (void)
66 {
67 if (first_output)
68 first_output = false;
69 else
70 printf ("\f\n");
71
72 if (!bsd_style_output)
73 {
74 if (print_descriptions)
75 printf (_("\t\t Call graph (explanation follows)\n\n"));
76 else
77 printf (_("\t\t\tCall graph\n\n"));
78 }
79
80 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
81 (long) hist_scale * (long) sizeof (UNIT));
82
83 if (print_time > 0.0)
84 printf (_(" for %.2f%% of %.2f seconds\n\n"),
85 100.0 / print_time, print_time / hz);
86 else
87 {
88 printf (_(" no time propagated\n\n"));
89
90 /* This doesn't hurt, since all the numerators will be 0.0. */
91 print_time = 1.0;
92 }
93
94 if (bsd_style_output)
95 {
96 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
97 "", "", "", "", _("called"), _("total"), _("parents"));
98 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
99 _("index"),
100 /* xgettext:no-c-format */
101 _("%time"),
102 _("self"), _("descendants"), _("called"), _("self"),
103 _("name"), _("index"));
104 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
105 "", "", "", "", _("called"), _("total"), _("children"));
106 printf ("\n");
107 }
108 else
109 {
110 printf (_("index %% time self children called name\n"));
111 }
112 }
113
114 /* Print a cycle header. */
115
116 static void
117 print_cycle (Sym *cyc)
118 {
119 char buf[BUFSIZ];
120
121 sprintf (buf, "[%d]", cyc->cg.index);
122 printf (bsd_style_output
123 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
124 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
125 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
126 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
127
128 if (cyc->cg.self_calls != 0)
129 printf ("+%-7lu", cyc->cg.self_calls);
130 else
131 printf (" %7.7s", "");
132
133 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
134 }
135
136 /* Compare LEFT and RIGHT membmer. Major comparison key is
137 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
138
139 static int
140 cmp_member (Sym *left, Sym *right)
141 {
142 double left_time = left->cg.prop.self + left->cg.prop.child;
143 double right_time = right->cg.prop.self + right->cg.prop.child;
144 unsigned long left_calls = left->ncalls + left->cg.self_calls;
145 unsigned long right_calls = right->ncalls + right->cg.self_calls;
146
147 if (left_time > right_time)
148 return GREATERTHAN;
149
150 if (left_time < right_time)
151 return LESSTHAN;
152
153 if (left_calls > right_calls)
154 return GREATERTHAN;
155
156 if (left_calls < right_calls)
157 return LESSTHAN;
158
159 return EQUALTO;
160 }
161
162 /* Sort members of a cycle. */
163
164 static void
165 sort_members (Sym *cyc)
166 {
167 Sym *todo, *doing, *prev;
168
169 /* Detach cycle members from cyclehead,
170 and insertion sort them back on. */
171 todo = cyc->cg.cyc.next;
172 cyc->cg.cyc.next = 0;
173
174 for (doing = todo; doing != NULL; doing = todo)
175 {
176 todo = doing->cg.cyc.next;
177
178 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
179 {
180 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
181 break;
182 }
183
184 doing->cg.cyc.next = prev->cg.cyc.next;
185 prev->cg.cyc.next = doing;
186 }
187 }
188
189 /* Print the members of a cycle. */
190
191 static void
192 print_members (Sym *cyc)
193 {
194 Sym *member;
195
196 sort_members (cyc);
197
198 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
199 {
200 printf (bsd_style_output
201 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
202 : "%6.6s %5.5s %7.2f %7.2f %7lu",
203 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
204 member->ncalls);
205
206 if (member->cg.self_calls != 0)
207 printf ("+%-7lu", member->cg.self_calls);
208 else
209 printf (" %7.7s", "");
210
211 printf (" ");
212 print_name (member);
213 printf ("\n");
214 }
215 }
216
217 /* Compare two arcs to/from the same child/parent.
218 - if one arc is a self arc, it's least.
219 - if one arc is within a cycle, it's less than.
220 - if both arcs are within a cycle, compare arc counts.
221 - if neither arc is within a cycle, compare with
222 time + child_time as major key
223 arc count as minor key. */
224
225 static int
226 cmp_arc (Arc *left, Arc *right)
227 {
228 Sym *left_parent = left->parent;
229 Sym *left_child = left->child;
230 Sym *right_parent = right->parent;
231 Sym *right_child = right->child;
232 double left_time, right_time;
233
234 DBG (TIMEDEBUG,
235 printf ("[cmp_arc] ");
236 print_name (left_parent);
237 printf (" calls ");
238 print_name (left_child);
239 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
240 left->count, left_child->ncalls);
241 printf ("[cmp_arc] ");
242 print_name (right_parent);
243 printf (" calls ");
244 print_name (right_child);
245 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
246 right->count, right_child->ncalls);
247 printf ("\n");
248 );
249
250 if (left_parent == left_child)
251 return LESSTHAN; /* Left is a self call. */
252
253 if (right_parent == right_child)
254 return GREATERTHAN; /* Right is a self call. */
255
256 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
257 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
258 {
259 /* Left is a call within a cycle. */
260 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
261 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
262 {
263 /* Right is a call within the cycle, too. */
264 if (left->count < right->count)
265 return LESSTHAN;
266
267 if (left->count > right->count)
268 return GREATERTHAN;
269
270 return EQUALTO;
271 }
272 else
273 {
274 /* Right isn't a call within the cycle. */
275 return LESSTHAN;
276 }
277 }
278 else
279 {
280 /* Left isn't a call within a cycle. */
281 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
282 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
283 {
284 /* Right is a call within a cycle. */
285 return GREATERTHAN;
286 }
287 else
288 {
289 /* Neither is a call within a cycle. */
290 left_time = left->time + left->child_time;
291 right_time = right->time + right->child_time;
292
293 if (left_time < right_time)
294 return LESSTHAN;
295
296 if (left_time > right_time)
297 return GREATERTHAN;
298
299 if (left->count < right->count)
300 return LESSTHAN;
301
302 if (left->count > right->count)
303 return GREATERTHAN;
304
305 return EQUALTO;
306 }
307 }
308 }
309
310
311 static void
312 sort_parents (Sym * child)
313 {
314 Arc *arc, *detached, sorted, *prev;
315
316 /* Unlink parents from child, then insertion sort back on to
317 sorted's parents.
318 *arc the arc you have detached and are inserting.
319 *detached the rest of the arcs to be sorted.
320 sorted arc list onto which you insertion sort.
321 *prev arc before the arc you are comparing. */
322 sorted.next_parent = 0;
323
324 for (arc = child->cg.parents; arc; arc = detached)
325 {
326 detached = arc->next_parent;
327
328 /* Consider *arc as disconnected; insert it into sorted. */
329 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
330 {
331 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
332 break;
333 }
334
335 arc->next_parent = prev->next_parent;
336 prev->next_parent = arc;
337 }
338
339 /* Reattach sorted arcs to child. */
340 child->cg.parents = sorted.next_parent;
341 }
342
343
344 static void
345 print_parents (Sym *child)
346 {
347 Sym *parent;
348 Arc *arc;
349 Sym *cycle_head;
350
351 if (child->cg.cyc.head != 0)
352 cycle_head = child->cg.cyc.head;
353 else
354 cycle_head = child;
355
356 if (!child->cg.parents)
357 {
358 printf (bsd_style_output
359 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
360 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
361 "", "", "", "", "", "");
362 return;
363 }
364
365 sort_parents (child);
366
367 for (arc = child->cg.parents; arc; arc = arc->next_parent)
368 {
369 parent = arc->parent;
370 if (child == parent || (child->cg.cyc.num != 0
371 && parent->cg.cyc.num == child->cg.cyc.num))
372 {
373 /* Selfcall or call among siblings. */
374 printf (bsd_style_output
375 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
376 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
377 "", "", "", "",
378 arc->count, "");
379 print_name (parent);
380 printf ("\n");
381 }
382 else
383 {
384 /* Regular parent of child. */
385 printf (bsd_style_output
386 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
387 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
388 "", "",
389 arc->time / hz, arc->child_time / hz,
390 arc->count, cycle_head->ncalls);
391 print_name (parent);
392 printf ("\n");
393 }
394 }
395 }
396
397
398 static void
399 sort_children (Sym *parent)
400 {
401 Arc *arc, *detached, sorted, *prev;
402
403 /* Unlink children from parent, then insertion sort back on to
404 sorted's children.
405 *arc the arc you have detached and are inserting.
406 *detached the rest of the arcs to be sorted.
407 sorted arc list onto which you insertion sort.
408 *prev arc before the arc you are comparing. */
409 sorted.next_child = 0;
410
411 for (arc = parent->cg.children; arc; arc = detached)
412 {
413 detached = arc->next_child;
414
415 /* Consider *arc as disconnected; insert it into sorted. */
416 for (prev = &sorted; prev->next_child; prev = prev->next_child)
417 {
418 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
419 break;
420 }
421
422 arc->next_child = prev->next_child;
423 prev->next_child = arc;
424 }
425
426 /* Reattach sorted children to parent. */
427 parent->cg.children = sorted.next_child;
428 }
429
430
431 static void
432 print_children (Sym *parent)
433 {
434 Sym *child;
435 Arc *arc;
436
437 sort_children (parent);
438 arc = parent->cg.children;
439
440 for (arc = parent->cg.children; arc; arc = arc->next_child)
441 {
442 child = arc->child;
443 if (child == parent || (child->cg.cyc.num != 0
444 && child->cg.cyc.num == parent->cg.cyc.num))
445 {
446 /* Self call or call to sibling. */
447 printf (bsd_style_output
448 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
449 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
450 "", "", "", "", arc->count, "");
451 print_name (child);
452 printf ("\n");
453 }
454 else
455 {
456 /* Regular child of parent. */
457 printf (bsd_style_output
458 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
459 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
460 "", "",
461 arc->time / hz, arc->child_time / hz,
462 arc->count, child->cg.cyc.head->ncalls);
463 print_name (child);
464 printf ("\n");
465 }
466 }
467 }
468
469
470 static void
471 print_line (Sym *np)
472 {
473 char buf[BUFSIZ];
474
475 sprintf (buf, "[%d]", np->cg.index);
476 printf (bsd_style_output
477 ? "%-6.6s %5.1f %7.2f %11.2f"
478 : "%-6.6s %5.1f %7.2f %7.2f", buf,
479 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
480 np->cg.prop.self / hz, np->cg.prop.child / hz);
481
482 if ((np->ncalls + np->cg.self_calls) != 0)
483 {
484 printf (" %7lu", np->ncalls);
485
486 if (np->cg.self_calls != 0)
487 printf ("+%-7lu ", np->cg.self_calls);
488 else
489 printf (" %7.7s ", "");
490 }
491 else
492 {
493 printf (" %7.7s %7.7s ", "", "");
494 }
495
496 print_name (np);
497 printf ("\n");
498 }
499
500
501 /* Print dynamic call graph. */
502
503 void
504 cg_print (Sym ** timesortsym)
505 {
506 unsigned int sym_index;
507 Sym *parent;
508
509 if (print_descriptions && bsd_style_output)
510 bsd_callg_blurb (stdout);
511
512 print_header ();
513
514 for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
515 {
516 parent = timesortsym[sym_index];
517
518 if ((ignore_zeros && parent->ncalls == 0
519 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
520 && parent->cg.prop.child == 0)
521 || !parent->cg.print_flag
522 || (line_granularity && ! parent->is_func))
523 continue;
524
525 if (!parent->name && parent->cg.cyc.num != 0)
526 {
527 /* Cycle header. */
528 print_cycle (parent);
529 print_members (parent);
530 }
531 else
532 {
533 print_parents (parent);
534 print_line (parent);
535 print_children (parent);
536 }
537
538 if (bsd_style_output)
539 printf ("\n");
540
541 printf ("-----------------------------------------------\n");
542
543 if (bsd_style_output)
544 printf ("\n");
545 }
546
547 free (timesortsym);
548
549 if (print_descriptions && !bsd_style_output)
550 fsf_callg_blurb (stdout);
551 }
552
553
554 static int
555 cmp_name (const void *left, const void *right)
556 {
557 const Sym **npp1 = (const Sym **) left;
558 const Sym **npp2 = (const Sym **) right;
559
560 return strcmp ((*npp1)->name, (*npp2)->name);
561 }
562
563
564 void
565 cg_print_index (void)
566 {
567 unsigned int sym_index;
568 unsigned int nnames, todo, i, j;
569 int col, starting_col;
570 Sym **name_sorted_syms, *sym;
571 const char *filename;
572 char buf[20];
573 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
574
575 /* Now, sort regular function name
576 alphabetically to create an index. */
577 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
578
579 for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
580 {
581 if (ignore_zeros && symtab.base[sym_index].ncalls == 0
582 && symtab.base[sym_index].hist.time == 0)
583 continue;
584
585 name_sorted_syms[nnames++] = &symtab.base[sym_index];
586 }
587
588 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
589
590 for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
591 name_sorted_syms[todo++] = &cycle_header[sym_index];
592
593 printf ("\f\n");
594 printf (_("Index by function name\n\n"));
595 sym_index = (todo + 2) / 3;
596
597 for (i = 0; i < sym_index; i++)
598 {
599 col = 0;
600 starting_col = 0;
601
602 for (j = i; j < todo; j += sym_index)
603 {
604 sym = name_sorted_syms[j];
605
606 if (sym->cg.print_flag)
607 sprintf (buf, "[%d]", sym->cg.index);
608 else
609 sprintf (buf, "(%d)", sym->cg.index);
610
611 if (j < nnames)
612 {
613 if (bsd_style_output)
614 {
615 printf ("%6.6s %-19.19s", buf, sym->name);
616 }
617 else
618 {
619 col += strlen (buf);
620
621 for (; col < starting_col + 5; ++col)
622 putchar (' ');
623
624 printf (" %s ", buf);
625 col += print_name_only (sym);
626
627 if (!line_granularity && sym->is_static && sym->file)
628 {
629 filename = sym->file->name;
630
631 if (!print_path)
632 {
633 filename = strrchr (filename, '/');
634
635 if (filename)
636 ++filename;
637 else
638 filename = sym->file->name;
639 }
640
641 printf (" (%s)", filename);
642 col += strlen (filename) + 3;
643 }
644 }
645 }
646 else
647 {
648 if (bsd_style_output)
649 {
650 printf ("%6.6s ", buf);
651 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
652 printf ("%-19.19s", buf);
653 }
654 else
655 {
656 col += strlen (buf);
657 for (; col < starting_col + 5; ++col)
658 putchar (' ');
659 printf (" %s ", buf);
660 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
661 printf ("%s", buf);
662 col += strlen (buf);
663 }
664 }
665
666 starting_col += column_width;
667 }
668
669 printf ("\n");
670 }
671
672 free (name_sorted_syms);
673 }
674
675 /* Compare two arcs based on their usage counts.
676 We want to sort in descending order. */
677
678 static int
679 cmp_arc_count (const void *left, const void *right)
680 {
681 const Arc **npp1 = (const Arc **) left;
682 const Arc **npp2 = (const Arc **) right;
683
684 if ((*npp1)->count > (*npp2)->count)
685 return -1;
686 else if ((*npp1)->count < (*npp2)->count)
687 return 1;
688 else
689 return 0;
690 }
691
692 /* Compare two funtions based on their usage counts.
693 We want to sort in descending order. */
694
695 static int
696 cmp_fun_nuses (const void *left, const void *right)
697 {
698 const Sym **npp1 = (const Sym **) left;
699 const Sym **npp2 = (const Sym **) right;
700
701 if ((*npp1)->nuses > (*npp2)->nuses)
702 return -1;
703 else if ((*npp1)->nuses < (*npp2)->nuses)
704 return 1;
705 else
706 return 0;
707 }
708
709 /* Print a suggested function ordering based on the profiling data.
710
711 We perform 4 major steps when ordering functions:
712
713 * Group unused functions together and place them at the
714 end of the function order.
715
716 * Search the highest use arcs (those which account for 90% of
717 the total arc count) for functions which have several parents.
718
719 Group those with the most call sites together (currently the
720 top 1.25% which have at least five different call sites).
721
722 These are emitted at the start of the function order.
723
724 * Use a greedy placement algorithm to place functions which
725 occur in the top 99% of the arcs in the profile. Some provisions
726 are made to handle high usage arcs where the parent and/or
727 child has already been placed.
728
729 * Run the same greedy placement algorithm on the remaining
730 arcs to place the leftover functions.
731
732
733 The various "magic numbers" should (one day) be tuneable by command
734 line options. They were arrived at by benchmarking a few applications
735 with various values to see which values produced better overall function
736 orderings.
737
738 Of course, profiling errors, machine limitations (PA long calls), and
739 poor cutoff values for the placement algorithm may limit the usefullness
740 of the resulting function order. Improvements would be greatly appreciated.
741
742 Suggestions:
743
744 * Place the functions with many callers near the middle of the
745 list to reduce long calls.
746
747 * Propagate arc usage changes as functions are placed. Ie if
748 func1 and func2 are placed together, arcs to/from those arcs
749 to the same parent/child should be combined, then resort the
750 arcs to choose the next one.
751
752 * Implement some global positioning algorithm to place the
753 chains made by the greedy local positioning algorithm. Probably
754 by examining arcs which haven't been placed yet to tie two
755 chains together.
756
757 * Take a function's size and time into account in the algorithm;
758 size in particular is important on the PA (long calls). Placing
759 many small functions onto their own page may be wise.
760
761 * Use better profiling information; many published algorithms
762 are based on call sequences through time, rather than just
763 arc counts.
764
765 * Prodecure cloning could improve performance when a small number
766 of arcs account for most of the calls to a particular function.
767
768 * Use relocation information to avoid moving unused functions
769 completely out of the code stream; this would avoid severe lossage
770 when the profile data bears little resemblance to actual runs.
771
772 * Propagation of arc usages should also improve .o link line
773 ordering which shares the same arc placement algorithm with
774 the function ordering code (in fact it is a degenerate case
775 of function ordering). */
776
777 void
778 cg_print_function_ordering (void)
779 {
780 unsigned long sym_index;
781 unsigned long arc_index;
782 unsigned long used, unused, scratch_index;
783 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
784 #ifdef __GNUC__
785 unsigned long long total_arcs, tmp_arcs_count;
786 #else
787 unsigned long total_arcs, tmp_arcs_count;
788 #endif
789 Sym **unused_syms, **used_syms, **scratch_syms;
790 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
791
792 sym_index = 0;
793 used = 0;
794 unused = 0;
795 scratch_index = 0;
796 unplaced_arc_count = 0;
797 high_arc_count = 0;
798 scratch_arc_count = 0;
799
800 /* First group all the unused functions together. */
801 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
802 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
803 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
804 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
805 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
806 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
807
808 /* Walk through all the functions; mark those which are never
809 called as placed (we'll emit them as a group later). */
810 for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
811 {
812 if (symtab.base[sym_index].ncalls == 0)
813 {
814 unused_syms[unused++] = &symtab.base[sym_index];
815 symtab.base[sym_index].has_been_placed = 1;
816 }
817 else
818 {
819 used_syms[used++] = &symtab.base[sym_index];
820 symtab.base[sym_index].has_been_placed = 0;
821 symtab.base[sym_index].next = 0;
822 symtab.base[sym_index].prev = 0;
823 symtab.base[sym_index].nuses = 0;
824 }
825 }
826
827 /* Sort the arcs from most used to least used. */
828 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
829
830 /* Compute the total arc count. Also mark arcs as unplaced.
831
832 Note we don't compensate for overflow if that happens!
833 Overflow is much less likely when this file is compiled
834 with GCC as it can double-wide integers via long long. */
835 total_arcs = 0;
836 for (arc_index = 0; arc_index < numarcs; arc_index++)
837 {
838 total_arcs += arcs[arc_index]->count;
839 arcs[arc_index]->has_been_placed = 0;
840 }
841
842 /* We want to pull out those functions which are referenced
843 by many highly used arcs and emit them as a group. This
844 could probably use some tuning. */
845 tmp_arcs_count = 0;
846 for (arc_index = 0; arc_index < numarcs; arc_index++)
847 {
848 tmp_arcs_count += arcs[arc_index]->count;
849
850 /* Count how many times each parent and child are used up
851 to our threshold of arcs (90%). */
852 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
853 break;
854
855 arcs[arc_index]->child->nuses++;
856 }
857
858 /* Now sort a temporary symbol table based on the number of
859 times each function was used in the highest used arcs. */
860 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
861 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
862
863 /* Now pick out those symbols we're going to emit as
864 a group. We take up to 1.25% of the used symbols. */
865 for (sym_index = 0; sym_index < used / 80; sym_index++)
866 {
867 Sym *sym = scratch_syms[sym_index];
868 Arc *arc;
869
870 /* If we hit symbols that aren't used from many call sites,
871 then we can quit. We choose five as the low limit for
872 no particular reason. */
873 if (sym->nuses == 5)
874 break;
875
876 /* We're going to need the arcs between these functions.
877 Unfortunately, we don't know all these functions
878 until we're done. So we keep track of all the arcs
879 to the functions we care about, then prune out those
880 which are uninteresting.
881
882 An interesting variation would be to quit when we found
883 multi-call site functions which account for some percentage
884 of the arcs. */
885 arc = sym->cg.children;
886
887 while (arc)
888 {
889 if (arc->parent != arc->child)
890 scratch_arcs[scratch_arc_count++] = arc;
891 arc->has_been_placed = 1;
892 arc = arc->next_child;
893 }
894
895 arc = sym->cg.parents;
896
897 while (arc)
898 {
899 if (arc->parent != arc->child)
900 scratch_arcs[scratch_arc_count++] = arc;
901 arc->has_been_placed = 1;
902 arc = arc->next_parent;
903 }
904
905 /* Keep track of how many symbols we're going to place. */
906 scratch_index = sym_index;
907
908 /* A lie, but it makes identifying
909 these functions easier later. */
910 sym->has_been_placed = 1;
911 }
912
913 /* Now walk through the temporary arcs and copy
914 those we care about into the high arcs array. */
915 for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
916 {
917 Arc *arc = scratch_arcs[arc_index];
918
919 /* If this arc refers to highly used functions, then
920 then we want to keep it. */
921 if (arc->child->has_been_placed
922 && arc->parent->has_been_placed)
923 {
924 high_arcs[high_arc_count++] = scratch_arcs[arc_index];
925
926 /* We need to turn of has_been_placed since we're going to
927 use the main arc placement algorithm on these arcs. */
928 arc->child->has_been_placed = 0;
929 arc->parent->has_been_placed = 0;
930 }
931 }
932
933 /* Dump the multi-site high usage functions which are not
934 going to be ordered by the main ordering algorithm. */
935 for (sym_index = 0; sym_index < scratch_index; sym_index++)
936 {
937 if (scratch_syms[sym_index]->has_been_placed)
938 printf ("%s\n", scratch_syms[sym_index]->name);
939 }
940
941 /* Now we can order the multi-site high use
942 functions based on the arcs between them. */
943 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
944 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
945 unplaced_arcs, &unplaced_arc_count);
946
947 /* Order and dump the high use functions left,
948 these typically have only a few call sites. */
949 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
950 unplaced_arcs, &unplaced_arc_count);
951
952 /* Now place the rarely used functions. */
953 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
954 scratch_arcs, &scratch_arc_count);
955
956 /* Output any functions not emitted by the order_and_dump calls. */
957 for (sym_index = 0; sym_index < used; sym_index++)
958 if (used_syms[sym_index]->has_been_placed == 0)
959 printf("%s\n", used_syms[sym_index]->name);
960
961 /* Output the unused functions. */
962 for (sym_index = 0; sym_index < unused; sym_index++)
963 printf("%s\n", unused_syms[sym_index]->name);
964
965 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
966 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
967 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
968 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
969 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
970 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
971
972 free (unused_syms);
973 free (used_syms);
974 free (scratch_syms);
975 free (high_arcs);
976 free (scratch_arcs);
977 free (unplaced_arcs);
978 }
979
980 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
981 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
982
983 If ALL is nonzero, then place all functions referenced by THE_ARCS,
984 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
985
986 #define MOST 0.99
987 static void
988 order_and_dump_functions_by_arcs (Arc **the_arcs, unsigned long arc_count,
989 int all, Arc **unplaced_arcs,
990 unsigned long *unplaced_arc_count)
991 {
992 #ifdef __GNUC__
993 unsigned long long tmp_arcs, total_arcs;
994 #else
995 unsigned long tmp_arcs, total_arcs;
996 #endif
997 unsigned int arc_index;
998
999 /* If needed, compute the total arc count.
1000
1001 Note we don't compensate for overflow if that happens! */
1002 if (! all)
1003 {
1004 total_arcs = 0;
1005 for (arc_index = 0; arc_index < arc_count; arc_index++)
1006 total_arcs += the_arcs[arc_index]->count;
1007 }
1008 else
1009 total_arcs = 0;
1010
1011 tmp_arcs = 0;
1012
1013 for (arc_index = 0; arc_index < arc_count; arc_index++)
1014 {
1015 Sym *sym1, *sym2;
1016 Sym *child, *parent;
1017
1018 tmp_arcs += the_arcs[arc_index]->count;
1019
1020 /* Ignore this arc if it's already been placed. */
1021 if (the_arcs[arc_index]->has_been_placed)
1022 continue;
1023
1024 child = the_arcs[arc_index]->child;
1025 parent = the_arcs[arc_index]->parent;
1026
1027 /* If we're not using all arcs, and this is a rarely used
1028 arc, then put it on the unplaced_arc list. Similarly
1029 if both the parent and child of this arc have been placed. */
1030 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1031 || child->has_been_placed || parent->has_been_placed)
1032 {
1033 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1034 continue;
1035 }
1036
1037 /* If all slots in the parent and child are full, then there isn't
1038 anything we can do right now. We'll place this arc on the
1039 unplaced arc list in the hope that a global positioning
1040 algorithm can use it to place function chains. */
1041 if (parent->next && parent->prev && child->next && child->prev)
1042 {
1043 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1044 continue;
1045 }
1046
1047 /* If the parent is unattached, then find the closest
1048 place to attach it onto child's chain. Similarly
1049 for the opposite case. */
1050 if (!parent->next && !parent->prev)
1051 {
1052 int next_count = 0;
1053 int prev_count = 0;
1054 Sym *prev = child;
1055 Sym *next = child;
1056
1057 /* Walk to the beginning and end of the child's chain. */
1058 while (next->next)
1059 {
1060 next = next->next;
1061 next_count++;
1062 }
1063
1064 while (prev->prev)
1065 {
1066 prev = prev->prev;
1067 prev_count++;
1068 }
1069
1070 /* Choose the closest. */
1071 child = next_count < prev_count ? next : prev;
1072 }
1073 else if (! child->next && !child->prev)
1074 {
1075 int next_count = 0;
1076 int prev_count = 0;
1077 Sym *prev = parent;
1078 Sym *next = parent;
1079
1080 while (next->next)
1081 {
1082 next = next->next;
1083 next_count++;
1084 }
1085
1086 while (prev->prev)
1087 {
1088 prev = prev->prev;
1089 prev_count++;
1090 }
1091
1092 parent = prev_count < next_count ? prev : next;
1093 }
1094 else
1095 {
1096 /* Couldn't find anywhere to attach the functions,
1097 put the arc on the unplaced arc list. */
1098 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1099 continue;
1100 }
1101
1102 /* Make sure we don't tie two ends together. */
1103 sym1 = parent;
1104 if (sym1->next)
1105 while (sym1->next)
1106 sym1 = sym1->next;
1107 else
1108 while (sym1->prev)
1109 sym1 = sym1->prev;
1110
1111 sym2 = child;
1112 if (sym2->next)
1113 while (sym2->next)
1114 sym2 = sym2->next;
1115 else
1116 while (sym2->prev)
1117 sym2 = sym2->prev;
1118
1119 if (sym1 == child
1120 && sym2 == parent)
1121 {
1122 /* This would tie two ends together. */
1123 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1124 continue;
1125 }
1126
1127 if (parent->next)
1128 {
1129 /* Must attach to the parent's prev field. */
1130 if (! child->next)
1131 {
1132 /* parent-prev and child-next */
1133 parent->prev = child;
1134 child->next = parent;
1135 the_arcs[arc_index]->has_been_placed = 1;
1136 }
1137 }
1138 else if (parent->prev)
1139 {
1140 /* Must attach to the parent's next field. */
1141 if (! child->prev)
1142 {
1143 /* parent-next and child-prev */
1144 parent->next = child;
1145 child->prev = parent;
1146 the_arcs[arc_index]->has_been_placed = 1;
1147 }
1148 }
1149 else
1150 {
1151 /* Can attach to either field in the parent, depends
1152 on where we've got space in the child. */
1153 if (child->prev)
1154 {
1155 /* parent-prev and child-next. */
1156 parent->prev = child;
1157 child->next = parent;
1158 the_arcs[arc_index]->has_been_placed = 1;
1159 }
1160 else
1161 {
1162 /* parent-next and child-prev. */
1163 parent->next = child;
1164 child->prev = parent;
1165 the_arcs[arc_index]->has_been_placed = 1;
1166 }
1167 }
1168 }
1169
1170 /* Dump the chains of functions we've made. */
1171 for (arc_index = 0; arc_index < arc_count; arc_index++)
1172 {
1173 Sym *sym;
1174 if (the_arcs[arc_index]->parent->has_been_placed
1175 || the_arcs[arc_index]->child->has_been_placed)
1176 continue;
1177
1178 sym = the_arcs[arc_index]->parent;
1179
1180 /* If this symbol isn't attached to any other
1181 symbols, then we've got a rarely used arc.
1182
1183 Skip it for now, we'll deal with them later. */
1184 if (sym->next == NULL
1185 && sym->prev == NULL)
1186 continue;
1187
1188 /* Get to the start of this chain. */
1189 while (sym->prev)
1190 sym = sym->prev;
1191
1192 while (sym)
1193 {
1194 /* Mark it as placed. */
1195 sym->has_been_placed = 1;
1196 printf ("%s\n", sym->name);
1197 sym = sym->next;
1198 }
1199 }
1200
1201 /* If we want to place all the arcs, then output
1202 those which weren't placed by the main algorithm. */
1203 if (all)
1204 for (arc_index = 0; arc_index < arc_count; arc_index++)
1205 {
1206 Sym *sym;
1207 if (the_arcs[arc_index]->parent->has_been_placed
1208 || the_arcs[arc_index]->child->has_been_placed)
1209 continue;
1210
1211 sym = the_arcs[arc_index]->parent;
1212
1213 sym->has_been_placed = 1;
1214 printf ("%s\n", sym->name);
1215 }
1216 }
1217
1218 /* Compare two function_map structs based on file name.
1219 We want to sort in ascending order. */
1220
1221 static int
1222 cmp_symbol_map (const void * l, const void * r)
1223 {
1224 return filename_cmp (((struct function_map *) l)->file_name,
1225 ((struct function_map *) r)->file_name);
1226 }
1227
1228 /* Print a suggested .o ordering for files on a link line based
1229 on profiling information. This uses the function placement
1230 code for the bulk of its work. */
1231
1232 void
1233 cg_print_file_ordering (void)
1234 {
1235 unsigned long scratch_arc_count;
1236 unsigned long arc_index;
1237 unsigned long sym_index;
1238 Arc **scratch_arcs;
1239 char *last;
1240
1241 scratch_arc_count = 0;
1242
1243 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1244 for (arc_index = 0; arc_index < numarcs; arc_index++)
1245 {
1246 if (! arcs[arc_index]->parent->mapped
1247 || ! arcs[arc_index]->child->mapped)
1248 arcs[arc_index]->has_been_placed = 1;
1249 }
1250
1251 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1252 scratch_arcs, &scratch_arc_count);
1253
1254 /* Output .o's not handled by the main placement algorithm. */
1255 for (sym_index = 0; sym_index < symtab.len; sym_index++)
1256 {
1257 if (symtab.base[sym_index].mapped
1258 && ! symtab.base[sym_index].has_been_placed)
1259 printf ("%s\n", symtab.base[sym_index].name);
1260 }
1261
1262 qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
1263
1264 /* Now output any .o's that didn't have any text symbols. */
1265 last = NULL;
1266 for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
1267 {
1268 unsigned int index2;
1269
1270 /* Don't bother searching if this symbol
1271 is the same as the previous one. */
1272 if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
1273 continue;
1274
1275 for (index2 = 0; index2 < symtab.len; index2++)
1276 {
1277 if (! symtab.base[index2].mapped)
1278 continue;
1279
1280 if (!filename_cmp (symtab.base[index2].name,
1281 symbol_map[sym_index].file_name))
1282 break;
1283 }
1284
1285 /* If we didn't find it in the symbol table, then it must
1286 be a .o with no text symbols. Output it last. */
1287 if (index2 == symtab.len)
1288 printf ("%s\n", symbol_map[sym_index].file_name);
1289 last = symbol_map[sym_index].file_name;
1290 }
1291 }
1292