cg_print.c revision 1.1 1 /* cg_print.c - Print routines for displaying call graphs.
2
3 Copyright 2000, 2001, 2002, 2004, 2007 Free Software Foundation, Inc.
4
5 This file is part of GNU Binutils.
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
20 02110-1301, USA. */
21
22 #include "gprof.h"
24 #include "libiberty.h"
25 #include "search_list.h"
26 #include "source.h"
27 #include "symtab.h"
28 #include "cg_arcs.h"
29 #include "cg_print.h"
30 #include "hist.h"
31 #include "utils.h"
32 #include "corefile.h"
33
34 /* Return value of comparison functions used to sort tables. */
35 #define LESSTHAN -1
36 #define EQUALTO 0
37 #define GREATERTHAN 1
38
39 static void print_header (void);
40 static void print_cycle (Sym *);
41 static int cmp_member (Sym *, Sym *);
42 static void sort_members (Sym *);
43 static void print_members (Sym *);
44 static int cmp_arc (Arc *, Arc *);
45 static void sort_parents (Sym *);
46 static void print_parents (Sym *);
47 static void sort_children (Sym *);
48 static void print_children (Sym *);
49 static void print_line (Sym *);
50 static int cmp_name (const PTR, const PTR);
51 static int cmp_arc_count (const PTR, const PTR);
52 static int cmp_fun_nuses (const PTR, const PTR);
53 static void order_and_dump_functions_by_arcs
54 (Arc **, unsigned long, int, Arc **, unsigned long *);
55
56 /* Declarations of automatically generated functions to output blurbs. */
57 extern void bsd_callg_blurb (FILE * fp);
58 extern void fsf_callg_blurb (FILE * fp);
59
60 double print_time = 0.0;
61
62
63 static void
64 print_header ()
65 {
66 if (first_output)
67 first_output = FALSE;
68 else
69 printf ("\f\n");
70
71 if (!bsd_style_output)
72 {
73 if (print_descriptions)
74 printf (_("\t\t Call graph (explanation follows)\n\n"));
75 else
76 printf (_("\t\t\tCall graph\n\n"));
77 }
78
79 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
80 (long) hist_scale * (long) sizeof (UNIT));
81
82 if (print_time > 0.0)
83 printf (_(" for %.2f%% of %.2f seconds\n\n"),
84 100.0 / print_time, print_time / hz);
85 else
86 {
87 printf (_(" no time propagated\n\n"));
88
89 /* This doesn't hurt, since all the numerators will be 0.0. */
90 print_time = 1.0;
91 }
92
93 if (bsd_style_output)
94 {
95 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
96 "", "", "", "", _("called"), _("total"), _("parents"));
97 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
98 _("index"), _("%time"), _("self"), _("descendants"),
99 _("called"), _("self"), _("name"), _("index"));
100 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
101 "", "", "", "", _("called"), _("total"), _("children"));
102 printf ("\n");
103 }
104 else
105 {
106 printf (_("index %% time self children called name\n"));
107 }
108 }
109
110 /* Print a cycle header. */
111
112 static void
113 print_cycle (Sym *cyc)
114 {
115 char buf[BUFSIZ];
116
117 sprintf (buf, "[%d]", cyc->cg.index);
118 printf (bsd_style_output
119 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
120 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
121 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
122 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
123
124 if (cyc->cg.self_calls != 0)
125 printf ("+%-7lu", cyc->cg.self_calls);
126 else
127 printf (" %7.7s", "");
128
129 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
130 }
131
132 /* Compare LEFT and RIGHT membmer. Major comparison key is
133 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
134
135 static int
136 cmp_member (Sym *left, Sym *right)
137 {
138 double left_time = left->cg.prop.self + left->cg.prop.child;
139 double right_time = right->cg.prop.self + right->cg.prop.child;
140 unsigned long left_calls = left->ncalls + left->cg.self_calls;
141 unsigned long right_calls = right->ncalls + right->cg.self_calls;
142
143 if (left_time > right_time)
144 return GREATERTHAN;
145
146 if (left_time < right_time)
147 return LESSTHAN;
148
149 if (left_calls > right_calls)
150 return GREATERTHAN;
151
152 if (left_calls < right_calls)
153 return LESSTHAN;
154
155 return EQUALTO;
156 }
157
158 /* Sort members of a cycle. */
159
160 static void
161 sort_members (Sym *cyc)
162 {
163 Sym *todo, *doing, *prev;
164
165 /* Detach cycle members from cyclehead,
166 and insertion sort them back on. */
167 todo = cyc->cg.cyc.next;
168 cyc->cg.cyc.next = 0;
169
170 for (doing = todo; doing != NULL; doing = todo)
171 {
172 todo = doing->cg.cyc.next;
173
174 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
175 {
176 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
177 break;
178 }
179
180 doing->cg.cyc.next = prev->cg.cyc.next;
181 prev->cg.cyc.next = doing;
182 }
183 }
184
185 /* Print the members of a cycle. */
186
187 static void
188 print_members (Sym *cyc)
189 {
190 Sym *member;
191
192 sort_members (cyc);
193
194 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
195 {
196 printf (bsd_style_output
197 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
198 : "%6.6s %5.5s %7.2f %7.2f %7lu",
199 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
200 member->ncalls);
201
202 if (member->cg.self_calls != 0)
203 printf ("+%-7lu", member->cg.self_calls);
204 else
205 printf (" %7.7s", "");
206
207 printf (" ");
208 print_name (member);
209 printf ("\n");
210 }
211 }
212
213 /* Compare two arcs to/from the same child/parent.
214 - if one arc is a self arc, it's least.
215 - if one arc is within a cycle, it's less than.
216 - if both arcs are within a cycle, compare arc counts.
217 - if neither arc is within a cycle, compare with
218 time + child_time as major key
219 arc count as minor key. */
220
221 static int
222 cmp_arc (Arc *left, Arc *right)
223 {
224 Sym *left_parent = left->parent;
225 Sym *left_child = left->child;
226 Sym *right_parent = right->parent;
227 Sym *right_child = right->child;
228 double left_time, right_time;
229
230 DBG (TIMEDEBUG,
231 printf ("[cmp_arc] ");
232 print_name (left_parent);
233 printf (" calls ");
234 print_name (left_child);
235 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
236 left->count, left_child->ncalls);
237 printf ("[cmp_arc] ");
238 print_name (right_parent);
239 printf (" calls ");
240 print_name (right_child);
241 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
242 right->count, right_child->ncalls);
243 printf ("\n");
244 );
245
246 if (left_parent == left_child)
247 return LESSTHAN; /* Left is a self call. */
248
249 if (right_parent == right_child)
250 return GREATERTHAN; /* Right is a self call. */
251
252 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
253 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
254 {
255 /* Left is a call within a cycle. */
256 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
257 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
258 {
259 /* Right is a call within the cycle, too. */
260 if (left->count < right->count)
261 return LESSTHAN;
262
263 if (left->count > right->count)
264 return GREATERTHAN;
265
266 return EQUALTO;
267 }
268 else
269 {
270 /* Right isn't a call within the cycle. */
271 return LESSTHAN;
272 }
273 }
274 else
275 {
276 /* Left isn't a call within a cycle. */
277 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
278 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
279 {
280 /* Right is a call within a cycle. */
281 return GREATERTHAN;
282 }
283 else
284 {
285 /* Neither is a call within a cycle. */
286 left_time = left->time + left->child_time;
287 right_time = right->time + right->child_time;
288
289 if (left_time < right_time)
290 return LESSTHAN;
291
292 if (left_time > right_time)
293 return GREATERTHAN;
294
295 if (left->count < right->count)
296 return LESSTHAN;
297
298 if (left->count > right->count)
299 return GREATERTHAN;
300
301 return EQUALTO;
302 }
303 }
304 }
305
306
307 static void
308 sort_parents (Sym * child)
309 {
310 Arc *arc, *detached, sorted, *prev;
311
312 /* Unlink parents from child, then insertion sort back on to
313 sorted's parents.
314 *arc the arc you have detached and are inserting.
315 *detached the rest of the arcs to be sorted.
316 sorted arc list onto which you insertion sort.
317 *prev arc before the arc you are comparing. */
318 sorted.next_parent = 0;
319
320 for (arc = child->cg.parents; arc; arc = detached)
321 {
322 detached = arc->next_parent;
323
324 /* Consider *arc as disconnected; insert it into sorted. */
325 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
326 {
327 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
328 break;
329 }
330
331 arc->next_parent = prev->next_parent;
332 prev->next_parent = arc;
333 }
334
335 /* Reattach sorted arcs to child. */
336 child->cg.parents = sorted.next_parent;
337 }
338
339
340 static void
341 print_parents (Sym *child)
342 {
343 Sym *parent;
344 Arc *arc;
345 Sym *cycle_head;
346
347 if (child->cg.cyc.head != 0)
348 cycle_head = child->cg.cyc.head;
349 else
350 cycle_head = child;
351
352 if (!child->cg.parents)
353 {
354 printf (bsd_style_output
355 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
356 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
357 "", "", "", "", "", "");
358 return;
359 }
360
361 sort_parents (child);
362
363 for (arc = child->cg.parents; arc; arc = arc->next_parent)
364 {
365 parent = arc->parent;
366 if (child == parent || (child->cg.cyc.num != 0
367 && parent->cg.cyc.num == child->cg.cyc.num))
368 {
369 /* Selfcall or call among siblings. */
370 printf (bsd_style_output
371 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
372 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
373 "", "", "", "",
374 arc->count, "");
375 print_name (parent);
376 printf ("\n");
377 }
378 else
379 {
380 /* Regular parent of child. */
381 printf (bsd_style_output
382 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
383 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
384 "", "",
385 arc->time / hz, arc->child_time / hz,
386 arc->count, cycle_head->ncalls);
387 print_name (parent);
388 printf ("\n");
389 }
390 }
391 }
392
393
394 static void
395 sort_children (Sym *parent)
396 {
397 Arc *arc, *detached, sorted, *prev;
398
399 /* Unlink children from parent, then insertion sort back on to
400 sorted's children.
401 *arc the arc you have detached and are inserting.
402 *detached the rest of the arcs to be sorted.
403 sorted arc list onto which you insertion sort.
404 *prev arc before the arc you are comparing. */
405 sorted.next_child = 0;
406
407 for (arc = parent->cg.children; arc; arc = detached)
408 {
409 detached = arc->next_child;
410
411 /* Consider *arc as disconnected; insert it into sorted. */
412 for (prev = &sorted; prev->next_child; prev = prev->next_child)
413 {
414 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
415 break;
416 }
417
418 arc->next_child = prev->next_child;
419 prev->next_child = arc;
420 }
421
422 /* Reattach sorted children to parent. */
423 parent->cg.children = sorted.next_child;
424 }
425
426
427 static void
428 print_children (Sym *parent)
429 {
430 Sym *child;
431 Arc *arc;
432
433 sort_children (parent);
434 arc = parent->cg.children;
435
436 for (arc = parent->cg.children; arc; arc = arc->next_child)
437 {
438 child = arc->child;
439 if (child == parent || (child->cg.cyc.num != 0
440 && child->cg.cyc.num == parent->cg.cyc.num))
441 {
442 /* Self call or call to sibling. */
443 printf (bsd_style_output
444 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
445 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
446 "", "", "", "", arc->count, "");
447 print_name (child);
448 printf ("\n");
449 }
450 else
451 {
452 /* Regular child of parent. */
453 printf (bsd_style_output
454 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
455 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
456 "", "",
457 arc->time / hz, arc->child_time / hz,
458 arc->count, child->cg.cyc.head->ncalls);
459 print_name (child);
460 printf ("\n");
461 }
462 }
463 }
464
465
466 static void
467 print_line (Sym *np)
468 {
469 char buf[BUFSIZ];
470
471 sprintf (buf, "[%d]", np->cg.index);
472 printf (bsd_style_output
473 ? "%-6.6s %5.1f %7.2f %11.2f"
474 : "%-6.6s %5.1f %7.2f %7.2f", buf,
475 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
476 np->cg.prop.self / hz, np->cg.prop.child / hz);
477
478 if ((np->ncalls + np->cg.self_calls) != 0)
479 {
480 printf (" %7lu", np->ncalls);
481
482 if (np->cg.self_calls != 0)
483 printf ("+%-7lu ", np->cg.self_calls);
484 else
485 printf (" %7.7s ", "");
486 }
487 else
488 {
489 printf (" %7.7s %7.7s ", "", "");
490 }
491
492 print_name (np);
493 printf ("\n");
494 }
495
496
497 /* Print dynamic call graph. */
498
499 void
500 cg_print (Sym ** timesortsym)
501 {
502 unsigned int index;
503 Sym *parent;
504
505 if (print_descriptions && bsd_style_output)
506 bsd_callg_blurb (stdout);
507
508 print_header ();
509
510 for (index = 0; index < symtab.len + num_cycles; ++index)
511 {
512 parent = timesortsym[index];
513
514 if ((ignore_zeros && parent->ncalls == 0
515 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
516 && parent->cg.prop.child == 0)
517 || !parent->cg.print_flag
518 || (line_granularity && ! parent->is_func))
519 continue;
520
521 if (!parent->name && parent->cg.cyc.num != 0)
522 {
523 /* Cycle header. */
524 print_cycle (parent);
525 print_members (parent);
526 }
527 else
528 {
529 print_parents (parent);
530 print_line (parent);
531 print_children (parent);
532 }
533
534 if (bsd_style_output)
535 printf ("\n");
536
537 printf ("-----------------------------------------------\n");
538
539 if (bsd_style_output)
540 printf ("\n");
541 }
542
543 free (timesortsym);
544
545 if (print_descriptions && !bsd_style_output)
546 fsf_callg_blurb (stdout);
547 }
548
549
550 static int
551 cmp_name (const PTR left, const PTR right)
552 {
553 const Sym **npp1 = (const Sym **) left;
554 const Sym **npp2 = (const Sym **) right;
555
556 return strcmp ((*npp1)->name, (*npp2)->name);
557 }
558
559
560 void
561 cg_print_index ()
562 {
563 unsigned int index;
564 unsigned int nnames, todo, i, j;
565 int col, starting_col;
566 Sym **name_sorted_syms, *sym;
567 const char *filename;
568 char buf[20];
569 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
570
571 /* Now, sort regular function name
572 alphabetically to create an index. */
573 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
574
575 for (index = 0, nnames = 0; index < symtab.len; index++)
576 {
577 if (ignore_zeros && symtab.base[index].ncalls == 0
578 && symtab.base[index].hist.time == 0)
579 continue;
580
581 name_sorted_syms[nnames++] = &symtab.base[index];
582 }
583
584 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
585
586 for (index = 1, todo = nnames; index <= num_cycles; index++)
587 name_sorted_syms[todo++] = &cycle_header[index];
588
589 printf ("\f\n");
590 printf (_("Index by function name\n\n"));
591 index = (todo + 2) / 3;
592
593 for (i = 0; i < index; i++)
594 {
595 col = 0;
596 starting_col = 0;
597
598 for (j = i; j < todo; j += index)
599 {
600 sym = name_sorted_syms[j];
601
602 if (sym->cg.print_flag)
603 sprintf (buf, "[%d]", sym->cg.index);
604 else
605 sprintf (buf, "(%d)", sym->cg.index);
606
607 if (j < nnames)
608 {
609 if (bsd_style_output)
610 {
611 printf ("%6.6s %-19.19s", buf, sym->name);
612 }
613 else
614 {
615 col += strlen (buf);
616
617 for (; col < starting_col + 5; ++col)
618 putchar (' ');
619
620 printf (" %s ", buf);
621 col += print_name_only (sym);
622
623 if (!line_granularity && sym->is_static && sym->file)
624 {
625 filename = sym->file->name;
626
627 if (!print_path)
628 {
629 filename = strrchr (filename, '/');
630
631 if (filename)
632 ++filename;
633 else
634 filename = sym->file->name;
635 }
636
637 printf (" (%s)", filename);
638 col += strlen (filename) + 3;
639 }
640 }
641 }
642 else
643 {
644 if (bsd_style_output)
645 {
646 printf ("%6.6s ", buf);
647 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
648 printf ("%-19.19s", buf);
649 }
650 else
651 {
652 col += strlen (buf);
653 for (; col < starting_col + 5; ++col)
654 putchar (' ');
655 printf (" %s ", buf);
656 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
657 printf ("%s", buf);
658 col += strlen (buf);
659 }
660 }
661
662 starting_col += column_width;
663 }
664
665 printf ("\n");
666 }
667
668 free (name_sorted_syms);
669 }
670
671 /* Compare two arcs based on their usage counts.
672 We want to sort in descending order. */
673
674 static int
675 cmp_arc_count (const PTR left, const PTR right)
676 {
677 const Arc **npp1 = (const Arc **) left;
678 const Arc **npp2 = (const Arc **) right;
679
680 if ((*npp1)->count > (*npp2)->count)
681 return -1;
682 else if ((*npp1)->count < (*npp2)->count)
683 return 1;
684 else
685 return 0;
686 }
687
688 /* Compare two funtions based on their usage counts.
689 We want to sort in descending order. */
690
691 static int
692 cmp_fun_nuses (const PTR left, const PTR right)
693 {
694 const Sym **npp1 = (const Sym **) left;
695 const Sym **npp2 = (const Sym **) right;
696
697 if ((*npp1)->nuses > (*npp2)->nuses)
698 return -1;
699 else if ((*npp1)->nuses < (*npp2)->nuses)
700 return 1;
701 else
702 return 0;
703 }
704
705 /* Print a suggested function ordering based on the profiling data.
706
707 We perform 4 major steps when ordering functions:
708
709 * Group unused functions together and place them at the
710 end of the function order.
711
712 * Search the highest use arcs (those which account for 90% of
713 the total arc count) for functions which have several parents.
714
715 Group those with the most call sites together (currently the
716 top 1.25% which have at least five different call sites).
717
718 These are emitted at the start of the function order.
719
720 * Use a greedy placement algorithm to place functions which
721 occur in the top 99% of the arcs in the profile. Some provisions
722 are made to handle high usage arcs where the parent and/or
723 child has already been placed.
724
725 * Run the same greedy placement algorithm on the remaining
726 arcs to place the leftover functions.
727
728
729 The various "magic numbers" should (one day) be tuneable by command
730 line options. They were arrived at by benchmarking a few applications
731 with various values to see which values produced better overall function
732 orderings.
733
734 Of course, profiling errors, machine limitations (PA long calls), and
735 poor cutoff values for the placement algorithm may limit the usefullness
736 of the resulting function order. Improvements would be greatly appreciated.
737
738 Suggestions:
739
740 * Place the functions with many callers near the middle of the
741 list to reduce long calls.
742
743 * Propagate arc usage changes as functions are placed. Ie if
744 func1 and func2 are placed together, arcs to/from those arcs
745 to the same parent/child should be combined, then resort the
746 arcs to choose the next one.
747
748 * Implement some global positioning algorithm to place the
749 chains made by the greedy local positioning algorithm. Probably
750 by examining arcs which haven't been placed yet to tie two
751 chains together.
752
753 * Take a function's size and time into account in the algorithm;
754 size in particular is important on the PA (long calls). Placing
755 many small functions onto their own page may be wise.
756
757 * Use better profiling information; many published algorithms
758 are based on call sequences through time, rather than just
759 arc counts.
760
761 * Prodecure cloning could improve performance when a small number
762 of arcs account for most of the calls to a particular function.
763
764 * Use relocation information to avoid moving unused functions
765 completely out of the code stream; this would avoid severe lossage
766 when the profile data bears little resemblance to actual runs.
767
768 * Propagation of arc usages should also improve .o link line
769 ordering which shares the same arc placement algorithm with
770 the function ordering code (in fact it is a degenerate case
771 of function ordering). */
772
773 void
774 cg_print_function_ordering ()
775 {
776 unsigned long index, used, unused, scratch_index;
777 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
778 #ifdef __GNUC__
779 unsigned long long total_arcs, tmp_arcs_count;
780 #else
781 unsigned long total_arcs, tmp_arcs_count;
782 #endif
783 Sym **unused_syms, **used_syms, **scratch_syms;
784 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
785
786 index = 0;
787 used = 0;
788 unused = 0;
789 scratch_index = 0;
790 unplaced_arc_count = 0;
791 high_arc_count = 0;
792 scratch_arc_count = 0;
793
794 /* First group all the unused functions together. */
795 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
796 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
797 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
798 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
799 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
800 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
801
802 /* Walk through all the functions; mark those which are never
803 called as placed (we'll emit them as a group later). */
804 for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
805 {
806 if (symtab.base[index].ncalls == 0)
807 {
808 /* Filter out gprof generated names. */
809 if (strcmp (symtab.base[index].name, "<locore>")
810 && strcmp (symtab.base[index].name, "<hicore>"))
811 {
812 unused_syms[unused++] = &symtab.base[index];
813 symtab.base[index].has_been_placed = 1;
814 }
815 }
816 else
817 {
818 used_syms[used++] = &symtab.base[index];
819 symtab.base[index].has_been_placed = 0;
820 symtab.base[index].next = 0;
821 symtab.base[index].prev = 0;
822 symtab.base[index].nuses = 0;
823 }
824 }
825
826 /* Sort the arcs from most used to least used. */
827 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
828
829 /* Compute the total arc count. Also mark arcs as unplaced.
830
831 Note we don't compensate for overflow if that happens!
832 Overflow is much less likely when this file is compiled
833 with GCC as it can double-wide integers via long long. */
834 total_arcs = 0;
835 for (index = 0; index < numarcs; index++)
836 {
837 total_arcs += arcs[index]->count;
838 arcs[index]->has_been_placed = 0;
839 }
840
841 /* We want to pull out those functions which are referenced
842 by many highly used arcs and emit them as a group. This
843 could probably use some tuning. */
844 tmp_arcs_count = 0;
845 for (index = 0; index < numarcs; index++)
846 {
847 tmp_arcs_count += arcs[index]->count;
848
849 /* Count how many times each parent and child are used up
850 to our threshhold of arcs (90%). */
851 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
852 break;
853
854 arcs[index]->child->nuses++;
855 }
856
857 /* Now sort a temporary symbol table based on the number of
858 times each function was used in the highest used arcs. */
859 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
860 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
861
862 /* Now pick out those symbols we're going to emit as
863 a group. We take up to 1.25% of the used symbols. */
864 for (index = 0; index < used / 80; index++)
865 {
866 Sym *sym = scratch_syms[index];
867 Arc *arc;
868
869 /* If we hit symbols that aren't used from many call sites,
870 then we can quit. We choose five as the low limit for
871 no particular reason. */
872 if (sym->nuses == 5)
873 break;
874
875 /* We're going to need the arcs between these functions.
876 Unfortunately, we don't know all these functions
877 until we're done. So we keep track of all the arcs
878 to the functions we care about, then prune out those
879 which are uninteresting.
880
881 An interesting variation would be to quit when we found
882 multi-call site functions which account for some percentage
883 of the arcs. */
884 arc = sym->cg.children;
885
886 while (arc)
887 {
888 if (arc->parent != arc->child)
889 scratch_arcs[scratch_arc_count++] = arc;
890 arc->has_been_placed = 1;
891 arc = arc->next_child;
892 }
893
894 arc = sym->cg.parents;
895
896 while (arc)
897 {
898 if (arc->parent != arc->child)
899 scratch_arcs[scratch_arc_count++] = arc;
900 arc->has_been_placed = 1;
901 arc = arc->next_parent;
902 }
903
904 /* Keep track of how many symbols we're going to place. */
905 scratch_index = index;
906
907 /* A lie, but it makes identifying
908 these functions easier later. */
909 sym->has_been_placed = 1;
910 }
911
912 /* Now walk through the temporary arcs and copy
913 those we care about into the high arcs array. */
914 for (index = 0; index < scratch_arc_count; index++)
915 {
916 Arc *arc = scratch_arcs[index];
917
918 /* If this arc refers to highly used functions, then
919 then we want to keep it. */
920 if (arc->child->has_been_placed
921 && arc->parent->has_been_placed)
922 {
923 high_arcs[high_arc_count++] = scratch_arcs[index];
924
925 /* We need to turn of has_been_placed since we're going to
926 use the main arc placement algorithm on these arcs. */
927 arc->child->has_been_placed = 0;
928 arc->parent->has_been_placed = 0;
929 }
930 }
931
932 /* Dump the multi-site high usage functions which are not
933 going to be ordered by the main ordering algorithm. */
934 for (index = 0; index < scratch_index; index++)
935 {
936 if (scratch_syms[index]->has_been_placed)
937 printf ("%s\n", scratch_syms[index]->name);
938 }
939
940 /* Now we can order the multi-site high use
941 functions based on the arcs between them. */
942 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
943 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
944 unplaced_arcs, &unplaced_arc_count);
945
946 /* Order and dump the high use functions left,
947 these typically have only a few call sites. */
948 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
949 unplaced_arcs, &unplaced_arc_count);
950
951 /* Now place the rarely used functions. */
952 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
953 scratch_arcs, &scratch_arc_count);
954
955 /* Output any functions not emitted by the order_and_dump calls. */
956 for (index = 0; index < used; index++)
957 if (used_syms[index]->has_been_placed == 0)
958 printf("%s\n", used_syms[index]->name);
959
960 /* Output the unused functions. */
961 for (index = 0; index < unused; index++)
962 printf("%s\n", unused_syms[index]->name);
963
964 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
965 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
966 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
967 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
968 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
969 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
970
971 free (unused_syms);
972 free (used_syms);
973 free (scratch_syms);
974 free (high_arcs);
975 free (scratch_arcs);
976 free (unplaced_arcs);
977 }
978
979 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
980 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
981
982 If ALL is nonzero, then place all functions referenced by THE_ARCS,
983 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
984
985 #define MOST 0.99
986 static void
987 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
988 unplaced_arcs, unplaced_arc_count)
989 Arc **the_arcs;
990 unsigned long arc_count;
991 int all;
992 Arc **unplaced_arcs;
993 unsigned long *unplaced_arc_count;
994 {
995 #ifdef __GNUC__
996 unsigned long long tmp_arcs, total_arcs;
997 #else
998 unsigned long tmp_arcs, total_arcs;
999 #endif
1000 unsigned int index;
1001
1002 /* If needed, compute the total arc count.
1003
1004 Note we don't compensate for overflow if that happens! */
1005 if (! all)
1006 {
1007 total_arcs = 0;
1008 for (index = 0; index < arc_count; index++)
1009 total_arcs += the_arcs[index]->count;
1010 }
1011 else
1012 total_arcs = 0;
1013
1014 tmp_arcs = 0;
1015
1016 for (index = 0; index < arc_count; index++)
1017 {
1018 Sym *sym1, *sym2;
1019 Sym *child, *parent;
1020
1021 tmp_arcs += the_arcs[index]->count;
1022
1023 /* Ignore this arc if it's already been placed. */
1024 if (the_arcs[index]->has_been_placed)
1025 continue;
1026
1027 child = the_arcs[index]->child;
1028 parent = the_arcs[index]->parent;
1029
1030 /* If we're not using all arcs, and this is a rarely used
1031 arc, then put it on the unplaced_arc list. Similarly
1032 if both the parent and child of this arc have been placed. */
1033 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1034 || child->has_been_placed || parent->has_been_placed)
1035 {
1036 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1037 continue;
1038 }
1039
1040 /* If all slots in the parent and child are full, then there isn't
1041 anything we can do right now. We'll place this arc on the
1042 unplaced arc list in the hope that a global positioning
1043 algorithm can use it to place function chains. */
1044 if (parent->next && parent->prev && child->next && child->prev)
1045 {
1046 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1047 continue;
1048 }
1049
1050 /* If the parent is unattached, then find the closest
1051 place to attach it onto child's chain. Similarly
1052 for the opposite case. */
1053 if (!parent->next && !parent->prev)
1054 {
1055 int next_count = 0;
1056 int prev_count = 0;
1057 Sym *prev = child;
1058 Sym *next = child;
1059
1060 /* Walk to the beginning and end of the child's chain. */
1061 while (next->next)
1062 {
1063 next = next->next;
1064 next_count++;
1065 }
1066
1067 while (prev->prev)
1068 {
1069 prev = prev->prev;
1070 prev_count++;
1071 }
1072
1073 /* Choose the closest. */
1074 child = next_count < prev_count ? next : prev;
1075 }
1076 else if (! child->next && !child->prev)
1077 {
1078 int next_count = 0;
1079 int prev_count = 0;
1080 Sym *prev = parent;
1081 Sym *next = parent;
1082
1083 while (next->next)
1084 {
1085 next = next->next;
1086 next_count++;
1087 }
1088
1089 while (prev->prev)
1090 {
1091 prev = prev->prev;
1092 prev_count++;
1093 }
1094
1095 parent = prev_count < next_count ? prev : next;
1096 }
1097 else
1098 {
1099 /* Couldn't find anywhere to attach the functions,
1100 put the arc on the unplaced arc list. */
1101 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1102 continue;
1103 }
1104
1105 /* Make sure we don't tie two ends together. */
1106 sym1 = parent;
1107 if (sym1->next)
1108 while (sym1->next)
1109 sym1 = sym1->next;
1110 else
1111 while (sym1->prev)
1112 sym1 = sym1->prev;
1113
1114 sym2 = child;
1115 if (sym2->next)
1116 while (sym2->next)
1117 sym2 = sym2->next;
1118 else
1119 while (sym2->prev)
1120 sym2 = sym2->prev;
1121
1122 if (sym1 == child
1123 && sym2 == parent)
1124 {
1125 /* This would tie two ends together. */
1126 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
1127 continue;
1128 }
1129
1130 if (parent->next)
1131 {
1132 /* Must attach to the parent's prev field. */
1133 if (! child->next)
1134 {
1135 /* parent-prev and child-next */
1136 parent->prev = child;
1137 child->next = parent;
1138 the_arcs[index]->has_been_placed = 1;
1139 }
1140 }
1141 else if (parent->prev)
1142 {
1143 /* Must attach to the parent's next field. */
1144 if (! child->prev)
1145 {
1146 /* parent-next and child-prev */
1147 parent->next = child;
1148 child->prev = parent;
1149 the_arcs[index]->has_been_placed = 1;
1150 }
1151 }
1152 else
1153 {
1154 /* Can attach to either field in the parent, depends
1155 on where we've got space in the child. */
1156 if (child->prev)
1157 {
1158 /* parent-prev and child-next. */
1159 parent->prev = child;
1160 child->next = parent;
1161 the_arcs[index]->has_been_placed = 1;
1162 }
1163 else
1164 {
1165 /* parent-next and child-prev. */
1166 parent->next = child;
1167 child->prev = parent;
1168 the_arcs[index]->has_been_placed = 1;
1169 }
1170 }
1171 }
1172
1173 /* Dump the chains of functions we've made. */
1174 for (index = 0; index < arc_count; index++)
1175 {
1176 Sym *sym;
1177 if (the_arcs[index]->parent->has_been_placed
1178 || the_arcs[index]->child->has_been_placed)
1179 continue;
1180
1181 sym = the_arcs[index]->parent;
1182
1183 /* If this symbol isn't attached to any other
1184 symbols, then we've got a rarely used arc.
1185
1186 Skip it for now, we'll deal with them later. */
1187 if (sym->next == NULL
1188 && sym->prev == NULL)
1189 continue;
1190
1191 /* Get to the start of this chain. */
1192 while (sym->prev)
1193 sym = sym->prev;
1194
1195 while (sym)
1196 {
1197 /* Mark it as placed. */
1198 sym->has_been_placed = 1;
1199 printf ("%s\n", sym->name);
1200 sym = sym->next;
1201 }
1202 }
1203
1204 /* If we want to place all the arcs, then output
1205 those which weren't placed by the main algorithm. */
1206 if (all)
1207 for (index = 0; index < arc_count; index++)
1208 {
1209 Sym *sym;
1210 if (the_arcs[index]->parent->has_been_placed
1211 || the_arcs[index]->child->has_been_placed)
1212 continue;
1213
1214 sym = the_arcs[index]->parent;
1215
1216 sym->has_been_placed = 1;
1217 printf ("%s\n", sym->name);
1218 }
1219 }
1220
1221 /* Print a suggested .o ordering for files on a link line based
1222 on profiling information. This uses the function placement
1223 code for the bulk of its work. */
1224
1225 void
1226 cg_print_file_ordering ()
1227 {
1228 unsigned long scratch_arc_count, index;
1229 Arc **scratch_arcs;
1230 char *last;
1231
1232 scratch_arc_count = 0;
1233
1234 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1235 for (index = 0; index < numarcs; index++)
1236 {
1237 if (! arcs[index]->parent->mapped
1238 || ! arcs[index]->child->mapped)
1239 arcs[index]->has_been_placed = 1;
1240 }
1241
1242 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1243 scratch_arcs, &scratch_arc_count);
1244
1245 /* Output .o's not handled by the main placement algorithm. */
1246 for (index = 0; index < symtab.len; index++)
1247 {
1248 if (symtab.base[index].mapped
1249 && ! symtab.base[index].has_been_placed)
1250 printf ("%s\n", symtab.base[index].name);
1251 }
1252
1253 /* Now output any .o's that didn't have any text symbols. */
1254 last = NULL;
1255 for (index = 0; index < symbol_map_count; index++)
1256 {
1257 unsigned int index2;
1258
1259 /* Don't bother searching if this symbol
1260 is the same as the previous one. */
1261 if (last && !strcmp (last, symbol_map[index].file_name))
1262 continue;
1263
1264 for (index2 = 0; index2 < symtab.len; index2++)
1265 {
1266 if (! symtab.base[index2].mapped)
1267 continue;
1268
1269 if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
1270 break;
1271 }
1272
1273 /* If we didn't find it in the symbol table, then it must
1274 be a .o with no text symbols. Output it last. */
1275 if (index2 == symtab.len)
1276 printf ("%s\n", symbol_map[index].file_name);
1277 last = symbol_map[index].file_name;
1278 }
1279 }
1280