cg_print.c revision 1.1.1.2.2.1 1 /* cg_print.c - Print routines for displaying call graphs.
2
3 Copyright 2000, 2001, 2002, 2004, 2007, 2009, 2011
4 Free Software Foundation, Inc.
5
6 This file is part of GNU Binutils.
7
8 This program is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3 of the License, or
11 (at your option) any later version.
12
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with this program; if not, write to the Free Software
20 Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
21 02110-1301, USA. */
22
23 #include "gprof.h"
25 #include "libiberty.h"
26 #include "filenames.h"
27 #include "search_list.h"
28 #include "source.h"
29 #include "symtab.h"
30 #include "cg_arcs.h"
31 #include "cg_print.h"
32 #include "hist.h"
33 #include "utils.h"
34 #include "corefile.h"
35
36 /* Return value of comparison functions used to sort tables. */
37 #define LESSTHAN -1
38 #define EQUALTO 0
39 #define GREATERTHAN 1
40
41 static void print_header (void);
42 static void print_cycle (Sym *);
43 static int cmp_member (Sym *, Sym *);
44 static void sort_members (Sym *);
45 static void print_members (Sym *);
46 static int cmp_arc (Arc *, Arc *);
47 static void sort_parents (Sym *);
48 static void print_parents (Sym *);
49 static void sort_children (Sym *);
50 static void print_children (Sym *);
51 static void print_line (Sym *);
52 static int cmp_name (const PTR, const PTR);
53 static int cmp_arc_count (const PTR, const PTR);
54 static int cmp_fun_nuses (const PTR, const PTR);
55 static void order_and_dump_functions_by_arcs
56 (Arc **, unsigned long, int, Arc **, unsigned long *);
57
58 /* Declarations of automatically generated functions to output blurbs. */
59 extern void bsd_callg_blurb (FILE * fp);
60 extern void fsf_callg_blurb (FILE * fp);
61
62 double print_time = 0.0;
63
64
65 static void
66 print_header ()
67 {
68 if (first_output)
69 first_output = FALSE;
70 else
71 printf ("\f\n");
72
73 if (!bsd_style_output)
74 {
75 if (print_descriptions)
76 printf (_("\t\t Call graph (explanation follows)\n\n"));
77 else
78 printf (_("\t\t\tCall graph\n\n"));
79 }
80
81 printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
82 (long) hist_scale * (long) sizeof (UNIT));
83
84 if (print_time > 0.0)
85 printf (_(" for %.2f%% of %.2f seconds\n\n"),
86 100.0 / print_time, print_time / hz);
87 else
88 {
89 printf (_(" no time propagated\n\n"));
90
91 /* This doesn't hurt, since all the numerators will be 0.0. */
92 print_time = 1.0;
93 }
94
95 if (bsd_style_output)
96 {
97 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
98 "", "", "", "", _("called"), _("total"), _("parents"));
99 printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
100 _("index"),
101 /* xgettext:no-c-format */
102 _("%time"),
103 _("self"), _("descendants"), _("called"), _("self"),
104 _("name"), _("index"));
105 printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s %-8.8s\n",
106 "", "", "", "", _("called"), _("total"), _("children"));
107 printf ("\n");
108 }
109 else
110 {
111 printf (_("index %% time self children called name\n"));
112 }
113 }
114
115 /* Print a cycle header. */
116
117 static void
118 print_cycle (Sym *cyc)
119 {
120 char buf[BUFSIZ];
121
122 sprintf (buf, "[%d]", cyc->cg.index);
123 printf (bsd_style_output
124 ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
125 : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
126 100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
127 cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
128
129 if (cyc->cg.self_calls != 0)
130 printf ("+%-7lu", cyc->cg.self_calls);
131 else
132 printf (" %7.7s", "");
133
134 printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
135 }
136
137 /* Compare LEFT and RIGHT membmer. Major comparison key is
138 CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS. */
139
140 static int
141 cmp_member (Sym *left, Sym *right)
142 {
143 double left_time = left->cg.prop.self + left->cg.prop.child;
144 double right_time = right->cg.prop.self + right->cg.prop.child;
145 unsigned long left_calls = left->ncalls + left->cg.self_calls;
146 unsigned long right_calls = right->ncalls + right->cg.self_calls;
147
148 if (left_time > right_time)
149 return GREATERTHAN;
150
151 if (left_time < right_time)
152 return LESSTHAN;
153
154 if (left_calls > right_calls)
155 return GREATERTHAN;
156
157 if (left_calls < right_calls)
158 return LESSTHAN;
159
160 return EQUALTO;
161 }
162
163 /* Sort members of a cycle. */
164
165 static void
166 sort_members (Sym *cyc)
167 {
168 Sym *todo, *doing, *prev;
169
170 /* Detach cycle members from cyclehead,
171 and insertion sort them back on. */
172 todo = cyc->cg.cyc.next;
173 cyc->cg.cyc.next = 0;
174
175 for (doing = todo; doing != NULL; doing = todo)
176 {
177 todo = doing->cg.cyc.next;
178
179 for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
180 {
181 if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
182 break;
183 }
184
185 doing->cg.cyc.next = prev->cg.cyc.next;
186 prev->cg.cyc.next = doing;
187 }
188 }
189
190 /* Print the members of a cycle. */
191
192 static void
193 print_members (Sym *cyc)
194 {
195 Sym *member;
196
197 sort_members (cyc);
198
199 for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
200 {
201 printf (bsd_style_output
202 ? "%6.6s %5.5s %7.2f %11.2f %7lu"
203 : "%6.6s %5.5s %7.2f %7.2f %7lu",
204 "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
205 member->ncalls);
206
207 if (member->cg.self_calls != 0)
208 printf ("+%-7lu", member->cg.self_calls);
209 else
210 printf (" %7.7s", "");
211
212 printf (" ");
213 print_name (member);
214 printf ("\n");
215 }
216 }
217
218 /* Compare two arcs to/from the same child/parent.
219 - if one arc is a self arc, it's least.
220 - if one arc is within a cycle, it's less than.
221 - if both arcs are within a cycle, compare arc counts.
222 - if neither arc is within a cycle, compare with
223 time + child_time as major key
224 arc count as minor key. */
225
226 static int
227 cmp_arc (Arc *left, Arc *right)
228 {
229 Sym *left_parent = left->parent;
230 Sym *left_child = left->child;
231 Sym *right_parent = right->parent;
232 Sym *right_child = right->child;
233 double left_time, right_time;
234
235 DBG (TIMEDEBUG,
236 printf ("[cmp_arc] ");
237 print_name (left_parent);
238 printf (" calls ");
239 print_name (left_child);
240 printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
241 left->count, left_child->ncalls);
242 printf ("[cmp_arc] ");
243 print_name (right_parent);
244 printf (" calls ");
245 print_name (right_child);
246 printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
247 right->count, right_child->ncalls);
248 printf ("\n");
249 );
250
251 if (left_parent == left_child)
252 return LESSTHAN; /* Left is a self call. */
253
254 if (right_parent == right_child)
255 return GREATERTHAN; /* Right is a self call. */
256
257 if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
258 && left_parent->cg.cyc.num == left_child->cg.cyc.num)
259 {
260 /* Left is a call within a cycle. */
261 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
262 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
263 {
264 /* Right is a call within the cycle, too. */
265 if (left->count < right->count)
266 return LESSTHAN;
267
268 if (left->count > right->count)
269 return GREATERTHAN;
270
271 return EQUALTO;
272 }
273 else
274 {
275 /* Right isn't a call within the cycle. */
276 return LESSTHAN;
277 }
278 }
279 else
280 {
281 /* Left isn't a call within a cycle. */
282 if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
283 && right_parent->cg.cyc.num == right_child->cg.cyc.num)
284 {
285 /* Right is a call within a cycle. */
286 return GREATERTHAN;
287 }
288 else
289 {
290 /* Neither is a call within a cycle. */
291 left_time = left->time + left->child_time;
292 right_time = right->time + right->child_time;
293
294 if (left_time < right_time)
295 return LESSTHAN;
296
297 if (left_time > right_time)
298 return GREATERTHAN;
299
300 if (left->count < right->count)
301 return LESSTHAN;
302
303 if (left->count > right->count)
304 return GREATERTHAN;
305
306 return EQUALTO;
307 }
308 }
309 }
310
311
312 static void
313 sort_parents (Sym * child)
314 {
315 Arc *arc, *detached, sorted, *prev;
316
317 /* Unlink parents from child, then insertion sort back on to
318 sorted's parents.
319 *arc the arc you have detached and are inserting.
320 *detached the rest of the arcs to be sorted.
321 sorted arc list onto which you insertion sort.
322 *prev arc before the arc you are comparing. */
323 sorted.next_parent = 0;
324
325 for (arc = child->cg.parents; arc; arc = detached)
326 {
327 detached = arc->next_parent;
328
329 /* Consider *arc as disconnected; insert it into sorted. */
330 for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
331 {
332 if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
333 break;
334 }
335
336 arc->next_parent = prev->next_parent;
337 prev->next_parent = arc;
338 }
339
340 /* Reattach sorted arcs to child. */
341 child->cg.parents = sorted.next_parent;
342 }
343
344
345 static void
346 print_parents (Sym *child)
347 {
348 Sym *parent;
349 Arc *arc;
350 Sym *cycle_head;
351
352 if (child->cg.cyc.head != 0)
353 cycle_head = child->cg.cyc.head;
354 else
355 cycle_head = child;
356
357 if (!child->cg.parents)
358 {
359 printf (bsd_style_output
360 ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s <spontaneous>\n")
361 : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s <spontaneous>\n"),
362 "", "", "", "", "", "");
363 return;
364 }
365
366 sort_parents (child);
367
368 for (arc = child->cg.parents; arc; arc = arc->next_parent)
369 {
370 parent = arc->parent;
371 if (child == parent || (child->cg.cyc.num != 0
372 && parent->cg.cyc.num == child->cg.cyc.num))
373 {
374 /* Selfcall or call among siblings. */
375 printf (bsd_style_output
376 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
377 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
378 "", "", "", "",
379 arc->count, "");
380 print_name (parent);
381 printf ("\n");
382 }
383 else
384 {
385 /* Regular parent of child. */
386 printf (bsd_style_output
387 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
388 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
389 "", "",
390 arc->time / hz, arc->child_time / hz,
391 arc->count, cycle_head->ncalls);
392 print_name (parent);
393 printf ("\n");
394 }
395 }
396 }
397
398
399 static void
400 sort_children (Sym *parent)
401 {
402 Arc *arc, *detached, sorted, *prev;
403
404 /* Unlink children from parent, then insertion sort back on to
405 sorted's children.
406 *arc the arc you have detached and are inserting.
407 *detached the rest of the arcs to be sorted.
408 sorted arc list onto which you insertion sort.
409 *prev arc before the arc you are comparing. */
410 sorted.next_child = 0;
411
412 for (arc = parent->cg.children; arc; arc = detached)
413 {
414 detached = arc->next_child;
415
416 /* Consider *arc as disconnected; insert it into sorted. */
417 for (prev = &sorted; prev->next_child; prev = prev->next_child)
418 {
419 if (cmp_arc (arc, prev->next_child) != LESSTHAN)
420 break;
421 }
422
423 arc->next_child = prev->next_child;
424 prev->next_child = arc;
425 }
426
427 /* Reattach sorted children to parent. */
428 parent->cg.children = sorted.next_child;
429 }
430
431
432 static void
433 print_children (Sym *parent)
434 {
435 Sym *child;
436 Arc *arc;
437
438 sort_children (parent);
439 arc = parent->cg.children;
440
441 for (arc = parent->cg.children; arc; arc = arc->next_child)
442 {
443 child = arc->child;
444 if (child == parent || (child->cg.cyc.num != 0
445 && child->cg.cyc.num == parent->cg.cyc.num))
446 {
447 /* Self call or call to sibling. */
448 printf (bsd_style_output
449 ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s "
450 : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s ",
451 "", "", "", "", arc->count, "");
452 print_name (child);
453 printf ("\n");
454 }
455 else
456 {
457 /* Regular child of parent. */
458 printf (bsd_style_output
459 ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu "
460 : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu ",
461 "", "",
462 arc->time / hz, arc->child_time / hz,
463 arc->count, child->cg.cyc.head->ncalls);
464 print_name (child);
465 printf ("\n");
466 }
467 }
468 }
469
470
471 static void
472 print_line (Sym *np)
473 {
474 char buf[BUFSIZ];
475
476 sprintf (buf, "[%d]", np->cg.index);
477 printf (bsd_style_output
478 ? "%-6.6s %5.1f %7.2f %11.2f"
479 : "%-6.6s %5.1f %7.2f %7.2f", buf,
480 100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
481 np->cg.prop.self / hz, np->cg.prop.child / hz);
482
483 if ((np->ncalls + np->cg.self_calls) != 0)
484 {
485 printf (" %7lu", np->ncalls);
486
487 if (np->cg.self_calls != 0)
488 printf ("+%-7lu ", np->cg.self_calls);
489 else
490 printf (" %7.7s ", "");
491 }
492 else
493 {
494 printf (" %7.7s %7.7s ", "", "");
495 }
496
497 print_name (np);
498 printf ("\n");
499 }
500
501
502 /* Print dynamic call graph. */
503
504 void
505 cg_print (Sym ** timesortsym)
506 {
507 unsigned int sym_index;
508 Sym *parent;
509
510 if (print_descriptions && bsd_style_output)
511 bsd_callg_blurb (stdout);
512
513 print_header ();
514
515 for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
516 {
517 parent = timesortsym[sym_index];
518
519 if ((ignore_zeros && parent->ncalls == 0
520 && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
521 && parent->cg.prop.child == 0)
522 || !parent->cg.print_flag
523 || (line_granularity && ! parent->is_func))
524 continue;
525
526 if (!parent->name && parent->cg.cyc.num != 0)
527 {
528 /* Cycle header. */
529 print_cycle (parent);
530 print_members (parent);
531 }
532 else
533 {
534 print_parents (parent);
535 print_line (parent);
536 print_children (parent);
537 }
538
539 if (bsd_style_output)
540 printf ("\n");
541
542 printf ("-----------------------------------------------\n");
543
544 if (bsd_style_output)
545 printf ("\n");
546 }
547
548 free (timesortsym);
549
550 if (print_descriptions && !bsd_style_output)
551 fsf_callg_blurb (stdout);
552 }
553
554
555 static int
556 cmp_name (const PTR left, const PTR right)
557 {
558 const Sym **npp1 = (const Sym **) left;
559 const Sym **npp2 = (const Sym **) right;
560
561 return strcmp ((*npp1)->name, (*npp2)->name);
562 }
563
564
565 void
566 cg_print_index ()
567 {
568 unsigned int sym_index;
569 unsigned int nnames, todo, i, j;
570 int col, starting_col;
571 Sym **name_sorted_syms, *sym;
572 const char *filename;
573 char buf[20];
574 int column_width = (output_width - 1) / 3; /* Don't write in last col! */
575
576 /* Now, sort regular function name
577 alphabetically to create an index. */
578 name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
579
580 for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
581 {
582 if (ignore_zeros && symtab.base[sym_index].ncalls == 0
583 && symtab.base[sym_index].hist.time == 0)
584 continue;
585
586 name_sorted_syms[nnames++] = &symtab.base[sym_index];
587 }
588
589 qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
590
591 for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
592 name_sorted_syms[todo++] = &cycle_header[sym_index];
593
594 printf ("\f\n");
595 printf (_("Index by function name\n\n"));
596 sym_index = (todo + 2) / 3;
597
598 for (i = 0; i < sym_index; i++)
599 {
600 col = 0;
601 starting_col = 0;
602
603 for (j = i; j < todo; j += sym_index)
604 {
605 sym = name_sorted_syms[j];
606
607 if (sym->cg.print_flag)
608 sprintf (buf, "[%d]", sym->cg.index);
609 else
610 sprintf (buf, "(%d)", sym->cg.index);
611
612 if (j < nnames)
613 {
614 if (bsd_style_output)
615 {
616 printf ("%6.6s %-19.19s", buf, sym->name);
617 }
618 else
619 {
620 col += strlen (buf);
621
622 for (; col < starting_col + 5; ++col)
623 putchar (' ');
624
625 printf (" %s ", buf);
626 col += print_name_only (sym);
627
628 if (!line_granularity && sym->is_static && sym->file)
629 {
630 filename = sym->file->name;
631
632 if (!print_path)
633 {
634 filename = strrchr (filename, '/');
635
636 if (filename)
637 ++filename;
638 else
639 filename = sym->file->name;
640 }
641
642 printf (" (%s)", filename);
643 col += strlen (filename) + 3;
644 }
645 }
646 }
647 else
648 {
649 if (bsd_style_output)
650 {
651 printf ("%6.6s ", buf);
652 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
653 printf ("%-19.19s", buf);
654 }
655 else
656 {
657 col += strlen (buf);
658 for (; col < starting_col + 5; ++col)
659 putchar (' ');
660 printf (" %s ", buf);
661 sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
662 printf ("%s", buf);
663 col += strlen (buf);
664 }
665 }
666
667 starting_col += column_width;
668 }
669
670 printf ("\n");
671 }
672
673 free (name_sorted_syms);
674 }
675
676 /* Compare two arcs based on their usage counts.
677 We want to sort in descending order. */
678
679 static int
680 cmp_arc_count (const PTR left, const PTR right)
681 {
682 const Arc **npp1 = (const Arc **) left;
683 const Arc **npp2 = (const Arc **) right;
684
685 if ((*npp1)->count > (*npp2)->count)
686 return -1;
687 else if ((*npp1)->count < (*npp2)->count)
688 return 1;
689 else
690 return 0;
691 }
692
693 /* Compare two funtions based on their usage counts.
694 We want to sort in descending order. */
695
696 static int
697 cmp_fun_nuses (const PTR left, const PTR right)
698 {
699 const Sym **npp1 = (const Sym **) left;
700 const Sym **npp2 = (const Sym **) right;
701
702 if ((*npp1)->nuses > (*npp2)->nuses)
703 return -1;
704 else if ((*npp1)->nuses < (*npp2)->nuses)
705 return 1;
706 else
707 return 0;
708 }
709
710 /* Print a suggested function ordering based on the profiling data.
711
712 We perform 4 major steps when ordering functions:
713
714 * Group unused functions together and place them at the
715 end of the function order.
716
717 * Search the highest use arcs (those which account for 90% of
718 the total arc count) for functions which have several parents.
719
720 Group those with the most call sites together (currently the
721 top 1.25% which have at least five different call sites).
722
723 These are emitted at the start of the function order.
724
725 * Use a greedy placement algorithm to place functions which
726 occur in the top 99% of the arcs in the profile. Some provisions
727 are made to handle high usage arcs where the parent and/or
728 child has already been placed.
729
730 * Run the same greedy placement algorithm on the remaining
731 arcs to place the leftover functions.
732
733
734 The various "magic numbers" should (one day) be tuneable by command
735 line options. They were arrived at by benchmarking a few applications
736 with various values to see which values produced better overall function
737 orderings.
738
739 Of course, profiling errors, machine limitations (PA long calls), and
740 poor cutoff values for the placement algorithm may limit the usefullness
741 of the resulting function order. Improvements would be greatly appreciated.
742
743 Suggestions:
744
745 * Place the functions with many callers near the middle of the
746 list to reduce long calls.
747
748 * Propagate arc usage changes as functions are placed. Ie if
749 func1 and func2 are placed together, arcs to/from those arcs
750 to the same parent/child should be combined, then resort the
751 arcs to choose the next one.
752
753 * Implement some global positioning algorithm to place the
754 chains made by the greedy local positioning algorithm. Probably
755 by examining arcs which haven't been placed yet to tie two
756 chains together.
757
758 * Take a function's size and time into account in the algorithm;
759 size in particular is important on the PA (long calls). Placing
760 many small functions onto their own page may be wise.
761
762 * Use better profiling information; many published algorithms
763 are based on call sequences through time, rather than just
764 arc counts.
765
766 * Prodecure cloning could improve performance when a small number
767 of arcs account for most of the calls to a particular function.
768
769 * Use relocation information to avoid moving unused functions
770 completely out of the code stream; this would avoid severe lossage
771 when the profile data bears little resemblance to actual runs.
772
773 * Propagation of arc usages should also improve .o link line
774 ordering which shares the same arc placement algorithm with
775 the function ordering code (in fact it is a degenerate case
776 of function ordering). */
777
778 void
779 cg_print_function_ordering (void)
780 {
781 unsigned long sym_index;
782 unsigned long arc_index;
783 unsigned long used, unused, scratch_index;
784 unsigned long unplaced_arc_count, high_arc_count, scratch_arc_count;
785 #ifdef __GNUC__
786 unsigned long long total_arcs, tmp_arcs_count;
787 #else
788 unsigned long total_arcs, tmp_arcs_count;
789 #endif
790 Sym **unused_syms, **used_syms, **scratch_syms;
791 Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
792
793 sym_index = 0;
794 used = 0;
795 unused = 0;
796 scratch_index = 0;
797 unplaced_arc_count = 0;
798 high_arc_count = 0;
799 scratch_arc_count = 0;
800
801 /* First group all the unused functions together. */
802 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
803 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
804 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
805 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
806 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
807 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
808
809 /* Walk through all the functions; mark those which are never
810 called as placed (we'll emit them as a group later). */
811 for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
812 {
813 if (symtab.base[sym_index].ncalls == 0)
814 {
815 unused_syms[unused++] = &symtab.base[sym_index];
816 symtab.base[sym_index].has_been_placed = 1;
817 }
818 else
819 {
820 used_syms[used++] = &symtab.base[sym_index];
821 symtab.base[sym_index].has_been_placed = 0;
822 symtab.base[sym_index].next = 0;
823 symtab.base[sym_index].prev = 0;
824 symtab.base[sym_index].nuses = 0;
825 }
826 }
827
828 /* Sort the arcs from most used to least used. */
829 qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
830
831 /* Compute the total arc count. Also mark arcs as unplaced.
832
833 Note we don't compensate for overflow if that happens!
834 Overflow is much less likely when this file is compiled
835 with GCC as it can double-wide integers via long long. */
836 total_arcs = 0;
837 for (arc_index = 0; arc_index < numarcs; arc_index++)
838 {
839 total_arcs += arcs[arc_index]->count;
840 arcs[arc_index]->has_been_placed = 0;
841 }
842
843 /* We want to pull out those functions which are referenced
844 by many highly used arcs and emit them as a group. This
845 could probably use some tuning. */
846 tmp_arcs_count = 0;
847 for (arc_index = 0; arc_index < numarcs; arc_index++)
848 {
849 tmp_arcs_count += arcs[arc_index]->count;
850
851 /* Count how many times each parent and child are used up
852 to our threshhold of arcs (90%). */
853 if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
854 break;
855
856 arcs[arc_index]->child->nuses++;
857 }
858
859 /* Now sort a temporary symbol table based on the number of
860 times each function was used in the highest used arcs. */
861 memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
862 qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
863
864 /* Now pick out those symbols we're going to emit as
865 a group. We take up to 1.25% of the used symbols. */
866 for (sym_index = 0; sym_index < used / 80; sym_index++)
867 {
868 Sym *sym = scratch_syms[sym_index];
869 Arc *arc;
870
871 /* If we hit symbols that aren't used from many call sites,
872 then we can quit. We choose five as the low limit for
873 no particular reason. */
874 if (sym->nuses == 5)
875 break;
876
877 /* We're going to need the arcs between these functions.
878 Unfortunately, we don't know all these functions
879 until we're done. So we keep track of all the arcs
880 to the functions we care about, then prune out those
881 which are uninteresting.
882
883 An interesting variation would be to quit when we found
884 multi-call site functions which account for some percentage
885 of the arcs. */
886 arc = sym->cg.children;
887
888 while (arc)
889 {
890 if (arc->parent != arc->child)
891 scratch_arcs[scratch_arc_count++] = arc;
892 arc->has_been_placed = 1;
893 arc = arc->next_child;
894 }
895
896 arc = sym->cg.parents;
897
898 while (arc)
899 {
900 if (arc->parent != arc->child)
901 scratch_arcs[scratch_arc_count++] = arc;
902 arc->has_been_placed = 1;
903 arc = arc->next_parent;
904 }
905
906 /* Keep track of how many symbols we're going to place. */
907 scratch_index = sym_index;
908
909 /* A lie, but it makes identifying
910 these functions easier later. */
911 sym->has_been_placed = 1;
912 }
913
914 /* Now walk through the temporary arcs and copy
915 those we care about into the high arcs array. */
916 for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
917 {
918 Arc *arc = scratch_arcs[arc_index];
919
920 /* If this arc refers to highly used functions, then
921 then we want to keep it. */
922 if (arc->child->has_been_placed
923 && arc->parent->has_been_placed)
924 {
925 high_arcs[high_arc_count++] = scratch_arcs[arc_index];
926
927 /* We need to turn of has_been_placed since we're going to
928 use the main arc placement algorithm on these arcs. */
929 arc->child->has_been_placed = 0;
930 arc->parent->has_been_placed = 0;
931 }
932 }
933
934 /* Dump the multi-site high usage functions which are not
935 going to be ordered by the main ordering algorithm. */
936 for (sym_index = 0; sym_index < scratch_index; sym_index++)
937 {
938 if (scratch_syms[sym_index]->has_been_placed)
939 printf ("%s\n", scratch_syms[sym_index]->name);
940 }
941
942 /* Now we can order the multi-site high use
943 functions based on the arcs between them. */
944 qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
945 order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
946 unplaced_arcs, &unplaced_arc_count);
947
948 /* Order and dump the high use functions left,
949 these typically have only a few call sites. */
950 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
951 unplaced_arcs, &unplaced_arc_count);
952
953 /* Now place the rarely used functions. */
954 order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
955 scratch_arcs, &scratch_arc_count);
956
957 /* Output any functions not emitted by the order_and_dump calls. */
958 for (sym_index = 0; sym_index < used; sym_index++)
959 if (used_syms[sym_index]->has_been_placed == 0)
960 printf("%s\n", used_syms[sym_index]->name);
961
962 /* Output the unused functions. */
963 for (sym_index = 0; sym_index < unused; sym_index++)
964 printf("%s\n", unused_syms[sym_index]->name);
965
966 unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
967 used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
968 scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
969 high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
970 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
971 unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
972
973 free (unused_syms);
974 free (used_syms);
975 free (scratch_syms);
976 free (high_arcs);
977 free (scratch_arcs);
978 free (unplaced_arcs);
979 }
980
981 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
982 place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
983
984 If ALL is nonzero, then place all functions referenced by THE_ARCS,
985 else only place those referenced in the top 99% of the arcs in THE_ARCS. */
986
987 #define MOST 0.99
988 static void
989 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
990 unplaced_arcs, unplaced_arc_count)
991 Arc **the_arcs;
992 unsigned long arc_count;
993 int all;
994 Arc **unplaced_arcs;
995 unsigned long *unplaced_arc_count;
996 {
997 #ifdef __GNUC__
998 unsigned long long tmp_arcs, total_arcs;
999 #else
1000 unsigned long tmp_arcs, total_arcs;
1001 #endif
1002 unsigned int arc_index;
1003
1004 /* If needed, compute the total arc count.
1005
1006 Note we don't compensate for overflow if that happens! */
1007 if (! all)
1008 {
1009 total_arcs = 0;
1010 for (arc_index = 0; arc_index < arc_count; arc_index++)
1011 total_arcs += the_arcs[arc_index]->count;
1012 }
1013 else
1014 total_arcs = 0;
1015
1016 tmp_arcs = 0;
1017
1018 for (arc_index = 0; arc_index < arc_count; arc_index++)
1019 {
1020 Sym *sym1, *sym2;
1021 Sym *child, *parent;
1022
1023 tmp_arcs += the_arcs[arc_index]->count;
1024
1025 /* Ignore this arc if it's already been placed. */
1026 if (the_arcs[arc_index]->has_been_placed)
1027 continue;
1028
1029 child = the_arcs[arc_index]->child;
1030 parent = the_arcs[arc_index]->parent;
1031
1032 /* If we're not using all arcs, and this is a rarely used
1033 arc, then put it on the unplaced_arc list. Similarly
1034 if both the parent and child of this arc have been placed. */
1035 if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
1036 || child->has_been_placed || parent->has_been_placed)
1037 {
1038 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1039 continue;
1040 }
1041
1042 /* If all slots in the parent and child are full, then there isn't
1043 anything we can do right now. We'll place this arc on the
1044 unplaced arc list in the hope that a global positioning
1045 algorithm can use it to place function chains. */
1046 if (parent->next && parent->prev && child->next && child->prev)
1047 {
1048 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1049 continue;
1050 }
1051
1052 /* If the parent is unattached, then find the closest
1053 place to attach it onto child's chain. Similarly
1054 for the opposite case. */
1055 if (!parent->next && !parent->prev)
1056 {
1057 int next_count = 0;
1058 int prev_count = 0;
1059 Sym *prev = child;
1060 Sym *next = child;
1061
1062 /* Walk to the beginning and end of the child's chain. */
1063 while (next->next)
1064 {
1065 next = next->next;
1066 next_count++;
1067 }
1068
1069 while (prev->prev)
1070 {
1071 prev = prev->prev;
1072 prev_count++;
1073 }
1074
1075 /* Choose the closest. */
1076 child = next_count < prev_count ? next : prev;
1077 }
1078 else if (! child->next && !child->prev)
1079 {
1080 int next_count = 0;
1081 int prev_count = 0;
1082 Sym *prev = parent;
1083 Sym *next = parent;
1084
1085 while (next->next)
1086 {
1087 next = next->next;
1088 next_count++;
1089 }
1090
1091 while (prev->prev)
1092 {
1093 prev = prev->prev;
1094 prev_count++;
1095 }
1096
1097 parent = prev_count < next_count ? prev : next;
1098 }
1099 else
1100 {
1101 /* Couldn't find anywhere to attach the functions,
1102 put the arc on the unplaced arc list. */
1103 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1104 continue;
1105 }
1106
1107 /* Make sure we don't tie two ends together. */
1108 sym1 = parent;
1109 if (sym1->next)
1110 while (sym1->next)
1111 sym1 = sym1->next;
1112 else
1113 while (sym1->prev)
1114 sym1 = sym1->prev;
1115
1116 sym2 = child;
1117 if (sym2->next)
1118 while (sym2->next)
1119 sym2 = sym2->next;
1120 else
1121 while (sym2->prev)
1122 sym2 = sym2->prev;
1123
1124 if (sym1 == child
1125 && sym2 == parent)
1126 {
1127 /* This would tie two ends together. */
1128 unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
1129 continue;
1130 }
1131
1132 if (parent->next)
1133 {
1134 /* Must attach to the parent's prev field. */
1135 if (! child->next)
1136 {
1137 /* parent-prev and child-next */
1138 parent->prev = child;
1139 child->next = parent;
1140 the_arcs[arc_index]->has_been_placed = 1;
1141 }
1142 }
1143 else if (parent->prev)
1144 {
1145 /* Must attach to the parent's next field. */
1146 if (! child->prev)
1147 {
1148 /* parent-next and child-prev */
1149 parent->next = child;
1150 child->prev = parent;
1151 the_arcs[arc_index]->has_been_placed = 1;
1152 }
1153 }
1154 else
1155 {
1156 /* Can attach to either field in the parent, depends
1157 on where we've got space in the child. */
1158 if (child->prev)
1159 {
1160 /* parent-prev and child-next. */
1161 parent->prev = child;
1162 child->next = parent;
1163 the_arcs[arc_index]->has_been_placed = 1;
1164 }
1165 else
1166 {
1167 /* parent-next and child-prev. */
1168 parent->next = child;
1169 child->prev = parent;
1170 the_arcs[arc_index]->has_been_placed = 1;
1171 }
1172 }
1173 }
1174
1175 /* Dump the chains of functions we've made. */
1176 for (arc_index = 0; arc_index < arc_count; arc_index++)
1177 {
1178 Sym *sym;
1179 if (the_arcs[arc_index]->parent->has_been_placed
1180 || the_arcs[arc_index]->child->has_been_placed)
1181 continue;
1182
1183 sym = the_arcs[arc_index]->parent;
1184
1185 /* If this symbol isn't attached to any other
1186 symbols, then we've got a rarely used arc.
1187
1188 Skip it for now, we'll deal with them later. */
1189 if (sym->next == NULL
1190 && sym->prev == NULL)
1191 continue;
1192
1193 /* Get to the start of this chain. */
1194 while (sym->prev)
1195 sym = sym->prev;
1196
1197 while (sym)
1198 {
1199 /* Mark it as placed. */
1200 sym->has_been_placed = 1;
1201 printf ("%s\n", sym->name);
1202 sym = sym->next;
1203 }
1204 }
1205
1206 /* If we want to place all the arcs, then output
1207 those which weren't placed by the main algorithm. */
1208 if (all)
1209 for (arc_index = 0; arc_index < arc_count; arc_index++)
1210 {
1211 Sym *sym;
1212 if (the_arcs[arc_index]->parent->has_been_placed
1213 || the_arcs[arc_index]->child->has_been_placed)
1214 continue;
1215
1216 sym = the_arcs[arc_index]->parent;
1217
1218 sym->has_been_placed = 1;
1219 printf ("%s\n", sym->name);
1220 }
1221 }
1222
1223 /* Compare two function_map structs based on file name.
1224 We want to sort in ascending order. */
1225
1226 static int
1227 cmp_symbol_map (const void * l, const void * r)
1228 {
1229 return filename_cmp (((struct function_map *) l)->file_name,
1230 ((struct function_map *) r)->file_name);
1231 }
1232
1233 /* Print a suggested .o ordering for files on a link line based
1234 on profiling information. This uses the function placement
1235 code for the bulk of its work. */
1236
1237 void
1238 cg_print_file_ordering (void)
1239 {
1240 unsigned long scratch_arc_count;
1241 unsigned long arc_index;
1242 unsigned long sym_index;
1243 Arc **scratch_arcs;
1244 char *last;
1245
1246 scratch_arc_count = 0;
1247
1248 scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
1249 for (arc_index = 0; arc_index < numarcs; arc_index++)
1250 {
1251 if (! arcs[arc_index]->parent->mapped
1252 || ! arcs[arc_index]->child->mapped)
1253 arcs[arc_index]->has_been_placed = 1;
1254 }
1255
1256 order_and_dump_functions_by_arcs (arcs, numarcs, 0,
1257 scratch_arcs, &scratch_arc_count);
1258
1259 /* Output .o's not handled by the main placement algorithm. */
1260 for (sym_index = 0; sym_index < symtab.len; sym_index++)
1261 {
1262 if (symtab.base[sym_index].mapped
1263 && ! symtab.base[sym_index].has_been_placed)
1264 printf ("%s\n", symtab.base[sym_index].name);
1265 }
1266
1267 qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
1268
1269 /* Now output any .o's that didn't have any text symbols. */
1270 last = NULL;
1271 for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
1272 {
1273 unsigned int index2;
1274
1275 /* Don't bother searching if this symbol
1276 is the same as the previous one. */
1277 if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
1278 continue;
1279
1280 for (index2 = 0; index2 < symtab.len; index2++)
1281 {
1282 if (! symtab.base[index2].mapped)
1283 continue;
1284
1285 if (!filename_cmp (symtab.base[index2].name,
1286 symbol_map[sym_index].file_name))
1287 break;
1288 }
1289
1290 /* If we didn't find it in the symbol table, then it must
1291 be a .o with no text symbols. Output it last. */
1292 if (index2 == symtab.len)
1293 printf ("%s\n", symbol_map[sym_index].file_name);
1294 last = symbol_map[sym_index].file_name;
1295 }
1296 }
1297