Home | History | Annotate | Line # | Download | only in gprof
cg_print.c revision 1.1
      1 /* cg_print.c -  Print routines for displaying call graphs.
      2 
      3    Copyright 2000, 2001, 2002, 2004, 2007 Free Software Foundation, Inc.
      4 
      5    This file is part of GNU Binutils.
      6 
      7    This program is free software; you can redistribute it and/or modify
      8    it under the terms of the GNU General Public License as published by
      9    the Free Software Foundation; either version 3 of the License, or
     10    (at your option) any later version.
     11 
     12    This program is distributed in the hope that it will be useful,
     13    but WITHOUT ANY WARRANTY; without even the implied warranty of
     14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     15    GNU General Public License for more details.
     16 
     17    You should have received a copy of the GNU General Public License
     18    along with this program; if not, write to the Free Software
     19    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
     20    02110-1301, USA.  */
     21 
     22 #include "gprof.h"
     24 #include "libiberty.h"
     25 #include "search_list.h"
     26 #include "source.h"
     27 #include "symtab.h"
     28 #include "cg_arcs.h"
     29 #include "cg_print.h"
     30 #include "hist.h"
     31 #include "utils.h"
     32 #include "corefile.h"
     33 
     34 /* Return value of comparison functions used to sort tables.  */
     35 #define	LESSTHAN	-1
     36 #define	EQUALTO		0
     37 #define	GREATERTHAN	1
     38 
     39 static void print_header (void);
     40 static void print_cycle (Sym *);
     41 static int cmp_member (Sym *, Sym *);
     42 static void sort_members (Sym *);
     43 static void print_members (Sym *);
     44 static int cmp_arc (Arc *, Arc *);
     45 static void sort_parents (Sym *);
     46 static void print_parents (Sym *);
     47 static void sort_children (Sym *);
     48 static void print_children (Sym *);
     49 static void print_line (Sym *);
     50 static int cmp_name (const PTR, const PTR);
     51 static int cmp_arc_count (const PTR, const PTR);
     52 static int cmp_fun_nuses (const PTR, const PTR);
     53 static void order_and_dump_functions_by_arcs
     54   (Arc **, unsigned long, int, Arc **, unsigned long *);
     55 
     56 /* Declarations of automatically generated functions to output blurbs.  */
     57 extern void bsd_callg_blurb (FILE * fp);
     58 extern void fsf_callg_blurb (FILE * fp);
     59 
     60 double print_time = 0.0;
     61 
     62 
     63 static void
     64 print_header ()
     65 {
     66   if (first_output)
     67     first_output = FALSE;
     68   else
     69     printf ("\f\n");
     70 
     71   if (!bsd_style_output)
     72     {
     73       if (print_descriptions)
     74 	printf (_("\t\t     Call graph (explanation follows)\n\n"));
     75       else
     76 	printf (_("\t\t\tCall graph\n\n"));
     77     }
     78 
     79   printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
     80 	  (long) hist_scale * (long) sizeof (UNIT));
     81 
     82   if (print_time > 0.0)
     83     printf (_(" for %.2f%% of %.2f seconds\n\n"),
     84 	    100.0 / print_time, print_time / hz);
     85   else
     86     {
     87       printf (_(" no time propagated\n\n"));
     88 
     89       /* This doesn't hurt, since all the numerators will be 0.0.  */
     90       print_time = 1.0;
     91     }
     92 
     93   if (bsd_style_output)
     94     {
     95       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
     96 	      "", "", "", "", _("called"), _("total"), _("parents"));
     97       printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
     98 	      _("index"), _("%time"), _("self"), _("descendants"),
     99 	      _("called"), _("self"), _("name"), _("index"));
    100       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
    101 	      "", "", "", "", _("called"), _("total"), _("children"));
    102       printf ("\n");
    103     }
    104   else
    105     {
    106       printf (_("index %% time    self  children    called     name\n"));
    107     }
    108 }
    109 
    110 /* Print a cycle header.  */
    111 
    112 static void
    113 print_cycle (Sym *cyc)
    114 {
    115   char buf[BUFSIZ];
    116 
    117   sprintf (buf, "[%d]", cyc->cg.index);
    118   printf (bsd_style_output
    119 	  ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
    120 	  : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
    121 	  100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
    122 	  cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
    123 
    124   if (cyc->cg.self_calls != 0)
    125     printf ("+%-7lu", cyc->cg.self_calls);
    126   else
    127     printf (" %7.7s", "");
    128 
    129   printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
    130 }
    131 
    132 /* Compare LEFT and RIGHT membmer.  Major comparison key is
    133    CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.  */
    134 
    135 static int
    136 cmp_member (Sym *left, Sym *right)
    137 {
    138   double left_time = left->cg.prop.self + left->cg.prop.child;
    139   double right_time = right->cg.prop.self + right->cg.prop.child;
    140   unsigned long left_calls = left->ncalls + left->cg.self_calls;
    141   unsigned long right_calls = right->ncalls + right->cg.self_calls;
    142 
    143   if (left_time > right_time)
    144     return GREATERTHAN;
    145 
    146   if (left_time < right_time)
    147     return LESSTHAN;
    148 
    149   if (left_calls > right_calls)
    150     return GREATERTHAN;
    151 
    152   if (left_calls < right_calls)
    153     return LESSTHAN;
    154 
    155   return EQUALTO;
    156 }
    157 
    158 /* Sort members of a cycle.  */
    159 
    160 static void
    161 sort_members (Sym *cyc)
    162 {
    163   Sym *todo, *doing, *prev;
    164 
    165   /* Detach cycle members from cyclehead,
    166      and insertion sort them back on.  */
    167   todo = cyc->cg.cyc.next;
    168   cyc->cg.cyc.next = 0;
    169 
    170   for (doing = todo; doing != NULL; doing = todo)
    171     {
    172       todo = doing->cg.cyc.next;
    173 
    174       for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
    175 	{
    176 	  if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
    177 	    break;
    178 	}
    179 
    180       doing->cg.cyc.next = prev->cg.cyc.next;
    181       prev->cg.cyc.next = doing;
    182     }
    183 }
    184 
    185 /* Print the members of a cycle.  */
    186 
    187 static void
    188 print_members (Sym *cyc)
    189 {
    190   Sym *member;
    191 
    192   sort_members (cyc);
    193 
    194   for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
    195     {
    196       printf (bsd_style_output
    197 	      ? "%6.6s %5.5s %7.2f %11.2f %7lu"
    198 	      : "%6.6s %5.5s %7.2f %7.2f %7lu",
    199 	      "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
    200 	      member->ncalls);
    201 
    202       if (member->cg.self_calls != 0)
    203 	printf ("+%-7lu", member->cg.self_calls);
    204       else
    205 	printf (" %7.7s", "");
    206 
    207       printf ("     ");
    208       print_name (member);
    209       printf ("\n");
    210     }
    211 }
    212 
    213 /* Compare two arcs to/from the same child/parent.
    214 	- if one arc is a self arc, it's least.
    215 	- if one arc is within a cycle, it's less than.
    216 	- if both arcs are within a cycle, compare arc counts.
    217 	- if neither arc is within a cycle, compare with
    218 		time + child_time as major key
    219 		arc count as minor key.  */
    220 
    221 static int
    222 cmp_arc (Arc *left, Arc *right)
    223 {
    224   Sym *left_parent = left->parent;
    225   Sym *left_child = left->child;
    226   Sym *right_parent = right->parent;
    227   Sym *right_child = right->child;
    228   double left_time, right_time;
    229 
    230   DBG (TIMEDEBUG,
    231        printf ("[cmp_arc] ");
    232        print_name (left_parent);
    233        printf (" calls ");
    234        print_name (left_child);
    235        printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
    236 	       left->count, left_child->ncalls);
    237        printf ("[cmp_arc] ");
    238        print_name (right_parent);
    239        printf (" calls ");
    240        print_name (right_child);
    241        printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
    242 	       right->count, right_child->ncalls);
    243        printf ("\n");
    244     );
    245 
    246   if (left_parent == left_child)
    247     return LESSTHAN;		/* Left is a self call.  */
    248 
    249   if (right_parent == right_child)
    250     return GREATERTHAN;		/* Right is a self call.  */
    251 
    252   if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
    253       && left_parent->cg.cyc.num == left_child->cg.cyc.num)
    254     {
    255       /* Left is a call within a cycle.  */
    256       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
    257 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
    258 	{
    259 	  /* Right is a call within the cycle, too.  */
    260 	  if (left->count < right->count)
    261 	    return LESSTHAN;
    262 
    263 	  if (left->count > right->count)
    264 	    return GREATERTHAN;
    265 
    266 	  return EQUALTO;
    267 	}
    268       else
    269 	{
    270 	  /* Right isn't a call within the cycle.  */
    271 	  return LESSTHAN;
    272 	}
    273     }
    274   else
    275     {
    276       /* Left isn't a call within a cycle.  */
    277       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
    278 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
    279 	{
    280 	  /* Right is a call within a cycle.  */
    281 	  return GREATERTHAN;
    282 	}
    283       else
    284 	{
    285 	  /* Neither is a call within a cycle.  */
    286 	  left_time = left->time + left->child_time;
    287 	  right_time = right->time + right->child_time;
    288 
    289 	  if (left_time < right_time)
    290 	    return LESSTHAN;
    291 
    292 	  if (left_time > right_time)
    293 	    return GREATERTHAN;
    294 
    295 	  if (left->count < right->count)
    296 	    return LESSTHAN;
    297 
    298 	  if (left->count > right->count)
    299 	    return GREATERTHAN;
    300 
    301 	  return EQUALTO;
    302 	}
    303     }
    304 }
    305 
    306 
    307 static void
    308 sort_parents (Sym * child)
    309 {
    310   Arc *arc, *detached, sorted, *prev;
    311 
    312   /* Unlink parents from child, then insertion sort back on to
    313      sorted's parents.
    314 	  *arc        the arc you have detached and are inserting.
    315 	  *detached   the rest of the arcs to be sorted.
    316 	  sorted      arc list onto which you insertion sort.
    317 	  *prev       arc before the arc you are comparing.  */
    318   sorted.next_parent = 0;
    319 
    320   for (arc = child->cg.parents; arc; arc = detached)
    321     {
    322       detached = arc->next_parent;
    323 
    324       /* Consider *arc as disconnected; insert it into sorted.  */
    325       for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
    326 	{
    327 	  if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
    328 	    break;
    329 	}
    330 
    331       arc->next_parent = prev->next_parent;
    332       prev->next_parent = arc;
    333     }
    334 
    335   /* Reattach sorted arcs to child.  */
    336   child->cg.parents = sorted.next_parent;
    337 }
    338 
    339 
    340 static void
    341 print_parents (Sym *child)
    342 {
    343   Sym *parent;
    344   Arc *arc;
    345   Sym *cycle_head;
    346 
    347   if (child->cg.cyc.head != 0)
    348     cycle_head = child->cg.cyc.head;
    349   else
    350     cycle_head = child;
    351 
    352   if (!child->cg.parents)
    353     {
    354       printf (bsd_style_output
    355 	      ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s     <spontaneous>\n")
    356 	      : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s     <spontaneous>\n"),
    357 	      "", "", "", "", "", "");
    358       return;
    359     }
    360 
    361   sort_parents (child);
    362 
    363   for (arc = child->cg.parents; arc; arc = arc->next_parent)
    364     {
    365       parent = arc->parent;
    366       if (child == parent || (child->cg.cyc.num != 0
    367 			      && parent->cg.cyc.num == child->cg.cyc.num))
    368 	{
    369 	  /* Selfcall or call among siblings.  */
    370 	  printf (bsd_style_output
    371 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
    372 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
    373 		  "", "", "", "",
    374 		  arc->count, "");
    375 	  print_name (parent);
    376 	  printf ("\n");
    377 	}
    378       else
    379 	{
    380 	  /* Regular parent of child.  */
    381 	  printf (bsd_style_output
    382 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
    383 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
    384 		  "", "",
    385 		  arc->time / hz, arc->child_time / hz,
    386 		  arc->count, cycle_head->ncalls);
    387 	  print_name (parent);
    388 	  printf ("\n");
    389 	}
    390     }
    391 }
    392 
    393 
    394 static void
    395 sort_children (Sym *parent)
    396 {
    397   Arc *arc, *detached, sorted, *prev;
    398 
    399   /* Unlink children from parent, then insertion sort back on to
    400      sorted's children.
    401 	  *arc        the arc you have detached and are inserting.
    402 	  *detached   the rest of the arcs to be sorted.
    403 	  sorted      arc list onto which you insertion sort.
    404 	  *prev       arc before the arc you are comparing.  */
    405   sorted.next_child = 0;
    406 
    407   for (arc = parent->cg.children; arc; arc = detached)
    408     {
    409       detached = arc->next_child;
    410 
    411       /* Consider *arc as disconnected; insert it into sorted.  */
    412       for (prev = &sorted; prev->next_child; prev = prev->next_child)
    413 	{
    414 	  if (cmp_arc (arc, prev->next_child) != LESSTHAN)
    415 	    break;
    416 	}
    417 
    418       arc->next_child = prev->next_child;
    419       prev->next_child = arc;
    420     }
    421 
    422   /* Reattach sorted children to parent.  */
    423   parent->cg.children = sorted.next_child;
    424 }
    425 
    426 
    427 static void
    428 print_children (Sym *parent)
    429 {
    430   Sym *child;
    431   Arc *arc;
    432 
    433   sort_children (parent);
    434   arc = parent->cg.children;
    435 
    436   for (arc = parent->cg.children; arc; arc = arc->next_child)
    437     {
    438       child = arc->child;
    439       if (child == parent || (child->cg.cyc.num != 0
    440 			      && child->cg.cyc.num == parent->cg.cyc.num))
    441 	{
    442 	  /* Self call or call to sibling.  */
    443 	  printf (bsd_style_output
    444 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
    445 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
    446 		  "", "", "", "", arc->count, "");
    447 	  print_name (child);
    448 	  printf ("\n");
    449 	}
    450       else
    451 	{
    452 	  /* Regular child of parent.  */
    453 	  printf (bsd_style_output
    454 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
    455 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
    456 		  "", "",
    457 		  arc->time / hz, arc->child_time / hz,
    458 		  arc->count, child->cg.cyc.head->ncalls);
    459 	  print_name (child);
    460 	  printf ("\n");
    461 	}
    462     }
    463 }
    464 
    465 
    466 static void
    467 print_line (Sym *np)
    468 {
    469   char buf[BUFSIZ];
    470 
    471   sprintf (buf, "[%d]", np->cg.index);
    472   printf (bsd_style_output
    473 	  ? "%-6.6s %5.1f %7.2f %11.2f"
    474 	  : "%-6.6s %5.1f %7.2f %7.2f", buf,
    475 	  100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
    476 	  np->cg.prop.self / hz, np->cg.prop.child / hz);
    477 
    478   if ((np->ncalls + np->cg.self_calls) != 0)
    479     {
    480       printf (" %7lu", np->ncalls);
    481 
    482       if (np->cg.self_calls != 0)
    483 	  printf ("+%-7lu ", np->cg.self_calls);
    484       else
    485 	  printf (" %7.7s ", "");
    486     }
    487   else
    488     {
    489       printf (" %7.7s %7.7s ", "", "");
    490     }
    491 
    492   print_name (np);
    493   printf ("\n");
    494 }
    495 
    496 
    497 /* Print dynamic call graph.  */
    498 
    499 void
    500 cg_print (Sym ** timesortsym)
    501 {
    502   unsigned int index;
    503   Sym *parent;
    504 
    505   if (print_descriptions && bsd_style_output)
    506     bsd_callg_blurb (stdout);
    507 
    508   print_header ();
    509 
    510   for (index = 0; index < symtab.len + num_cycles; ++index)
    511     {
    512       parent = timesortsym[index];
    513 
    514       if ((ignore_zeros && parent->ncalls == 0
    515 	   && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
    516 	   && parent->cg.prop.child == 0)
    517 	  || !parent->cg.print_flag
    518 	  || (line_granularity && ! parent->is_func))
    519 	continue;
    520 
    521       if (!parent->name && parent->cg.cyc.num != 0)
    522 	{
    523 	  /* Cycle header.  */
    524 	  print_cycle (parent);
    525 	  print_members (parent);
    526 	}
    527       else
    528 	{
    529 	  print_parents (parent);
    530 	  print_line (parent);
    531 	  print_children (parent);
    532 	}
    533 
    534       if (bsd_style_output)
    535 	printf ("\n");
    536 
    537       printf ("-----------------------------------------------\n");
    538 
    539       if (bsd_style_output)
    540 	printf ("\n");
    541     }
    542 
    543   free (timesortsym);
    544 
    545   if (print_descriptions && !bsd_style_output)
    546     fsf_callg_blurb (stdout);
    547 }
    548 
    549 
    550 static int
    551 cmp_name (const PTR left, const PTR right)
    552 {
    553   const Sym **npp1 = (const Sym **) left;
    554   const Sym **npp2 = (const Sym **) right;
    555 
    556   return strcmp ((*npp1)->name, (*npp2)->name);
    557 }
    558 
    559 
    560 void
    561 cg_print_index ()
    562 {
    563   unsigned int index;
    564   unsigned int nnames, todo, i, j;
    565   int col, starting_col;
    566   Sym **name_sorted_syms, *sym;
    567   const char *filename;
    568   char buf[20];
    569   int column_width = (output_width - 1) / 3;	/* Don't write in last col!  */
    570 
    571   /* Now, sort regular function name
    572      alphabetically to create an index.  */
    573   name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
    574 
    575   for (index = 0, nnames = 0; index < symtab.len; index++)
    576     {
    577       if (ignore_zeros && symtab.base[index].ncalls == 0
    578 	  && symtab.base[index].hist.time == 0)
    579 	continue;
    580 
    581       name_sorted_syms[nnames++] = &symtab.base[index];
    582     }
    583 
    584   qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
    585 
    586   for (index = 1, todo = nnames; index <= num_cycles; index++)
    587     name_sorted_syms[todo++] = &cycle_header[index];
    588 
    589   printf ("\f\n");
    590   printf (_("Index by function name\n\n"));
    591   index = (todo + 2) / 3;
    592 
    593   for (i = 0; i < index; i++)
    594     {
    595       col = 0;
    596       starting_col = 0;
    597 
    598       for (j = i; j < todo; j += index)
    599 	{
    600 	  sym = name_sorted_syms[j];
    601 
    602 	  if (sym->cg.print_flag)
    603 	    sprintf (buf, "[%d]", sym->cg.index);
    604 	  else
    605 	    sprintf (buf, "(%d)", sym->cg.index);
    606 
    607 	  if (j < nnames)
    608 	    {
    609 	      if (bsd_style_output)
    610 		{
    611 		  printf ("%6.6s %-19.19s", buf, sym->name);
    612 		}
    613 	      else
    614 		{
    615 		  col += strlen (buf);
    616 
    617 		  for (; col < starting_col + 5; ++col)
    618 		    putchar (' ');
    619 
    620 		  printf (" %s ", buf);
    621 		  col += print_name_only (sym);
    622 
    623 		  if (!line_granularity && sym->is_static && sym->file)
    624 		    {
    625 		      filename = sym->file->name;
    626 
    627 		      if (!print_path)
    628 			{
    629 			  filename = strrchr (filename, '/');
    630 
    631 			  if (filename)
    632 			    ++filename;
    633 			  else
    634 			    filename = sym->file->name;
    635 			}
    636 
    637 		      printf (" (%s)", filename);
    638 		      col += strlen (filename) + 3;
    639 		    }
    640 		}
    641 	    }
    642 	  else
    643 	    {
    644 	      if (bsd_style_output)
    645 		{
    646 		  printf ("%6.6s ", buf);
    647 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
    648 		  printf ("%-19.19s", buf);
    649 		}
    650 	      else
    651 		{
    652 		  col += strlen (buf);
    653 		  for (; col < starting_col + 5; ++col)
    654 		    putchar (' ');
    655 		  printf (" %s ", buf);
    656 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
    657 		  printf ("%s", buf);
    658 		  col += strlen (buf);
    659 		}
    660 	    }
    661 
    662 	  starting_col += column_width;
    663 	}
    664 
    665       printf ("\n");
    666     }
    667 
    668   free (name_sorted_syms);
    669 }
    670 
    671 /* Compare two arcs based on their usage counts.
    672    We want to sort in descending order.  */
    673 
    674 static int
    675 cmp_arc_count (const PTR left, const PTR right)
    676 {
    677   const Arc **npp1 = (const Arc **) left;
    678   const Arc **npp2 = (const Arc **) right;
    679 
    680   if ((*npp1)->count > (*npp2)->count)
    681     return -1;
    682   else if ((*npp1)->count < (*npp2)->count)
    683     return 1;
    684   else
    685     return 0;
    686 }
    687 
    688 /* Compare two funtions based on their usage counts.
    689    We want to sort in descending order.  */
    690 
    691 static int
    692 cmp_fun_nuses (const PTR left, const PTR right)
    693 {
    694   const Sym **npp1 = (const Sym **) left;
    695   const Sym **npp2 = (const Sym **) right;
    696 
    697   if ((*npp1)->nuses > (*npp2)->nuses)
    698     return -1;
    699   else if ((*npp1)->nuses < (*npp2)->nuses)
    700     return 1;
    701   else
    702     return 0;
    703 }
    704 
    705 /* Print a suggested function ordering based on the profiling data.
    706 
    707    We perform 4 major steps when ordering functions:
    708 
    709 	* Group unused functions together and place them at the
    710 	end of the function order.
    711 
    712 	* Search the highest use arcs (those which account for 90% of
    713 	the total arc count) for functions which have several parents.
    714 
    715 	Group those with the most call sites together (currently the
    716 	top 1.25% which have at least five different call sites).
    717 
    718 	These are emitted at the start of the function order.
    719 
    720 	* Use a greedy placement algorithm to place functions which
    721 	occur in the top 99% of the arcs in the profile.  Some provisions
    722 	are made to handle high usage arcs where the parent and/or
    723 	child has already been placed.
    724 
    725 	* Run the same greedy placement algorithm on the remaining
    726 	arcs to place the leftover functions.
    727 
    728 
    729    The various "magic numbers" should (one day) be tuneable by command
    730    line options.  They were arrived at by benchmarking a few applications
    731    with various values to see which values produced better overall function
    732    orderings.
    733 
    734    Of course, profiling errors, machine limitations (PA long calls), and
    735    poor cutoff values for the placement algorithm may limit the usefullness
    736    of the resulting function order.  Improvements would be greatly appreciated.
    737 
    738    Suggestions:
    739 
    740 	* Place the functions with many callers near the middle of the
    741 	list to reduce long calls.
    742 
    743 	* Propagate arc usage changes as functions are placed.  Ie if
    744 	func1 and func2 are placed together, arcs to/from those arcs
    745 	to the same parent/child should be combined, then resort the
    746 	arcs to choose the next one.
    747 
    748 	* Implement some global positioning algorithm to place the
    749 	chains made by the greedy local positioning algorithm.  Probably
    750 	by examining arcs which haven't been placed yet to tie two
    751 	chains together.
    752 
    753 	* Take a function's size and time into account in the algorithm;
    754 	size in particular is important on the PA (long calls).  Placing
    755 	many small functions onto their own page may be wise.
    756 
    757 	* Use better profiling information; many published algorithms
    758 	are based on call sequences through time, rather than just
    759 	arc counts.
    760 
    761 	* Prodecure cloning could improve performance when a small number
    762 	of arcs account for most of the calls to a particular function.
    763 
    764 	* Use relocation information to avoid moving unused functions
    765 	completely out of the code stream; this would avoid severe lossage
    766 	when the profile data bears little resemblance to actual runs.
    767 
    768 	* Propagation of arc usages should also improve .o link line
    769 	ordering which shares the same arc placement algorithm with
    770 	the function ordering code (in fact it is a degenerate case
    771 	of function ordering).  */
    772 
    773 void
    774 cg_print_function_ordering ()
    775 {
    776   unsigned long index, used, unused, scratch_index;
    777   unsigned long  unplaced_arc_count, high_arc_count, scratch_arc_count;
    778 #ifdef __GNUC__
    779   unsigned long long total_arcs, tmp_arcs_count;
    780 #else
    781   unsigned long total_arcs, tmp_arcs_count;
    782 #endif
    783   Sym **unused_syms, **used_syms, **scratch_syms;
    784   Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
    785 
    786   index = 0;
    787   used = 0;
    788   unused = 0;
    789   scratch_index = 0;
    790   unplaced_arc_count = 0;
    791   high_arc_count = 0;
    792   scratch_arc_count = 0;
    793 
    794   /* First group all the unused functions together.  */
    795   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    796   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    797   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    798   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    799   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    800   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    801 
    802   /* Walk through all the functions; mark those which are never
    803      called as placed (we'll emit them as a group later).  */
    804   for (index = 0, used = 0, unused = 0; index < symtab.len; index++)
    805     {
    806       if (symtab.base[index].ncalls == 0)
    807 	{
    808 	  /* Filter out gprof generated names.  */
    809 	  if (strcmp (symtab.base[index].name, "<locore>")
    810 	      && strcmp (symtab.base[index].name, "<hicore>"))
    811 	    {
    812 	      unused_syms[unused++] = &symtab.base[index];
    813 	      symtab.base[index].has_been_placed = 1;
    814 	    }
    815 	}
    816       else
    817 	{
    818 	  used_syms[used++] = &symtab.base[index];
    819 	  symtab.base[index].has_been_placed = 0;
    820 	  symtab.base[index].next = 0;
    821 	  symtab.base[index].prev = 0;
    822 	  symtab.base[index].nuses = 0;
    823 	}
    824     }
    825 
    826   /* Sort the arcs from most used to least used.  */
    827   qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
    828 
    829   /* Compute the total arc count.  Also mark arcs as unplaced.
    830 
    831      Note we don't compensate for overflow if that happens!
    832      Overflow is much less likely when this file is compiled
    833      with GCC as it can double-wide integers via long long.  */
    834   total_arcs = 0;
    835   for (index = 0; index < numarcs; index++)
    836     {
    837       total_arcs += arcs[index]->count;
    838       arcs[index]->has_been_placed = 0;
    839     }
    840 
    841   /* We want to pull out those functions which are referenced
    842      by many highly used arcs and emit them as a group.  This
    843      could probably use some tuning.  */
    844   tmp_arcs_count = 0;
    845   for (index = 0; index < numarcs; index++)
    846     {
    847       tmp_arcs_count += arcs[index]->count;
    848 
    849       /* Count how many times each parent and child are used up
    850 	 to our threshhold of arcs (90%).  */
    851       if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
    852 	break;
    853 
    854       arcs[index]->child->nuses++;
    855     }
    856 
    857   /* Now sort a temporary symbol table based on the number of
    858      times each function was used in the highest used arcs.  */
    859   memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
    860   qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
    861 
    862   /* Now pick out those symbols we're going to emit as
    863      a group.  We take up to 1.25% of the used symbols.  */
    864   for (index = 0; index < used / 80; index++)
    865     {
    866       Sym *sym = scratch_syms[index];
    867       Arc *arc;
    868 
    869       /* If we hit symbols that aren't used from many call sites,
    870 	 then we can quit.  We choose five as the low limit for
    871 	 no particular reason.  */
    872       if (sym->nuses == 5)
    873 	break;
    874 
    875       /* We're going to need the arcs between these functions.
    876 	 Unfortunately, we don't know all these functions
    877 	 until we're done.  So we keep track of all the arcs
    878 	 to the functions we care about, then prune out those
    879 	 which are uninteresting.
    880 
    881 	 An interesting variation would be to quit when we found
    882 	 multi-call site functions which account for some percentage
    883 	 of the arcs.  */
    884       arc = sym->cg.children;
    885 
    886       while (arc)
    887 	{
    888 	  if (arc->parent != arc->child)
    889 	    scratch_arcs[scratch_arc_count++] = arc;
    890 	  arc->has_been_placed = 1;
    891 	  arc = arc->next_child;
    892 	}
    893 
    894       arc = sym->cg.parents;
    895 
    896       while (arc)
    897 	{
    898 	  if (arc->parent != arc->child)
    899 	    scratch_arcs[scratch_arc_count++] = arc;
    900 	  arc->has_been_placed = 1;
    901 	  arc = arc->next_parent;
    902 	}
    903 
    904       /* Keep track of how many symbols we're going to place.  */
    905       scratch_index = index;
    906 
    907       /* A lie, but it makes identifying
    908 	 these functions easier later.  */
    909       sym->has_been_placed = 1;
    910     }
    911 
    912   /* Now walk through the temporary arcs and copy
    913      those we care about into the high arcs array.  */
    914   for (index = 0; index < scratch_arc_count; index++)
    915     {
    916       Arc *arc = scratch_arcs[index];
    917 
    918       /* If this arc refers to highly used functions, then
    919 	 then we want to keep it.  */
    920       if (arc->child->has_been_placed
    921 	  && arc->parent->has_been_placed)
    922 	{
    923 	  high_arcs[high_arc_count++] = scratch_arcs[index];
    924 
    925 	  /* We need to turn of has_been_placed since we're going to
    926 	     use the main arc placement algorithm on these arcs.  */
    927 	  arc->child->has_been_placed = 0;
    928 	  arc->parent->has_been_placed = 0;
    929 	}
    930     }
    931 
    932   /* Dump the multi-site high usage functions which are not
    933      going to be ordered by the main ordering algorithm.  */
    934   for (index = 0; index < scratch_index; index++)
    935     {
    936       if (scratch_syms[index]->has_been_placed)
    937 	printf ("%s\n", scratch_syms[index]->name);
    938     }
    939 
    940   /* Now we can order the multi-site high use
    941      functions based on the arcs between them.  */
    942   qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
    943   order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
    944 				    unplaced_arcs, &unplaced_arc_count);
    945 
    946   /* Order and dump the high use functions left,
    947      these typically have only a few call sites.  */
    948   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
    949 				    unplaced_arcs, &unplaced_arc_count);
    950 
    951   /* Now place the rarely used functions.  */
    952   order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
    953 				    scratch_arcs, &scratch_arc_count);
    954 
    955   /* Output any functions not emitted by the order_and_dump calls.  */
    956   for (index = 0; index < used; index++)
    957     if (used_syms[index]->has_been_placed == 0)
    958       printf("%s\n", used_syms[index]->name);
    959 
    960   /* Output the unused functions.  */
    961   for (index = 0; index < unused; index++)
    962     printf("%s\n", unused_syms[index]->name);
    963 
    964   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    965   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    966   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    967   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    968   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    969   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    970 
    971   free (unused_syms);
    972   free (used_syms);
    973   free (scratch_syms);
    974   free (high_arcs);
    975   free (scratch_arcs);
    976   free (unplaced_arcs);
    977 }
    978 
    979 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
    980    place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
    981 
    982    If ALL is nonzero, then place all functions referenced by THE_ARCS,
    983    else only place those referenced in the top 99% of the arcs in THE_ARCS.  */
    984 
    985 #define MOST 0.99
    986 static void
    987 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
    988 				  unplaced_arcs, unplaced_arc_count)
    989      Arc **the_arcs;
    990      unsigned long arc_count;
    991      int all;
    992      Arc **unplaced_arcs;
    993      unsigned long *unplaced_arc_count;
    994 {
    995 #ifdef __GNUC__
    996   unsigned long long tmp_arcs, total_arcs;
    997 #else
    998   unsigned long tmp_arcs, total_arcs;
    999 #endif
   1000   unsigned int index;
   1001 
   1002   /* If needed, compute the total arc count.
   1003 
   1004      Note we don't compensate for overflow if that happens!  */
   1005   if (! all)
   1006     {
   1007       total_arcs = 0;
   1008       for (index = 0; index < arc_count; index++)
   1009 	total_arcs += the_arcs[index]->count;
   1010     }
   1011   else
   1012     total_arcs = 0;
   1013 
   1014   tmp_arcs = 0;
   1015 
   1016   for (index = 0; index < arc_count; index++)
   1017     {
   1018       Sym *sym1, *sym2;
   1019       Sym *child, *parent;
   1020 
   1021       tmp_arcs += the_arcs[index]->count;
   1022 
   1023       /* Ignore this arc if it's already been placed.  */
   1024       if (the_arcs[index]->has_been_placed)
   1025 	continue;
   1026 
   1027       child = the_arcs[index]->child;
   1028       parent = the_arcs[index]->parent;
   1029 
   1030       /* If we're not using all arcs, and this is a rarely used
   1031 	 arc, then put it on the unplaced_arc list.  Similarly
   1032 	 if both the parent and child of this arc have been placed.  */
   1033       if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
   1034 	  || child->has_been_placed || parent->has_been_placed)
   1035 	{
   1036 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
   1037 	  continue;
   1038 	}
   1039 
   1040       /* If all slots in the parent and child are full, then there isn't
   1041 	 anything we can do right now.  We'll place this arc on the
   1042 	 unplaced arc list in the hope that a global positioning
   1043 	 algorithm can use it to place function chains.  */
   1044       if (parent->next && parent->prev && child->next && child->prev)
   1045 	{
   1046 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
   1047 	  continue;
   1048 	}
   1049 
   1050       /* If the parent is unattached, then find the closest
   1051 	 place to attach it onto child's chain.   Similarly
   1052 	 for the opposite case.  */
   1053       if (!parent->next && !parent->prev)
   1054 	{
   1055 	  int next_count = 0;
   1056 	  int prev_count = 0;
   1057 	  Sym *prev = child;
   1058 	  Sym *next = child;
   1059 
   1060 	  /* Walk to the beginning and end of the child's chain.  */
   1061 	  while (next->next)
   1062 	    {
   1063 	      next = next->next;
   1064 	      next_count++;
   1065 	    }
   1066 
   1067 	  while (prev->prev)
   1068 	    {
   1069 	      prev = prev->prev;
   1070 	      prev_count++;
   1071 	    }
   1072 
   1073 	  /* Choose the closest.  */
   1074 	  child = next_count < prev_count ? next : prev;
   1075 	}
   1076       else if (! child->next && !child->prev)
   1077 	{
   1078 	  int next_count = 0;
   1079 	  int prev_count = 0;
   1080 	  Sym *prev = parent;
   1081 	  Sym *next = parent;
   1082 
   1083 	  while (next->next)
   1084 	    {
   1085 	      next = next->next;
   1086 	      next_count++;
   1087 	    }
   1088 
   1089 	  while (prev->prev)
   1090 	    {
   1091 	      prev = prev->prev;
   1092 	      prev_count++;
   1093 	    }
   1094 
   1095 	  parent = prev_count < next_count ? prev : next;
   1096 	}
   1097       else
   1098 	{
   1099 	  /* Couldn't find anywhere to attach the functions,
   1100 	     put the arc on the unplaced arc list.  */
   1101 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
   1102 	  continue;
   1103 	}
   1104 
   1105       /* Make sure we don't tie two ends together.  */
   1106       sym1 = parent;
   1107       if (sym1->next)
   1108 	while (sym1->next)
   1109 	  sym1 = sym1->next;
   1110       else
   1111 	while (sym1->prev)
   1112 	  sym1 = sym1->prev;
   1113 
   1114       sym2 = child;
   1115       if (sym2->next)
   1116 	while (sym2->next)
   1117 	  sym2 = sym2->next;
   1118       else
   1119 	while (sym2->prev)
   1120 	  sym2 = sym2->prev;
   1121 
   1122       if (sym1 == child
   1123 	  && sym2 == parent)
   1124 	{
   1125 	  /* This would tie two ends together.  */
   1126 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[index];
   1127 	  continue;
   1128 	}
   1129 
   1130       if (parent->next)
   1131 	{
   1132 	  /* Must attach to the parent's prev field.  */
   1133 	  if (! child->next)
   1134 	    {
   1135 	      /* parent-prev and child-next */
   1136 	      parent->prev = child;
   1137 	      child->next = parent;
   1138 	      the_arcs[index]->has_been_placed = 1;
   1139 	    }
   1140 	}
   1141       else if (parent->prev)
   1142 	{
   1143 	  /* Must attach to the parent's next field.  */
   1144 	  if (! child->prev)
   1145 	    {
   1146 	      /* parent-next and child-prev */
   1147 	      parent->next = child;
   1148 	      child->prev = parent;
   1149 	      the_arcs[index]->has_been_placed = 1;
   1150 	    }
   1151 	}
   1152       else
   1153 	{
   1154 	  /* Can attach to either field in the parent, depends
   1155 	     on where we've got space in the child.  */
   1156 	  if (child->prev)
   1157 	    {
   1158 	      /* parent-prev and child-next.  */
   1159 	      parent->prev = child;
   1160 	      child->next = parent;
   1161 	      the_arcs[index]->has_been_placed = 1;
   1162 	    }
   1163 	  else
   1164 	    {
   1165 	      /* parent-next and child-prev.  */
   1166 	      parent->next = child;
   1167 	      child->prev = parent;
   1168 	      the_arcs[index]->has_been_placed = 1;
   1169 	    }
   1170 	}
   1171     }
   1172 
   1173   /* Dump the chains of functions we've made.  */
   1174   for (index = 0; index < arc_count; index++)
   1175     {
   1176       Sym *sym;
   1177       if (the_arcs[index]->parent->has_been_placed
   1178 	  || the_arcs[index]->child->has_been_placed)
   1179 	continue;
   1180 
   1181       sym = the_arcs[index]->parent;
   1182 
   1183       /* If this symbol isn't attached to any other
   1184 	 symbols, then we've got a rarely used arc.
   1185 
   1186 	 Skip it for now, we'll deal with them later.  */
   1187       if (sym->next == NULL
   1188 	  && sym->prev == NULL)
   1189 	continue;
   1190 
   1191       /* Get to the start of this chain.  */
   1192       while (sym->prev)
   1193 	sym = sym->prev;
   1194 
   1195       while (sym)
   1196 	{
   1197 	  /* Mark it as placed.  */
   1198 	  sym->has_been_placed = 1;
   1199 	  printf ("%s\n", sym->name);
   1200 	  sym = sym->next;
   1201 	}
   1202     }
   1203 
   1204   /* If we want to place all the arcs, then output
   1205      those which weren't placed by the main algorithm.  */
   1206   if (all)
   1207     for (index = 0; index < arc_count; index++)
   1208       {
   1209 	Sym *sym;
   1210 	if (the_arcs[index]->parent->has_been_placed
   1211 	    || the_arcs[index]->child->has_been_placed)
   1212 	  continue;
   1213 
   1214 	sym = the_arcs[index]->parent;
   1215 
   1216 	sym->has_been_placed = 1;
   1217 	printf ("%s\n", sym->name);
   1218       }
   1219 }
   1220 
   1221 /* Print a suggested .o ordering for files on a link line based
   1222    on profiling information.  This uses the function placement
   1223    code for the bulk of its work.  */
   1224 
   1225 void
   1226 cg_print_file_ordering ()
   1227 {
   1228   unsigned long scratch_arc_count, index;
   1229   Arc **scratch_arcs;
   1230   char *last;
   1231 
   1232   scratch_arc_count = 0;
   1233 
   1234   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
   1235   for (index = 0; index < numarcs; index++)
   1236     {
   1237       if (! arcs[index]->parent->mapped
   1238 	  || ! arcs[index]->child->mapped)
   1239 	arcs[index]->has_been_placed = 1;
   1240     }
   1241 
   1242   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
   1243 				    scratch_arcs, &scratch_arc_count);
   1244 
   1245   /* Output .o's not handled by the main placement algorithm.  */
   1246   for (index = 0; index < symtab.len; index++)
   1247     {
   1248       if (symtab.base[index].mapped
   1249 	  && ! symtab.base[index].has_been_placed)
   1250 	printf ("%s\n", symtab.base[index].name);
   1251     }
   1252 
   1253   /* Now output any .o's that didn't have any text symbols.  */
   1254   last = NULL;
   1255   for (index = 0; index < symbol_map_count; index++)
   1256     {
   1257       unsigned int index2;
   1258 
   1259       /* Don't bother searching if this symbol
   1260 	 is the same as the previous one.  */
   1261       if (last && !strcmp (last, symbol_map[index].file_name))
   1262 	continue;
   1263 
   1264       for (index2 = 0; index2 < symtab.len; index2++)
   1265 	{
   1266 	  if (! symtab.base[index2].mapped)
   1267 	    continue;
   1268 
   1269 	  if (!strcmp (symtab.base[index2].name, symbol_map[index].file_name))
   1270 	    break;
   1271 	}
   1272 
   1273       /* If we didn't find it in the symbol table, then it must
   1274 	 be a .o with no text symbols.  Output it last.  */
   1275       if (index2 == symtab.len)
   1276 	printf ("%s\n", symbol_map[index].file_name);
   1277       last = symbol_map[index].file_name;
   1278     }
   1279 }
   1280