Home | History | Annotate | Line # | Download | only in gprof
cg_print.c revision 1.1.1.2.2.1
      1 /* cg_print.c -  Print routines for displaying call graphs.
      2 
      3    Copyright 2000, 2001, 2002, 2004, 2007, 2009, 2011
      4    Free Software Foundation, Inc.
      5 
      6    This file is part of GNU Binutils.
      7 
      8    This program is free software; you can redistribute it and/or modify
      9    it under the terms of the GNU General Public License as published by
     10    the Free Software Foundation; either version 3 of the License, or
     11    (at your option) any later version.
     12 
     13    This program is distributed in the hope that it will be useful,
     14    but WITHOUT ANY WARRANTY; without even the implied warranty of
     15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     16    GNU General Public License for more details.
     17 
     18    You should have received a copy of the GNU General Public License
     19    along with this program; if not, write to the Free Software
     20    Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA
     21    02110-1301, USA.  */
     22 
     23 #include "gprof.h"
     25 #include "libiberty.h"
     26 #include "filenames.h"
     27 #include "search_list.h"
     28 #include "source.h"
     29 #include "symtab.h"
     30 #include "cg_arcs.h"
     31 #include "cg_print.h"
     32 #include "hist.h"
     33 #include "utils.h"
     34 #include "corefile.h"
     35 
     36 /* Return value of comparison functions used to sort tables.  */
     37 #define	LESSTHAN	-1
     38 #define	EQUALTO		0
     39 #define	GREATERTHAN	1
     40 
     41 static void print_header (void);
     42 static void print_cycle (Sym *);
     43 static int cmp_member (Sym *, Sym *);
     44 static void sort_members (Sym *);
     45 static void print_members (Sym *);
     46 static int cmp_arc (Arc *, Arc *);
     47 static void sort_parents (Sym *);
     48 static void print_parents (Sym *);
     49 static void sort_children (Sym *);
     50 static void print_children (Sym *);
     51 static void print_line (Sym *);
     52 static int cmp_name (const PTR, const PTR);
     53 static int cmp_arc_count (const PTR, const PTR);
     54 static int cmp_fun_nuses (const PTR, const PTR);
     55 static void order_and_dump_functions_by_arcs
     56   (Arc **, unsigned long, int, Arc **, unsigned long *);
     57 
     58 /* Declarations of automatically generated functions to output blurbs.  */
     59 extern void bsd_callg_blurb (FILE * fp);
     60 extern void fsf_callg_blurb (FILE * fp);
     61 
     62 double print_time = 0.0;
     63 
     64 
     65 static void
     66 print_header ()
     67 {
     68   if (first_output)
     69     first_output = FALSE;
     70   else
     71     printf ("\f\n");
     72 
     73   if (!bsd_style_output)
     74     {
     75       if (print_descriptions)
     76 	printf (_("\t\t     Call graph (explanation follows)\n\n"));
     77       else
     78 	printf (_("\t\t\tCall graph\n\n"));
     79     }
     80 
     81   printf (_("\ngranularity: each sample hit covers %ld byte(s)"),
     82 	  (long) hist_scale * (long) sizeof (UNIT));
     83 
     84   if (print_time > 0.0)
     85     printf (_(" for %.2f%% of %.2f seconds\n\n"),
     86 	    100.0 / print_time, print_time / hz);
     87   else
     88     {
     89       printf (_(" no time propagated\n\n"));
     90 
     91       /* This doesn't hurt, since all the numerators will be 0.0.  */
     92       print_time = 1.0;
     93     }
     94 
     95   if (bsd_style_output)
     96     {
     97       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
     98 	      "", "", "", "", _("called"), _("total"), _("parents"));
     99       printf ("%-6.6s %5.5s %7.7s %11.11s %7.7s+%-7.7s %-8.8s\t%5.5s\n",
    100 	      _("index"),
    101 	      /* xgettext:no-c-format */
    102 	      _("%time"),
    103 	      _("self"), _("descendants"), _("called"), _("self"),
    104 	      _("name"), _("index"));
    105       printf ("%6.6s %5.5s %7.7s %11.11s %7.7s/%-7.7s     %-8.8s\n",
    106 	      "", "", "", "", _("called"), _("total"), _("children"));
    107       printf ("\n");
    108     }
    109   else
    110     {
    111       printf (_("index %% time    self  children    called     name\n"));
    112     }
    113 }
    114 
    115 /* Print a cycle header.  */
    116 
    117 static void
    118 print_cycle (Sym *cyc)
    119 {
    120   char buf[BUFSIZ];
    121 
    122   sprintf (buf, "[%d]", cyc->cg.index);
    123   printf (bsd_style_output
    124 	  ? "%-6.6s %5.1f %7.2f %11.2f %7lu"
    125 	  : "%-6.6s %5.1f %7.2f %7.2f %7lu", buf,
    126 	  100 * (cyc->cg.prop.self + cyc->cg.prop.child) / print_time,
    127 	  cyc->cg.prop.self / hz, cyc->cg.prop.child / hz, cyc->ncalls);
    128 
    129   if (cyc->cg.self_calls != 0)
    130     printf ("+%-7lu", cyc->cg.self_calls);
    131   else
    132     printf (" %7.7s", "");
    133 
    134   printf (_(" <cycle %d as a whole> [%d]\n"), cyc->cg.cyc.num, cyc->cg.index);
    135 }
    136 
    137 /* Compare LEFT and RIGHT membmer.  Major comparison key is
    138    CG.PROP.SELF+CG.PROP.CHILD, secondary key is NCALLS+CG.SELF_CALLS.  */
    139 
    140 static int
    141 cmp_member (Sym *left, Sym *right)
    142 {
    143   double left_time = left->cg.prop.self + left->cg.prop.child;
    144   double right_time = right->cg.prop.self + right->cg.prop.child;
    145   unsigned long left_calls = left->ncalls + left->cg.self_calls;
    146   unsigned long right_calls = right->ncalls + right->cg.self_calls;
    147 
    148   if (left_time > right_time)
    149     return GREATERTHAN;
    150 
    151   if (left_time < right_time)
    152     return LESSTHAN;
    153 
    154   if (left_calls > right_calls)
    155     return GREATERTHAN;
    156 
    157   if (left_calls < right_calls)
    158     return LESSTHAN;
    159 
    160   return EQUALTO;
    161 }
    162 
    163 /* Sort members of a cycle.  */
    164 
    165 static void
    166 sort_members (Sym *cyc)
    167 {
    168   Sym *todo, *doing, *prev;
    169 
    170   /* Detach cycle members from cyclehead,
    171      and insertion sort them back on.  */
    172   todo = cyc->cg.cyc.next;
    173   cyc->cg.cyc.next = 0;
    174 
    175   for (doing = todo; doing != NULL; doing = todo)
    176     {
    177       todo = doing->cg.cyc.next;
    178 
    179       for (prev = cyc; prev->cg.cyc.next; prev = prev->cg.cyc.next)
    180 	{
    181 	  if (cmp_member (doing, prev->cg.cyc.next) == GREATERTHAN)
    182 	    break;
    183 	}
    184 
    185       doing->cg.cyc.next = prev->cg.cyc.next;
    186       prev->cg.cyc.next = doing;
    187     }
    188 }
    189 
    190 /* Print the members of a cycle.  */
    191 
    192 static void
    193 print_members (Sym *cyc)
    194 {
    195   Sym *member;
    196 
    197   sort_members (cyc);
    198 
    199   for (member = cyc->cg.cyc.next; member; member = member->cg.cyc.next)
    200     {
    201       printf (bsd_style_output
    202 	      ? "%6.6s %5.5s %7.2f %11.2f %7lu"
    203 	      : "%6.6s %5.5s %7.2f %7.2f %7lu",
    204 	      "", "", member->cg.prop.self / hz, member->cg.prop.child / hz,
    205 	      member->ncalls);
    206 
    207       if (member->cg.self_calls != 0)
    208 	printf ("+%-7lu", member->cg.self_calls);
    209       else
    210 	printf (" %7.7s", "");
    211 
    212       printf ("     ");
    213       print_name (member);
    214       printf ("\n");
    215     }
    216 }
    217 
    218 /* Compare two arcs to/from the same child/parent.
    219 	- if one arc is a self arc, it's least.
    220 	- if one arc is within a cycle, it's less than.
    221 	- if both arcs are within a cycle, compare arc counts.
    222 	- if neither arc is within a cycle, compare with
    223 		time + child_time as major key
    224 		arc count as minor key.  */
    225 
    226 static int
    227 cmp_arc (Arc *left, Arc *right)
    228 {
    229   Sym *left_parent = left->parent;
    230   Sym *left_child = left->child;
    231   Sym *right_parent = right->parent;
    232   Sym *right_child = right->child;
    233   double left_time, right_time;
    234 
    235   DBG (TIMEDEBUG,
    236        printf ("[cmp_arc] ");
    237        print_name (left_parent);
    238        printf (" calls ");
    239        print_name (left_child);
    240        printf (" %f + %f %lu/%lu\n", left->time, left->child_time,
    241 	       left->count, left_child->ncalls);
    242        printf ("[cmp_arc] ");
    243        print_name (right_parent);
    244        printf (" calls ");
    245        print_name (right_child);
    246        printf (" %f + %f %lu/%lu\n", right->time, right->child_time,
    247 	       right->count, right_child->ncalls);
    248        printf ("\n");
    249     );
    250 
    251   if (left_parent == left_child)
    252     return LESSTHAN;		/* Left is a self call.  */
    253 
    254   if (right_parent == right_child)
    255     return GREATERTHAN;		/* Right is a self call.  */
    256 
    257   if (left_parent->cg.cyc.num != 0 && left_child->cg.cyc.num != 0
    258       && left_parent->cg.cyc.num == left_child->cg.cyc.num)
    259     {
    260       /* Left is a call within a cycle.  */
    261       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
    262 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
    263 	{
    264 	  /* Right is a call within the cycle, too.  */
    265 	  if (left->count < right->count)
    266 	    return LESSTHAN;
    267 
    268 	  if (left->count > right->count)
    269 	    return GREATERTHAN;
    270 
    271 	  return EQUALTO;
    272 	}
    273       else
    274 	{
    275 	  /* Right isn't a call within the cycle.  */
    276 	  return LESSTHAN;
    277 	}
    278     }
    279   else
    280     {
    281       /* Left isn't a call within a cycle.  */
    282       if (right_parent->cg.cyc.num != 0 && right_child->cg.cyc.num != 0
    283 	  && right_parent->cg.cyc.num == right_child->cg.cyc.num)
    284 	{
    285 	  /* Right is a call within a cycle.  */
    286 	  return GREATERTHAN;
    287 	}
    288       else
    289 	{
    290 	  /* Neither is a call within a cycle.  */
    291 	  left_time = left->time + left->child_time;
    292 	  right_time = right->time + right->child_time;
    293 
    294 	  if (left_time < right_time)
    295 	    return LESSTHAN;
    296 
    297 	  if (left_time > right_time)
    298 	    return GREATERTHAN;
    299 
    300 	  if (left->count < right->count)
    301 	    return LESSTHAN;
    302 
    303 	  if (left->count > right->count)
    304 	    return GREATERTHAN;
    305 
    306 	  return EQUALTO;
    307 	}
    308     }
    309 }
    310 
    311 
    312 static void
    313 sort_parents (Sym * child)
    314 {
    315   Arc *arc, *detached, sorted, *prev;
    316 
    317   /* Unlink parents from child, then insertion sort back on to
    318      sorted's parents.
    319 	  *arc        the arc you have detached and are inserting.
    320 	  *detached   the rest of the arcs to be sorted.
    321 	  sorted      arc list onto which you insertion sort.
    322 	  *prev       arc before the arc you are comparing.  */
    323   sorted.next_parent = 0;
    324 
    325   for (arc = child->cg.parents; arc; arc = detached)
    326     {
    327       detached = arc->next_parent;
    328 
    329       /* Consider *arc as disconnected; insert it into sorted.  */
    330       for (prev = &sorted; prev->next_parent; prev = prev->next_parent)
    331 	{
    332 	  if (cmp_arc (arc, prev->next_parent) != GREATERTHAN)
    333 	    break;
    334 	}
    335 
    336       arc->next_parent = prev->next_parent;
    337       prev->next_parent = arc;
    338     }
    339 
    340   /* Reattach sorted arcs to child.  */
    341   child->cg.parents = sorted.next_parent;
    342 }
    343 
    344 
    345 static void
    346 print_parents (Sym *child)
    347 {
    348   Sym *parent;
    349   Arc *arc;
    350   Sym *cycle_head;
    351 
    352   if (child->cg.cyc.head != 0)
    353     cycle_head = child->cg.cyc.head;
    354   else
    355     cycle_head = child;
    356 
    357   if (!child->cg.parents)
    358     {
    359       printf (bsd_style_output
    360 	      ? _("%6.6s %5.5s %7.7s %11.11s %7.7s %7.7s     <spontaneous>\n")
    361 	      : _("%6.6s %5.5s %7.7s %7.7s %7.7s %7.7s     <spontaneous>\n"),
    362 	      "", "", "", "", "", "");
    363       return;
    364     }
    365 
    366   sort_parents (child);
    367 
    368   for (arc = child->cg.parents; arc; arc = arc->next_parent)
    369     {
    370       parent = arc->parent;
    371       if (child == parent || (child->cg.cyc.num != 0
    372 			      && parent->cg.cyc.num == child->cg.cyc.num))
    373 	{
    374 	  /* Selfcall or call among siblings.  */
    375 	  printf (bsd_style_output
    376 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
    377 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
    378 		  "", "", "", "",
    379 		  arc->count, "");
    380 	  print_name (parent);
    381 	  printf ("\n");
    382 	}
    383       else
    384 	{
    385 	  /* Regular parent of child.  */
    386 	  printf (bsd_style_output
    387 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
    388 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
    389 		  "", "",
    390 		  arc->time / hz, arc->child_time / hz,
    391 		  arc->count, cycle_head->ncalls);
    392 	  print_name (parent);
    393 	  printf ("\n");
    394 	}
    395     }
    396 }
    397 
    398 
    399 static void
    400 sort_children (Sym *parent)
    401 {
    402   Arc *arc, *detached, sorted, *prev;
    403 
    404   /* Unlink children from parent, then insertion sort back on to
    405      sorted's children.
    406 	  *arc        the arc you have detached and are inserting.
    407 	  *detached   the rest of the arcs to be sorted.
    408 	  sorted      arc list onto which you insertion sort.
    409 	  *prev       arc before the arc you are comparing.  */
    410   sorted.next_child = 0;
    411 
    412   for (arc = parent->cg.children; arc; arc = detached)
    413     {
    414       detached = arc->next_child;
    415 
    416       /* Consider *arc as disconnected; insert it into sorted.  */
    417       for (prev = &sorted; prev->next_child; prev = prev->next_child)
    418 	{
    419 	  if (cmp_arc (arc, prev->next_child) != LESSTHAN)
    420 	    break;
    421 	}
    422 
    423       arc->next_child = prev->next_child;
    424       prev->next_child = arc;
    425     }
    426 
    427   /* Reattach sorted children to parent.  */
    428   parent->cg.children = sorted.next_child;
    429 }
    430 
    431 
    432 static void
    433 print_children (Sym *parent)
    434 {
    435   Sym *child;
    436   Arc *arc;
    437 
    438   sort_children (parent);
    439   arc = parent->cg.children;
    440 
    441   for (arc = parent->cg.children; arc; arc = arc->next_child)
    442     {
    443       child = arc->child;
    444       if (child == parent || (child->cg.cyc.num != 0
    445 			      && child->cg.cyc.num == parent->cg.cyc.num))
    446 	{
    447 	  /* Self call or call to sibling.  */
    448 	  printf (bsd_style_output
    449 		  ? "%6.6s %5.5s %7.7s %11.11s %7lu %7.7s     "
    450 		  : "%6.6s %5.5s %7.7s %7.7s %7lu %7.7s     ",
    451 		  "", "", "", "", arc->count, "");
    452 	  print_name (child);
    453 	  printf ("\n");
    454 	}
    455       else
    456 	{
    457 	  /* Regular child of parent.  */
    458 	  printf (bsd_style_output
    459 		  ? "%6.6s %5.5s %7.2f %11.2f %7lu/%-7lu     "
    460 		  : "%6.6s %5.5s %7.2f %7.2f %7lu/%-7lu     ",
    461 		  "", "",
    462 		  arc->time / hz, arc->child_time / hz,
    463 		  arc->count, child->cg.cyc.head->ncalls);
    464 	  print_name (child);
    465 	  printf ("\n");
    466 	}
    467     }
    468 }
    469 
    470 
    471 static void
    472 print_line (Sym *np)
    473 {
    474   char buf[BUFSIZ];
    475 
    476   sprintf (buf, "[%d]", np->cg.index);
    477   printf (bsd_style_output
    478 	  ? "%-6.6s %5.1f %7.2f %11.2f"
    479 	  : "%-6.6s %5.1f %7.2f %7.2f", buf,
    480 	  100 * (np->cg.prop.self + np->cg.prop.child) / print_time,
    481 	  np->cg.prop.self / hz, np->cg.prop.child / hz);
    482 
    483   if ((np->ncalls + np->cg.self_calls) != 0)
    484     {
    485       printf (" %7lu", np->ncalls);
    486 
    487       if (np->cg.self_calls != 0)
    488 	  printf ("+%-7lu ", np->cg.self_calls);
    489       else
    490 	  printf (" %7.7s ", "");
    491     }
    492   else
    493     {
    494       printf (" %7.7s %7.7s ", "", "");
    495     }
    496 
    497   print_name (np);
    498   printf ("\n");
    499 }
    500 
    501 
    502 /* Print dynamic call graph.  */
    503 
    504 void
    505 cg_print (Sym ** timesortsym)
    506 {
    507   unsigned int sym_index;
    508   Sym *parent;
    509 
    510   if (print_descriptions && bsd_style_output)
    511     bsd_callg_blurb (stdout);
    512 
    513   print_header ();
    514 
    515   for (sym_index = 0; sym_index < symtab.len + num_cycles; ++sym_index)
    516     {
    517       parent = timesortsym[sym_index];
    518 
    519       if ((ignore_zeros && parent->ncalls == 0
    520 	   && parent->cg.self_calls == 0 && parent->cg.prop.self == 0
    521 	   && parent->cg.prop.child == 0)
    522 	  || !parent->cg.print_flag
    523 	  || (line_granularity && ! parent->is_func))
    524 	continue;
    525 
    526       if (!parent->name && parent->cg.cyc.num != 0)
    527 	{
    528 	  /* Cycle header.  */
    529 	  print_cycle (parent);
    530 	  print_members (parent);
    531 	}
    532       else
    533 	{
    534 	  print_parents (parent);
    535 	  print_line (parent);
    536 	  print_children (parent);
    537 	}
    538 
    539       if (bsd_style_output)
    540 	printf ("\n");
    541 
    542       printf ("-----------------------------------------------\n");
    543 
    544       if (bsd_style_output)
    545 	printf ("\n");
    546     }
    547 
    548   free (timesortsym);
    549 
    550   if (print_descriptions && !bsd_style_output)
    551     fsf_callg_blurb (stdout);
    552 }
    553 
    554 
    555 static int
    556 cmp_name (const PTR left, const PTR right)
    557 {
    558   const Sym **npp1 = (const Sym **) left;
    559   const Sym **npp2 = (const Sym **) right;
    560 
    561   return strcmp ((*npp1)->name, (*npp2)->name);
    562 }
    563 
    564 
    565 void
    566 cg_print_index ()
    567 {
    568   unsigned int sym_index;
    569   unsigned int nnames, todo, i, j;
    570   int col, starting_col;
    571   Sym **name_sorted_syms, *sym;
    572   const char *filename;
    573   char buf[20];
    574   int column_width = (output_width - 1) / 3;	/* Don't write in last col!  */
    575 
    576   /* Now, sort regular function name
    577      alphabetically to create an index.  */
    578   name_sorted_syms = (Sym **) xmalloc ((symtab.len + num_cycles) * sizeof (Sym *));
    579 
    580   for (sym_index = 0, nnames = 0; sym_index < symtab.len; sym_index++)
    581     {
    582       if (ignore_zeros && symtab.base[sym_index].ncalls == 0
    583 	  && symtab.base[sym_index].hist.time == 0)
    584 	continue;
    585 
    586       name_sorted_syms[nnames++] = &symtab.base[sym_index];
    587     }
    588 
    589   qsort (name_sorted_syms, nnames, sizeof (Sym *), cmp_name);
    590 
    591   for (sym_index = 1, todo = nnames; sym_index <= num_cycles; sym_index++)
    592     name_sorted_syms[todo++] = &cycle_header[sym_index];
    593 
    594   printf ("\f\n");
    595   printf (_("Index by function name\n\n"));
    596   sym_index = (todo + 2) / 3;
    597 
    598   for (i = 0; i < sym_index; i++)
    599     {
    600       col = 0;
    601       starting_col = 0;
    602 
    603       for (j = i; j < todo; j += sym_index)
    604 	{
    605 	  sym = name_sorted_syms[j];
    606 
    607 	  if (sym->cg.print_flag)
    608 	    sprintf (buf, "[%d]", sym->cg.index);
    609 	  else
    610 	    sprintf (buf, "(%d)", sym->cg.index);
    611 
    612 	  if (j < nnames)
    613 	    {
    614 	      if (bsd_style_output)
    615 		{
    616 		  printf ("%6.6s %-19.19s", buf, sym->name);
    617 		}
    618 	      else
    619 		{
    620 		  col += strlen (buf);
    621 
    622 		  for (; col < starting_col + 5; ++col)
    623 		    putchar (' ');
    624 
    625 		  printf (" %s ", buf);
    626 		  col += print_name_only (sym);
    627 
    628 		  if (!line_granularity && sym->is_static && sym->file)
    629 		    {
    630 		      filename = sym->file->name;
    631 
    632 		      if (!print_path)
    633 			{
    634 			  filename = strrchr (filename, '/');
    635 
    636 			  if (filename)
    637 			    ++filename;
    638 			  else
    639 			    filename = sym->file->name;
    640 			}
    641 
    642 		      printf (" (%s)", filename);
    643 		      col += strlen (filename) + 3;
    644 		    }
    645 		}
    646 	    }
    647 	  else
    648 	    {
    649 	      if (bsd_style_output)
    650 		{
    651 		  printf ("%6.6s ", buf);
    652 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
    653 		  printf ("%-19.19s", buf);
    654 		}
    655 	      else
    656 		{
    657 		  col += strlen (buf);
    658 		  for (; col < starting_col + 5; ++col)
    659 		    putchar (' ');
    660 		  printf (" %s ", buf);
    661 		  sprintf (buf, _("<cycle %d>"), sym->cg.cyc.num);
    662 		  printf ("%s", buf);
    663 		  col += strlen (buf);
    664 		}
    665 	    }
    666 
    667 	  starting_col += column_width;
    668 	}
    669 
    670       printf ("\n");
    671     }
    672 
    673   free (name_sorted_syms);
    674 }
    675 
    676 /* Compare two arcs based on their usage counts.
    677    We want to sort in descending order.  */
    678 
    679 static int
    680 cmp_arc_count (const PTR left, const PTR right)
    681 {
    682   const Arc **npp1 = (const Arc **) left;
    683   const Arc **npp2 = (const Arc **) right;
    684 
    685   if ((*npp1)->count > (*npp2)->count)
    686     return -1;
    687   else if ((*npp1)->count < (*npp2)->count)
    688     return 1;
    689   else
    690     return 0;
    691 }
    692 
    693 /* Compare two funtions based on their usage counts.
    694    We want to sort in descending order.  */
    695 
    696 static int
    697 cmp_fun_nuses (const PTR left, const PTR right)
    698 {
    699   const Sym **npp1 = (const Sym **) left;
    700   const Sym **npp2 = (const Sym **) right;
    701 
    702   if ((*npp1)->nuses > (*npp2)->nuses)
    703     return -1;
    704   else if ((*npp1)->nuses < (*npp2)->nuses)
    705     return 1;
    706   else
    707     return 0;
    708 }
    709 
    710 /* Print a suggested function ordering based on the profiling data.
    711 
    712    We perform 4 major steps when ordering functions:
    713 
    714 	* Group unused functions together and place them at the
    715 	end of the function order.
    716 
    717 	* Search the highest use arcs (those which account for 90% of
    718 	the total arc count) for functions which have several parents.
    719 
    720 	Group those with the most call sites together (currently the
    721 	top 1.25% which have at least five different call sites).
    722 
    723 	These are emitted at the start of the function order.
    724 
    725 	* Use a greedy placement algorithm to place functions which
    726 	occur in the top 99% of the arcs in the profile.  Some provisions
    727 	are made to handle high usage arcs where the parent and/or
    728 	child has already been placed.
    729 
    730 	* Run the same greedy placement algorithm on the remaining
    731 	arcs to place the leftover functions.
    732 
    733 
    734    The various "magic numbers" should (one day) be tuneable by command
    735    line options.  They were arrived at by benchmarking a few applications
    736    with various values to see which values produced better overall function
    737    orderings.
    738 
    739    Of course, profiling errors, machine limitations (PA long calls), and
    740    poor cutoff values for the placement algorithm may limit the usefullness
    741    of the resulting function order.  Improvements would be greatly appreciated.
    742 
    743    Suggestions:
    744 
    745 	* Place the functions with many callers near the middle of the
    746 	list to reduce long calls.
    747 
    748 	* Propagate arc usage changes as functions are placed.  Ie if
    749 	func1 and func2 are placed together, arcs to/from those arcs
    750 	to the same parent/child should be combined, then resort the
    751 	arcs to choose the next one.
    752 
    753 	* Implement some global positioning algorithm to place the
    754 	chains made by the greedy local positioning algorithm.  Probably
    755 	by examining arcs which haven't been placed yet to tie two
    756 	chains together.
    757 
    758 	* Take a function's size and time into account in the algorithm;
    759 	size in particular is important on the PA (long calls).  Placing
    760 	many small functions onto their own page may be wise.
    761 
    762 	* Use better profiling information; many published algorithms
    763 	are based on call sequences through time, rather than just
    764 	arc counts.
    765 
    766 	* Prodecure cloning could improve performance when a small number
    767 	of arcs account for most of the calls to a particular function.
    768 
    769 	* Use relocation information to avoid moving unused functions
    770 	completely out of the code stream; this would avoid severe lossage
    771 	when the profile data bears little resemblance to actual runs.
    772 
    773 	* Propagation of arc usages should also improve .o link line
    774 	ordering which shares the same arc placement algorithm with
    775 	the function ordering code (in fact it is a degenerate case
    776 	of function ordering).  */
    777 
    778 void
    779 cg_print_function_ordering (void)
    780 {
    781   unsigned long sym_index;
    782   unsigned long arc_index;
    783   unsigned long used, unused, scratch_index;
    784   unsigned long  unplaced_arc_count, high_arc_count, scratch_arc_count;
    785 #ifdef __GNUC__
    786   unsigned long long total_arcs, tmp_arcs_count;
    787 #else
    788   unsigned long total_arcs, tmp_arcs_count;
    789 #endif
    790   Sym **unused_syms, **used_syms, **scratch_syms;
    791   Arc **unplaced_arcs, **high_arcs, **scratch_arcs;
    792 
    793   sym_index = 0;
    794   used = 0;
    795   unused = 0;
    796   scratch_index = 0;
    797   unplaced_arc_count = 0;
    798   high_arc_count = 0;
    799   scratch_arc_count = 0;
    800 
    801   /* First group all the unused functions together.  */
    802   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    803   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    804   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    805   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    806   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    807   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    808 
    809   /* Walk through all the functions; mark those which are never
    810      called as placed (we'll emit them as a group later).  */
    811   for (sym_index = 0, used = 0, unused = 0; sym_index < symtab.len; sym_index++)
    812     {
    813       if (symtab.base[sym_index].ncalls == 0)
    814 	{
    815 	  unused_syms[unused++] = &symtab.base[sym_index];
    816 	  symtab.base[sym_index].has_been_placed = 1;
    817 	}
    818       else
    819 	{
    820 	  used_syms[used++] = &symtab.base[sym_index];
    821 	  symtab.base[sym_index].has_been_placed = 0;
    822 	  symtab.base[sym_index].next = 0;
    823 	  symtab.base[sym_index].prev = 0;
    824 	  symtab.base[sym_index].nuses = 0;
    825 	}
    826     }
    827 
    828   /* Sort the arcs from most used to least used.  */
    829   qsort (arcs, numarcs, sizeof (Arc *), cmp_arc_count);
    830 
    831   /* Compute the total arc count.  Also mark arcs as unplaced.
    832 
    833      Note we don't compensate for overflow if that happens!
    834      Overflow is much less likely when this file is compiled
    835      with GCC as it can double-wide integers via long long.  */
    836   total_arcs = 0;
    837   for (arc_index = 0; arc_index < numarcs; arc_index++)
    838     {
    839       total_arcs += arcs[arc_index]->count;
    840       arcs[arc_index]->has_been_placed = 0;
    841     }
    842 
    843   /* We want to pull out those functions which are referenced
    844      by many highly used arcs and emit them as a group.  This
    845      could probably use some tuning.  */
    846   tmp_arcs_count = 0;
    847   for (arc_index = 0; arc_index < numarcs; arc_index++)
    848     {
    849       tmp_arcs_count += arcs[arc_index]->count;
    850 
    851       /* Count how many times each parent and child are used up
    852 	 to our threshhold of arcs (90%).  */
    853       if ((double)tmp_arcs_count / (double)total_arcs > 0.90)
    854 	break;
    855 
    856       arcs[arc_index]->child->nuses++;
    857     }
    858 
    859   /* Now sort a temporary symbol table based on the number of
    860      times each function was used in the highest used arcs.  */
    861   memcpy (scratch_syms, used_syms, used * sizeof (Sym *));
    862   qsort (scratch_syms, used, sizeof (Sym *), cmp_fun_nuses);
    863 
    864   /* Now pick out those symbols we're going to emit as
    865      a group.  We take up to 1.25% of the used symbols.  */
    866   for (sym_index = 0; sym_index < used / 80; sym_index++)
    867     {
    868       Sym *sym = scratch_syms[sym_index];
    869       Arc *arc;
    870 
    871       /* If we hit symbols that aren't used from many call sites,
    872 	 then we can quit.  We choose five as the low limit for
    873 	 no particular reason.  */
    874       if (sym->nuses == 5)
    875 	break;
    876 
    877       /* We're going to need the arcs between these functions.
    878 	 Unfortunately, we don't know all these functions
    879 	 until we're done.  So we keep track of all the arcs
    880 	 to the functions we care about, then prune out those
    881 	 which are uninteresting.
    882 
    883 	 An interesting variation would be to quit when we found
    884 	 multi-call site functions which account for some percentage
    885 	 of the arcs.  */
    886       arc = sym->cg.children;
    887 
    888       while (arc)
    889 	{
    890 	  if (arc->parent != arc->child)
    891 	    scratch_arcs[scratch_arc_count++] = arc;
    892 	  arc->has_been_placed = 1;
    893 	  arc = arc->next_child;
    894 	}
    895 
    896       arc = sym->cg.parents;
    897 
    898       while (arc)
    899 	{
    900 	  if (arc->parent != arc->child)
    901 	    scratch_arcs[scratch_arc_count++] = arc;
    902 	  arc->has_been_placed = 1;
    903 	  arc = arc->next_parent;
    904 	}
    905 
    906       /* Keep track of how many symbols we're going to place.  */
    907       scratch_index = sym_index;
    908 
    909       /* A lie, but it makes identifying
    910 	 these functions easier later.  */
    911       sym->has_been_placed = 1;
    912     }
    913 
    914   /* Now walk through the temporary arcs and copy
    915      those we care about into the high arcs array.  */
    916   for (arc_index = 0; arc_index < scratch_arc_count; arc_index++)
    917     {
    918       Arc *arc = scratch_arcs[arc_index];
    919 
    920       /* If this arc refers to highly used functions, then
    921 	 then we want to keep it.  */
    922       if (arc->child->has_been_placed
    923 	  && arc->parent->has_been_placed)
    924 	{
    925 	  high_arcs[high_arc_count++] = scratch_arcs[arc_index];
    926 
    927 	  /* We need to turn of has_been_placed since we're going to
    928 	     use the main arc placement algorithm on these arcs.  */
    929 	  arc->child->has_been_placed = 0;
    930 	  arc->parent->has_been_placed = 0;
    931 	}
    932     }
    933 
    934   /* Dump the multi-site high usage functions which are not
    935      going to be ordered by the main ordering algorithm.  */
    936   for (sym_index = 0; sym_index < scratch_index; sym_index++)
    937     {
    938       if (scratch_syms[sym_index]->has_been_placed)
    939 	printf ("%s\n", scratch_syms[sym_index]->name);
    940     }
    941 
    942   /* Now we can order the multi-site high use
    943      functions based on the arcs between them.  */
    944   qsort (high_arcs, high_arc_count, sizeof (Arc *), cmp_arc_count);
    945   order_and_dump_functions_by_arcs (high_arcs, high_arc_count, 1,
    946 				    unplaced_arcs, &unplaced_arc_count);
    947 
    948   /* Order and dump the high use functions left,
    949      these typically have only a few call sites.  */
    950   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
    951 				    unplaced_arcs, &unplaced_arc_count);
    952 
    953   /* Now place the rarely used functions.  */
    954   order_and_dump_functions_by_arcs (unplaced_arcs, unplaced_arc_count, 1,
    955 				    scratch_arcs, &scratch_arc_count);
    956 
    957   /* Output any functions not emitted by the order_and_dump calls.  */
    958   for (sym_index = 0; sym_index < used; sym_index++)
    959     if (used_syms[sym_index]->has_been_placed == 0)
    960       printf("%s\n", used_syms[sym_index]->name);
    961 
    962   /* Output the unused functions.  */
    963   for (sym_index = 0; sym_index < unused; sym_index++)
    964     printf("%s\n", unused_syms[sym_index]->name);
    965 
    966   unused_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    967   used_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    968   scratch_syms = (Sym **) xmalloc (symtab.len * sizeof (Sym *));
    969   high_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    970   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    971   unplaced_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
    972 
    973   free (unused_syms);
    974   free (used_syms);
    975   free (scratch_syms);
    976   free (high_arcs);
    977   free (scratch_arcs);
    978   free (unplaced_arcs);
    979 }
    980 
    981 /* Place functions based on the arcs in THE_ARCS with ARC_COUNT entries;
    982    place unused arcs into UNPLACED_ARCS/UNPLACED_ARC_COUNT.
    983 
    984    If ALL is nonzero, then place all functions referenced by THE_ARCS,
    985    else only place those referenced in the top 99% of the arcs in THE_ARCS.  */
    986 
    987 #define MOST 0.99
    988 static void
    989 order_and_dump_functions_by_arcs (the_arcs, arc_count, all,
    990 				  unplaced_arcs, unplaced_arc_count)
    991      Arc **the_arcs;
    992      unsigned long arc_count;
    993      int all;
    994      Arc **unplaced_arcs;
    995      unsigned long *unplaced_arc_count;
    996 {
    997 #ifdef __GNUC__
    998   unsigned long long tmp_arcs, total_arcs;
    999 #else
   1000   unsigned long tmp_arcs, total_arcs;
   1001 #endif
   1002   unsigned int arc_index;
   1003 
   1004   /* If needed, compute the total arc count.
   1005 
   1006      Note we don't compensate for overflow if that happens!  */
   1007   if (! all)
   1008     {
   1009       total_arcs = 0;
   1010       for (arc_index = 0; arc_index < arc_count; arc_index++)
   1011 	total_arcs += the_arcs[arc_index]->count;
   1012     }
   1013   else
   1014     total_arcs = 0;
   1015 
   1016   tmp_arcs = 0;
   1017 
   1018   for (arc_index = 0; arc_index < arc_count; arc_index++)
   1019     {
   1020       Sym *sym1, *sym2;
   1021       Sym *child, *parent;
   1022 
   1023       tmp_arcs += the_arcs[arc_index]->count;
   1024 
   1025       /* Ignore this arc if it's already been placed.  */
   1026       if (the_arcs[arc_index]->has_been_placed)
   1027 	continue;
   1028 
   1029       child = the_arcs[arc_index]->child;
   1030       parent = the_arcs[arc_index]->parent;
   1031 
   1032       /* If we're not using all arcs, and this is a rarely used
   1033 	 arc, then put it on the unplaced_arc list.  Similarly
   1034 	 if both the parent and child of this arc have been placed.  */
   1035       if ((! all && (double)tmp_arcs / (double)total_arcs > MOST)
   1036 	  || child->has_been_placed || parent->has_been_placed)
   1037 	{
   1038 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
   1039 	  continue;
   1040 	}
   1041 
   1042       /* If all slots in the parent and child are full, then there isn't
   1043 	 anything we can do right now.  We'll place this arc on the
   1044 	 unplaced arc list in the hope that a global positioning
   1045 	 algorithm can use it to place function chains.  */
   1046       if (parent->next && parent->prev && child->next && child->prev)
   1047 	{
   1048 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
   1049 	  continue;
   1050 	}
   1051 
   1052       /* If the parent is unattached, then find the closest
   1053 	 place to attach it onto child's chain.   Similarly
   1054 	 for the opposite case.  */
   1055       if (!parent->next && !parent->prev)
   1056 	{
   1057 	  int next_count = 0;
   1058 	  int prev_count = 0;
   1059 	  Sym *prev = child;
   1060 	  Sym *next = child;
   1061 
   1062 	  /* Walk to the beginning and end of the child's chain.  */
   1063 	  while (next->next)
   1064 	    {
   1065 	      next = next->next;
   1066 	      next_count++;
   1067 	    }
   1068 
   1069 	  while (prev->prev)
   1070 	    {
   1071 	      prev = prev->prev;
   1072 	      prev_count++;
   1073 	    }
   1074 
   1075 	  /* Choose the closest.  */
   1076 	  child = next_count < prev_count ? next : prev;
   1077 	}
   1078       else if (! child->next && !child->prev)
   1079 	{
   1080 	  int next_count = 0;
   1081 	  int prev_count = 0;
   1082 	  Sym *prev = parent;
   1083 	  Sym *next = parent;
   1084 
   1085 	  while (next->next)
   1086 	    {
   1087 	      next = next->next;
   1088 	      next_count++;
   1089 	    }
   1090 
   1091 	  while (prev->prev)
   1092 	    {
   1093 	      prev = prev->prev;
   1094 	      prev_count++;
   1095 	    }
   1096 
   1097 	  parent = prev_count < next_count ? prev : next;
   1098 	}
   1099       else
   1100 	{
   1101 	  /* Couldn't find anywhere to attach the functions,
   1102 	     put the arc on the unplaced arc list.  */
   1103 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
   1104 	  continue;
   1105 	}
   1106 
   1107       /* Make sure we don't tie two ends together.  */
   1108       sym1 = parent;
   1109       if (sym1->next)
   1110 	while (sym1->next)
   1111 	  sym1 = sym1->next;
   1112       else
   1113 	while (sym1->prev)
   1114 	  sym1 = sym1->prev;
   1115 
   1116       sym2 = child;
   1117       if (sym2->next)
   1118 	while (sym2->next)
   1119 	  sym2 = sym2->next;
   1120       else
   1121 	while (sym2->prev)
   1122 	  sym2 = sym2->prev;
   1123 
   1124       if (sym1 == child
   1125 	  && sym2 == parent)
   1126 	{
   1127 	  /* This would tie two ends together.  */
   1128 	  unplaced_arcs[(*unplaced_arc_count)++] = the_arcs[arc_index];
   1129 	  continue;
   1130 	}
   1131 
   1132       if (parent->next)
   1133 	{
   1134 	  /* Must attach to the parent's prev field.  */
   1135 	  if (! child->next)
   1136 	    {
   1137 	      /* parent-prev and child-next */
   1138 	      parent->prev = child;
   1139 	      child->next = parent;
   1140 	      the_arcs[arc_index]->has_been_placed = 1;
   1141 	    }
   1142 	}
   1143       else if (parent->prev)
   1144 	{
   1145 	  /* Must attach to the parent's next field.  */
   1146 	  if (! child->prev)
   1147 	    {
   1148 	      /* parent-next and child-prev */
   1149 	      parent->next = child;
   1150 	      child->prev = parent;
   1151 	      the_arcs[arc_index]->has_been_placed = 1;
   1152 	    }
   1153 	}
   1154       else
   1155 	{
   1156 	  /* Can attach to either field in the parent, depends
   1157 	     on where we've got space in the child.  */
   1158 	  if (child->prev)
   1159 	    {
   1160 	      /* parent-prev and child-next.  */
   1161 	      parent->prev = child;
   1162 	      child->next = parent;
   1163 	      the_arcs[arc_index]->has_been_placed = 1;
   1164 	    }
   1165 	  else
   1166 	    {
   1167 	      /* parent-next and child-prev.  */
   1168 	      parent->next = child;
   1169 	      child->prev = parent;
   1170 	      the_arcs[arc_index]->has_been_placed = 1;
   1171 	    }
   1172 	}
   1173     }
   1174 
   1175   /* Dump the chains of functions we've made.  */
   1176   for (arc_index = 0; arc_index < arc_count; arc_index++)
   1177     {
   1178       Sym *sym;
   1179       if (the_arcs[arc_index]->parent->has_been_placed
   1180 	  || the_arcs[arc_index]->child->has_been_placed)
   1181 	continue;
   1182 
   1183       sym = the_arcs[arc_index]->parent;
   1184 
   1185       /* If this symbol isn't attached to any other
   1186 	 symbols, then we've got a rarely used arc.
   1187 
   1188 	 Skip it for now, we'll deal with them later.  */
   1189       if (sym->next == NULL
   1190 	  && sym->prev == NULL)
   1191 	continue;
   1192 
   1193       /* Get to the start of this chain.  */
   1194       while (sym->prev)
   1195 	sym = sym->prev;
   1196 
   1197       while (sym)
   1198 	{
   1199 	  /* Mark it as placed.  */
   1200 	  sym->has_been_placed = 1;
   1201 	  printf ("%s\n", sym->name);
   1202 	  sym = sym->next;
   1203 	}
   1204     }
   1205 
   1206   /* If we want to place all the arcs, then output
   1207      those which weren't placed by the main algorithm.  */
   1208   if (all)
   1209     for (arc_index = 0; arc_index < arc_count; arc_index++)
   1210       {
   1211 	Sym *sym;
   1212 	if (the_arcs[arc_index]->parent->has_been_placed
   1213 	    || the_arcs[arc_index]->child->has_been_placed)
   1214 	  continue;
   1215 
   1216 	sym = the_arcs[arc_index]->parent;
   1217 
   1218 	sym->has_been_placed = 1;
   1219 	printf ("%s\n", sym->name);
   1220       }
   1221 }
   1222 
   1223 /* Compare two function_map structs based on file name.
   1224    We want to sort in ascending order.  */
   1225 
   1226 static int
   1227 cmp_symbol_map (const void * l, const void * r)
   1228 {
   1229   return filename_cmp (((struct function_map *) l)->file_name,
   1230 		       ((struct function_map *) r)->file_name);
   1231 }
   1232 
   1233 /* Print a suggested .o ordering for files on a link line based
   1234    on profiling information.  This uses the function placement
   1235    code for the bulk of its work.  */
   1236 
   1237 void
   1238 cg_print_file_ordering (void)
   1239 {
   1240   unsigned long scratch_arc_count;
   1241   unsigned long arc_index;
   1242   unsigned long sym_index;
   1243   Arc **scratch_arcs;
   1244   char *last;
   1245 
   1246   scratch_arc_count = 0;
   1247 
   1248   scratch_arcs = (Arc **) xmalloc (numarcs * sizeof (Arc *));
   1249   for (arc_index = 0; arc_index < numarcs; arc_index++)
   1250     {
   1251       if (! arcs[arc_index]->parent->mapped
   1252 	  || ! arcs[arc_index]->child->mapped)
   1253 	arcs[arc_index]->has_been_placed = 1;
   1254     }
   1255 
   1256   order_and_dump_functions_by_arcs (arcs, numarcs, 0,
   1257 				    scratch_arcs, &scratch_arc_count);
   1258 
   1259   /* Output .o's not handled by the main placement algorithm.  */
   1260   for (sym_index = 0; sym_index < symtab.len; sym_index++)
   1261     {
   1262       if (symtab.base[sym_index].mapped
   1263 	  && ! symtab.base[sym_index].has_been_placed)
   1264 	printf ("%s\n", symtab.base[sym_index].name);
   1265     }
   1266 
   1267   qsort (symbol_map, symbol_map_count, sizeof (struct function_map), cmp_symbol_map);
   1268 
   1269   /* Now output any .o's that didn't have any text symbols.  */
   1270   last = NULL;
   1271   for (sym_index = 0; sym_index < symbol_map_count; sym_index++)
   1272     {
   1273       unsigned int index2;
   1274 
   1275       /* Don't bother searching if this symbol
   1276 	 is the same as the previous one.  */
   1277       if (last && !filename_cmp (last, symbol_map[sym_index].file_name))
   1278 	continue;
   1279 
   1280       for (index2 = 0; index2 < symtab.len; index2++)
   1281 	{
   1282 	  if (! symtab.base[index2].mapped)
   1283 	    continue;
   1284 
   1285 	  if (!filename_cmp (symtab.base[index2].name,
   1286 			     symbol_map[sym_index].file_name))
   1287 	    break;
   1288 	}
   1289 
   1290       /* If we didn't find it in the symbol table, then it must
   1291 	 be a .o with no text symbols.  Output it last.  */
   1292       if (index2 == symtab.len)
   1293 	printf ("%s\n", symbol_map[sym_index].file_name);
   1294       last = symbol_map[sym_index].file_name;
   1295     }
   1296 }
   1297