Home | History | Annotate | Line # | Download | only in libcpp
      1  1.1  mrg /* Hash tables.
      2  1.1  mrg    Copyright (C) 2000-2022 Free Software Foundation, Inc.
      3  1.1  mrg 
      4  1.1  mrg This program is free software; you can redistribute it and/or modify it
      5  1.1  mrg under the terms of the GNU General Public License as published by the
      6  1.1  mrg Free Software Foundation; either version 3, or (at your option) any
      7  1.1  mrg later version.
      8  1.1  mrg 
      9  1.1  mrg This program is distributed in the hope that it will be useful,
     10  1.1  mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
     11  1.1  mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
     12  1.1  mrg GNU General Public License for more details.
     13  1.1  mrg 
     14  1.1  mrg You should have received a copy of the GNU General Public License
     15  1.1  mrg along with this program; see the file COPYING3.  If not see
     16  1.1  mrg <http://www.gnu.org/licenses/>.
     17  1.1  mrg 
     18  1.1  mrg  In other words, you are welcome to use, share and improve this program.
     19  1.1  mrg  You are forbidden to forbid anyone else to use, share and improve
     20  1.1  mrg  what you give them.   Help stamp out software-hoarding!  */
     21  1.1  mrg 
     22  1.1  mrg #include "config.h"
     23  1.1  mrg #include "system.h"
     24  1.1  mrg #include "symtab.h"
     25  1.1  mrg 
     26  1.1  mrg /* The code below is a specialization of Vladimir Makarov's expandable
     27  1.1  mrg    hash tables (see libiberty/hashtab.c).  The abstraction penalty was
     28  1.1  mrg    too high to continue using the generic form.  This code knows
     29  1.1  mrg    intrinsically how to calculate a hash value, and how to compare an
     30  1.1  mrg    existing entry with a potential new one.  */
     31  1.1  mrg 
     32  1.1  mrg static unsigned int calc_hash (const unsigned char *, size_t);
     33  1.1  mrg static void ht_expand (cpp_hash_table *);
     34  1.1  mrg static double approx_sqrt (double);
     35  1.1  mrg 
     36  1.1  mrg /* A deleted entry.  */
     37  1.1  mrg #define DELETED ((hashnode) -1)
     38  1.1  mrg 
     39  1.1  mrg /* Calculate the hash of the string STR of length LEN.  */
     40  1.1  mrg 
     41  1.1  mrg static unsigned int
     42  1.1  mrg calc_hash (const unsigned char *str, size_t len)
     43  1.1  mrg {
     44  1.1  mrg   size_t n = len;
     45  1.1  mrg   unsigned int r = 0;
     46  1.1  mrg 
     47  1.1  mrg   while (n--)
     48  1.1  mrg     r = HT_HASHSTEP (r, *str++);
     49  1.1  mrg 
     50  1.1  mrg   return HT_HASHFINISH (r, len);
     51  1.1  mrg }
     52  1.1  mrg 
     53  1.1  mrg /* Initialize an identifier hashtable.  */
     54  1.1  mrg 
     55  1.1  mrg cpp_hash_table *
     56  1.1  mrg ht_create (unsigned int order)
     57  1.1  mrg {
     58  1.1  mrg   unsigned int nslots = 1 << order;
     59  1.1  mrg   cpp_hash_table *table;
     60  1.1  mrg 
     61  1.1  mrg   table = XCNEW (cpp_hash_table);
     62  1.1  mrg 
     63  1.1  mrg   /* Strings need no alignment.  */
     64  1.1  mrg   obstack_specify_allocation (&table->stack, 0, 0, xmalloc, free);
     65  1.1  mrg 
     66  1.1  mrg   obstack_alignment_mask (&table->stack) = 0;
     67  1.1  mrg 
     68  1.1  mrg   table->entries = XCNEWVEC (hashnode, nslots);
     69  1.1  mrg   table->entries_owned = true;
     70  1.1  mrg   table->nslots = nslots;
     71  1.1  mrg   return table;
     72  1.1  mrg }
     73  1.1  mrg 
     74  1.1  mrg /* Frees all memory associated with a hash table.  */
     75  1.1  mrg 
     76  1.1  mrg void
     77  1.1  mrg ht_destroy (cpp_hash_table *table)
     78  1.1  mrg {
     79  1.1  mrg   obstack_free (&table->stack, NULL);
     80  1.1  mrg   if (table->entries_owned)
     81  1.1  mrg     free (table->entries);
     82  1.1  mrg   free (table);
     83  1.1  mrg }
     84  1.1  mrg 
     85  1.1  mrg /* Returns the hash entry for the a STR of length LEN.  If that string
     86  1.1  mrg    already exists in the table, returns the existing entry.  If the
     87  1.1  mrg    identifier hasn't been seen before, and INSERT is CPP_NO_INSERT,
     88  1.1  mrg    returns NULL.  Otherwise insert and returns a new entry.  A new
     89  1.1  mrg    string is allocated.  */
     90  1.1  mrg hashnode
     91  1.1  mrg ht_lookup (cpp_hash_table *table, const unsigned char *str, size_t len,
     92  1.1  mrg 	   enum ht_lookup_option insert)
     93  1.1  mrg {
     94  1.1  mrg   return ht_lookup_with_hash (table, str, len, calc_hash (str, len),
     95  1.1  mrg 			      insert);
     96  1.1  mrg }
     97  1.1  mrg 
     98  1.1  mrg hashnode
     99  1.1  mrg ht_lookup_with_hash (cpp_hash_table *table, const unsigned char *str,
    100  1.1  mrg 		     size_t len, unsigned int hash,
    101  1.1  mrg 		     enum ht_lookup_option insert)
    102  1.1  mrg {
    103  1.1  mrg   unsigned int hash2;
    104  1.1  mrg   unsigned int index;
    105  1.1  mrg   unsigned int deleted_index = table->nslots;
    106  1.1  mrg   size_t sizemask;
    107  1.1  mrg   hashnode node;
    108  1.1  mrg 
    109  1.1  mrg   sizemask = table->nslots - 1;
    110  1.1  mrg   index = hash & sizemask;
    111  1.1  mrg   table->searches++;
    112  1.1  mrg 
    113  1.1  mrg   node = table->entries[index];
    114  1.1  mrg 
    115  1.1  mrg   if (node != NULL)
    116  1.1  mrg     {
    117  1.1  mrg       if (node == DELETED)
    118  1.1  mrg 	deleted_index = index;
    119  1.1  mrg       else if (node->hash_value == hash
    120  1.1  mrg 	       && HT_LEN (node) == (unsigned int) len
    121  1.1  mrg 	       && !memcmp (HT_STR (node), str, len))
    122  1.1  mrg 	return node;
    123  1.1  mrg 
    124  1.1  mrg       /* hash2 must be odd, so we're guaranteed to visit every possible
    125  1.1  mrg 	 location in the table during rehashing.  */
    126  1.1  mrg       hash2 = ((hash * 17) & sizemask) | 1;
    127  1.1  mrg 
    128  1.1  mrg       for (;;)
    129  1.1  mrg 	{
    130  1.1  mrg 	  table->collisions++;
    131  1.1  mrg 	  index = (index + hash2) & sizemask;
    132  1.1  mrg 	  node = table->entries[index];
    133  1.1  mrg 	  if (node == NULL)
    134  1.1  mrg 	    break;
    135  1.1  mrg 
    136  1.1  mrg 	  if (node == DELETED)
    137  1.1  mrg 	    {
    138  1.1  mrg 	      if (deleted_index != table->nslots)
    139  1.1  mrg 		deleted_index = index;
    140  1.1  mrg 	    }
    141  1.1  mrg 	  else if (node->hash_value == hash
    142  1.1  mrg 		   && HT_LEN (node) == (unsigned int) len
    143  1.1  mrg 		   && !memcmp (HT_STR (node), str, len))
    144  1.1  mrg 	    return node;
    145  1.1  mrg 	}
    146  1.1  mrg     }
    147  1.1  mrg 
    148  1.1  mrg   if (insert == HT_NO_INSERT)
    149  1.1  mrg     return NULL;
    150  1.1  mrg 
    151  1.1  mrg   /* We prefer to overwrite the first deleted slot we saw.  */
    152  1.1  mrg   if (deleted_index != table->nslots)
    153  1.1  mrg     index = deleted_index;
    154  1.1  mrg 
    155  1.1  mrg   node = (*table->alloc_node) (table);
    156  1.1  mrg   table->entries[index] = node;
    157  1.1  mrg 
    158  1.1  mrg   HT_LEN (node) = (unsigned int) len;
    159  1.1  mrg   node->hash_value = hash;
    160  1.1  mrg 
    161  1.1  mrg   if (table->alloc_subobject)
    162  1.1  mrg     {
    163  1.1  mrg       char *chars = (char *) table->alloc_subobject (len + 1);
    164  1.1  mrg       memcpy (chars, str, len);
    165  1.1  mrg       chars[len] = '\0';
    166  1.1  mrg       HT_STR (node) = (const unsigned char *) chars;
    167  1.1  mrg     }
    168  1.1  mrg   else
    169  1.1  mrg     HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack,
    170  1.1  mrg 							   str, len);
    171  1.1  mrg 
    172  1.1  mrg   if (++table->nelements * 4 >= table->nslots * 3)
    173  1.1  mrg     /* Must expand the string table.  */
    174  1.1  mrg     ht_expand (table);
    175  1.1  mrg 
    176  1.1  mrg   return node;
    177  1.1  mrg }
    178  1.1  mrg 
    179  1.1  mrg /* Double the size of a hash table, re-hashing existing entries.  */
    180  1.1  mrg 
    181  1.1  mrg static void
    182  1.1  mrg ht_expand (cpp_hash_table *table)
    183  1.1  mrg {
    184  1.1  mrg   hashnode *nentries, *p, *limit;
    185  1.1  mrg   unsigned int size, sizemask;
    186  1.1  mrg 
    187  1.1  mrg   size = table->nslots * 2;
    188  1.1  mrg   nentries = XCNEWVEC (hashnode, size);
    189  1.1  mrg   sizemask = size - 1;
    190  1.1  mrg 
    191  1.1  mrg   p = table->entries;
    192  1.1  mrg   limit = p + table->nslots;
    193  1.1  mrg   do
    194  1.1  mrg     if (*p && *p != DELETED)
    195  1.1  mrg       {
    196  1.1  mrg 	unsigned int index, hash, hash2;
    197  1.1  mrg 
    198  1.1  mrg 	hash = (*p)->hash_value;
    199  1.1  mrg 	index = hash & sizemask;
    200  1.1  mrg 
    201  1.1  mrg 	if (nentries[index])
    202  1.1  mrg 	  {
    203  1.1  mrg 	    hash2 = ((hash * 17) & sizemask) | 1;
    204  1.1  mrg 	    do
    205  1.1  mrg 	      {
    206  1.1  mrg 		index = (index + hash2) & sizemask;
    207  1.1  mrg 	      }
    208  1.1  mrg 	    while (nentries[index]);
    209  1.1  mrg 	  }
    210  1.1  mrg 	nentries[index] = *p;
    211  1.1  mrg       }
    212  1.1  mrg   while (++p < limit);
    213  1.1  mrg 
    214  1.1  mrg   if (table->entries_owned)
    215  1.1  mrg     free (table->entries);
    216  1.1  mrg   table->entries_owned = true;
    217  1.1  mrg   table->entries = nentries;
    218  1.1  mrg   table->nslots = size;
    219  1.1  mrg }
    220  1.1  mrg 
    221  1.1  mrg /* For all nodes in TABLE, callback CB with parameters TABLE->PFILE,
    222  1.1  mrg    the node, and V.  */
    223  1.1  mrg void
    224  1.1  mrg ht_forall (cpp_hash_table *table, ht_cb cb, const void *v)
    225  1.1  mrg {
    226  1.1  mrg   hashnode *p, *limit;
    227  1.1  mrg 
    228  1.1  mrg   p = table->entries;
    229  1.1  mrg   limit = p + table->nslots;
    230  1.1  mrg   do
    231  1.1  mrg     if (*p && *p != DELETED)
    232  1.1  mrg       {
    233  1.1  mrg 	if ((*cb) (table->pfile, *p, v) == 0)
    234  1.1  mrg 	  break;
    235  1.1  mrg       }
    236  1.1  mrg   while (++p < limit);
    237  1.1  mrg }
    238  1.1  mrg 
    239  1.1  mrg /* Like ht_forall, but a nonzero return from the callback means that
    240  1.1  mrg    the entry should be removed from the table.  */
    241  1.1  mrg void
    242  1.1  mrg ht_purge (cpp_hash_table *table, ht_cb cb, const void *v)
    243  1.1  mrg {
    244  1.1  mrg   hashnode *p, *limit;
    245  1.1  mrg 
    246  1.1  mrg   p = table->entries;
    247  1.1  mrg   limit = p + table->nslots;
    248  1.1  mrg   do
    249  1.1  mrg     if (*p && *p != DELETED)
    250  1.1  mrg       {
    251  1.1  mrg 	if ((*cb) (table->pfile, *p, v))
    252  1.1  mrg 	  *p = DELETED;
    253  1.1  mrg       }
    254  1.1  mrg   while (++p < limit);
    255  1.1  mrg }
    256  1.1  mrg 
    257  1.1  mrg /* Restore the hash table.  */
    258  1.1  mrg void
    259  1.1  mrg ht_load (cpp_hash_table *ht, hashnode *entries,
    260  1.1  mrg 	 unsigned int nslots, unsigned int nelements,
    261  1.1  mrg 	 bool own)
    262  1.1  mrg {
    263  1.1  mrg   if (ht->entries_owned)
    264  1.1  mrg     free (ht->entries);
    265  1.1  mrg   ht->entries = entries;
    266  1.1  mrg   ht->nslots = nslots;
    267  1.1  mrg   ht->nelements = nelements;
    268  1.1  mrg   ht->entries_owned = own;
    269  1.1  mrg }
    270  1.1  mrg 
    271  1.1  mrg /* Dump allocation statistics to stderr.  */
    272  1.1  mrg 
    273  1.1  mrg void
    274  1.1  mrg ht_dump_statistics (cpp_hash_table *table)
    275  1.1  mrg {
    276  1.1  mrg   size_t nelts, nids, overhead, headers;
    277  1.1  mrg   size_t total_bytes, longest, deleted = 0;
    278  1.1  mrg   double sum_of_squares, exp_len, exp_len2, exp2_len;
    279  1.1  mrg   hashnode *p, *limit;
    280  1.1  mrg 
    281  1.1  mrg #define SCALE(x) ((unsigned long) ((x) < 1024*10 \
    282  1.1  mrg 		  ? (x) \
    283  1.1  mrg 		  : ((x) < 1024*1024*10 \
    284  1.1  mrg 		     ? (x) / 1024 \
    285  1.1  mrg 		     : (x) / (1024*1024))))
    286  1.1  mrg #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
    287  1.1  mrg 
    288  1.1  mrg   total_bytes = longest = sum_of_squares = nids = 0;
    289  1.1  mrg   p = table->entries;
    290  1.1  mrg   limit = p + table->nslots;
    291  1.1  mrg   do
    292  1.1  mrg     if (*p == DELETED)
    293  1.1  mrg       ++deleted;
    294  1.1  mrg     else if (*p)
    295  1.1  mrg       {
    296  1.1  mrg 	size_t n = HT_LEN (*p);
    297  1.1  mrg 
    298  1.1  mrg 	total_bytes += n;
    299  1.1  mrg 	sum_of_squares += (double) n * n;
    300  1.1  mrg 	if (n > longest)
    301  1.1  mrg 	  longest = n;
    302  1.1  mrg 	nids++;
    303  1.1  mrg       }
    304  1.1  mrg   while (++p < limit);
    305  1.1  mrg 
    306  1.1  mrg   nelts = table->nelements;
    307  1.1  mrg   headers = table->nslots * sizeof (hashnode);
    308  1.1  mrg 
    309  1.1  mrg   fprintf (stderr, "\nString pool\n%-32s%lu\n", "entries:",
    310  1.1  mrg 	   (unsigned long) nelts);
    311  1.1  mrg   fprintf (stderr, "%-32s%lu (%.2f%%)\n", "identifiers:",
    312  1.1  mrg 	   (unsigned long) nids, nids * 100.0 / nelts);
    313  1.1  mrg   fprintf (stderr, "%-32s%lu\n", "slots:",
    314  1.1  mrg 	   (unsigned long) table->nslots);
    315  1.1  mrg   fprintf (stderr, "%-32s%lu\n", "deleted:",
    316  1.1  mrg 	   (unsigned long) deleted);
    317  1.1  mrg 
    318  1.1  mrg   if (table->alloc_subobject)
    319  1.1  mrg     fprintf (stderr, "%-32s%lu%c\n", "GGC bytes:",
    320  1.1  mrg 	     SCALE (total_bytes), LABEL (total_bytes));
    321  1.1  mrg   else
    322  1.1  mrg     {
    323  1.1  mrg       overhead = obstack_memory_used (&table->stack) - total_bytes;
    324  1.1  mrg       fprintf (stderr, "%-32s%lu%c (%lu%c overhead)\n",
    325  1.1  mrg 	       "obstack bytes:",
    326  1.1  mrg 	       SCALE (total_bytes), LABEL (total_bytes),
    327  1.1  mrg 	       SCALE (overhead), LABEL (overhead));
    328  1.1  mrg     }
    329  1.1  mrg   fprintf (stderr, "%-32s%lu%c\n", "table size:",
    330  1.1  mrg 	   SCALE (headers), LABEL (headers));
    331  1.1  mrg 
    332  1.1  mrg   exp_len = (double)total_bytes / (double)nelts;
    333  1.1  mrg   exp2_len = exp_len * exp_len;
    334  1.1  mrg   exp_len2 = (double) sum_of_squares / (double) nelts;
    335  1.1  mrg 
    336  1.1  mrg   fprintf (stderr, "%-32s%.4f\n", "coll/search:",
    337  1.1  mrg 	   (double) table->collisions / (double) table->searches);
    338  1.1  mrg   fprintf (stderr, "%-32s%.4f\n", "ins/search:",
    339  1.1  mrg 	   (double) nelts / (double) table->searches);
    340  1.1  mrg   fprintf (stderr, "%-32s%.2f bytes (+/- %.2f)\n",
    341  1.1  mrg 	   "avg. entry:",
    342  1.1  mrg 	   exp_len, approx_sqrt (exp_len2 - exp2_len));
    343  1.1  mrg   fprintf (stderr, "%-32s%lu\n", "longest entry:",
    344  1.1  mrg 	   (unsigned long) longest);
    345  1.1  mrg #undef SCALE
    346  1.1  mrg #undef LABEL
    347  1.1  mrg }
    348  1.1  mrg 
    349  1.1  mrg /* Return the approximate positive square root of a number N.  This is for
    350  1.1  mrg    statistical reports, not code generation.  */
    351  1.1  mrg static double
    352  1.1  mrg approx_sqrt (double x)
    353  1.1  mrg {
    354  1.1  mrg   double s, d;
    355  1.1  mrg 
    356  1.1  mrg   if (x < 0)
    357  1.1  mrg     abort ();
    358  1.1  mrg   if (x == 0)
    359  1.1  mrg     return 0;
    360  1.1  mrg 
    361  1.1  mrg   s = x;
    362  1.1  mrg   do
    363  1.1  mrg     {
    364  1.1  mrg       d = (s * s - x) / (2 * s);
    365  1.1  mrg       s -= d;
    366  1.1  mrg     }
    367  1.1  mrg   while (d > .0001);
    368  1.1  mrg   return s;
    369  1.1  mrg }
    370