1 1.1 mrg /* Hash tables. 2 1.1 mrg Copyright (C) 2000-2022 Free Software Foundation, Inc. 3 1.1 mrg 4 1.1 mrg This program is free software; you can redistribute it and/or modify it 5 1.1 mrg under the terms of the GNU General Public License as published by the 6 1.1 mrg Free Software Foundation; either version 3, or (at your option) any 7 1.1 mrg later version. 8 1.1 mrg 9 1.1 mrg This program is distributed in the hope that it will be useful, 10 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of 11 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 12 1.1 mrg GNU General Public License for more details. 13 1.1 mrg 14 1.1 mrg You should have received a copy of the GNU General Public License 15 1.1 mrg along with this program; see the file COPYING3. If not see 16 1.1 mrg <http://www.gnu.org/licenses/>. 17 1.1 mrg 18 1.1 mrg In other words, you are welcome to use, share and improve this program. 19 1.1 mrg You are forbidden to forbid anyone else to use, share and improve 20 1.1 mrg what you give them. Help stamp out software-hoarding! */ 21 1.1 mrg 22 1.1 mrg #include "config.h" 23 1.1 mrg #include "system.h" 24 1.1 mrg #include "symtab.h" 25 1.1 mrg 26 1.1 mrg /* The code below is a specialization of Vladimir Makarov's expandable 27 1.1 mrg hash tables (see libiberty/hashtab.c). The abstraction penalty was 28 1.1 mrg too high to continue using the generic form. This code knows 29 1.1 mrg intrinsically how to calculate a hash value, and how to compare an 30 1.1 mrg existing entry with a potential new one. */ 31 1.1 mrg 32 1.1 mrg static unsigned int calc_hash (const unsigned char *, size_t); 33 1.1 mrg static void ht_expand (cpp_hash_table *); 34 1.1 mrg static double approx_sqrt (double); 35 1.1 mrg 36 1.1 mrg /* A deleted entry. */ 37 1.1 mrg #define DELETED ((hashnode) -1) 38 1.1 mrg 39 1.1 mrg /* Calculate the hash of the string STR of length LEN. */ 40 1.1 mrg 41 1.1 mrg static unsigned int 42 1.1 mrg calc_hash (const unsigned char *str, size_t len) 43 1.1 mrg { 44 1.1 mrg size_t n = len; 45 1.1 mrg unsigned int r = 0; 46 1.1 mrg 47 1.1 mrg while (n--) 48 1.1 mrg r = HT_HASHSTEP (r, *str++); 49 1.1 mrg 50 1.1 mrg return HT_HASHFINISH (r, len); 51 1.1 mrg } 52 1.1 mrg 53 1.1 mrg /* Initialize an identifier hashtable. */ 54 1.1 mrg 55 1.1 mrg cpp_hash_table * 56 1.1 mrg ht_create (unsigned int order) 57 1.1 mrg { 58 1.1 mrg unsigned int nslots = 1 << order; 59 1.1 mrg cpp_hash_table *table; 60 1.1 mrg 61 1.1 mrg table = XCNEW (cpp_hash_table); 62 1.1 mrg 63 1.1 mrg /* Strings need no alignment. */ 64 1.1 mrg obstack_specify_allocation (&table->stack, 0, 0, xmalloc, free); 65 1.1 mrg 66 1.1 mrg obstack_alignment_mask (&table->stack) = 0; 67 1.1 mrg 68 1.1 mrg table->entries = XCNEWVEC (hashnode, nslots); 69 1.1 mrg table->entries_owned = true; 70 1.1 mrg table->nslots = nslots; 71 1.1 mrg return table; 72 1.1 mrg } 73 1.1 mrg 74 1.1 mrg /* Frees all memory associated with a hash table. */ 75 1.1 mrg 76 1.1 mrg void 77 1.1 mrg ht_destroy (cpp_hash_table *table) 78 1.1 mrg { 79 1.1 mrg obstack_free (&table->stack, NULL); 80 1.1 mrg if (table->entries_owned) 81 1.1 mrg free (table->entries); 82 1.1 mrg free (table); 83 1.1 mrg } 84 1.1 mrg 85 1.1 mrg /* Returns the hash entry for the a STR of length LEN. If that string 86 1.1 mrg already exists in the table, returns the existing entry. If the 87 1.1 mrg identifier hasn't been seen before, and INSERT is CPP_NO_INSERT, 88 1.1 mrg returns NULL. Otherwise insert and returns a new entry. A new 89 1.1 mrg string is allocated. */ 90 1.1 mrg hashnode 91 1.1 mrg ht_lookup (cpp_hash_table *table, const unsigned char *str, size_t len, 92 1.1 mrg enum ht_lookup_option insert) 93 1.1 mrg { 94 1.1 mrg return ht_lookup_with_hash (table, str, len, calc_hash (str, len), 95 1.1 mrg insert); 96 1.1 mrg } 97 1.1 mrg 98 1.1 mrg hashnode 99 1.1 mrg ht_lookup_with_hash (cpp_hash_table *table, const unsigned char *str, 100 1.1 mrg size_t len, unsigned int hash, 101 1.1 mrg enum ht_lookup_option insert) 102 1.1 mrg { 103 1.1 mrg unsigned int hash2; 104 1.1 mrg unsigned int index; 105 1.1 mrg unsigned int deleted_index = table->nslots; 106 1.1 mrg size_t sizemask; 107 1.1 mrg hashnode node; 108 1.1 mrg 109 1.1 mrg sizemask = table->nslots - 1; 110 1.1 mrg index = hash & sizemask; 111 1.1 mrg table->searches++; 112 1.1 mrg 113 1.1 mrg node = table->entries[index]; 114 1.1 mrg 115 1.1 mrg if (node != NULL) 116 1.1 mrg { 117 1.1 mrg if (node == DELETED) 118 1.1 mrg deleted_index = index; 119 1.1 mrg else if (node->hash_value == hash 120 1.1 mrg && HT_LEN (node) == (unsigned int) len 121 1.1 mrg && !memcmp (HT_STR (node), str, len)) 122 1.1 mrg return node; 123 1.1 mrg 124 1.1 mrg /* hash2 must be odd, so we're guaranteed to visit every possible 125 1.1 mrg location in the table during rehashing. */ 126 1.1 mrg hash2 = ((hash * 17) & sizemask) | 1; 127 1.1 mrg 128 1.1 mrg for (;;) 129 1.1 mrg { 130 1.1 mrg table->collisions++; 131 1.1 mrg index = (index + hash2) & sizemask; 132 1.1 mrg node = table->entries[index]; 133 1.1 mrg if (node == NULL) 134 1.1 mrg break; 135 1.1 mrg 136 1.1 mrg if (node == DELETED) 137 1.1 mrg { 138 1.1 mrg if (deleted_index != table->nslots) 139 1.1 mrg deleted_index = index; 140 1.1 mrg } 141 1.1 mrg else if (node->hash_value == hash 142 1.1 mrg && HT_LEN (node) == (unsigned int) len 143 1.1 mrg && !memcmp (HT_STR (node), str, len)) 144 1.1 mrg return node; 145 1.1 mrg } 146 1.1 mrg } 147 1.1 mrg 148 1.1 mrg if (insert == HT_NO_INSERT) 149 1.1 mrg return NULL; 150 1.1 mrg 151 1.1 mrg /* We prefer to overwrite the first deleted slot we saw. */ 152 1.1 mrg if (deleted_index != table->nslots) 153 1.1 mrg index = deleted_index; 154 1.1 mrg 155 1.1 mrg node = (*table->alloc_node) (table); 156 1.1 mrg table->entries[index] = node; 157 1.1 mrg 158 1.1 mrg HT_LEN (node) = (unsigned int) len; 159 1.1 mrg node->hash_value = hash; 160 1.1 mrg 161 1.1 mrg if (table->alloc_subobject) 162 1.1 mrg { 163 1.1 mrg char *chars = (char *) table->alloc_subobject (len + 1); 164 1.1 mrg memcpy (chars, str, len); 165 1.1 mrg chars[len] = '\0'; 166 1.1 mrg HT_STR (node) = (const unsigned char *) chars; 167 1.1 mrg } 168 1.1 mrg else 169 1.1 mrg HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack, 170 1.1 mrg str, len); 171 1.1 mrg 172 1.1 mrg if (++table->nelements * 4 >= table->nslots * 3) 173 1.1 mrg /* Must expand the string table. */ 174 1.1 mrg ht_expand (table); 175 1.1 mrg 176 1.1 mrg return node; 177 1.1 mrg } 178 1.1 mrg 179 1.1 mrg /* Double the size of a hash table, re-hashing existing entries. */ 180 1.1 mrg 181 1.1 mrg static void 182 1.1 mrg ht_expand (cpp_hash_table *table) 183 1.1 mrg { 184 1.1 mrg hashnode *nentries, *p, *limit; 185 1.1 mrg unsigned int size, sizemask; 186 1.1 mrg 187 1.1 mrg size = table->nslots * 2; 188 1.1 mrg nentries = XCNEWVEC (hashnode, size); 189 1.1 mrg sizemask = size - 1; 190 1.1 mrg 191 1.1 mrg p = table->entries; 192 1.1 mrg limit = p + table->nslots; 193 1.1 mrg do 194 1.1 mrg if (*p && *p != DELETED) 195 1.1 mrg { 196 1.1 mrg unsigned int index, hash, hash2; 197 1.1 mrg 198 1.1 mrg hash = (*p)->hash_value; 199 1.1 mrg index = hash & sizemask; 200 1.1 mrg 201 1.1 mrg if (nentries[index]) 202 1.1 mrg { 203 1.1 mrg hash2 = ((hash * 17) & sizemask) | 1; 204 1.1 mrg do 205 1.1 mrg { 206 1.1 mrg index = (index + hash2) & sizemask; 207 1.1 mrg } 208 1.1 mrg while (nentries[index]); 209 1.1 mrg } 210 1.1 mrg nentries[index] = *p; 211 1.1 mrg } 212 1.1 mrg while (++p < limit); 213 1.1 mrg 214 1.1 mrg if (table->entries_owned) 215 1.1 mrg free (table->entries); 216 1.1 mrg table->entries_owned = true; 217 1.1 mrg table->entries = nentries; 218 1.1 mrg table->nslots = size; 219 1.1 mrg } 220 1.1 mrg 221 1.1 mrg /* For all nodes in TABLE, callback CB with parameters TABLE->PFILE, 222 1.1 mrg the node, and V. */ 223 1.1 mrg void 224 1.1 mrg ht_forall (cpp_hash_table *table, ht_cb cb, const void *v) 225 1.1 mrg { 226 1.1 mrg hashnode *p, *limit; 227 1.1 mrg 228 1.1 mrg p = table->entries; 229 1.1 mrg limit = p + table->nslots; 230 1.1 mrg do 231 1.1 mrg if (*p && *p != DELETED) 232 1.1 mrg { 233 1.1 mrg if ((*cb) (table->pfile, *p, v) == 0) 234 1.1 mrg break; 235 1.1 mrg } 236 1.1 mrg while (++p < limit); 237 1.1 mrg } 238 1.1 mrg 239 1.1 mrg /* Like ht_forall, but a nonzero return from the callback means that 240 1.1 mrg the entry should be removed from the table. */ 241 1.1 mrg void 242 1.1 mrg ht_purge (cpp_hash_table *table, ht_cb cb, const void *v) 243 1.1 mrg { 244 1.1 mrg hashnode *p, *limit; 245 1.1 mrg 246 1.1 mrg p = table->entries; 247 1.1 mrg limit = p + table->nslots; 248 1.1 mrg do 249 1.1 mrg if (*p && *p != DELETED) 250 1.1 mrg { 251 1.1 mrg if ((*cb) (table->pfile, *p, v)) 252 1.1 mrg *p = DELETED; 253 1.1 mrg } 254 1.1 mrg while (++p < limit); 255 1.1 mrg } 256 1.1 mrg 257 1.1 mrg /* Restore the hash table. */ 258 1.1 mrg void 259 1.1 mrg ht_load (cpp_hash_table *ht, hashnode *entries, 260 1.1 mrg unsigned int nslots, unsigned int nelements, 261 1.1 mrg bool own) 262 1.1 mrg { 263 1.1 mrg if (ht->entries_owned) 264 1.1 mrg free (ht->entries); 265 1.1 mrg ht->entries = entries; 266 1.1 mrg ht->nslots = nslots; 267 1.1 mrg ht->nelements = nelements; 268 1.1 mrg ht->entries_owned = own; 269 1.1 mrg } 270 1.1 mrg 271 1.1 mrg /* Dump allocation statistics to stderr. */ 272 1.1 mrg 273 1.1 mrg void 274 1.1 mrg ht_dump_statistics (cpp_hash_table *table) 275 1.1 mrg { 276 1.1 mrg size_t nelts, nids, overhead, headers; 277 1.1 mrg size_t total_bytes, longest, deleted = 0; 278 1.1 mrg double sum_of_squares, exp_len, exp_len2, exp2_len; 279 1.1 mrg hashnode *p, *limit; 280 1.1 mrg 281 1.1 mrg #define SCALE(x) ((unsigned long) ((x) < 1024*10 \ 282 1.1 mrg ? (x) \ 283 1.1 mrg : ((x) < 1024*1024*10 \ 284 1.1 mrg ? (x) / 1024 \ 285 1.1 mrg : (x) / (1024*1024)))) 286 1.1 mrg #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M')) 287 1.1 mrg 288 1.1 mrg total_bytes = longest = sum_of_squares = nids = 0; 289 1.1 mrg p = table->entries; 290 1.1 mrg limit = p + table->nslots; 291 1.1 mrg do 292 1.1 mrg if (*p == DELETED) 293 1.1 mrg ++deleted; 294 1.1 mrg else if (*p) 295 1.1 mrg { 296 1.1 mrg size_t n = HT_LEN (*p); 297 1.1 mrg 298 1.1 mrg total_bytes += n; 299 1.1 mrg sum_of_squares += (double) n * n; 300 1.1 mrg if (n > longest) 301 1.1 mrg longest = n; 302 1.1 mrg nids++; 303 1.1 mrg } 304 1.1 mrg while (++p < limit); 305 1.1 mrg 306 1.1 mrg nelts = table->nelements; 307 1.1 mrg headers = table->nslots * sizeof (hashnode); 308 1.1 mrg 309 1.1 mrg fprintf (stderr, "\nString pool\n%-32s%lu\n", "entries:", 310 1.1 mrg (unsigned long) nelts); 311 1.1 mrg fprintf (stderr, "%-32s%lu (%.2f%%)\n", "identifiers:", 312 1.1 mrg (unsigned long) nids, nids * 100.0 / nelts); 313 1.1 mrg fprintf (stderr, "%-32s%lu\n", "slots:", 314 1.1 mrg (unsigned long) table->nslots); 315 1.1 mrg fprintf (stderr, "%-32s%lu\n", "deleted:", 316 1.1 mrg (unsigned long) deleted); 317 1.1 mrg 318 1.1 mrg if (table->alloc_subobject) 319 1.1 mrg fprintf (stderr, "%-32s%lu%c\n", "GGC bytes:", 320 1.1 mrg SCALE (total_bytes), LABEL (total_bytes)); 321 1.1 mrg else 322 1.1 mrg { 323 1.1 mrg overhead = obstack_memory_used (&table->stack) - total_bytes; 324 1.1 mrg fprintf (stderr, "%-32s%lu%c (%lu%c overhead)\n", 325 1.1 mrg "obstack bytes:", 326 1.1 mrg SCALE (total_bytes), LABEL (total_bytes), 327 1.1 mrg SCALE (overhead), LABEL (overhead)); 328 1.1 mrg } 329 1.1 mrg fprintf (stderr, "%-32s%lu%c\n", "table size:", 330 1.1 mrg SCALE (headers), LABEL (headers)); 331 1.1 mrg 332 1.1 mrg exp_len = (double)total_bytes / (double)nelts; 333 1.1 mrg exp2_len = exp_len * exp_len; 334 1.1 mrg exp_len2 = (double) sum_of_squares / (double) nelts; 335 1.1 mrg 336 1.1 mrg fprintf (stderr, "%-32s%.4f\n", "coll/search:", 337 1.1 mrg (double) table->collisions / (double) table->searches); 338 1.1 mrg fprintf (stderr, "%-32s%.4f\n", "ins/search:", 339 1.1 mrg (double) nelts / (double) table->searches); 340 1.1 mrg fprintf (stderr, "%-32s%.2f bytes (+/- %.2f)\n", 341 1.1 mrg "avg. entry:", 342 1.1 mrg exp_len, approx_sqrt (exp_len2 - exp2_len)); 343 1.1 mrg fprintf (stderr, "%-32s%lu\n", "longest entry:", 344 1.1 mrg (unsigned long) longest); 345 1.1 mrg #undef SCALE 346 1.1 mrg #undef LABEL 347 1.1 mrg } 348 1.1 mrg 349 1.1 mrg /* Return the approximate positive square root of a number N. This is for 350 1.1 mrg statistical reports, not code generation. */ 351 1.1 mrg static double 352 1.1 mrg approx_sqrt (double x) 353 1.1 mrg { 354 1.1 mrg double s, d; 355 1.1 mrg 356 1.1 mrg if (x < 0) 357 1.1 mrg abort (); 358 1.1 mrg if (x == 0) 359 1.1 mrg return 0; 360 1.1 mrg 361 1.1 mrg s = x; 362 1.1 mrg do 363 1.1 mrg { 364 1.1 mrg d = (s * s - x) / (2 * s); 365 1.1 mrg s -= d; 366 1.1 mrg } 367 1.1 mrg while (d > .0001); 368 1.1 mrg return s; 369 1.1 mrg } 370