symtab.cc revision 1.1.1.2 1 1.1 mrg /* Hash tables.
2 1.1.1.2 mrg Copyright (C) 2000-2024 Free Software Foundation, Inc.
3 1.1 mrg
4 1.1 mrg This program is free software; you can redistribute it and/or modify it
5 1.1 mrg under the terms of the GNU General Public License as published by the
6 1.1 mrg Free Software Foundation; either version 3, or (at your option) any
7 1.1 mrg later version.
8 1.1 mrg
9 1.1 mrg This program is distributed in the hope that it will be useful,
10 1.1 mrg but WITHOUT ANY WARRANTY; without even the implied warranty of
11 1.1 mrg MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 1.1 mrg GNU General Public License for more details.
13 1.1 mrg
14 1.1 mrg You should have received a copy of the GNU General Public License
15 1.1 mrg along with this program; see the file COPYING3. If not see
16 1.1 mrg <http://www.gnu.org/licenses/>.
17 1.1 mrg
18 1.1 mrg In other words, you are welcome to use, share and improve this program.
19 1.1 mrg You are forbidden to forbid anyone else to use, share and improve
20 1.1 mrg what you give them. Help stamp out software-hoarding! */
21 1.1 mrg
22 1.1 mrg #include "config.h"
23 1.1 mrg #include "system.h"
24 1.1 mrg #include "symtab.h"
25 1.1 mrg
26 1.1 mrg /* The code below is a specialization of Vladimir Makarov's expandable
27 1.1 mrg hash tables (see libiberty/hashtab.c). The abstraction penalty was
28 1.1 mrg too high to continue using the generic form. This code knows
29 1.1 mrg intrinsically how to calculate a hash value, and how to compare an
30 1.1 mrg existing entry with a potential new one. */
31 1.1 mrg
32 1.1 mrg static unsigned int calc_hash (const unsigned char *, size_t);
33 1.1 mrg static void ht_expand (cpp_hash_table *);
34 1.1 mrg static double approx_sqrt (double);
35 1.1 mrg
36 1.1 mrg /* A deleted entry. */
37 1.1 mrg #define DELETED ((hashnode) -1)
38 1.1 mrg
39 1.1 mrg /* Calculate the hash of the string STR of length LEN. */
40 1.1 mrg
41 1.1 mrg static unsigned int
42 1.1 mrg calc_hash (const unsigned char *str, size_t len)
43 1.1 mrg {
44 1.1 mrg size_t n = len;
45 1.1 mrg unsigned int r = 0;
46 1.1 mrg
47 1.1 mrg while (n--)
48 1.1 mrg r = HT_HASHSTEP (r, *str++);
49 1.1 mrg
50 1.1 mrg return HT_HASHFINISH (r, len);
51 1.1 mrg }
52 1.1 mrg
53 1.1 mrg /* Initialize an identifier hashtable. */
54 1.1 mrg
55 1.1 mrg cpp_hash_table *
56 1.1 mrg ht_create (unsigned int order)
57 1.1 mrg {
58 1.1 mrg unsigned int nslots = 1 << order;
59 1.1 mrg cpp_hash_table *table;
60 1.1 mrg
61 1.1 mrg table = XCNEW (cpp_hash_table);
62 1.1 mrg
63 1.1 mrg /* Strings need no alignment. */
64 1.1 mrg obstack_specify_allocation (&table->stack, 0, 0, xmalloc, free);
65 1.1 mrg
66 1.1 mrg obstack_alignment_mask (&table->stack) = 0;
67 1.1 mrg
68 1.1 mrg table->entries = XCNEWVEC (hashnode, nslots);
69 1.1 mrg table->entries_owned = true;
70 1.1 mrg table->nslots = nslots;
71 1.1 mrg return table;
72 1.1 mrg }
73 1.1 mrg
74 1.1 mrg /* Frees all memory associated with a hash table. */
75 1.1 mrg
76 1.1 mrg void
77 1.1 mrg ht_destroy (cpp_hash_table *table)
78 1.1 mrg {
79 1.1 mrg obstack_free (&table->stack, NULL);
80 1.1 mrg if (table->entries_owned)
81 1.1 mrg free (table->entries);
82 1.1 mrg free (table);
83 1.1 mrg }
84 1.1 mrg
85 1.1 mrg /* Returns the hash entry for the a STR of length LEN. If that string
86 1.1 mrg already exists in the table, returns the existing entry. If the
87 1.1 mrg identifier hasn't been seen before, and INSERT is CPP_NO_INSERT,
88 1.1 mrg returns NULL. Otherwise insert and returns a new entry. A new
89 1.1 mrg string is allocated. */
90 1.1 mrg hashnode
91 1.1 mrg ht_lookup (cpp_hash_table *table, const unsigned char *str, size_t len,
92 1.1 mrg enum ht_lookup_option insert)
93 1.1 mrg {
94 1.1 mrg return ht_lookup_with_hash (table, str, len, calc_hash (str, len),
95 1.1 mrg insert);
96 1.1 mrg }
97 1.1 mrg
98 1.1 mrg hashnode
99 1.1 mrg ht_lookup_with_hash (cpp_hash_table *table, const unsigned char *str,
100 1.1 mrg size_t len, unsigned int hash,
101 1.1 mrg enum ht_lookup_option insert)
102 1.1 mrg {
103 1.1 mrg unsigned int hash2;
104 1.1 mrg unsigned int index;
105 1.1 mrg unsigned int deleted_index = table->nslots;
106 1.1 mrg size_t sizemask;
107 1.1 mrg hashnode node;
108 1.1 mrg
109 1.1 mrg sizemask = table->nslots - 1;
110 1.1 mrg index = hash & sizemask;
111 1.1 mrg table->searches++;
112 1.1 mrg
113 1.1 mrg node = table->entries[index];
114 1.1 mrg
115 1.1 mrg if (node != NULL)
116 1.1 mrg {
117 1.1 mrg if (node == DELETED)
118 1.1 mrg deleted_index = index;
119 1.1 mrg else if (node->hash_value == hash
120 1.1 mrg && HT_LEN (node) == (unsigned int) len
121 1.1 mrg && !memcmp (HT_STR (node), str, len))
122 1.1 mrg return node;
123 1.1 mrg
124 1.1 mrg /* hash2 must be odd, so we're guaranteed to visit every possible
125 1.1 mrg location in the table during rehashing. */
126 1.1 mrg hash2 = ((hash * 17) & sizemask) | 1;
127 1.1 mrg
128 1.1 mrg for (;;)
129 1.1 mrg {
130 1.1 mrg table->collisions++;
131 1.1 mrg index = (index + hash2) & sizemask;
132 1.1 mrg node = table->entries[index];
133 1.1 mrg if (node == NULL)
134 1.1 mrg break;
135 1.1 mrg
136 1.1 mrg if (node == DELETED)
137 1.1 mrg {
138 1.1 mrg if (deleted_index != table->nslots)
139 1.1 mrg deleted_index = index;
140 1.1 mrg }
141 1.1 mrg else if (node->hash_value == hash
142 1.1 mrg && HT_LEN (node) == (unsigned int) len
143 1.1 mrg && !memcmp (HT_STR (node), str, len))
144 1.1 mrg return node;
145 1.1 mrg }
146 1.1 mrg }
147 1.1 mrg
148 1.1 mrg if (insert == HT_NO_INSERT)
149 1.1 mrg return NULL;
150 1.1 mrg
151 1.1 mrg /* We prefer to overwrite the first deleted slot we saw. */
152 1.1 mrg if (deleted_index != table->nslots)
153 1.1 mrg index = deleted_index;
154 1.1 mrg
155 1.1 mrg node = (*table->alloc_node) (table);
156 1.1 mrg table->entries[index] = node;
157 1.1 mrg
158 1.1 mrg HT_LEN (node) = (unsigned int) len;
159 1.1 mrg node->hash_value = hash;
160 1.1 mrg
161 1.1 mrg if (table->alloc_subobject)
162 1.1 mrg {
163 1.1 mrg char *chars = (char *) table->alloc_subobject (len + 1);
164 1.1 mrg memcpy (chars, str, len);
165 1.1 mrg chars[len] = '\0';
166 1.1 mrg HT_STR (node) = (const unsigned char *) chars;
167 1.1 mrg }
168 1.1 mrg else
169 1.1 mrg HT_STR (node) = (const unsigned char *) obstack_copy0 (&table->stack,
170 1.1 mrg str, len);
171 1.1 mrg
172 1.1 mrg if (++table->nelements * 4 >= table->nslots * 3)
173 1.1 mrg /* Must expand the string table. */
174 1.1 mrg ht_expand (table);
175 1.1 mrg
176 1.1 mrg return node;
177 1.1 mrg }
178 1.1 mrg
179 1.1 mrg /* Double the size of a hash table, re-hashing existing entries. */
180 1.1 mrg
181 1.1 mrg static void
182 1.1 mrg ht_expand (cpp_hash_table *table)
183 1.1 mrg {
184 1.1 mrg hashnode *nentries, *p, *limit;
185 1.1 mrg unsigned int size, sizemask;
186 1.1 mrg
187 1.1 mrg size = table->nslots * 2;
188 1.1 mrg nentries = XCNEWVEC (hashnode, size);
189 1.1 mrg sizemask = size - 1;
190 1.1 mrg
191 1.1 mrg p = table->entries;
192 1.1 mrg limit = p + table->nslots;
193 1.1 mrg do
194 1.1 mrg if (*p && *p != DELETED)
195 1.1 mrg {
196 1.1 mrg unsigned int index, hash, hash2;
197 1.1 mrg
198 1.1 mrg hash = (*p)->hash_value;
199 1.1 mrg index = hash & sizemask;
200 1.1 mrg
201 1.1 mrg if (nentries[index])
202 1.1 mrg {
203 1.1 mrg hash2 = ((hash * 17) & sizemask) | 1;
204 1.1 mrg do
205 1.1 mrg {
206 1.1 mrg index = (index + hash2) & sizemask;
207 1.1 mrg }
208 1.1 mrg while (nentries[index]);
209 1.1 mrg }
210 1.1 mrg nentries[index] = *p;
211 1.1 mrg }
212 1.1 mrg while (++p < limit);
213 1.1 mrg
214 1.1 mrg if (table->entries_owned)
215 1.1 mrg free (table->entries);
216 1.1 mrg table->entries_owned = true;
217 1.1 mrg table->entries = nentries;
218 1.1 mrg table->nslots = size;
219 1.1 mrg }
220 1.1 mrg
221 1.1 mrg /* For all nodes in TABLE, callback CB with parameters TABLE->PFILE,
222 1.1 mrg the node, and V. */
223 1.1 mrg void
224 1.1 mrg ht_forall (cpp_hash_table *table, ht_cb cb, const void *v)
225 1.1 mrg {
226 1.1 mrg hashnode *p, *limit;
227 1.1 mrg
228 1.1 mrg p = table->entries;
229 1.1 mrg limit = p + table->nslots;
230 1.1 mrg do
231 1.1 mrg if (*p && *p != DELETED)
232 1.1 mrg {
233 1.1 mrg if ((*cb) (table->pfile, *p, v) == 0)
234 1.1 mrg break;
235 1.1 mrg }
236 1.1 mrg while (++p < limit);
237 1.1 mrg }
238 1.1 mrg
239 1.1 mrg /* Like ht_forall, but a nonzero return from the callback means that
240 1.1 mrg the entry should be removed from the table. */
241 1.1 mrg void
242 1.1 mrg ht_purge (cpp_hash_table *table, ht_cb cb, const void *v)
243 1.1 mrg {
244 1.1 mrg hashnode *p, *limit;
245 1.1 mrg
246 1.1 mrg p = table->entries;
247 1.1 mrg limit = p + table->nslots;
248 1.1 mrg do
249 1.1 mrg if (*p && *p != DELETED)
250 1.1 mrg {
251 1.1 mrg if ((*cb) (table->pfile, *p, v))
252 1.1 mrg *p = DELETED;
253 1.1 mrg }
254 1.1 mrg while (++p < limit);
255 1.1 mrg }
256 1.1 mrg
257 1.1 mrg /* Restore the hash table. */
258 1.1 mrg void
259 1.1 mrg ht_load (cpp_hash_table *ht, hashnode *entries,
260 1.1 mrg unsigned int nslots, unsigned int nelements,
261 1.1 mrg bool own)
262 1.1 mrg {
263 1.1 mrg if (ht->entries_owned)
264 1.1 mrg free (ht->entries);
265 1.1 mrg ht->entries = entries;
266 1.1 mrg ht->nslots = nslots;
267 1.1 mrg ht->nelements = nelements;
268 1.1 mrg ht->entries_owned = own;
269 1.1 mrg }
270 1.1 mrg
271 1.1 mrg /* Dump allocation statistics to stderr. */
272 1.1 mrg
273 1.1 mrg void
274 1.1 mrg ht_dump_statistics (cpp_hash_table *table)
275 1.1 mrg {
276 1.1 mrg size_t nelts, nids, overhead, headers;
277 1.1 mrg size_t total_bytes, longest, deleted = 0;
278 1.1 mrg double sum_of_squares, exp_len, exp_len2, exp2_len;
279 1.1 mrg hashnode *p, *limit;
280 1.1 mrg
281 1.1 mrg #define SCALE(x) ((unsigned long) ((x) < 1024*10 \
282 1.1 mrg ? (x) \
283 1.1 mrg : ((x) < 1024*1024*10 \
284 1.1 mrg ? (x) / 1024 \
285 1.1 mrg : (x) / (1024*1024))))
286 1.1 mrg #define LABEL(x) ((x) < 1024*10 ? ' ' : ((x) < 1024*1024*10 ? 'k' : 'M'))
287 1.1 mrg
288 1.1 mrg total_bytes = longest = sum_of_squares = nids = 0;
289 1.1 mrg p = table->entries;
290 1.1 mrg limit = p + table->nslots;
291 1.1 mrg do
292 1.1 mrg if (*p == DELETED)
293 1.1 mrg ++deleted;
294 1.1 mrg else if (*p)
295 1.1 mrg {
296 1.1 mrg size_t n = HT_LEN (*p);
297 1.1 mrg
298 1.1 mrg total_bytes += n;
299 1.1 mrg sum_of_squares += (double) n * n;
300 1.1 mrg if (n > longest)
301 1.1 mrg longest = n;
302 1.1 mrg nids++;
303 1.1 mrg }
304 1.1 mrg while (++p < limit);
305 1.1 mrg
306 1.1 mrg nelts = table->nelements;
307 1.1 mrg headers = table->nslots * sizeof (hashnode);
308 1.1 mrg
309 1.1 mrg fprintf (stderr, "\nString pool\n%-32s%lu\n", "entries:",
310 1.1 mrg (unsigned long) nelts);
311 1.1 mrg fprintf (stderr, "%-32s%lu (%.2f%%)\n", "identifiers:",
312 1.1 mrg (unsigned long) nids, nids * 100.0 / nelts);
313 1.1 mrg fprintf (stderr, "%-32s%lu\n", "slots:",
314 1.1 mrg (unsigned long) table->nslots);
315 1.1 mrg fprintf (stderr, "%-32s%lu\n", "deleted:",
316 1.1 mrg (unsigned long) deleted);
317 1.1 mrg
318 1.1 mrg if (table->alloc_subobject)
319 1.1 mrg fprintf (stderr, "%-32s%lu%c\n", "GGC bytes:",
320 1.1 mrg SCALE (total_bytes), LABEL (total_bytes));
321 1.1 mrg else
322 1.1 mrg {
323 1.1 mrg overhead = obstack_memory_used (&table->stack) - total_bytes;
324 1.1 mrg fprintf (stderr, "%-32s%lu%c (%lu%c overhead)\n",
325 1.1 mrg "obstack bytes:",
326 1.1 mrg SCALE (total_bytes), LABEL (total_bytes),
327 1.1 mrg SCALE (overhead), LABEL (overhead));
328 1.1 mrg }
329 1.1 mrg fprintf (stderr, "%-32s%lu%c\n", "table size:",
330 1.1 mrg SCALE (headers), LABEL (headers));
331 1.1 mrg
332 1.1 mrg exp_len = (double)total_bytes / (double)nelts;
333 1.1 mrg exp2_len = exp_len * exp_len;
334 1.1 mrg exp_len2 = (double) sum_of_squares / (double) nelts;
335 1.1 mrg
336 1.1 mrg fprintf (stderr, "%-32s%.4f\n", "coll/search:",
337 1.1 mrg (double) table->collisions / (double) table->searches);
338 1.1 mrg fprintf (stderr, "%-32s%.4f\n", "ins/search:",
339 1.1 mrg (double) nelts / (double) table->searches);
340 1.1 mrg fprintf (stderr, "%-32s%.2f bytes (+/- %.2f)\n",
341 1.1 mrg "avg. entry:",
342 1.1 mrg exp_len, approx_sqrt (exp_len2 - exp2_len));
343 1.1 mrg fprintf (stderr, "%-32s%lu\n", "longest entry:",
344 1.1 mrg (unsigned long) longest);
345 1.1 mrg #undef SCALE
346 1.1 mrg #undef LABEL
347 1.1 mrg }
348 1.1 mrg
349 1.1 mrg /* Return the approximate positive square root of a number N. This is for
350 1.1 mrg statistical reports, not code generation. */
351 1.1 mrg static double
352 1.1 mrg approx_sqrt (double x)
353 1.1 mrg {
354 1.1 mrg double s, d;
355 1.1 mrg
356 1.1 mrg if (x < 0)
357 1.1 mrg abort ();
358 1.1 mrg if (x == 0)
359 1.1 mrg return 0;
360 1.1 mrg
361 1.1 mrg s = x;
362 1.1 mrg do
363 1.1 mrg {
364 1.1 mrg d = (s * s - x) / (2 * s);
365 1.1 mrg s -= d;
366 1.1 mrg }
367 1.1 mrg while (d > .0001);
368 1.1 mrg return s;
369 1.1 mrg }
370