hash.c revision 1.60 1 /* $NetBSD: hash.c,v 1.60 2020/12/30 10:03:16 rillig Exp $ */
2
3 /*
4 * Copyright (c) 1988, 1989, 1990 The Regents of the University of California.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * Adam de Boor.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34
35 /*
36 * Copyright (c) 1988, 1989 by Adam de Boor
37 * Copyright (c) 1989 by Berkeley Softworks
38 * All rights reserved.
39 *
40 * This code is derived from software contributed to Berkeley by
41 * Adam de Boor.
42 *
43 * Redistribution and use in source and binary forms, with or without
44 * modification, are permitted provided that the following conditions
45 * are met:
46 * 1. Redistributions of source code must retain the above copyright
47 * notice, this list of conditions and the following disclaimer.
48 * 2. Redistributions in binary form must reproduce the above copyright
49 * notice, this list of conditions and the following disclaimer in the
50 * documentation and/or other materials provided with the distribution.
51 * 3. All advertising materials mentioning features or use of this software
52 * must display the following acknowledgement:
53 * This product includes software developed by the University of
54 * California, Berkeley and its contributors.
55 * 4. Neither the name of the University nor the names of its contributors
56 * may be used to endorse or promote products derived from this software
57 * without specific prior written permission.
58 *
59 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
60 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
62 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
63 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
64 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
65 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
66 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
67 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
68 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
69 * SUCH DAMAGE.
70 */
71
72 /* Hash tables with string keys. */
73
74 #include "make.h"
75
76 /* "@(#)hash.c 8.1 (Berkeley) 6/6/93" */
77 MAKE_RCSID("$NetBSD: hash.c,v 1.60 2020/12/30 10:03:16 rillig Exp $");
78
79 /*
80 * The ratio of # entries to # buckets at which we rebuild the table to
81 * make it larger.
82 */
83 #define rebuildLimit 3
84
85 /* This hash function matches Gosling's emacs and java.lang.String. */
86 static unsigned int
87 hash(const char *key, size_t *out_keylen)
88 {
89 unsigned int h;
90 const char *p;
91
92 h = 0;
93 for (p = key; *p != '\0'; p++)
94 h = 31 * h + (unsigned char)*p;
95
96 if (out_keylen != NULL)
97 *out_keylen = (size_t)(p - key);
98 return h;
99 }
100
101 unsigned int
102 Hash_Hash(const char *key)
103 {
104 return hash(key, NULL);
105 }
106
107 static HashEntry *
108 HashTable_Find(HashTable *t, unsigned int h, const char *key)
109 {
110 HashEntry *e;
111 unsigned int chainlen = 0;
112
113 #ifdef DEBUG_HASH_LOOKUP
114 DEBUG4(HASH, "%s: %p h=%08x key=%s\n", __func__, t, h, key);
115 #endif
116
117 for (e = t->buckets[h & t->bucketsMask]; e != NULL; e = e->next) {
118 chainlen++;
119 if (e->key_hash == h && strcmp(e->key, key) == 0)
120 break;
121 }
122
123 if (chainlen > t->maxchain)
124 t->maxchain = chainlen;
125
126 return e;
127 }
128
129 /* Set up the hash table. */
130 void
131 HashTable_Init(HashTable *t)
132 {
133 unsigned int n = 16, i;
134 HashEntry **buckets = bmake_malloc(sizeof *buckets * n);
135 for (i = 0; i < n; i++)
136 buckets[i] = NULL;
137
138 t->buckets = buckets;
139 t->bucketsSize = n;
140 t->numEntries = 0;
141 t->bucketsMask = n - 1;
142 t->maxchain = 0;
143 }
144
145 /* Remove everything from the hash table and frees up the memory. */
146 void
147 HashTable_Done(HashTable *t)
148 {
149 HashEntry **buckets = t->buckets;
150 size_t i, n = t->bucketsSize;
151
152 for (i = 0; i < n; i++) {
153 HashEntry *he = buckets[i];
154 while (he != NULL) {
155 HashEntry *next = he->next;
156 free(he);
157 he = next;
158 }
159 }
160 free(t->buckets);
161
162 #ifdef CLEANUP
163 t->buckets = NULL;
164 #endif
165 }
166
167 /* Find the entry corresponding to the key, or return NULL. */
168 HashEntry *
169 HashTable_FindEntry(HashTable *t, const char *key)
170 {
171 unsigned int h = hash(key, NULL);
172 return HashTable_Find(t, h, key);
173 }
174
175 /* Find the value corresponding to the key, or return NULL. */
176 void *
177 HashTable_FindValue(HashTable *t, const char *key)
178 {
179 HashEntry *he = HashTable_FindEntry(t, key);
180 return he != NULL ? he->value : NULL;
181 }
182
183 /*
184 * Find the value corresponding to the key and the precomputed hash,
185 * or return NULL.
186 */
187 void *
188 HashTable_FindValueHash(HashTable *t, const char *key, unsigned int h)
189 {
190 HashEntry *he = HashTable_Find(t, h, key);
191 return he != NULL ? he->value : NULL;
192 }
193
194 /*
195 * Make the hash table larger. Any bucket numbers from the old table become
196 * invalid; the hash codes stay valid though.
197 */
198 static void
199 HashTable_Enlarge(HashTable *t)
200 {
201 unsigned int oldSize = t->bucketsSize;
202 HashEntry **oldBuckets = t->buckets;
203 unsigned int newSize = 2 * oldSize;
204 unsigned int newMask = newSize - 1;
205 HashEntry **newBuckets = bmake_malloc(sizeof *newBuckets * newSize);
206 size_t i;
207
208 for (i = 0; i < newSize; i++)
209 newBuckets[i] = NULL;
210
211 for (i = 0; i < oldSize; i++) {
212 HashEntry *he = oldBuckets[i];
213 while (he != NULL) {
214 HashEntry *next = he->next;
215 he->next = newBuckets[he->key_hash & newMask];
216 newBuckets[he->key_hash & newMask] = he;
217 he = next;
218 }
219 }
220
221 free(oldBuckets);
222
223 t->bucketsSize = newSize;
224 t->bucketsMask = newMask;
225 t->buckets = newBuckets;
226 DEBUG5(HASH, "%s: %p size=%d entries=%d maxchain=%d\n",
227 __func__, t, t->bucketsSize, t->numEntries, t->maxchain);
228 t->maxchain = 0;
229 }
230
231 /*
232 * Find or create an entry corresponding to the key.
233 * Return in out_isNew whether a new entry has been created.
234 */
235 HashEntry *
236 HashTable_CreateEntry(HashTable *t, const char *key, Boolean *out_isNew)
237 {
238 size_t keylen;
239 unsigned int h = hash(key, &keylen);
240 HashEntry *he = HashTable_Find(t, h, key);
241
242 if (he != NULL) {
243 if (out_isNew != NULL)
244 *out_isNew = FALSE;
245 return he;
246 }
247
248 if (t->numEntries >= rebuildLimit * t->bucketsSize)
249 HashTable_Enlarge(t);
250
251 he = bmake_malloc(sizeof *he + keylen);
252 he->value = NULL;
253 he->key_hash = h;
254 memcpy(he->key, key, keylen + 1);
255
256 he->next = t->buckets[h & t->bucketsMask];
257 t->buckets[h & t->bucketsMask] = he;
258 t->numEntries++;
259
260 if (out_isNew != NULL)
261 *out_isNew = TRUE;
262 return he;
263 }
264
265 HashEntry *
266 HashTable_Set(HashTable *t, const char *key, void *value)
267 {
268 HashEntry *he = HashTable_CreateEntry(t, key, NULL);
269 HashEntry_Set(he, value);
270 return he;
271 }
272
273 /* Delete the entry from the table and free the associated memory. */
274 void
275 HashTable_DeleteEntry(HashTable *t, HashEntry *he)
276 {
277 HashEntry **ref = &t->buckets[he->key_hash & t->bucketsMask];
278 HashEntry *p;
279
280 for (; (p = *ref) != NULL; ref = &p->next) {
281 if (p == he) {
282 *ref = p->next;
283 free(p);
284 t->numEntries--;
285 return;
286 }
287 }
288 abort();
289 }
290
291 /* Set things up for iterating over all entries in the hash table. */
292 void
293 HashIter_Init(HashIter *hi, HashTable *t)
294 {
295 hi->table = t;
296 hi->nextBucket = 0;
297 hi->entry = NULL;
298 }
299
300 /*
301 * Return the next entry in the hash table, or NULL if the end of the table
302 * is reached.
303 */
304 HashEntry *
305 HashIter_Next(HashIter *hi)
306 {
307 HashTable *t = hi->table;
308 HashEntry *he = hi->entry;
309 HashEntry **buckets = t->buckets;
310 unsigned int bucketsSize = t->bucketsSize;
311
312 if (he != NULL)
313 he = he->next; /* skip the most recently returned entry */
314
315 while (he == NULL) { /* find the next nonempty chain */
316 if (hi->nextBucket >= bucketsSize)
317 return NULL;
318 he = buckets[hi->nextBucket++];
319 }
320 hi->entry = he;
321 return he;
322 }
323
324 void
325 HashTable_DebugStats(HashTable *t, const char *name)
326 {
327 DEBUG4(HASH, "HashTable %s: size=%u numEntries=%u maxchain=%u\n",
328 name, t->bucketsSize, t->numEntries, t->maxchain);
329 }
330