1 1.80 rillig /* $NetBSD: hash.c,v 1.80 2025/04/22 19:28:50 rillig Exp $ */ 2 1.5 christos 3 1.1 cgd /* 4 1.1 cgd * Copyright (c) 1988, 1989, 1990 The Regents of the University of California. 5 1.12 agc * All rights reserved. 6 1.12 agc * 7 1.12 agc * This code is derived from software contributed to Berkeley by 8 1.12 agc * Adam de Boor. 9 1.12 agc * 10 1.12 agc * Redistribution and use in source and binary forms, with or without 11 1.12 agc * modification, are permitted provided that the following conditions 12 1.12 agc * are met: 13 1.12 agc * 1. Redistributions of source code must retain the above copyright 14 1.12 agc * notice, this list of conditions and the following disclaimer. 15 1.12 agc * 2. Redistributions in binary form must reproduce the above copyright 16 1.12 agc * notice, this list of conditions and the following disclaimer in the 17 1.12 agc * documentation and/or other materials provided with the distribution. 18 1.12 agc * 3. Neither the name of the University nor the names of its contributors 19 1.12 agc * may be used to endorse or promote products derived from this software 20 1.12 agc * without specific prior written permission. 21 1.12 agc * 22 1.12 agc * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 1.12 agc * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 1.12 agc * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 1.12 agc * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 1.12 agc * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 1.12 agc * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 1.12 agc * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 1.12 agc * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 1.12 agc * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 1.12 agc * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 1.12 agc * SUCH DAMAGE. 33 1.12 agc */ 34 1.12 agc 35 1.12 agc /* 36 1.1 cgd * Copyright (c) 1988, 1989 by Adam de Boor 37 1.1 cgd * Copyright (c) 1989 by Berkeley Softworks 38 1.1 cgd * All rights reserved. 39 1.1 cgd * 40 1.1 cgd * This code is derived from software contributed to Berkeley by 41 1.1 cgd * Adam de Boor. 42 1.1 cgd * 43 1.1 cgd * Redistribution and use in source and binary forms, with or without 44 1.1 cgd * modification, are permitted provided that the following conditions 45 1.1 cgd * are met: 46 1.1 cgd * 1. Redistributions of source code must retain the above copyright 47 1.1 cgd * notice, this list of conditions and the following disclaimer. 48 1.1 cgd * 2. Redistributions in binary form must reproduce the above copyright 49 1.1 cgd * notice, this list of conditions and the following disclaimer in the 50 1.1 cgd * documentation and/or other materials provided with the distribution. 51 1.1 cgd * 3. All advertising materials mentioning features or use of this software 52 1.1 cgd * must display the following acknowledgement: 53 1.1 cgd * This product includes software developed by the University of 54 1.1 cgd * California, Berkeley and its contributors. 55 1.1 cgd * 4. Neither the name of the University nor the names of its contributors 56 1.1 cgd * may be used to endorse or promote products derived from this software 57 1.1 cgd * without specific prior written permission. 58 1.1 cgd * 59 1.1 cgd * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 60 1.1 cgd * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 1.1 cgd * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 62 1.1 cgd * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 63 1.1 cgd * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 64 1.1 cgd * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 65 1.1 cgd * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 66 1.1 cgd * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 67 1.1 cgd * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 68 1.1 cgd * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 69 1.1 cgd * SUCH DAMAGE. 70 1.1 cgd */ 71 1.1 cgd 72 1.74 rillig /* Hash tables with string keys and pointer values. */ 73 1.34 rillig 74 1.4 cgd #include "make.h" 75 1.1 cgd 76 1.32 rillig /* "@(#)hash.c 8.1 (Berkeley) 6/6/93" */ 77 1.80 rillig MAKE_RCSID("$NetBSD: hash.c,v 1.80 2025/04/22 19:28:50 rillig Exp $"); 78 1.32 rillig 79 1.1 cgd /* 80 1.43 rillig * The ratio of # entries to # buckets at which we rebuild the table to 81 1.43 rillig * make it larger. 82 1.1 cgd */ 83 1.43 rillig #define rebuildLimit 3 84 1.1 cgd 85 1.61 rillig /* This hash function matches Gosling's Emacs and java.lang.String. */ 86 1.80 rillig static unsigned 87 1.71 rillig Hash_String(const char *key, const char **out_keyEnd) 88 1.43 rillig { 89 1.80 rillig unsigned h; 90 1.59 rillig const char *p; 91 1.59 rillig 92 1.59 rillig h = 0; 93 1.59 rillig for (p = key; *p != '\0'; p++) 94 1.59 rillig h = 31 * h + (unsigned char)*p; 95 1.59 rillig 96 1.71 rillig *out_keyEnd = p; 97 1.43 rillig return h; 98 1.43 rillig } 99 1.23 sjg 100 1.64 rillig /* This hash function matches Gosling's Emacs and java.lang.String. */ 101 1.80 rillig unsigned 102 1.64 rillig Hash_Substring(Substring key) 103 1.48 rillig { 104 1.80 rillig unsigned h; 105 1.64 rillig const char *p; 106 1.64 rillig 107 1.64 rillig h = 0; 108 1.64 rillig for (p = key.start; p != key.end; p++) 109 1.64 rillig h = 31 * h + (unsigned char)*p; 110 1.64 rillig return h; 111 1.48 rillig } 112 1.48 rillig 113 1.46 rillig static HashEntry * 114 1.80 rillig HashTable_Find(HashTable *t, Substring key, unsigned h) 115 1.43 rillig { 116 1.73 rillig HashEntry *he; 117 1.71 rillig size_t keyLen = Substring_Length(key); 118 1.41 rillig 119 1.43 rillig #ifdef DEBUG_HASH_LOOKUP 120 1.71 rillig DEBUG4(HASH, "HashTable_Find: %p h=%08x key=%.*s\n", 121 1.71 rillig t, h, (int)keyLen, key.start); 122 1.64 rillig #endif 123 1.64 rillig 124 1.73 rillig for (he = t->buckets[h & t->bucketsMask]; he != NULL; he = he->next) { 125 1.73 rillig if (he->hash == h && 126 1.73 rillig strncmp(he->key, key.start, keyLen) == 0 && 127 1.73 rillig he->key[keyLen] == '\0') 128 1.64 rillig break; 129 1.64 rillig } 130 1.64 rillig 131 1.73 rillig return he; 132 1.64 rillig } 133 1.64 rillig 134 1.54 rillig /* Set up the hash table. */ 135 1.1 cgd void 136 1.54 rillig HashTable_Init(HashTable *t) 137 1.1 cgd { 138 1.80 rillig unsigned n = 16, i; 139 1.56 rillig HashEntry **buckets = bmake_malloc(sizeof *buckets * n); 140 1.50 rillig for (i = 0; i < n; i++) 141 1.50 rillig buckets[i] = NULL; 142 1.1 cgd 143 1.50 rillig t->buckets = buckets; 144 1.50 rillig t->bucketsSize = n; 145 1.1 cgd t->numEntries = 0; 146 1.50 rillig t->bucketsMask = n - 1; 147 1.1 cgd } 148 1.1 cgd 149 1.61 rillig /* 150 1.61 rillig * Remove everything from the hash table and free up the memory for the keys 151 1.61 rillig * of the hash table, but not for the values associated to these keys. 152 1.61 rillig */ 153 1.1 cgd void 154 1.54 rillig HashTable_Done(HashTable *t) 155 1.1 cgd { 156 1.51 rillig HashEntry **buckets = t->buckets; 157 1.51 rillig size_t i, n = t->bucketsSize; 158 1.1 cgd 159 1.51 rillig for (i = 0; i < n; i++) { 160 1.51 rillig HashEntry *he = buckets[i]; 161 1.51 rillig while (he != NULL) { 162 1.51 rillig HashEntry *next = he->next; 163 1.51 rillig free(he); 164 1.51 rillig he = next; 165 1.1 cgd } 166 1.1 cgd } 167 1.61 rillig 168 1.29 rillig free(t->buckets); 169 1.55 rillig #ifdef CLEANUP 170 1.29 rillig t->buckets = NULL; 171 1.55 rillig #endif 172 1.1 cgd } 173 1.1 cgd 174 1.54 rillig /* Find the entry corresponding to the key, or return NULL. */ 175 1.46 rillig HashEntry * 176 1.54 rillig HashTable_FindEntry(HashTable *t, const char *key) 177 1.1 cgd { 178 1.71 rillig const char *keyEnd; 179 1.80 rillig unsigned h = Hash_String(key, &keyEnd); 180 1.71 rillig return HashTable_Find(t, Substring_Init(key, keyEnd), h); 181 1.1 cgd } 182 1.1 cgd 183 1.54 rillig /* Find the value corresponding to the key, or return NULL. */ 184 1.33 rillig void * 185 1.54 rillig HashTable_FindValue(HashTable *t, const char *key) 186 1.33 rillig { 187 1.54 rillig HashEntry *he = HashTable_FindEntry(t, key); 188 1.43 rillig return he != NULL ? he->value : NULL; 189 1.43 rillig } 190 1.43 rillig 191 1.60 rillig /* 192 1.60 rillig * Find the value corresponding to the key and the precomputed hash, 193 1.60 rillig * or return NULL. 194 1.60 rillig */ 195 1.48 rillig void * 196 1.80 rillig HashTable_FindValueBySubstringHash(HashTable *t, Substring key, unsigned h) 197 1.48 rillig { 198 1.71 rillig HashEntry *he = HashTable_Find(t, key, h); 199 1.48 rillig return he != NULL ? he->value : NULL; 200 1.48 rillig } 201 1.48 rillig 202 1.79 rillig static unsigned 203 1.79 rillig HashTable_MaxChain(const HashTable *t) 204 1.79 rillig { 205 1.79 rillig unsigned b, cl, max_cl = 0; 206 1.79 rillig for (b = 0; b < t->bucketsSize; b++) { 207 1.79 rillig const HashEntry *he = t->buckets[b]; 208 1.79 rillig for (cl = 0; he != NULL; he = he->next) 209 1.79 rillig cl++; 210 1.79 rillig if (cl > max_cl) 211 1.79 rillig max_cl = cl; 212 1.79 rillig } 213 1.79 rillig return max_cl; 214 1.79 rillig } 215 1.79 rillig 216 1.60 rillig /* 217 1.60 rillig * Make the hash table larger. Any bucket numbers from the old table become 218 1.74 rillig * invalid; the hash values stay valid though. 219 1.60 rillig */ 220 1.43 rillig static void 221 1.54 rillig HashTable_Enlarge(HashTable *t) 222 1.43 rillig { 223 1.80 rillig unsigned oldSize = t->bucketsSize; 224 1.49 rillig HashEntry **oldBuckets = t->buckets; 225 1.80 rillig unsigned newSize = 2 * oldSize; 226 1.80 rillig unsigned newMask = newSize - 1; 227 1.56 rillig HashEntry **newBuckets = bmake_malloc(sizeof *newBuckets * newSize); 228 1.49 rillig size_t i; 229 1.49 rillig 230 1.49 rillig for (i = 0; i < newSize; i++) 231 1.49 rillig newBuckets[i] = NULL; 232 1.49 rillig 233 1.49 rillig for (i = 0; i < oldSize; i++) { 234 1.49 rillig HashEntry *he = oldBuckets[i]; 235 1.49 rillig while (he != NULL) { 236 1.49 rillig HashEntry *next = he->next; 237 1.73 rillig he->next = newBuckets[he->hash & newMask]; 238 1.73 rillig newBuckets[he->hash & newMask] = he; 239 1.49 rillig he = next; 240 1.43 rillig } 241 1.43 rillig } 242 1.49 rillig 243 1.49 rillig free(oldBuckets); 244 1.49 rillig 245 1.49 rillig t->bucketsSize = newSize; 246 1.49 rillig t->bucketsMask = newMask; 247 1.49 rillig t->buckets = newBuckets; 248 1.69 rillig DEBUG4(HASH, "HashTable_Enlarge: %p size=%d entries=%d maxchain=%d\n", 249 1.79 rillig (void *)t, t->bucketsSize, t->numEntries, HashTable_MaxChain(t)); 250 1.33 rillig } 251 1.33 rillig 252 1.60 rillig /* 253 1.60 rillig * Find or create an entry corresponding to the key. 254 1.60 rillig * Return in out_isNew whether a new entry has been created. 255 1.60 rillig */ 256 1.46 rillig HashEntry * 257 1.62 rillig HashTable_CreateEntry(HashTable *t, const char *key, bool *out_isNew) 258 1.1 cgd { 259 1.71 rillig const char *keyEnd; 260 1.80 rillig unsigned h = Hash_String(key, &keyEnd); 261 1.71 rillig HashEntry *he = HashTable_Find(t, Substring_Init(key, keyEnd), h); 262 1.1 cgd 263 1.52 rillig if (he != NULL) { 264 1.52 rillig if (out_isNew != NULL) 265 1.62 rillig *out_isNew = false; 266 1.52 rillig return he; 267 1.42 rillig } 268 1.1 cgd 269 1.29 rillig if (t->numEntries >= rebuildLimit * t->bucketsSize) 270 1.54 rillig HashTable_Enlarge(t); 271 1.43 rillig 272 1.71 rillig he = bmake_malloc(sizeof *he + (size_t)(keyEnd - key)); 273 1.52 rillig he->value = NULL; 274 1.73 rillig he->hash = h; 275 1.71 rillig memcpy(he->key, key, (size_t)(keyEnd - key) + 1); 276 1.52 rillig 277 1.52 rillig he->next = t->buckets[h & t->bucketsMask]; 278 1.52 rillig t->buckets[h & t->bucketsMask] = he; 279 1.1 cgd t->numEntries++; 280 1.1 cgd 281 1.52 rillig if (out_isNew != NULL) 282 1.62 rillig *out_isNew = true; 283 1.52 rillig return he; 284 1.1 cgd } 285 1.1 cgd 286 1.67 rillig void 287 1.57 rillig HashTable_Set(HashTable *t, const char *key, void *value) 288 1.57 rillig { 289 1.57 rillig HashEntry *he = HashTable_CreateEntry(t, key, NULL); 290 1.57 rillig HashEntry_Set(he, value); 291 1.57 rillig } 292 1.57 rillig 293 1.72 rillig /* Delete the entry from the table, don't free the value of the entry. */ 294 1.1 cgd void 295 1.54 rillig HashTable_DeleteEntry(HashTable *t, HashEntry *he) 296 1.1 cgd { 297 1.73 rillig HashEntry **ref = &t->buckets[he->hash & t->bucketsMask]; 298 1.1 cgd 299 1.75 rillig for (; *ref != he; ref = &(*ref)->next) 300 1.75 rillig continue; 301 1.75 rillig *ref = he->next; 302 1.75 rillig free(he); 303 1.75 rillig t->numEntries--; 304 1.1 cgd } 305 1.1 cgd 306 1.60 rillig /* 307 1.78 rillig * Place the next entry from the hash table in hi->entry, or return false if 308 1.78 rillig * the end of the table is reached. 309 1.60 rillig */ 310 1.76 rillig bool 311 1.45 rillig HashIter_Next(HashIter *hi) 312 1.1 cgd { 313 1.46 rillig HashTable *t = hi->table; 314 1.52 rillig HashEntry *he = hi->entry; 315 1.52 rillig HashEntry **buckets = t->buckets; 316 1.80 rillig unsigned bucketsSize = t->bucketsSize; 317 1.1 cgd 318 1.52 rillig if (he != NULL) 319 1.52 rillig he = he->next; /* skip the most recently returned entry */ 320 1.52 rillig 321 1.52 rillig while (he == NULL) { /* find the next nonempty chain */ 322 1.52 rillig if (hi->nextBucket >= bucketsSize) 323 1.76 rillig return false; 324 1.52 rillig he = buckets[hi->nextBucket++]; 325 1.1 cgd } 326 1.52 rillig hi->entry = he; 327 1.77 rillig return true; 328 1.1 cgd } 329 1.1 cgd 330 1.28 rillig void 331 1.79 rillig HashTable_DebugStats(const HashTable *t, const char *name) 332 1.25 sjg { 333 1.79 rillig DEBUG4(HASH, "HashTable \"%s\": size=%u entries=%u maxchain=%u\n", 334 1.79 rillig name, t->bucketsSize, t->numEntries, HashTable_MaxChain(t)); 335 1.25 sjg } 336