hash.c revision 1.2 1 /* $NetBSD: hash.c,v 1.2 2006/08/26 18:17:13 christos Exp $ */
2
3 /*
4 * Copyright (c) 1992, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This software was developed by the Computer Systems Engineering group
8 * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
9 * contributed to Berkeley.
10 *
11 * All advertising materials mentioning features or use of this software
12 * must display the following acknowledgement:
13 * This product includes software developed by the University of
14 * California, Lawrence Berkeley Laboratories.
15 *
16 * Redistribution and use in source and binary forms, with or without
17 * modification, are permitted provided that the following conditions
18 * are met:
19 * 1. Redistributions of source code must retain the above copyright
20 * notice, this list of conditions and the following disclaimer.
21 * 2. Redistributions in binary form must reproduce the above copyright
22 * notice, this list of conditions and the following disclaimer in the
23 * documentation and/or other materials provided with the distribution.
24 * 3. Neither the name of the University nor the names of its contributors
25 * may be used to endorse or promote products derived from this software
26 * without specific prior written permission.
27 *
28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
38 * SUCH DAMAGE.
39 *
40 * from: @(#)hash.c 8.1 (Berkeley) 6/6/93
41 */
42
43 #if HAVE_NBTOOL_CONFIG_H
44 #include "nbtool_config.h"
45 #endif
46
47 #include <sys/param.h>
48 #include <stdlib.h>
49 #include <string.h>
50 #include <util.h>
51 #include "defs.h"
52
53 /*
54 * Interned strings are kept in a hash table. By making each string
55 * unique, the program can compare strings by comparing pointers.
56 */
57 struct hashent {
58 // XXXLUKEM: a SIMPLEQ might be more appropriate
59 TAILQ_ENTRY(hashent) h_next;
60 const char *h_name; /* the string */
61 u_int h_hash; /* its hash value */
62 void *h_value; /* other values (for name=value) */
63 };
64 struct hashtab {
65 size_t ht_size; /* size (power of 2) */
66 u_int ht_mask; /* == ht_size - 1 */
67 u_int ht_used; /* number of entries used */
68 u_int ht_lim; /* when to expand */
69 TAILQ_HEAD(hashenthead, hashent) *ht_tab;
70 };
71
72 static struct hashtab strings;
73
74 static struct hashenthead hefreelist = TAILQ_HEAD_INITIALIZER(hefreelist);
75
76 /*
77 * HASHFRACTION controls ht_lim, which in turn controls the average chain
78 * length. We allow a few entries, on average, as comparing them is usually
79 * cheap (the h_hash values prevent a strcmp).
80 */
81 #define HASHFRACTION(sz) ((sz) * 3 / 2)
82
83 static void ht_expand(struct hashtab *);
84 static void ht_init(struct hashtab *, size_t);
85 static inline u_int hash(const char *);
86 static inline struct hashent *newhashent(const char *, u_int);
87
88 /*
89 * Initialize a new hash table. The size must be a power of 2.
90 */
91 static void
92 ht_init(struct hashtab *ht, size_t sz)
93 {
94 u_int n;
95
96 ht->ht_tab = emalloc(sz * sizeof (ht->ht_tab[0]));
97 ht->ht_size = sz;
98 ht->ht_mask = sz - 1;
99 for (n = 0; n < sz; n++)
100 TAILQ_INIT(&ht->ht_tab[n]);
101 ht->ht_used = 0;
102 ht->ht_lim = HASHFRACTION(sz);
103 }
104
105 /*
106 * Expand an existing hash table.
107 */
108 static void
109 ht_expand(struct hashtab *ht)
110 {
111 struct hashenthead *h, *oldh;
112 struct hashent *p;
113 u_int n, i;
114
115 n = ht->ht_size * 2;
116 h = emalloc(n * sizeof *h);
117 for (i = 0; i < n; i++)
118 TAILQ_INIT(&h[i]);
119 oldh = ht->ht_tab;
120 n--;
121 for (i = 0; i < ht->ht_size; i++) {
122 while ((p = TAILQ_FIRST(&oldh[i])) != NULL) {
123 TAILQ_REMOVE(&oldh[i], p, h_next);
124 // XXXLUKEM: really should be TAILQ_INSERT_TAIL
125 TAILQ_INSERT_HEAD(&h[p->h_hash & n], p, h_next);
126 }
127 }
128 free(ht->ht_tab);
129 ht->ht_tab = h;
130 ht->ht_mask = n;
131 ht->ht_size = ++n;
132 ht->ht_lim = HASHFRACTION(n);
133 }
134
135 /*
136 * Make a new hash entry, setting its h_next to NULL.
137 * If the free list is not empty, use the first entry from there,
138 * otherwise allocate a new entry.
139 */
140 static inline struct hashent *
141 newhashent(const char *name, u_int h)
142 {
143 struct hashent *hp;
144
145 if (TAILQ_EMPTY(&hefreelist))
146 hp = ecalloc(1, sizeof(*hp));
147 else {
148 hp = TAILQ_FIRST(&hefreelist);
149 TAILQ_REMOVE(&hefreelist, hp, h_next);
150 }
151
152 hp->h_name = name;
153 hp->h_hash = h;
154 return (hp);
155 }
156
157 /*
158 * Hash a string.
159 */
160 static inline u_int
161 hash(const char *str)
162 {
163 u_int h;
164
165 for (h = 0; *str;)
166 h = (h << 5) + h + *str++;
167 return (h);
168 }
169
170 void
171 initintern(void)
172 {
173
174 ht_init(&strings, 128);
175 }
176
177 /*
178 * Generate a single unique copy of the given string. We expect this
179 * function to be used frequently, so it should be fast.
180 */
181 const char *
182 intern(const char *s)
183 {
184 struct hashtab *ht;
185 struct hashent *hp;
186 struct hashenthead *hpp;
187 u_int h;
188 char *p;
189
190 ht = &strings;
191 h = hash(s);
192 hpp = &ht->ht_tab[h & ht->ht_mask];
193 TAILQ_FOREACH(hp, hpp, h_next) {
194 if (hp->h_hash == h && strcmp(hp->h_name, s) == 0)
195 return (hp->h_name);
196 }
197 p = estrdup(s);
198 hp = newhashent(p, h);
199 TAILQ_INSERT_TAIL(hpp, hp, h_next);
200 if (++ht->ht_used > ht->ht_lim)
201 ht_expand(ht);
202 return (p);
203 }
204
205 struct hashtab *
206 ht_new(void)
207 {
208 struct hashtab *ht;
209
210 ht = ecalloc(1, sizeof *ht);
211 ht_init(ht, 8);
212 return (ht);
213 }
214
215 /*
216 * Insert and/or replace.
217 */
218 int
219 ht_insrep(struct hashtab *ht, const char *nam, void *val, int replace)
220 {
221 struct hashent *hp;
222 struct hashenthead *hpp;
223 u_int h;
224
225 h = hash(nam);
226 hpp = &ht->ht_tab[h & ht->ht_mask];
227 TAILQ_FOREACH(hp, hpp, h_next) {
228 if (hp->h_name == nam) {
229 if (replace)
230 hp->h_value = val;
231 return (1);
232 }
233 }
234 hp = newhashent(nam, h);
235 TAILQ_INSERT_TAIL(hpp, hp, h_next);
236 hp->h_value = val;
237 if (++ht->ht_used > ht->ht_lim)
238 ht_expand(ht);
239 return (0);
240 }
241
242 /*
243 * Remove.
244 */
245 int
246 ht_remove(struct hashtab *ht, const char *name)
247 {
248 struct hashent *hp;
249 struct hashenthead *hpp;
250 u_int h;
251
252 h = hash(name);
253 hpp = &ht->ht_tab[h & ht->ht_mask];
254
255 TAILQ_FOREACH(hp, hpp, h_next) {
256 if (hp->h_name != name)
257 continue;
258 TAILQ_REMOVE(hpp, hp, h_next);
259
260 memset(hp, 0, sizeof(*hp));
261 TAILQ_INSERT_TAIL(&hefreelist, hp, h_next);
262 ht->ht_used--;
263 return (0);
264 }
265 return (1);
266 }
267
268 void *
269 ht_lookup(struct hashtab *ht, const char *nam)
270 {
271 struct hashent *hp;
272 struct hashenthead *hpp;
273 u_int h;
274
275 h = hash(nam);
276 hpp = &ht->ht_tab[h & ht->ht_mask];
277 TAILQ_FOREACH(hp, hpp, h_next)
278 if (hp->h_name == nam)
279 return (hp->h_value);
280 return (NULL);
281 }
282
283 /*
284 * first parameter to callback is the entry name from the hash table
285 * second parameter is the value from the hash table
286 * third argument is passed through from the "arg" parameter to ht_enumerate()
287 */
288
289 int
290 ht_enumerate(struct hashtab *ht, ht_callback cbfunc, void *arg)
291 {
292 struct hashent *hp;
293 struct hashenthead *hpp;
294 u_int i;
295 int rval = 0;
296
297 for (i = 0; i < ht->ht_size; i++) {
298 hpp = &ht->ht_tab[i];
299 TAILQ_FOREACH(hp, hpp, h_next)
300 rval += (*cbfunc)(hp->h_name, hp->h_value, arg);
301 }
302 return rval;
303 }
304