hash_table.c revision af69d88d
1af69d88dSmrg/*
2af69d88dSmrg * Copyright © 2009,2012 Intel Corporation
3af69d88dSmrg * Copyright © 1988-2004 Keith Packard and Bart Massey.
4af69d88dSmrg *
5af69d88dSmrg * Permission is hereby granted, free of charge, to any person obtaining a
6af69d88dSmrg * copy of this software and associated documentation files (the "Software"),
7af69d88dSmrg * to deal in the Software without restriction, including without limitation
8af69d88dSmrg * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9af69d88dSmrg * and/or sell copies of the Software, and to permit persons to whom the
10af69d88dSmrg * Software is furnished to do so, subject to the following conditions:
11af69d88dSmrg *
12af69d88dSmrg * The above copyright notice and this permission notice (including the next
13af69d88dSmrg * paragraph) shall be included in all copies or substantial portions of the
14af69d88dSmrg * Software.
15af69d88dSmrg *
16af69d88dSmrg * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17af69d88dSmrg * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18af69d88dSmrg * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19af69d88dSmrg * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20af69d88dSmrg * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21af69d88dSmrg * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22af69d88dSmrg * IN THE SOFTWARE.
23af69d88dSmrg *
24af69d88dSmrg * Except as contained in this notice, the names of the authors
25af69d88dSmrg * or their institutions shall not be used in advertising or
26af69d88dSmrg * otherwise to promote the sale, use or other dealings in this
27af69d88dSmrg * Software without prior written authorization from the
28af69d88dSmrg * authors.
29af69d88dSmrg *
30af69d88dSmrg * Authors:
31af69d88dSmrg *    Eric Anholt <eric@anholt.net>
32af69d88dSmrg *    Keith Packard <keithp@keithp.com>
33af69d88dSmrg */
34af69d88dSmrg
35af69d88dSmrg/**
36af69d88dSmrg * Implements an open-addressing, linear-reprobing hash table.
37af69d88dSmrg *
38af69d88dSmrg * For more information, see:
39af69d88dSmrg *
40af69d88dSmrg * http://cgit.freedesktop.org/~anholt/hash_table/tree/README
41af69d88dSmrg */
42af69d88dSmrg
43af69d88dSmrg#include <stdlib.h>
44af69d88dSmrg#include <string.h>
45af69d88dSmrg
46af69d88dSmrg#include "hash_table.h"
47af69d88dSmrg#include "ralloc.h"
48af69d88dSmrg#include "macros.h"
49af69d88dSmrg
50af69d88dSmrgstatic const uint32_t deleted_key_value;
51af69d88dSmrg
52af69d88dSmrg/**
53af69d88dSmrg * From Knuth -- a good choice for hash/rehash values is p, p-2 where
54af69d88dSmrg * p and p-2 are both prime.  These tables are sized to have an extra 10%
55af69d88dSmrg * free to avoid exponential performance degradation as the hash table fills
56af69d88dSmrg */
57af69d88dSmrgstatic const struct {
58af69d88dSmrg   uint32_t max_entries, size, rehash;
59af69d88dSmrg} hash_sizes[] = {
60af69d88dSmrg   { 2,			5,		3	  },
61af69d88dSmrg   { 4,			7,		5	  },
62af69d88dSmrg   { 8,			13,		11	  },
63af69d88dSmrg   { 16,		19,		17	  },
64af69d88dSmrg   { 32,		43,		41        },
65af69d88dSmrg   { 64,		73,		71        },
66af69d88dSmrg   { 128,		151,		149       },
67af69d88dSmrg   { 256,		283,		281       },
68af69d88dSmrg   { 512,		571,		569       },
69af69d88dSmrg   { 1024,		1153,		1151      },
70af69d88dSmrg   { 2048,		2269,		2267      },
71af69d88dSmrg   { 4096,		4519,		4517      },
72af69d88dSmrg   { 8192,		9013,		9011      },
73af69d88dSmrg   { 16384,		18043,		18041     },
74af69d88dSmrg   { 32768,		36109,		36107     },
75af69d88dSmrg   { 65536,		72091,		72089     },
76af69d88dSmrg   { 131072,		144409,		144407    },
77af69d88dSmrg   { 262144,		288361,		288359    },
78af69d88dSmrg   { 524288,		576883,		576881    },
79af69d88dSmrg   { 1048576,		1153459,	1153457   },
80af69d88dSmrg   { 2097152,		2307163,	2307161   },
81af69d88dSmrg   { 4194304,		4613893,	4613891   },
82af69d88dSmrg   { 8388608,		9227641,	9227639   },
83af69d88dSmrg   { 16777216,		18455029,	18455027  },
84af69d88dSmrg   { 33554432,		36911011,	36911009  },
85af69d88dSmrg   { 67108864,		73819861,	73819859  },
86af69d88dSmrg   { 134217728,		147639589,	147639587 },
87af69d88dSmrg   { 268435456,		295279081,	295279079 },
88af69d88dSmrg   { 536870912,		590559793,	590559791 },
89af69d88dSmrg   { 1073741824,	1181116273,	1181116271},
90af69d88dSmrg   { 2147483648ul,	2362232233ul,	2362232231ul}
91af69d88dSmrg};
92af69d88dSmrg
93af69d88dSmrgstatic int
94af69d88dSmrgentry_is_free(const struct hash_entry *entry)
95af69d88dSmrg{
96af69d88dSmrg   return entry->key == NULL;
97af69d88dSmrg}
98af69d88dSmrg
99af69d88dSmrgstatic int
100af69d88dSmrgentry_is_deleted(const struct hash_table *ht, struct hash_entry *entry)
101af69d88dSmrg{
102af69d88dSmrg   return entry->key == ht->deleted_key;
103af69d88dSmrg}
104af69d88dSmrg
105af69d88dSmrgstatic int
106af69d88dSmrgentry_is_present(const struct hash_table *ht, struct hash_entry *entry)
107af69d88dSmrg{
108af69d88dSmrg   return entry->key != NULL && entry->key != ht->deleted_key;
109af69d88dSmrg}
110af69d88dSmrg
111af69d88dSmrgstruct hash_table *
112af69d88dSmrg_mesa_hash_table_create(void *mem_ctx,
113af69d88dSmrg                        bool (*key_equals_function)(const void *a,
114af69d88dSmrg                                                    const void *b))
115af69d88dSmrg{
116af69d88dSmrg   struct hash_table *ht;
117af69d88dSmrg
118af69d88dSmrg   ht = ralloc(mem_ctx, struct hash_table);
119af69d88dSmrg   if (ht == NULL)
120af69d88dSmrg      return NULL;
121af69d88dSmrg
122af69d88dSmrg   ht->size_index = 0;
123af69d88dSmrg   ht->size = hash_sizes[ht->size_index].size;
124af69d88dSmrg   ht->rehash = hash_sizes[ht->size_index].rehash;
125af69d88dSmrg   ht->max_entries = hash_sizes[ht->size_index].max_entries;
126af69d88dSmrg   ht->key_equals_function = key_equals_function;
127af69d88dSmrg   ht->table = rzalloc_array(ht, struct hash_entry, ht->size);
128af69d88dSmrg   ht->entries = 0;
129af69d88dSmrg   ht->deleted_entries = 0;
130af69d88dSmrg   ht->deleted_key = &deleted_key_value;
131af69d88dSmrg
132af69d88dSmrg   if (ht->table == NULL) {
133af69d88dSmrg      ralloc_free(ht);
134af69d88dSmrg      return NULL;
135af69d88dSmrg   }
136af69d88dSmrg
137af69d88dSmrg   return ht;
138af69d88dSmrg}
139af69d88dSmrg
140af69d88dSmrg/**
141af69d88dSmrg * Frees the given hash table.
142af69d88dSmrg *
143af69d88dSmrg * If delete_function is passed, it gets called on each entry present before
144af69d88dSmrg * freeing.
145af69d88dSmrg */
146af69d88dSmrgvoid
147af69d88dSmrg_mesa_hash_table_destroy(struct hash_table *ht,
148af69d88dSmrg                         void (*delete_function)(struct hash_entry *entry))
149af69d88dSmrg{
150af69d88dSmrg   if (!ht)
151af69d88dSmrg      return;
152af69d88dSmrg
153af69d88dSmrg   if (delete_function) {
154af69d88dSmrg      struct hash_entry *entry;
155af69d88dSmrg
156af69d88dSmrg      hash_table_foreach(ht, entry) {
157af69d88dSmrg         delete_function(entry);
158af69d88dSmrg      }
159af69d88dSmrg   }
160af69d88dSmrg   ralloc_free(ht);
161af69d88dSmrg}
162af69d88dSmrg
163af69d88dSmrg/** Sets the value of the key pointer used for deleted entries in the table.
164af69d88dSmrg *
165af69d88dSmrg * The assumption is that usually keys are actual pointers, so we use a
166af69d88dSmrg * default value of a pointer to an arbitrary piece of storage in the library.
167af69d88dSmrg * But in some cases a consumer wants to store some other sort of value in the
168af69d88dSmrg * table, like a uint32_t, in which case that pointer may conflict with one of
169af69d88dSmrg * their valid keys.  This lets that user select a safe value.
170af69d88dSmrg *
171af69d88dSmrg * This must be called before any keys are actually deleted from the table.
172af69d88dSmrg */
173af69d88dSmrgvoid
174af69d88dSmrg_mesa_hash_table_set_deleted_key(struct hash_table *ht, const void *deleted_key)
175af69d88dSmrg{
176af69d88dSmrg   ht->deleted_key = deleted_key;
177af69d88dSmrg}
178af69d88dSmrg
179af69d88dSmrg/**
180af69d88dSmrg * Finds a hash table entry with the given key and hash of that key.
181af69d88dSmrg *
182af69d88dSmrg * Returns NULL if no entry is found.  Note that the data pointer may be
183af69d88dSmrg * modified by the user.
184af69d88dSmrg */
185af69d88dSmrgstruct hash_entry *
186af69d88dSmrg_mesa_hash_table_search(struct hash_table *ht, uint32_t hash,
187af69d88dSmrg                        const void *key)
188af69d88dSmrg{
189af69d88dSmrg   uint32_t start_hash_address = hash % ht->size;
190af69d88dSmrg   uint32_t hash_address = start_hash_address;
191af69d88dSmrg
192af69d88dSmrg   do {
193af69d88dSmrg      uint32_t double_hash;
194af69d88dSmrg
195af69d88dSmrg      struct hash_entry *entry = ht->table + hash_address;
196af69d88dSmrg
197af69d88dSmrg      if (entry_is_free(entry)) {
198af69d88dSmrg         return NULL;
199af69d88dSmrg      } else if (entry_is_present(ht, entry) && entry->hash == hash) {
200af69d88dSmrg         if (ht->key_equals_function(key, entry->key)) {
201af69d88dSmrg            return entry;
202af69d88dSmrg         }
203af69d88dSmrg      }
204af69d88dSmrg
205af69d88dSmrg      double_hash = 1 + hash % ht->rehash;
206af69d88dSmrg
207af69d88dSmrg      hash_address = (hash_address + double_hash) % ht->size;
208af69d88dSmrg   } while (hash_address != start_hash_address);
209af69d88dSmrg
210af69d88dSmrg   return NULL;
211af69d88dSmrg}
212af69d88dSmrg
213af69d88dSmrgstatic void
214af69d88dSmrg_mesa_hash_table_rehash(struct hash_table *ht, int new_size_index)
215af69d88dSmrg{
216af69d88dSmrg   struct hash_table old_ht;
217af69d88dSmrg   struct hash_entry *table, *entry;
218af69d88dSmrg
219af69d88dSmrg   if (new_size_index >= ARRAY_SIZE(hash_sizes))
220af69d88dSmrg      return;
221af69d88dSmrg
222af69d88dSmrg   table = rzalloc_array(ht, struct hash_entry,
223af69d88dSmrg                         hash_sizes[new_size_index].size);
224af69d88dSmrg   if (table == NULL)
225af69d88dSmrg      return;
226af69d88dSmrg
227af69d88dSmrg   old_ht = *ht;
228af69d88dSmrg
229af69d88dSmrg   ht->table = table;
230af69d88dSmrg   ht->size_index = new_size_index;
231af69d88dSmrg   ht->size = hash_sizes[ht->size_index].size;
232af69d88dSmrg   ht->rehash = hash_sizes[ht->size_index].rehash;
233af69d88dSmrg   ht->max_entries = hash_sizes[ht->size_index].max_entries;
234af69d88dSmrg   ht->entries = 0;
235af69d88dSmrg   ht->deleted_entries = 0;
236af69d88dSmrg
237af69d88dSmrg   hash_table_foreach(&old_ht, entry) {
238af69d88dSmrg      _mesa_hash_table_insert(ht, entry->hash,
239af69d88dSmrg                              entry->key, entry->data);
240af69d88dSmrg   }
241af69d88dSmrg
242af69d88dSmrg   ralloc_free(old_ht.table);
243af69d88dSmrg}
244af69d88dSmrg
245af69d88dSmrg/**
246af69d88dSmrg * Inserts the key with the given hash into the table.
247af69d88dSmrg *
248af69d88dSmrg * Note that insertion may rearrange the table on a resize or rehash,
249af69d88dSmrg * so previously found hash_entries are no longer valid after this function.
250af69d88dSmrg */
251af69d88dSmrgstruct hash_entry *
252af69d88dSmrg_mesa_hash_table_insert(struct hash_table *ht, uint32_t hash,
253af69d88dSmrg                        const void *key, void *data)
254af69d88dSmrg{
255af69d88dSmrg   uint32_t start_hash_address, hash_address;
256af69d88dSmrg
257af69d88dSmrg   if (ht->entries >= ht->max_entries) {
258af69d88dSmrg      _mesa_hash_table_rehash(ht, ht->size_index + 1);
259af69d88dSmrg   } else if (ht->deleted_entries + ht->entries >= ht->max_entries) {
260af69d88dSmrg      _mesa_hash_table_rehash(ht, ht->size_index);
261af69d88dSmrg   }
262af69d88dSmrg
263af69d88dSmrg   start_hash_address = hash % ht->size;
264af69d88dSmrg   hash_address = start_hash_address;
265af69d88dSmrg   do {
266af69d88dSmrg      struct hash_entry *entry = ht->table + hash_address;
267af69d88dSmrg      uint32_t double_hash;
268af69d88dSmrg
269af69d88dSmrg      if (!entry_is_present(ht, entry)) {
270af69d88dSmrg         if (entry_is_deleted(ht, entry))
271af69d88dSmrg            ht->deleted_entries--;
272af69d88dSmrg         entry->hash = hash;
273af69d88dSmrg         entry->key = key;
274af69d88dSmrg         entry->data = data;
275af69d88dSmrg         ht->entries++;
276af69d88dSmrg         return entry;
277af69d88dSmrg      }
278af69d88dSmrg
279af69d88dSmrg      /* Implement replacement when another insert happens
280af69d88dSmrg       * with a matching key.  This is a relatively common
281af69d88dSmrg       * feature of hash tables, with the alternative
282af69d88dSmrg       * generally being "insert the new value as well, and
283af69d88dSmrg       * return it first when the key is searched for".
284af69d88dSmrg       *
285af69d88dSmrg       * Note that the hash table doesn't have a delete
286af69d88dSmrg       * callback.  If freeing of old data pointers is
287af69d88dSmrg       * required to avoid memory leaks, perform a search
288af69d88dSmrg       * before inserting.
289af69d88dSmrg       */
290af69d88dSmrg      if (entry->hash == hash &&
291af69d88dSmrg          ht->key_equals_function(key, entry->key)) {
292af69d88dSmrg         entry->key = key;
293af69d88dSmrg         entry->data = data;
294af69d88dSmrg         return entry;
295af69d88dSmrg      }
296af69d88dSmrg
297af69d88dSmrg
298af69d88dSmrg      double_hash = 1 + hash % ht->rehash;
299af69d88dSmrg
300af69d88dSmrg      hash_address = (hash_address + double_hash) % ht->size;
301af69d88dSmrg   } while (hash_address != start_hash_address);
302af69d88dSmrg
303af69d88dSmrg   /* We could hit here if a required resize failed. An unchecked-malloc
304af69d88dSmrg    * application could ignore this result.
305af69d88dSmrg    */
306af69d88dSmrg   return NULL;
307af69d88dSmrg}
308af69d88dSmrg
309af69d88dSmrg/**
310af69d88dSmrg * This function deletes the given hash table entry.
311af69d88dSmrg *
312af69d88dSmrg * Note that deletion doesn't otherwise modify the table, so an iteration over
313af69d88dSmrg * the table deleting entries is safe.
314af69d88dSmrg */
315af69d88dSmrgvoid
316af69d88dSmrg_mesa_hash_table_remove(struct hash_table *ht,
317af69d88dSmrg                        struct hash_entry *entry)
318af69d88dSmrg{
319af69d88dSmrg   if (!entry)
320af69d88dSmrg      return;
321af69d88dSmrg
322af69d88dSmrg   entry->key = ht->deleted_key;
323af69d88dSmrg   ht->entries--;
324af69d88dSmrg   ht->deleted_entries++;
325af69d88dSmrg}
326af69d88dSmrg
327af69d88dSmrg/**
328af69d88dSmrg * This function is an iterator over the hash table.
329af69d88dSmrg *
330af69d88dSmrg * Pass in NULL for the first entry, as in the start of a for loop.  Note that
331af69d88dSmrg * an iteration over the table is O(table_size) not O(entries).
332af69d88dSmrg */
333af69d88dSmrgstruct hash_entry *
334af69d88dSmrg_mesa_hash_table_next_entry(struct hash_table *ht,
335af69d88dSmrg                            struct hash_entry *entry)
336af69d88dSmrg{
337af69d88dSmrg   if (entry == NULL)
338af69d88dSmrg      entry = ht->table;
339af69d88dSmrg   else
340af69d88dSmrg      entry = entry + 1;
341af69d88dSmrg
342af69d88dSmrg   for (; entry != ht->table + ht->size; entry++) {
343af69d88dSmrg      if (entry_is_present(ht, entry)) {
344af69d88dSmrg         return entry;
345af69d88dSmrg      }
346af69d88dSmrg   }
347af69d88dSmrg
348af69d88dSmrg   return NULL;
349af69d88dSmrg}
350af69d88dSmrg
351af69d88dSmrg/**
352af69d88dSmrg * Returns a random entry from the hash table.
353af69d88dSmrg *
354af69d88dSmrg * This may be useful in implementing random replacement (as opposed
355af69d88dSmrg * to just removing everything) in caches based on this hash table
356af69d88dSmrg * implementation.  @predicate may be used to filter entries, or may
357af69d88dSmrg * be set to NULL for no filtering.
358af69d88dSmrg */
359af69d88dSmrgstruct hash_entry *
360af69d88dSmrg_mesa_hash_table_random_entry(struct hash_table *ht,
361af69d88dSmrg                              bool (*predicate)(struct hash_entry *entry))
362af69d88dSmrg{
363af69d88dSmrg   struct hash_entry *entry;
364af69d88dSmrg   uint32_t i = rand() % ht->size;
365af69d88dSmrg
366af69d88dSmrg   if (ht->entries == 0)
367af69d88dSmrg      return NULL;
368af69d88dSmrg
369af69d88dSmrg   for (entry = ht->table + i; entry != ht->table + ht->size; entry++) {
370af69d88dSmrg      if (entry_is_present(ht, entry) &&
371af69d88dSmrg          (!predicate || predicate(entry))) {
372af69d88dSmrg         return entry;
373af69d88dSmrg      }
374af69d88dSmrg   }
375af69d88dSmrg
376af69d88dSmrg   for (entry = ht->table; entry != ht->table + i; entry++) {
377af69d88dSmrg      if (entry_is_present(ht, entry) &&
378af69d88dSmrg          (!predicate || predicate(entry))) {
379af69d88dSmrg         return entry;
380af69d88dSmrg      }
381af69d88dSmrg   }
382af69d88dSmrg
383af69d88dSmrg   return NULL;
384af69d88dSmrg}
385af69d88dSmrg
386af69d88dSmrg
387af69d88dSmrg/**
388af69d88dSmrg * Quick FNV-1 hash implementation based on:
389af69d88dSmrg * http://www.isthe.com/chongo/tech/comp/fnv/
390af69d88dSmrg *
391af69d88dSmrg * FNV-1 is not be the best hash out there -- Jenkins's lookup3 is supposed to
392af69d88dSmrg * be quite good, and it probably beats FNV.  But FNV has the advantage that
393af69d88dSmrg * it involves almost no code.  For an improvement on both, see Paul
394af69d88dSmrg * Hsieh's http://www.azillionmonkeys.com/qed/hash.html
395af69d88dSmrg */
396af69d88dSmrguint32_t
397af69d88dSmrg_mesa_hash_data(const void *data, size_t size)
398af69d88dSmrg{
399af69d88dSmrg   uint32_t hash = 2166136261ul;
400af69d88dSmrg   const uint8_t *bytes = data;
401af69d88dSmrg
402af69d88dSmrg   while (size-- != 0) {
403af69d88dSmrg      hash ^= *bytes;
404af69d88dSmrg      hash = hash * 0x01000193;
405af69d88dSmrg      bytes++;
406af69d88dSmrg   }
407af69d88dSmrg
408af69d88dSmrg   return hash;
409af69d88dSmrg}
410af69d88dSmrg
411af69d88dSmrg/** FNV-1 string hash implementation */
412af69d88dSmrguint32_t
413af69d88dSmrg_mesa_hash_string(const char *key)
414af69d88dSmrg{
415af69d88dSmrg   uint32_t hash = 2166136261ul;
416af69d88dSmrg
417af69d88dSmrg   while (*key != 0) {
418af69d88dSmrg      hash ^= *key;
419af69d88dSmrg      hash = hash * 0x01000193;
420af69d88dSmrg      key++;
421af69d88dSmrg   }
422af69d88dSmrg
423af69d88dSmrg   return hash;
424af69d88dSmrg}
425af69d88dSmrg
426af69d88dSmrg/**
427af69d88dSmrg * String compare function for use as the comparison callback in
428af69d88dSmrg * _mesa_hash_table_create().
429af69d88dSmrg */
430af69d88dSmrgbool
431af69d88dSmrg_mesa_key_string_equal(const void *a, const void *b)
432af69d88dSmrg{
433af69d88dSmrg   return strcmp(a, b) == 0;
434af69d88dSmrg}
435af69d88dSmrg
436af69d88dSmrgbool
437af69d88dSmrg_mesa_key_pointer_equal(const void *a, const void *b)
438af69d88dSmrg{
439af69d88dSmrg   return a == b;
440af69d88dSmrg}
441