skipset.h revision 1.1.1.1 1 1.1 christos /* skipset.h -- set operations using a skiplist
2 1.1 christos // Copyright (C) 2024-2026 Mark Adler
3 1.1 christos // See MiniZip_info.txt for the license.
4 1.1 christos
5 1.1 christos // This implements a skiplist set, i.e. just keys, no data, with ~O(log n) time
6 1.1 christos // insert and search operations. The application defines the type of a key, and
7 1.1 christos // provides a function to compare two keys.
8 1.1 christos
9 1.1 christos // This header is not definitions of functions found in another source file --
10 1.1 christos // it creates the set functions, with the application's key type, right where
11 1.1 christos // the #include is. Before this header is #included, these must be defined:
12 1.1 christos //
13 1.1 christos // 1. A macro or typedef for set_key_t, the type of a key.
14 1.1 christos // 2. A macro or function set_cmp(a, b) to compare two keys. The return values
15 1.1 christos // are < 0 for a < b, 0 for a == b, and > 0 for a > b.
16 1.1 christos // 3. A macro or function set_drop(s, k) to release the key k's resources, if
17 1.1 christos // any, when doing a set_end() or set_clear(). s is a pointer to the set
18 1.1 christos // that key is in, for use with set_free() if desired.
19 1.1 christos //
20 1.1 christos // Example usage:
21 1.1 christos //
22 1.1 christos // typedef int set_key_t;
23 1.1 christos // #define set_cmp(a, b) ((a) < (b) ? -1 : (a) == (b) ? 0 : 1)
24 1.1 christos // #define set_drop(s, k)
25 1.1 christos // #include "skipset.h"
26 1.1 christos //
27 1.1 christos // int test(void) { // return 0: good, 1: bad, -1: out of memory
28 1.1 christos // set_t set;
29 1.1 christos // if (setjmp(set.env))
30 1.1 christos // return -1;
31 1.1 christos // set_start(&set);
32 1.1 christos // set_insert(&set, 2);
33 1.1 christos // set_insert(&set, 1);
34 1.1 christos // set_insert(&set, 7);
35 1.1 christos // int bad = !set_found(&set, 2);
36 1.1 christos // bad = bad || set_found(&set, 5);
37 1.1 christos // set_end(&set);
38 1.1 christos // return bad;
39 1.1 christos // }
40 1.1 christos //
41 1.1 christos // Interface summary (see more details below):
42 1.1 christos // - set_t is the type of the set being operated on (a set_t pointer is passed)
43 1.1 christos // - set_start() initializes a new, empty set (initialize set.env first)
44 1.1 christos // - set_insert() inserts a new key into the set, or not if it's already there
45 1.1 christos // - set_found() determines whether or not a key is in the set
46 1.1 christos // - set_end() ends the use of the set, freeing all memory
47 1.1 christos // - set_clear() empties the set, equivalent to set_end() and then set_start()
48 1.1 christos // - set_ok() checks if set appears to be usable, i.e. started and not ended
49 1.1 christos //
50 1.1 christos // Auxiliary functions available to the application:
51 1.1 christos // - set_alloc() allocates memory with optional tracking (#define SET_TRACK)
52 1.1 christos // - set_free() deallocates memory allocated by set_alloc()
53 1.1 christos // - set_rand() returns 32 random bits (seeded by set_start()) */
54 1.1 christos
55 1.1 christos #ifndef SKIPSET_H
56 1.1 christos #define SKIPSET_H
57 1.1 christos
58 1.1 christos #include <stdlib.h> /* realloc(), free(), NULL, size_t */
59 1.1 christos #include <stddef.h> /* ptrdiff_t */
60 1.1 christos #include <setjmp.h> /* jmp_buf, longjmp() */
61 1.1 christos #include <errno.h> /* ENOMEM */
62 1.1 christos #include <time.h> /* time(), clock() */
63 1.1 christos #include <assert.h> /* assert.h */
64 1.1 christos #include "ints.h" /* i16_t, ui32_t, ui64_t */
65 1.1 christos
66 1.1 christos /* Structures and functions below noted as "--private--" should not be used by
67 1.1 christos // the application. set_t is partially private and partially public -- see the
68 1.1 christos // comments there.
69 1.1 christos
70 1.1 christos // There is no POSIX random() in MSVC, and rand() is awful. For portability, we
71 1.1 christos // cannot rely on a library function for random numbers. Instead we use the
72 1.1 christos // fast and effective algorithm below, invented by Melissa O'Neill.
73 1.1 christos
74 1.1 christos // *Really* minimal PCG32 code / (c) 2014 M.E. O'Neill / www.pcg-random.org
75 1.1 christos // Licensed under Apache License 2.0 (NO WARRANTY, etc. see website)
76 1.1 christos // --private-- Random number generator state. */
77 1.1 christos typedef struct {
78 1.1 christos ui64_t state; /* 64-bit generator state */
79 1.1 christos ui64_t inc; /* 63-bit sequence id */
80 1.1 christos } set_rand_t;
81 1.1 christos /* --private-- Initialize the state *gen using seed and seq. seed seeds the
82 1.1 christos // advancing 64-bit state. seq is a sequence selection constant. */
83 1.1 christos void set_seed(set_rand_t *gen, ui64_t seed, ui64_t seq) {
84 1.1 christos gen->inc = (seq << 1) | 1;
85 1.1 christos gen->state = (seed + gen->inc) * 6364136223846793005ULL + gen->inc;
86 1.1 christos }
87 1.1 christos /* Start a unique random number sequence using bits from noise sources. */
88 1.1 christos void set_uniq(set_rand_t *gen, const void *ptr) {
89 1.1 christos set_seed(gen, ((ui64_t)(ptrdiff_t)ptr << 32) ^
90 1.1 christos ((ui64_t)time(NULL) << 12) ^ clock(), 0);
91 1.1 christos }
92 1.1 christos /* Return 32 random bits, advancing the state *gen. */
93 1.1 christos ui32_t set_rand(set_rand_t *gen) {
94 1.1 christos ui64_t state = gen->state;
95 1.1 christos gen->state = state * 6364136223846793005ULL + gen->inc;
96 1.1 christos ui32_t mix = (ui32_t)(((state >> 18) ^ state) >> 27);
97 1.1 christos int rot = state >> 59;
98 1.1 christos return (mix >> rot) | (mix << ((-rot) & 31));
99 1.1 christos }
100 1.1 christos /* End of PCG32 code. */
101 1.1 christos
102 1.1 christos /* --private-- Linked-list node. */
103 1.1 christos typedef struct set_node_s set_node_t;
104 1.1 christos struct set_node_s {
105 1.1 christos set_key_t key; /* the key (not used for head or path) */
106 1.1 christos i16_t size; /* number of allocated pointers in right[] */
107 1.1 christos i16_t fill; /* number of pointers in right[] filled in */
108 1.1 christos set_node_t **right; /* pointer for each level, each to the right */
109 1.1 christos };
110 1.1 christos
111 1.1 christos /* A set. The application sets env, may use gen with set_rand(), and may read
112 1.1 christos // allocs and memory. The remaining variables are --private-- . */
113 1.1 christos typedef struct set_s {
114 1.1 christos set_node_t *head; /* skiplist head -- no key, just links */
115 1.1 christos set_node_t *path; /* right[] is path to key from set_found() */
116 1.1 christos set_node_t *node; /* node under construction, in case of longjmp() */
117 1.1 christos i16_t depth; /* maximum depth of the skiplist */
118 1.1 christos ui64_t ran; /* a precious trove of random bits */
119 1.1 christos set_rand_t gen; /* random number generator state */
120 1.1 christos jmp_buf env; /* setjmp() environment for allocation errors */
121 1.1 christos #ifdef SET_TRACK
122 1.1 christos size_t allocs; /* number of allocations */
123 1.1 christos size_t memory; /* total amount of allocated memory (>= requests) */
124 1.1 christos #endif
125 1.1 christos } set_t;
126 1.1 christos
127 1.1 christos /* Memory allocation and deallocation. set_alloc(set, ptr, size) returns a
128 1.1 christos // pointer to an allocation of size bytes if ptr is NULL, or the previous
129 1.1 christos // allocation ptr resized to size bytes. set_alloc() will never return NULL.
130 1.1 christos // set_free(set, ptr) frees an allocation created by set_alloc(). These may be
131 1.1 christos // used by the application. e.g. if allocation tracking is desired. */
132 1.1 christos #ifdef SET_TRACK
133 1.1 christos /* Track the number of allocations and the total backing memory size. */
134 1.1 christos # if defined(_WIN32)
135 1.1 christos # include <malloc.h>
136 1.1 christos # define SET_ALLOC_SIZE(ptr) _msize(ptr)
137 1.1 christos # elif defined(__MACH__)
138 1.1 christos # include <malloc/malloc.h>
139 1.1 christos # define SET_ALLOC_SIZE(ptr) malloc_size(ptr)
140 1.1 christos # elif defined(__linux__)
141 1.1 christos # include <malloc.h>
142 1.1 christos # define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr)
143 1.1 christos # elif defined(__FreeBSD__)
144 1.1 christos # include <malloc_np.h>
145 1.1 christos # define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr)
146 1.1 christos # elif defined(__NetBSD__)
147 1.1 christos # include <jemalloc/jemalloc.h>
148 1.1 christos # define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr)
149 1.1 christos # else // e.g. OpenBSD
150 1.1 christos # define SET_ALLOC_SIZE(ptr) 0
151 1.1 christos # endif
152 1.1 christos // With tracking.
153 1.1 christos void *set_alloc(set_t *set, void *ptr, size_t size) {
154 1.1 christos size_t had = ptr == NULL ? 0 : SET_ALLOC_SIZE(ptr);
155 1.1 christos void *mem = realloc(ptr, size);
156 1.1 christos if (mem == NULL)
157 1.1 christos longjmp(set->env, ENOMEM);
158 1.1 christos set->allocs += ptr == NULL;
159 1.1 christos set->memory += SET_ALLOC_SIZE(mem) - had;
160 1.1 christos return mem;
161 1.1 christos }
162 1.1 christos void set_free(set_t *set, void *ptr) {
163 1.1 christos if (ptr != NULL) {
164 1.1 christos set->allocs--;
165 1.1 christos set->memory -= SET_ALLOC_SIZE(ptr);
166 1.1 christos free(ptr);
167 1.1 christos }
168 1.1 christos }
169 1.1 christos #else
170 1.1 christos /* Without tracking. */
171 1.1 christos void *set_alloc(set_t *set, void *ptr, size_t size) {
172 1.1 christos void *mem = realloc(ptr, size);
173 1.1 christos if (mem == NULL)
174 1.1 christos longjmp(set->env, ENOMEM);
175 1.1 christos return mem;
176 1.1 christos }
177 1.1 christos void set_free(set_t *set, void *ptr) {
178 1.1 christos (void)set;
179 1.1 christos free(ptr);
180 1.1 christos }
181 1.1 christos #endif
182 1.1 christos
183 1.1 christos /* --private-- Grow node's array right[] as needed to be able to hold at least
184 1.1 christos // want links. If fill is true, assure that the first want links are filled in,
185 1.1 christos // setting them to set->head if not previously filled in. Otherwise it is
186 1.1 christos // assumed that the first want links are about to be filled in. */
187 1.1 christos void set_grow(set_t *set, set_node_t *node, int want, int fill) {
188 1.1 christos if (node->size < want) {
189 1.1 christos int more = node->size ? node->size : 1;
190 1.1 christos while (more < want)
191 1.1 christos more <<= 1;
192 1.1 christos node->right = set_alloc(set, node->right,
193 1.1 christos (size_t)more * sizeof(set_node_t *));
194 1.1 christos node->size = (i16_t)more;
195 1.1 christos }
196 1.1 christos int i;
197 1.1 christos if (fill)
198 1.1 christos for (i = node->fill; i < want; i++)
199 1.1 christos node->right[i] = set->head;
200 1.1 christos node->fill = (i16_t)want;
201 1.1 christos }
202 1.1 christos
203 1.1 christos /* --private-- Return a new node. key is left uninitialized. */
204 1.1 christos set_node_t *set_node(set_t *set) {
205 1.1 christos set_node_t *node = set_alloc(set, NULL, sizeof(set_node_t));
206 1.1 christos node->size = 0;
207 1.1 christos node->fill = 0;
208 1.1 christos node->right = NULL;
209 1.1 christos return node;
210 1.1 christos }
211 1.1 christos
212 1.1 christos /* --private-- Free the list linked from head, along with the keys. */
213 1.1 christos void set_sweep(set_t *set) {
214 1.1 christos set_node_t *step = set->head->right[0];
215 1.1 christos while (step != set->head) {
216 1.1 christos set_node_t *next = step->right[0]; /* save link to next node */
217 1.1 christos set_drop(set, step->key);
218 1.1 christos set_free(set, step->right);
219 1.1 christos set_free(set, step);
220 1.1 christos step = next;
221 1.1 christos }
222 1.1 christos }
223 1.1 christos
224 1.1 christos /* Initialize a new set. set->env must be initialized using setjmp() before
225 1.1 christos // set_start() is called. A longjmp(set->env, ENOMEM) will be used to handle a
226 1.1 christos // memory allocation failure during any of the operations. (See setjmp.h and
227 1.1 christos // errno.h.) The set can still be used if this happens, assuming that it didn't
228 1.1 christos // happen during set_start(). Whether set_start() completed or not, set_end()
229 1.1 christos // can be used to free the set's memory after a longjmp(). */
230 1.1 christos void set_start(set_t *set) {
231 1.1 christos #ifdef SET_TRACK
232 1.1 christos set->allocs = 0;
233 1.1 christos set->memory = 0;
234 1.1 christos #endif
235 1.1 christos set->head = set->path = set->node = NULL; /* in case set_node() fails */
236 1.1 christos set->path = set_node(set);
237 1.1 christos set->head = set_node(set);
238 1.1 christos set_grow(set, set->head, 1, 1); /* one link back to head for an empty set */
239 1.1 christos *(unsigned char *)&set->head->key = 137; /* set id */
240 1.1 christos set->depth = 0;
241 1.1 christos set_uniq(&set->gen, set);
242 1.1 christos set->ran = 1;
243 1.1 christos }
244 1.1 christos
245 1.1 christos /* Return true if *set appears to be in a usable state. If *set has been zeroed
246 1.1 christos // out, then set_ok(set) will be false and set_end(set) will be safe. */
247 1.1 christos int set_ok(set_t *set) {
248 1.1 christos return set->head != NULL &&
249 1.1 christos set->head->right != NULL &&
250 1.1 christos *(unsigned char *)&set->head->key == 137;
251 1.1 christos }
252 1.1 christos
253 1.1 christos /* Empty the set. This frees the memory used for the previous set contents.
254 1.1 christos // After set_clear(), *set is ready for use, as if after a set_start(). */
255 1.1 christos void set_clear(set_t *set) {
256 1.1 christos assert(set_ok(set) && "improper use");
257 1.1 christos
258 1.1 christos /* Free all the keys and their nodes. */
259 1.1 christos set_sweep(set);
260 1.1 christos
261 1.1 christos /* Leave the head and path allocations as is. Clear their contents, with
262 1.1 christos // head pointing to itself and setting depth to zero, for an empty set. */
263 1.1 christos set->head->right[0] = set->head;
264 1.1 christos set->head->fill = 1;
265 1.1 christos set->path->fill = 0;
266 1.1 christos set->depth = 0;
267 1.1 christos }
268 1.1 christos
269 1.1 christos /* Done using the set -- free all allocations. The only operation on *set
270 1.1 christos // permitted after this is set_start(). Though another set_end() would do no
271 1.1 christos // harm. This can be done at any time after a set_start(), or after a longjmp()
272 1.1 christos // on any allocation failure, including during a set_start(). */
273 1.1 christos void set_end(set_t *set) {
274 1.1 christos if (set->head != NULL) {
275 1.1 christos /* Empty the set and free the head node. */
276 1.1 christos if (set->head->right != NULL) {
277 1.1 christos set_sweep(set);
278 1.1 christos set_free(set, set->head->right);
279 1.1 christos }
280 1.1 christos set_free(set, set->head);
281 1.1 christos set->head = NULL;
282 1.1 christos }
283 1.1 christos if (set->path != NULL) {
284 1.1 christos /* Free the path work area. */
285 1.1 christos set_free(set, set->path->right);
286 1.1 christos set_free(set, set->path);
287 1.1 christos set->path = NULL;
288 1.1 christos }
289 1.1 christos if (set->node != NULL) {
290 1.1 christos /* Free the node that was under construction when longjmp() hit. */
291 1.1 christos set_drop(set, set->node->key);
292 1.1 christos set_free(set, set->node->right);
293 1.1 christos set_free(set, set->node);
294 1.1 christos set->node = NULL;
295 1.1 christos }
296 1.1 christos }
297 1.1 christos
298 1.1 christos /* Look for key. Return 1 if found or 0 if not. This also puts the path to get
299 1.1 christos // there in set->path, for use by set_insert(). */
300 1.1 christos int set_found(set_t *set, set_key_t key) {
301 1.1 christos assert(set_ok(set) && "improper use");
302 1.1 christos
303 1.1 christos /* Start at depth and work down and right as determined by key comparisons. */
304 1.1 christos set_node_t *head = set->head, *here = head;
305 1.1 christos int i = set->depth;
306 1.1 christos set_grow(set, set->path, i + 1, 0);
307 1.1 christos do {
308 1.1 christos while (here->right[i] != head &&
309 1.1 christos set_cmp(here->right[i]->key, key) < 0)
310 1.1 christos here = here->right[i];
311 1.1 christos set->path->right[i] = here;
312 1.1 christos } while (i--);
313 1.1 christos
314 1.1 christos /* See if the key matches. */
315 1.1 christos here = here->right[0];
316 1.1 christos return here != head && set_cmp(here->key, key) == 0;
317 1.1 christos }
318 1.1 christos
319 1.1 christos /* Insert the key key. Return 0 on success, or 1 if key is already in the set. */
320 1.1 christos int set_insert(set_t *set, set_key_t key) {
321 1.1 christos assert(set_ok(set) && "improper use");
322 1.1 christos
323 1.1 christos if (set_found(set, key))
324 1.1 christos /* That key is already in the set. */
325 1.1 christos return 1;
326 1.1 christos
327 1.1 christos /* Randomly generate a new level-- level 0 with probability 1/2, 1 with
328 1.1 christos // probability 1/4, 2 with probability 1/8, etc. */
329 1.1 christos int level = 0;
330 1.1 christos for (;;) {
331 1.1 christos if (set->ran == 1)
332 1.1 christos /* Ran out. Get another 32 random bits. */
333 1.1 christos set->ran = set_rand(&set->gen) | (1ULL << 32);
334 1.1 christos int bit = set->ran & 1;
335 1.1 christos set->ran >>= 1;
336 1.1 christos if (bit)
337 1.1 christos break;
338 1.1 christos assert(level < 32767 &&
339 1.1 christos "Overhead, without any fuss, the stars were going out.");
340 1.1 christos level++;
341 1.1 christos }
342 1.1 christos if (level > set->depth) {
343 1.1 christos /* The maximum depth is now deeper. Update the structures. */
344 1.1 christos set_grow(set, set->path, level + 1, 1);
345 1.1 christos set_grow(set, set->head, level + 1, 1);
346 1.1 christos set->depth = (i16_t)level;
347 1.1 christos }
348 1.1 christos
349 1.1 christos /* Make a new node for the provided key, and insert it in the lists up to
350 1.1 christos // and including level. */
351 1.1 christos set->node = set_node(set);
352 1.1 christos set->node->key = key;
353 1.1 christos set_grow(set, set->node, level + 1, 0);
354 1.1 christos int i;
355 1.1 christos for (i = 0; i <= level; i++) {
356 1.1 christos set->node->right[i] = set->path->right[i]->right[i];
357 1.1 christos set->path->right[i]->right[i] = set->node;
358 1.1 christos }
359 1.1 christos set->node = NULL;
360 1.1 christos return 0;
361 1.1 christos }
362 1.1 christos
363 1.1 christos #else
364 1.1 christos #error ** another skiplist set already created here
365 1.1 christos /* Would need to implement a prefix in order to support multiple sets. */
366 1.1 christos #endif
367