1 1.1 christos /* skipset.h -- set operations using a skiplist 2 1.1 christos // Copyright (C) 2024-2026 Mark Adler 3 1.1 christos // See MiniZip_info.txt for the license. 4 1.1 christos 5 1.1 christos // This implements a skiplist set, i.e. just keys, no data, with ~O(log n) time 6 1.1 christos // insert and search operations. The application defines the type of a key, and 7 1.1 christos // provides a function to compare two keys. 8 1.1 christos 9 1.1 christos // This header is not definitions of functions found in another source file -- 10 1.1 christos // it creates the set functions, with the application's key type, right where 11 1.1 christos // the #include is. Before this header is #included, these must be defined: 12 1.1 christos // 13 1.1 christos // 1. A macro or typedef for set_key_t, the type of a key. 14 1.1 christos // 2. A macro or function set_cmp(a, b) to compare two keys. The return values 15 1.1 christos // are < 0 for a < b, 0 for a == b, and > 0 for a > b. 16 1.1 christos // 3. A macro or function set_drop(s, k) to release the key k's resources, if 17 1.1 christos // any, when doing a set_end() or set_clear(). s is a pointer to the set 18 1.1 christos // that key is in, for use with set_free() if desired. 19 1.1 christos // 20 1.1 christos // Example usage: 21 1.1 christos // 22 1.1 christos // typedef int set_key_t; 23 1.1 christos // #define set_cmp(a, b) ((a) < (b) ? -1 : (a) == (b) ? 0 : 1) 24 1.1 christos // #define set_drop(s, k) 25 1.1 christos // #include "skipset.h" 26 1.1 christos // 27 1.1 christos // int test(void) { // return 0: good, 1: bad, -1: out of memory 28 1.1 christos // set_t set; 29 1.1 christos // if (setjmp(set.env)) 30 1.1 christos // return -1; 31 1.1 christos // set_start(&set); 32 1.1 christos // set_insert(&set, 2); 33 1.1 christos // set_insert(&set, 1); 34 1.1 christos // set_insert(&set, 7); 35 1.1 christos // int bad = !set_found(&set, 2); 36 1.1 christos // bad = bad || set_found(&set, 5); 37 1.1 christos // set_end(&set); 38 1.1 christos // return bad; 39 1.1 christos // } 40 1.1 christos // 41 1.1 christos // Interface summary (see more details below): 42 1.1 christos // - set_t is the type of the set being operated on (a set_t pointer is passed) 43 1.1 christos // - set_start() initializes a new, empty set (initialize set.env first) 44 1.1 christos // - set_insert() inserts a new key into the set, or not if it's already there 45 1.1 christos // - set_found() determines whether or not a key is in the set 46 1.1 christos // - set_end() ends the use of the set, freeing all memory 47 1.1 christos // - set_clear() empties the set, equivalent to set_end() and then set_start() 48 1.1 christos // - set_ok() checks if set appears to be usable, i.e. started and not ended 49 1.1 christos // 50 1.1 christos // Auxiliary functions available to the application: 51 1.1 christos // - set_alloc() allocates memory with optional tracking (#define SET_TRACK) 52 1.1 christos // - set_free() deallocates memory allocated by set_alloc() 53 1.1 christos // - set_rand() returns 32 random bits (seeded by set_start()) */ 54 1.1 christos 55 1.1 christos #ifndef SKIPSET_H 56 1.1 christos #define SKIPSET_H 57 1.1 christos 58 1.1 christos #include <stdlib.h> /* realloc(), free(), NULL, size_t */ 59 1.1 christos #include <stddef.h> /* ptrdiff_t */ 60 1.1 christos #include <setjmp.h> /* jmp_buf, longjmp() */ 61 1.1 christos #include <errno.h> /* ENOMEM */ 62 1.1 christos #include <time.h> /* time(), clock() */ 63 1.1 christos #include <assert.h> /* assert.h */ 64 1.1 christos #include "ints.h" /* i16_t, ui32_t, ui64_t */ 65 1.1 christos 66 1.1 christos /* Structures and functions below noted as "--private--" should not be used by 67 1.1 christos // the application. set_t is partially private and partially public -- see the 68 1.1 christos // comments there. 69 1.1 christos 70 1.1 christos // There is no POSIX random() in MSVC, and rand() is awful. For portability, we 71 1.1 christos // cannot rely on a library function for random numbers. Instead we use the 72 1.1 christos // fast and effective algorithm below, invented by Melissa O'Neill. 73 1.1 christos 74 1.1 christos // *Really* minimal PCG32 code / (c) 2014 M.E. O'Neill / www.pcg-random.org 75 1.1 christos // Licensed under Apache License 2.0 (NO WARRANTY, etc. see website) 76 1.1 christos // --private-- Random number generator state. */ 77 1.1 christos typedef struct { 78 1.1 christos ui64_t state; /* 64-bit generator state */ 79 1.1 christos ui64_t inc; /* 63-bit sequence id */ 80 1.1 christos } set_rand_t; 81 1.1 christos /* --private-- Initialize the state *gen using seed and seq. seed seeds the 82 1.1 christos // advancing 64-bit state. seq is a sequence selection constant. */ 83 1.1 christos void set_seed(set_rand_t *gen, ui64_t seed, ui64_t seq) { 84 1.1 christos gen->inc = (seq << 1) | 1; 85 1.1 christos gen->state = (seed + gen->inc) * 6364136223846793005ULL + gen->inc; 86 1.1 christos } 87 1.1 christos /* Start a unique random number sequence using bits from noise sources. */ 88 1.1 christos void set_uniq(set_rand_t *gen, const void *ptr) { 89 1.1 christos set_seed(gen, ((ui64_t)(ptrdiff_t)ptr << 32) ^ 90 1.1 christos ((ui64_t)time(NULL) << 12) ^ clock(), 0); 91 1.1 christos } 92 1.1 christos /* Return 32 random bits, advancing the state *gen. */ 93 1.1 christos ui32_t set_rand(set_rand_t *gen) { 94 1.1 christos ui64_t state = gen->state; 95 1.1 christos gen->state = state * 6364136223846793005ULL + gen->inc; 96 1.1 christos ui32_t mix = (ui32_t)(((state >> 18) ^ state) >> 27); 97 1.1 christos int rot = state >> 59; 98 1.1 christos return (mix >> rot) | (mix << ((-rot) & 31)); 99 1.1 christos } 100 1.1 christos /* End of PCG32 code. */ 101 1.1 christos 102 1.1 christos /* --private-- Linked-list node. */ 103 1.1 christos typedef struct set_node_s set_node_t; 104 1.1 christos struct set_node_s { 105 1.1 christos set_key_t key; /* the key (not used for head or path) */ 106 1.1 christos i16_t size; /* number of allocated pointers in right[] */ 107 1.1 christos i16_t fill; /* number of pointers in right[] filled in */ 108 1.1 christos set_node_t **right; /* pointer for each level, each to the right */ 109 1.1 christos }; 110 1.1 christos 111 1.1 christos /* A set. The application sets env, may use gen with set_rand(), and may read 112 1.1 christos // allocs and memory. The remaining variables are --private-- . */ 113 1.1 christos typedef struct set_s { 114 1.1 christos set_node_t *head; /* skiplist head -- no key, just links */ 115 1.1 christos set_node_t *path; /* right[] is path to key from set_found() */ 116 1.1 christos set_node_t *node; /* node under construction, in case of longjmp() */ 117 1.1 christos i16_t depth; /* maximum depth of the skiplist */ 118 1.1 christos ui64_t ran; /* a precious trove of random bits */ 119 1.1 christos set_rand_t gen; /* random number generator state */ 120 1.1 christos jmp_buf env; /* setjmp() environment for allocation errors */ 121 1.1 christos #ifdef SET_TRACK 122 1.1 christos size_t allocs; /* number of allocations */ 123 1.1 christos size_t memory; /* total amount of allocated memory (>= requests) */ 124 1.1 christos #endif 125 1.1 christos } set_t; 126 1.1 christos 127 1.1 christos /* Memory allocation and deallocation. set_alloc(set, ptr, size) returns a 128 1.1 christos // pointer to an allocation of size bytes if ptr is NULL, or the previous 129 1.1 christos // allocation ptr resized to size bytes. set_alloc() will never return NULL. 130 1.1 christos // set_free(set, ptr) frees an allocation created by set_alloc(). These may be 131 1.1 christos // used by the application. e.g. if allocation tracking is desired. */ 132 1.1 christos #ifdef SET_TRACK 133 1.1 christos /* Track the number of allocations and the total backing memory size. */ 134 1.1 christos # if defined(_WIN32) 135 1.1 christos # include <malloc.h> 136 1.1 christos # define SET_ALLOC_SIZE(ptr) _msize(ptr) 137 1.1 christos # elif defined(__MACH__) 138 1.1 christos # include <malloc/malloc.h> 139 1.1 christos # define SET_ALLOC_SIZE(ptr) malloc_size(ptr) 140 1.1 christos # elif defined(__linux__) 141 1.1 christos # include <malloc.h> 142 1.1 christos # define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr) 143 1.1 christos # elif defined(__FreeBSD__) 144 1.1 christos # include <malloc_np.h> 145 1.1 christos # define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr) 146 1.1 christos # elif defined(__NetBSD__) 147 1.1 christos # include <jemalloc/jemalloc.h> 148 1.1 christos # define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr) 149 1.1 christos # else // e.g. OpenBSD 150 1.1 christos # define SET_ALLOC_SIZE(ptr) 0 151 1.1 christos # endif 152 1.1 christos // With tracking. 153 1.1 christos void *set_alloc(set_t *set, void *ptr, size_t size) { 154 1.1 christos size_t had = ptr == NULL ? 0 : SET_ALLOC_SIZE(ptr); 155 1.1 christos void *mem = realloc(ptr, size); 156 1.1 christos if (mem == NULL) 157 1.1 christos longjmp(set->env, ENOMEM); 158 1.1 christos set->allocs += ptr == NULL; 159 1.1 christos set->memory += SET_ALLOC_SIZE(mem) - had; 160 1.1 christos return mem; 161 1.1 christos } 162 1.1 christos void set_free(set_t *set, void *ptr) { 163 1.1 christos if (ptr != NULL) { 164 1.1 christos set->allocs--; 165 1.1 christos set->memory -= SET_ALLOC_SIZE(ptr); 166 1.1 christos free(ptr); 167 1.1 christos } 168 1.1 christos } 169 1.1 christos #else 170 1.1 christos /* Without tracking. */ 171 1.1 christos void *set_alloc(set_t *set, void *ptr, size_t size) { 172 1.1 christos void *mem = realloc(ptr, size); 173 1.1 christos if (mem == NULL) 174 1.1 christos longjmp(set->env, ENOMEM); 175 1.1 christos return mem; 176 1.1 christos } 177 1.1 christos void set_free(set_t *set, void *ptr) { 178 1.1 christos (void)set; 179 1.1 christos free(ptr); 180 1.1 christos } 181 1.1 christos #endif 182 1.1 christos 183 1.1 christos /* --private-- Grow node's array right[] as needed to be able to hold at least 184 1.1 christos // want links. If fill is true, assure that the first want links are filled in, 185 1.1 christos // setting them to set->head if not previously filled in. Otherwise it is 186 1.1 christos // assumed that the first want links are about to be filled in. */ 187 1.1 christos void set_grow(set_t *set, set_node_t *node, int want, int fill) { 188 1.1 christos if (node->size < want) { 189 1.1 christos int more = node->size ? node->size : 1; 190 1.1 christos while (more < want) 191 1.1 christos more <<= 1; 192 1.1 christos node->right = set_alloc(set, node->right, 193 1.1 christos (size_t)more * sizeof(set_node_t *)); 194 1.1 christos node->size = (i16_t)more; 195 1.1 christos } 196 1.1 christos int i; 197 1.1 christos if (fill) 198 1.1 christos for (i = node->fill; i < want; i++) 199 1.1 christos node->right[i] = set->head; 200 1.1 christos node->fill = (i16_t)want; 201 1.1 christos } 202 1.1 christos 203 1.1 christos /* --private-- Return a new node. key is left uninitialized. */ 204 1.1 christos set_node_t *set_node(set_t *set) { 205 1.1 christos set_node_t *node = set_alloc(set, NULL, sizeof(set_node_t)); 206 1.1 christos node->size = 0; 207 1.1 christos node->fill = 0; 208 1.1 christos node->right = NULL; 209 1.1 christos return node; 210 1.1 christos } 211 1.1 christos 212 1.1 christos /* --private-- Free the list linked from head, along with the keys. */ 213 1.1 christos void set_sweep(set_t *set) { 214 1.1 christos set_node_t *step = set->head->right[0]; 215 1.1 christos while (step != set->head) { 216 1.1 christos set_node_t *next = step->right[0]; /* save link to next node */ 217 1.1 christos set_drop(set, step->key); 218 1.1 christos set_free(set, step->right); 219 1.1 christos set_free(set, step); 220 1.1 christos step = next; 221 1.1 christos } 222 1.1 christos } 223 1.1 christos 224 1.1 christos /* Initialize a new set. set->env must be initialized using setjmp() before 225 1.1 christos // set_start() is called. A longjmp(set->env, ENOMEM) will be used to handle a 226 1.1 christos // memory allocation failure during any of the operations. (See setjmp.h and 227 1.1 christos // errno.h.) The set can still be used if this happens, assuming that it didn't 228 1.1 christos // happen during set_start(). Whether set_start() completed or not, set_end() 229 1.1 christos // can be used to free the set's memory after a longjmp(). */ 230 1.1 christos void set_start(set_t *set) { 231 1.1 christos #ifdef SET_TRACK 232 1.1 christos set->allocs = 0; 233 1.1 christos set->memory = 0; 234 1.1 christos #endif 235 1.1 christos set->head = set->path = set->node = NULL; /* in case set_node() fails */ 236 1.1 christos set->path = set_node(set); 237 1.1 christos set->head = set_node(set); 238 1.1 christos set_grow(set, set->head, 1, 1); /* one link back to head for an empty set */ 239 1.1 christos *(unsigned char *)&set->head->key = 137; /* set id */ 240 1.1 christos set->depth = 0; 241 1.1 christos set_uniq(&set->gen, set); 242 1.1 christos set->ran = 1; 243 1.1 christos } 244 1.1 christos 245 1.1 christos /* Return true if *set appears to be in a usable state. If *set has been zeroed 246 1.1 christos // out, then set_ok(set) will be false and set_end(set) will be safe. */ 247 1.1 christos int set_ok(set_t *set) { 248 1.1 christos return set->head != NULL && 249 1.1 christos set->head->right != NULL && 250 1.1 christos *(unsigned char *)&set->head->key == 137; 251 1.1 christos } 252 1.1 christos 253 1.1 christos /* Empty the set. This frees the memory used for the previous set contents. 254 1.1 christos // After set_clear(), *set is ready for use, as if after a set_start(). */ 255 1.1 christos void set_clear(set_t *set) { 256 1.1 christos assert(set_ok(set) && "improper use"); 257 1.1 christos 258 1.1 christos /* Free all the keys and their nodes. */ 259 1.1 christos set_sweep(set); 260 1.1 christos 261 1.1 christos /* Leave the head and path allocations as is. Clear their contents, with 262 1.1 christos // head pointing to itself and setting depth to zero, for an empty set. */ 263 1.1 christos set->head->right[0] = set->head; 264 1.1 christos set->head->fill = 1; 265 1.1 christos set->path->fill = 0; 266 1.1 christos set->depth = 0; 267 1.1 christos } 268 1.1 christos 269 1.1 christos /* Done using the set -- free all allocations. The only operation on *set 270 1.1 christos // permitted after this is set_start(). Though another set_end() would do no 271 1.1 christos // harm. This can be done at any time after a set_start(), or after a longjmp() 272 1.1 christos // on any allocation failure, including during a set_start(). */ 273 1.1 christos void set_end(set_t *set) { 274 1.1 christos if (set->head != NULL) { 275 1.1 christos /* Empty the set and free the head node. */ 276 1.1 christos if (set->head->right != NULL) { 277 1.1 christos set_sweep(set); 278 1.1 christos set_free(set, set->head->right); 279 1.1 christos } 280 1.1 christos set_free(set, set->head); 281 1.1 christos set->head = NULL; 282 1.1 christos } 283 1.1 christos if (set->path != NULL) { 284 1.1 christos /* Free the path work area. */ 285 1.1 christos set_free(set, set->path->right); 286 1.1 christos set_free(set, set->path); 287 1.1 christos set->path = NULL; 288 1.1 christos } 289 1.1 christos if (set->node != NULL) { 290 1.1 christos /* Free the node that was under construction when longjmp() hit. */ 291 1.1 christos set_drop(set, set->node->key); 292 1.1 christos set_free(set, set->node->right); 293 1.1 christos set_free(set, set->node); 294 1.1 christos set->node = NULL; 295 1.1 christos } 296 1.1 christos } 297 1.1 christos 298 1.1 christos /* Look for key. Return 1 if found or 0 if not. This also puts the path to get 299 1.1 christos // there in set->path, for use by set_insert(). */ 300 1.1 christos int set_found(set_t *set, set_key_t key) { 301 1.1 christos assert(set_ok(set) && "improper use"); 302 1.1 christos 303 1.1 christos /* Start at depth and work down and right as determined by key comparisons. */ 304 1.1 christos set_node_t *head = set->head, *here = head; 305 1.1 christos int i = set->depth; 306 1.1 christos set_grow(set, set->path, i + 1, 0); 307 1.1 christos do { 308 1.1 christos while (here->right[i] != head && 309 1.1 christos set_cmp(here->right[i]->key, key) < 0) 310 1.1 christos here = here->right[i]; 311 1.1 christos set->path->right[i] = here; 312 1.1 christos } while (i--); 313 1.1 christos 314 1.1 christos /* See if the key matches. */ 315 1.1 christos here = here->right[0]; 316 1.1 christos return here != head && set_cmp(here->key, key) == 0; 317 1.1 christos } 318 1.1 christos 319 1.1 christos /* Insert the key key. Return 0 on success, or 1 if key is already in the set. */ 320 1.1 christos int set_insert(set_t *set, set_key_t key) { 321 1.1 christos assert(set_ok(set) && "improper use"); 322 1.1 christos 323 1.1 christos if (set_found(set, key)) 324 1.1 christos /* That key is already in the set. */ 325 1.1 christos return 1; 326 1.1 christos 327 1.1 christos /* Randomly generate a new level-- level 0 with probability 1/2, 1 with 328 1.1 christos // probability 1/4, 2 with probability 1/8, etc. */ 329 1.1 christos int level = 0; 330 1.1 christos for (;;) { 331 1.1 christos if (set->ran == 1) 332 1.1 christos /* Ran out. Get another 32 random bits. */ 333 1.1 christos set->ran = set_rand(&set->gen) | (1ULL << 32); 334 1.1 christos int bit = set->ran & 1; 335 1.1 christos set->ran >>= 1; 336 1.1 christos if (bit) 337 1.1 christos break; 338 1.1 christos assert(level < 32767 && 339 1.1 christos "Overhead, without any fuss, the stars were going out."); 340 1.1 christos level++; 341 1.1 christos } 342 1.1 christos if (level > set->depth) { 343 1.1 christos /* The maximum depth is now deeper. Update the structures. */ 344 1.1 christos set_grow(set, set->path, level + 1, 1); 345 1.1 christos set_grow(set, set->head, level + 1, 1); 346 1.1 christos set->depth = (i16_t)level; 347 1.1 christos } 348 1.1 christos 349 1.1 christos /* Make a new node for the provided key, and insert it in the lists up to 350 1.1 christos // and including level. */ 351 1.1 christos set->node = set_node(set); 352 1.1 christos set->node->key = key; 353 1.1 christos set_grow(set, set->node, level + 1, 0); 354 1.1 christos int i; 355 1.1 christos for (i = 0; i <= level; i++) { 356 1.1 christos set->node->right[i] = set->path->right[i]->right[i]; 357 1.1 christos set->path->right[i]->right[i] = set->node; 358 1.1 christos } 359 1.1 christos set->node = NULL; 360 1.1 christos return 0; 361 1.1 christos } 362 1.1 christos 363 1.1 christos #else 364 1.1 christos #error ** another skiplist set already created here 365 1.1 christos /* Would need to implement a prefix in order to support multiple sets. */ 366 1.1 christos #endif 367