1 /* skipset.h -- set operations using a skiplist 2 // Copyright (C) 2024-2026 Mark Adler 3 // See MiniZip_info.txt for the license. 4 5 // This implements a skiplist set, i.e. just keys, no data, with ~O(log n) time 6 // insert and search operations. The application defines the type of a key, and 7 // provides a function to compare two keys. 8 9 // This header is not definitions of functions found in another source file -- 10 // it creates the set functions, with the application's key type, right where 11 // the #include is. Before this header is #included, these must be defined: 12 // 13 // 1. A macro or typedef for set_key_t, the type of a key. 14 // 2. A macro or function set_cmp(a, b) to compare two keys. The return values 15 // are < 0 for a < b, 0 for a == b, and > 0 for a > b. 16 // 3. A macro or function set_drop(s, k) to release the key k's resources, if 17 // any, when doing a set_end() or set_clear(). s is a pointer to the set 18 // that key is in, for use with set_free() if desired. 19 // 20 // Example usage: 21 // 22 // typedef int set_key_t; 23 // #define set_cmp(a, b) ((a) < (b) ? -1 : (a) == (b) ? 0 : 1) 24 // #define set_drop(s, k) 25 // #include "skipset.h" 26 // 27 // int test(void) { // return 0: good, 1: bad, -1: out of memory 28 // set_t set; 29 // if (setjmp(set.env)) 30 // return -1; 31 // set_start(&set); 32 // set_insert(&set, 2); 33 // set_insert(&set, 1); 34 // set_insert(&set, 7); 35 // int bad = !set_found(&set, 2); 36 // bad = bad || set_found(&set, 5); 37 // set_end(&set); 38 // return bad; 39 // } 40 // 41 // Interface summary (see more details below): 42 // - set_t is the type of the set being operated on (a set_t pointer is passed) 43 // - set_start() initializes a new, empty set (initialize set.env first) 44 // - set_insert() inserts a new key into the set, or not if it's already there 45 // - set_found() determines whether or not a key is in the set 46 // - set_end() ends the use of the set, freeing all memory 47 // - set_clear() empties the set, equivalent to set_end() and then set_start() 48 // - set_ok() checks if set appears to be usable, i.e. started and not ended 49 // 50 // Auxiliary functions available to the application: 51 // - set_alloc() allocates memory with optional tracking (#define SET_TRACK) 52 // - set_free() deallocates memory allocated by set_alloc() 53 // - set_rand() returns 32 random bits (seeded by set_start()) */ 54 55 #ifndef SKIPSET_H 56 #define SKIPSET_H 57 58 #include <stdlib.h> /* realloc(), free(), NULL, size_t */ 59 #include <stddef.h> /* ptrdiff_t */ 60 #include <setjmp.h> /* jmp_buf, longjmp() */ 61 #include <errno.h> /* ENOMEM */ 62 #include <time.h> /* time(), clock() */ 63 #include <assert.h> /* assert.h */ 64 #include "ints.h" /* i16_t, ui32_t, ui64_t */ 65 66 /* Structures and functions below noted as "--private--" should not be used by 67 // the application. set_t is partially private and partially public -- see the 68 // comments there. 69 70 // There is no POSIX random() in MSVC, and rand() is awful. For portability, we 71 // cannot rely on a library function for random numbers. Instead we use the 72 // fast and effective algorithm below, invented by Melissa O'Neill. 73 74 // *Really* minimal PCG32 code / (c) 2014 M.E. O'Neill / www.pcg-random.org 75 // Licensed under Apache License 2.0 (NO WARRANTY, etc. see website) 76 // --private-- Random number generator state. */ 77 typedef struct { 78 ui64_t state; /* 64-bit generator state */ 79 ui64_t inc; /* 63-bit sequence id */ 80 } set_rand_t; 81 /* --private-- Initialize the state *gen using seed and seq. seed seeds the 82 // advancing 64-bit state. seq is a sequence selection constant. */ 83 void set_seed(set_rand_t *gen, ui64_t seed, ui64_t seq) { 84 gen->inc = (seq << 1) | 1; 85 gen->state = (seed + gen->inc) * 6364136223846793005ULL + gen->inc; 86 } 87 /* Start a unique random number sequence using bits from noise sources. */ 88 void set_uniq(set_rand_t *gen, const void *ptr) { 89 set_seed(gen, ((ui64_t)(ptrdiff_t)ptr << 32) ^ 90 ((ui64_t)time(NULL) << 12) ^ clock(), 0); 91 } 92 /* Return 32 random bits, advancing the state *gen. */ 93 ui32_t set_rand(set_rand_t *gen) { 94 ui64_t state = gen->state; 95 gen->state = state * 6364136223846793005ULL + gen->inc; 96 ui32_t mix = (ui32_t)(((state >> 18) ^ state) >> 27); 97 int rot = state >> 59; 98 return (mix >> rot) | (mix << ((-rot) & 31)); 99 } 100 /* End of PCG32 code. */ 101 102 /* --private-- Linked-list node. */ 103 typedef struct set_node_s set_node_t; 104 struct set_node_s { 105 set_key_t key; /* the key (not used for head or path) */ 106 i16_t size; /* number of allocated pointers in right[] */ 107 i16_t fill; /* number of pointers in right[] filled in */ 108 set_node_t **right; /* pointer for each level, each to the right */ 109 }; 110 111 /* A set. The application sets env, may use gen with set_rand(), and may read 112 // allocs and memory. The remaining variables are --private-- . */ 113 typedef struct set_s { 114 set_node_t *head; /* skiplist head -- no key, just links */ 115 set_node_t *path; /* right[] is path to key from set_found() */ 116 set_node_t *node; /* node under construction, in case of longjmp() */ 117 i16_t depth; /* maximum depth of the skiplist */ 118 ui64_t ran; /* a precious trove of random bits */ 119 set_rand_t gen; /* random number generator state */ 120 jmp_buf env; /* setjmp() environment for allocation errors */ 121 #ifdef SET_TRACK 122 size_t allocs; /* number of allocations */ 123 size_t memory; /* total amount of allocated memory (>= requests) */ 124 #endif 125 } set_t; 126 127 /* Memory allocation and deallocation. set_alloc(set, ptr, size) returns a 128 // pointer to an allocation of size bytes if ptr is NULL, or the previous 129 // allocation ptr resized to size bytes. set_alloc() will never return NULL. 130 // set_free(set, ptr) frees an allocation created by set_alloc(). These may be 131 // used by the application. e.g. if allocation tracking is desired. */ 132 #ifdef SET_TRACK 133 /* Track the number of allocations and the total backing memory size. */ 134 # if defined(_WIN32) 135 # include <malloc.h> 136 # define SET_ALLOC_SIZE(ptr) _msize(ptr) 137 # elif defined(__MACH__) 138 # include <malloc/malloc.h> 139 # define SET_ALLOC_SIZE(ptr) malloc_size(ptr) 140 # elif defined(__linux__) 141 # include <malloc.h> 142 # define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr) 143 # elif defined(__FreeBSD__) 144 # include <malloc_np.h> 145 # define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr) 146 # elif defined(__NetBSD__) 147 # include <jemalloc/jemalloc.h> 148 # define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr) 149 # else // e.g. OpenBSD 150 # define SET_ALLOC_SIZE(ptr) 0 151 # endif 152 // With tracking. 153 void *set_alloc(set_t *set, void *ptr, size_t size) { 154 size_t had = ptr == NULL ? 0 : SET_ALLOC_SIZE(ptr); 155 void *mem = realloc(ptr, size); 156 if (mem == NULL) 157 longjmp(set->env, ENOMEM); 158 set->allocs += ptr == NULL; 159 set->memory += SET_ALLOC_SIZE(mem) - had; 160 return mem; 161 } 162 void set_free(set_t *set, void *ptr) { 163 if (ptr != NULL) { 164 set->allocs--; 165 set->memory -= SET_ALLOC_SIZE(ptr); 166 free(ptr); 167 } 168 } 169 #else 170 /* Without tracking. */ 171 void *set_alloc(set_t *set, void *ptr, size_t size) { 172 void *mem = realloc(ptr, size); 173 if (mem == NULL) 174 longjmp(set->env, ENOMEM); 175 return mem; 176 } 177 void set_free(set_t *set, void *ptr) { 178 (void)set; 179 free(ptr); 180 } 181 #endif 182 183 /* --private-- Grow node's array right[] as needed to be able to hold at least 184 // want links. If fill is true, assure that the first want links are filled in, 185 // setting them to set->head if not previously filled in. Otherwise it is 186 // assumed that the first want links are about to be filled in. */ 187 void set_grow(set_t *set, set_node_t *node, int want, int fill) { 188 if (node->size < want) { 189 int more = node->size ? node->size : 1; 190 while (more < want) 191 more <<= 1; 192 node->right = set_alloc(set, node->right, 193 (size_t)more * sizeof(set_node_t *)); 194 node->size = (i16_t)more; 195 } 196 int i; 197 if (fill) 198 for (i = node->fill; i < want; i++) 199 node->right[i] = set->head; 200 node->fill = (i16_t)want; 201 } 202 203 /* --private-- Return a new node. key is left uninitialized. */ 204 set_node_t *set_node(set_t *set) { 205 set_node_t *node = set_alloc(set, NULL, sizeof(set_node_t)); 206 node->size = 0; 207 node->fill = 0; 208 node->right = NULL; 209 return node; 210 } 211 212 /* --private-- Free the list linked from head, along with the keys. */ 213 void set_sweep(set_t *set) { 214 set_node_t *step = set->head->right[0]; 215 while (step != set->head) { 216 set_node_t *next = step->right[0]; /* save link to next node */ 217 set_drop(set, step->key); 218 set_free(set, step->right); 219 set_free(set, step); 220 step = next; 221 } 222 } 223 224 /* Initialize a new set. set->env must be initialized using setjmp() before 225 // set_start() is called. A longjmp(set->env, ENOMEM) will be used to handle a 226 // memory allocation failure during any of the operations. (See setjmp.h and 227 // errno.h.) The set can still be used if this happens, assuming that it didn't 228 // happen during set_start(). Whether set_start() completed or not, set_end() 229 // can be used to free the set's memory after a longjmp(). */ 230 void set_start(set_t *set) { 231 #ifdef SET_TRACK 232 set->allocs = 0; 233 set->memory = 0; 234 #endif 235 set->head = set->path = set->node = NULL; /* in case set_node() fails */ 236 set->path = set_node(set); 237 set->head = set_node(set); 238 set_grow(set, set->head, 1, 1); /* one link back to head for an empty set */ 239 *(unsigned char *)&set->head->key = 137; /* set id */ 240 set->depth = 0; 241 set_uniq(&set->gen, set); 242 set->ran = 1; 243 } 244 245 /* Return true if *set appears to be in a usable state. If *set has been zeroed 246 // out, then set_ok(set) will be false and set_end(set) will be safe. */ 247 int set_ok(set_t *set) { 248 return set->head != NULL && 249 set->head->right != NULL && 250 *(unsigned char *)&set->head->key == 137; 251 } 252 253 /* Empty the set. This frees the memory used for the previous set contents. 254 // After set_clear(), *set is ready for use, as if after a set_start(). */ 255 void set_clear(set_t *set) { 256 assert(set_ok(set) && "improper use"); 257 258 /* Free all the keys and their nodes. */ 259 set_sweep(set); 260 261 /* Leave the head and path allocations as is. Clear their contents, with 262 // head pointing to itself and setting depth to zero, for an empty set. */ 263 set->head->right[0] = set->head; 264 set->head->fill = 1; 265 set->path->fill = 0; 266 set->depth = 0; 267 } 268 269 /* Done using the set -- free all allocations. The only operation on *set 270 // permitted after this is set_start(). Though another set_end() would do no 271 // harm. This can be done at any time after a set_start(), or after a longjmp() 272 // on any allocation failure, including during a set_start(). */ 273 void set_end(set_t *set) { 274 if (set->head != NULL) { 275 /* Empty the set and free the head node. */ 276 if (set->head->right != NULL) { 277 set_sweep(set); 278 set_free(set, set->head->right); 279 } 280 set_free(set, set->head); 281 set->head = NULL; 282 } 283 if (set->path != NULL) { 284 /* Free the path work area. */ 285 set_free(set, set->path->right); 286 set_free(set, set->path); 287 set->path = NULL; 288 } 289 if (set->node != NULL) { 290 /* Free the node that was under construction when longjmp() hit. */ 291 set_drop(set, set->node->key); 292 set_free(set, set->node->right); 293 set_free(set, set->node); 294 set->node = NULL; 295 } 296 } 297 298 /* Look for key. Return 1 if found or 0 if not. This also puts the path to get 299 // there in set->path, for use by set_insert(). */ 300 int set_found(set_t *set, set_key_t key) { 301 assert(set_ok(set) && "improper use"); 302 303 /* Start at depth and work down and right as determined by key comparisons. */ 304 set_node_t *head = set->head, *here = head; 305 int i = set->depth; 306 set_grow(set, set->path, i + 1, 0); 307 do { 308 while (here->right[i] != head && 309 set_cmp(here->right[i]->key, key) < 0) 310 here = here->right[i]; 311 set->path->right[i] = here; 312 } while (i--); 313 314 /* See if the key matches. */ 315 here = here->right[0]; 316 return here != head && set_cmp(here->key, key) == 0; 317 } 318 319 /* Insert the key key. Return 0 on success, or 1 if key is already in the set. */ 320 int set_insert(set_t *set, set_key_t key) { 321 assert(set_ok(set) && "improper use"); 322 323 if (set_found(set, key)) 324 /* That key is already in the set. */ 325 return 1; 326 327 /* Randomly generate a new level-- level 0 with probability 1/2, 1 with 328 // probability 1/4, 2 with probability 1/8, etc. */ 329 int level = 0; 330 for (;;) { 331 if (set->ran == 1) 332 /* Ran out. Get another 32 random bits. */ 333 set->ran = set_rand(&set->gen) | (1ULL << 32); 334 int bit = set->ran & 1; 335 set->ran >>= 1; 336 if (bit) 337 break; 338 assert(level < 32767 && 339 "Overhead, without any fuss, the stars were going out."); 340 level++; 341 } 342 if (level > set->depth) { 343 /* The maximum depth is now deeper. Update the structures. */ 344 set_grow(set, set->path, level + 1, 1); 345 set_grow(set, set->head, level + 1, 1); 346 set->depth = (i16_t)level; 347 } 348 349 /* Make a new node for the provided key, and insert it in the lists up to 350 // and including level. */ 351 set->node = set_node(set); 352 set->node->key = key; 353 set_grow(set, set->node, level + 1, 0); 354 int i; 355 for (i = 0; i <= level; i++) { 356 set->node->right[i] = set->path->right[i]->right[i]; 357 set->path->right[i]->right[i] = set->node; 358 } 359 set->node = NULL; 360 return 0; 361 } 362 363 #else 364 #error ** another skiplist set already created here 365 /* Would need to implement a prefix in order to support multiple sets. */ 366 #endif 367