Home | History | Annotate | Line # | Download | only in minizip
skipset.h revision 1.1.1.1
      1  1.1  christos /* skipset.h -- set operations using a skiplist
      2  1.1  christos // Copyright (C) 2024-2026 Mark Adler
      3  1.1  christos // See MiniZip_info.txt for the license.
      4  1.1  christos 
      5  1.1  christos // This implements a skiplist set, i.e. just keys, no data, with ~O(log n) time
      6  1.1  christos // insert and search operations. The application defines the type of a key, and
      7  1.1  christos // provides a function to compare two keys.
      8  1.1  christos 
      9  1.1  christos // This header is not definitions of functions found in another source file --
     10  1.1  christos // it creates the set functions, with the application's key type, right where
     11  1.1  christos // the #include is. Before this header is #included, these must be defined:
     12  1.1  christos //
     13  1.1  christos // 1. A macro or typedef for set_key_t, the type of a key.
     14  1.1  christos // 2. A macro or function set_cmp(a, b) to compare two keys. The return values
     15  1.1  christos //    are < 0 for a < b, 0 for a == b, and > 0 for a > b.
     16  1.1  christos // 3. A macro or function set_drop(s, k) to release the key k's resources, if
     17  1.1  christos //    any, when doing a set_end() or set_clear(). s is a pointer to the set
     18  1.1  christos //    that key is in, for use with set_free() if desired.
     19  1.1  christos //
     20  1.1  christos // Example usage:
     21  1.1  christos //
     22  1.1  christos //      typedef int set_key_t;
     23  1.1  christos //      #define set_cmp(a, b) ((a) < (b) ? -1 : (a) == (b) ? 0 : 1)
     24  1.1  christos //      #define set_drop(s, k)
     25  1.1  christos //      #include "skipset.h"
     26  1.1  christos //
     27  1.1  christos //      int test(void) {        // return 0: good, 1: bad, -1: out of memory
     28  1.1  christos //          set_t set;
     29  1.1  christos //          if (setjmp(set.env))
     30  1.1  christos //              return -1;
     31  1.1  christos //          set_start(&set);
     32  1.1  christos //          set_insert(&set, 2);
     33  1.1  christos //          set_insert(&set, 1);
     34  1.1  christos //          set_insert(&set, 7);
     35  1.1  christos //          int bad = !set_found(&set, 2);
     36  1.1  christos //          bad = bad || set_found(&set, 5);
     37  1.1  christos //          set_end(&set);
     38  1.1  christos //          return bad;
     39  1.1  christos //      }
     40  1.1  christos //
     41  1.1  christos // Interface summary (see more details below):
     42  1.1  christos // - set_t is the type of the set being operated on (a set_t pointer is passed)
     43  1.1  christos // - set_start() initializes a new, empty set (initialize set.env first)
     44  1.1  christos // - set_insert() inserts a new key into the set, or not if it's already there
     45  1.1  christos // - set_found() determines whether or not a key is in the set
     46  1.1  christos // - set_end() ends the use of the set, freeing all memory
     47  1.1  christos // - set_clear() empties the set, equivalent to set_end() and then set_start()
     48  1.1  christos // - set_ok() checks if set appears to be usable, i.e. started and not ended
     49  1.1  christos //
     50  1.1  christos // Auxiliary functions available to the application:
     51  1.1  christos // - set_alloc() allocates memory with optional tracking (#define SET_TRACK)
     52  1.1  christos // - set_free() deallocates memory allocated by set_alloc()
     53  1.1  christos // - set_rand() returns 32 random bits (seeded by set_start()) */
     54  1.1  christos 
     55  1.1  christos #ifndef SKIPSET_H
     56  1.1  christos #define SKIPSET_H
     57  1.1  christos 
     58  1.1  christos #include <stdlib.h>     /* realloc(), free(), NULL, size_t */
     59  1.1  christos #include <stddef.h>     /* ptrdiff_t */
     60  1.1  christos #include <setjmp.h>     /* jmp_buf, longjmp() */
     61  1.1  christos #include <errno.h>      /* ENOMEM */
     62  1.1  christos #include <time.h>       /* time(), clock() */
     63  1.1  christos #include <assert.h>     /* assert.h */
     64  1.1  christos #include "ints.h"       /* i16_t, ui32_t, ui64_t */
     65  1.1  christos 
     66  1.1  christos /* Structures and functions below noted as "--private--" should not be used by
     67  1.1  christos // the application. set_t is partially private and partially public -- see the
     68  1.1  christos // comments there.
     69  1.1  christos 
     70  1.1  christos // There is no POSIX random() in MSVC, and rand() is awful. For portability, we
     71  1.1  christos // cannot rely on a library function for random numbers. Instead we use the
     72  1.1  christos // fast and effective algorithm below, invented by Melissa O'Neill.
     73  1.1  christos 
     74  1.1  christos // *Really* minimal PCG32 code / (c) 2014 M.E. O'Neill / www.pcg-random.org
     75  1.1  christos // Licensed under Apache License 2.0 (NO WARRANTY, etc. see website)
     76  1.1  christos // --private-- Random number generator state. */
     77  1.1  christos typedef struct {
     78  1.1  christos     ui64_t state;       /* 64-bit generator state */
     79  1.1  christos     ui64_t inc;         /* 63-bit sequence id */
     80  1.1  christos } set_rand_t;
     81  1.1  christos /* --private-- Initialize the state *gen using seed and seq. seed seeds the
     82  1.1  christos // advancing 64-bit state. seq is a sequence selection constant. */
     83  1.1  christos void set_seed(set_rand_t *gen, ui64_t seed, ui64_t seq) {
     84  1.1  christos     gen->inc = (seq << 1) | 1;
     85  1.1  christos     gen->state = (seed + gen->inc) * 6364136223846793005ULL + gen->inc;
     86  1.1  christos }
     87  1.1  christos /* Start a unique random number sequence using bits from noise sources. */
     88  1.1  christos void set_uniq(set_rand_t *gen, const void *ptr) {
     89  1.1  christos     set_seed(gen, ((ui64_t)(ptrdiff_t)ptr << 32) ^
     90  1.1  christos                   ((ui64_t)time(NULL) << 12) ^ clock(), 0);
     91  1.1  christos }
     92  1.1  christos /* Return 32 random bits, advancing the state *gen. */
     93  1.1  christos ui32_t set_rand(set_rand_t *gen) {
     94  1.1  christos     ui64_t state = gen->state;
     95  1.1  christos     gen->state = state * 6364136223846793005ULL + gen->inc;
     96  1.1  christos     ui32_t mix = (ui32_t)(((state >> 18) ^ state) >> 27);
     97  1.1  christos     int rot = state >> 59;
     98  1.1  christos     return (mix >> rot) | (mix << ((-rot) & 31));
     99  1.1  christos }
    100  1.1  christos /* End of PCG32 code. */
    101  1.1  christos 
    102  1.1  christos /* --private-- Linked-list node. */
    103  1.1  christos typedef struct set_node_s set_node_t;
    104  1.1  christos struct set_node_s {
    105  1.1  christos     set_key_t key;          /* the key (not used for head or path) */
    106  1.1  christos     i16_t size;             /* number of allocated pointers in right[] */
    107  1.1  christos     i16_t fill;             /* number of pointers in right[] filled in */
    108  1.1  christos     set_node_t **right;     /* pointer for each level, each to the right */
    109  1.1  christos };
    110  1.1  christos 
    111  1.1  christos /* A set. The application sets env, may use gen with set_rand(), and may read
    112  1.1  christos // allocs and memory. The remaining variables are --private-- . */
    113  1.1  christos typedef struct set_s {
    114  1.1  christos     set_node_t *head;       /* skiplist head -- no key, just links */
    115  1.1  christos     set_node_t *path;       /* right[] is path to key from set_found() */
    116  1.1  christos     set_node_t *node;       /* node under construction, in case of longjmp() */
    117  1.1  christos     i16_t depth;            /* maximum depth of the skiplist */
    118  1.1  christos     ui64_t ran;             /* a precious trove of random bits */
    119  1.1  christos     set_rand_t gen;         /* random number generator state */
    120  1.1  christos     jmp_buf env;            /* setjmp() environment for allocation errors */
    121  1.1  christos #ifdef SET_TRACK
    122  1.1  christos     size_t allocs;          /* number of allocations */
    123  1.1  christos     size_t memory;          /* total amount of allocated memory (>= requests) */
    124  1.1  christos #endif
    125  1.1  christos } set_t;
    126  1.1  christos 
    127  1.1  christos /* Memory allocation and deallocation. set_alloc(set, ptr, size) returns a
    128  1.1  christos // pointer to an allocation of size bytes if ptr is NULL, or the previous
    129  1.1  christos // allocation ptr resized to size bytes. set_alloc() will never return NULL.
    130  1.1  christos // set_free(set, ptr) frees an allocation created by set_alloc(). These may be
    131  1.1  christos // used by the application. e.g. if allocation tracking is desired. */
    132  1.1  christos #ifdef SET_TRACK
    133  1.1  christos /* Track the number of allocations and the total backing memory size. */
    134  1.1  christos #  if defined(_WIN32)
    135  1.1  christos #    include <malloc.h>
    136  1.1  christos #    define SET_ALLOC_SIZE(ptr) _msize(ptr)
    137  1.1  christos #  elif defined(__MACH__)
    138  1.1  christos #    include <malloc/malloc.h>
    139  1.1  christos #    define SET_ALLOC_SIZE(ptr) malloc_size(ptr)
    140  1.1  christos #  elif defined(__linux__)
    141  1.1  christos #    include <malloc.h>
    142  1.1  christos #    define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr)
    143  1.1  christos #  elif defined(__FreeBSD__)
    144  1.1  christos #    include <malloc_np.h>
    145  1.1  christos #    define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr)
    146  1.1  christos #  elif defined(__NetBSD__)
    147  1.1  christos #    include <jemalloc/jemalloc.h>
    148  1.1  christos #    define SET_ALLOC_SIZE(ptr) malloc_usable_size(ptr)
    149  1.1  christos #  else     // e.g. OpenBSD
    150  1.1  christos #    define SET_ALLOC_SIZE(ptr) 0
    151  1.1  christos #  endif
    152  1.1  christos // With tracking.
    153  1.1  christos void *set_alloc(set_t *set, void *ptr, size_t size) {
    154  1.1  christos     size_t had = ptr == NULL ? 0 : SET_ALLOC_SIZE(ptr);
    155  1.1  christos     void *mem = realloc(ptr, size);
    156  1.1  christos     if (mem == NULL)
    157  1.1  christos         longjmp(set->env, ENOMEM);
    158  1.1  christos     set->allocs += ptr == NULL;
    159  1.1  christos     set->memory += SET_ALLOC_SIZE(mem) - had;
    160  1.1  christos     return mem;
    161  1.1  christos }
    162  1.1  christos void set_free(set_t *set, void *ptr) {
    163  1.1  christos     if (ptr != NULL) {
    164  1.1  christos         set->allocs--;
    165  1.1  christos         set->memory -= SET_ALLOC_SIZE(ptr);
    166  1.1  christos         free(ptr);
    167  1.1  christos     }
    168  1.1  christos }
    169  1.1  christos #else
    170  1.1  christos /* Without tracking. */
    171  1.1  christos void *set_alloc(set_t *set, void *ptr, size_t size) {
    172  1.1  christos     void *mem = realloc(ptr, size);
    173  1.1  christos     if (mem == NULL)
    174  1.1  christos         longjmp(set->env, ENOMEM);
    175  1.1  christos     return mem;
    176  1.1  christos }
    177  1.1  christos void set_free(set_t *set, void *ptr) {
    178  1.1  christos     (void)set;
    179  1.1  christos     free(ptr);
    180  1.1  christos }
    181  1.1  christos #endif
    182  1.1  christos 
    183  1.1  christos /* --private-- Grow node's array right[] as needed to be able to hold at least
    184  1.1  christos // want links. If fill is true, assure that the first want links are filled in,
    185  1.1  christos // setting them to set->head if not previously filled in. Otherwise it is
    186  1.1  christos // assumed that the first want links are about to be filled in. */
    187  1.1  christos void set_grow(set_t *set, set_node_t *node, int want, int fill) {
    188  1.1  christos     if (node->size < want) {
    189  1.1  christos         int more = node->size ? node->size : 1;
    190  1.1  christos         while (more < want)
    191  1.1  christos             more <<= 1;
    192  1.1  christos         node->right = set_alloc(set, node->right,
    193  1.1  christos                                 (size_t)more * sizeof(set_node_t *));
    194  1.1  christos         node->size = (i16_t)more;
    195  1.1  christos     }
    196  1.1  christos     int i;
    197  1.1  christos     if (fill)
    198  1.1  christos         for (i = node->fill; i < want; i++)
    199  1.1  christos             node->right[i] = set->head;
    200  1.1  christos     node->fill = (i16_t)want;
    201  1.1  christos }
    202  1.1  christos 
    203  1.1  christos /* --private-- Return a new node. key is left uninitialized. */
    204  1.1  christos set_node_t *set_node(set_t *set) {
    205  1.1  christos     set_node_t *node = set_alloc(set, NULL, sizeof(set_node_t));
    206  1.1  christos     node->size = 0;
    207  1.1  christos     node->fill = 0;
    208  1.1  christos     node->right = NULL;
    209  1.1  christos     return node;
    210  1.1  christos }
    211  1.1  christos 
    212  1.1  christos /* --private-- Free the list linked from head, along with the keys. */
    213  1.1  christos void set_sweep(set_t *set) {
    214  1.1  christos     set_node_t *step = set->head->right[0];
    215  1.1  christos     while (step != set->head) {
    216  1.1  christos         set_node_t *next = step->right[0];      /* save link to next node */
    217  1.1  christos         set_drop(set, step->key);
    218  1.1  christos         set_free(set, step->right);
    219  1.1  christos         set_free(set, step);
    220  1.1  christos         step = next;
    221  1.1  christos     }
    222  1.1  christos }
    223  1.1  christos 
    224  1.1  christos /* Initialize a new set. set->env must be initialized using setjmp() before
    225  1.1  christos // set_start() is called. A longjmp(set->env, ENOMEM) will be used to handle a
    226  1.1  christos // memory allocation failure during any of the operations. (See setjmp.h and
    227  1.1  christos // errno.h.) The set can still be used if this happens, assuming that it didn't
    228  1.1  christos // happen during set_start(). Whether set_start() completed or not, set_end()
    229  1.1  christos // can be used to free the set's memory after a longjmp(). */
    230  1.1  christos void set_start(set_t *set) {
    231  1.1  christos #ifdef SET_TRACK
    232  1.1  christos     set->allocs = 0;
    233  1.1  christos     set->memory = 0;
    234  1.1  christos #endif
    235  1.1  christos     set->head = set->path = set->node = NULL;   /* in case set_node() fails */
    236  1.1  christos     set->path = set_node(set);
    237  1.1  christos     set->head = set_node(set);
    238  1.1  christos     set_grow(set, set->head, 1, 1); /* one link back to head for an empty set */
    239  1.1  christos     *(unsigned char *)&set->head->key = 137;    /* set id */
    240  1.1  christos     set->depth = 0;
    241  1.1  christos     set_uniq(&set->gen, set);
    242  1.1  christos     set->ran = 1;
    243  1.1  christos }
    244  1.1  christos 
    245  1.1  christos /* Return true if *set appears to be in a usable state. If *set has been zeroed
    246  1.1  christos // out, then set_ok(set) will be false and set_end(set) will be safe. */
    247  1.1  christos int set_ok(set_t *set) {
    248  1.1  christos     return set->head != NULL &&
    249  1.1  christos            set->head->right != NULL &&
    250  1.1  christos            *(unsigned char *)&set->head->key == 137;
    251  1.1  christos }
    252  1.1  christos 
    253  1.1  christos /* Empty the set. This frees the memory used for the previous set contents.
    254  1.1  christos // After set_clear(), *set is ready for use, as if after a set_start(). */
    255  1.1  christos void set_clear(set_t *set) {
    256  1.1  christos     assert(set_ok(set) && "improper use");
    257  1.1  christos 
    258  1.1  christos     /* Free all the keys and their nodes. */
    259  1.1  christos     set_sweep(set);
    260  1.1  christos 
    261  1.1  christos     /* Leave the head and path allocations as is. Clear their contents, with
    262  1.1  christos     // head pointing to itself and setting depth to zero, for an empty set. */
    263  1.1  christos     set->head->right[0] = set->head;
    264  1.1  christos     set->head->fill = 1;
    265  1.1  christos     set->path->fill = 0;
    266  1.1  christos     set->depth = 0;
    267  1.1  christos }
    268  1.1  christos 
    269  1.1  christos /* Done using the set -- free all allocations. The only operation on *set
    270  1.1  christos // permitted after this is set_start(). Though another set_end() would do no
    271  1.1  christos // harm. This can be done at any time after a set_start(), or after a longjmp()
    272  1.1  christos // on any allocation failure, including during a set_start(). */
    273  1.1  christos void set_end(set_t *set) {
    274  1.1  christos     if (set->head != NULL) {
    275  1.1  christos         /* Empty the set and free the head node. */
    276  1.1  christos         if (set->head->right != NULL) {
    277  1.1  christos             set_sweep(set);
    278  1.1  christos             set_free(set, set->head->right);
    279  1.1  christos         }
    280  1.1  christos         set_free(set, set->head);
    281  1.1  christos         set->head = NULL;
    282  1.1  christos     }
    283  1.1  christos     if (set->path != NULL) {
    284  1.1  christos         /* Free the path work area. */
    285  1.1  christos         set_free(set, set->path->right);
    286  1.1  christos         set_free(set, set->path);
    287  1.1  christos         set->path = NULL;
    288  1.1  christos     }
    289  1.1  christos     if (set->node != NULL) {
    290  1.1  christos         /* Free the node that was under construction when longjmp() hit. */
    291  1.1  christos         set_drop(set, set->node->key);
    292  1.1  christos         set_free(set, set->node->right);
    293  1.1  christos         set_free(set, set->node);
    294  1.1  christos         set->node = NULL;
    295  1.1  christos     }
    296  1.1  christos }
    297  1.1  christos 
    298  1.1  christos /* Look for key. Return 1 if found or 0 if not. This also puts the path to get
    299  1.1  christos // there in set->path, for use by set_insert(). */
    300  1.1  christos int set_found(set_t *set, set_key_t key) {
    301  1.1  christos     assert(set_ok(set) && "improper use");
    302  1.1  christos 
    303  1.1  christos     /* Start at depth and work down and right as determined by key comparisons. */
    304  1.1  christos     set_node_t *head = set->head, *here = head;
    305  1.1  christos     int i = set->depth;
    306  1.1  christos     set_grow(set, set->path, i + 1, 0);
    307  1.1  christos     do {
    308  1.1  christos         while (here->right[i] != head &&
    309  1.1  christos                set_cmp(here->right[i]->key, key) < 0)
    310  1.1  christos             here = here->right[i];
    311  1.1  christos         set->path->right[i] = here;
    312  1.1  christos     } while (i--);
    313  1.1  christos 
    314  1.1  christos     /* See if the key matches. */
    315  1.1  christos     here = here->right[0];
    316  1.1  christos     return here != head && set_cmp(here->key, key) == 0;
    317  1.1  christos }
    318  1.1  christos 
    319  1.1  christos /* Insert the key key. Return 0 on success, or 1 if key is already in the set. */
    320  1.1  christos int set_insert(set_t *set, set_key_t key) {
    321  1.1  christos     assert(set_ok(set) && "improper use");
    322  1.1  christos 
    323  1.1  christos     if (set_found(set, key))
    324  1.1  christos         /* That key is already in the set. */
    325  1.1  christos         return 1;
    326  1.1  christos 
    327  1.1  christos     /* Randomly generate a new level-- level 0 with probability 1/2, 1 with
    328  1.1  christos     // probability 1/4, 2 with probability 1/8, etc. */
    329  1.1  christos     int level = 0;
    330  1.1  christos     for (;;) {
    331  1.1  christos         if (set->ran == 1)
    332  1.1  christos             /* Ran out. Get another 32 random bits. */
    333  1.1  christos             set->ran = set_rand(&set->gen) | (1ULL << 32);
    334  1.1  christos         int bit = set->ran & 1;
    335  1.1  christos         set->ran >>= 1;
    336  1.1  christos         if (bit)
    337  1.1  christos             break;
    338  1.1  christos         assert(level < 32767 &&
    339  1.1  christos                "Overhead, without any fuss, the stars were going out.");
    340  1.1  christos         level++;
    341  1.1  christos     }
    342  1.1  christos     if (level > set->depth) {
    343  1.1  christos         /* The maximum depth is now deeper. Update the structures. */
    344  1.1  christos         set_grow(set, set->path, level + 1, 1);
    345  1.1  christos         set_grow(set, set->head, level + 1, 1);
    346  1.1  christos         set->depth = (i16_t)level;
    347  1.1  christos     }
    348  1.1  christos 
    349  1.1  christos     /* Make a new node for the provided key, and insert it in the lists up to
    350  1.1  christos     // and including level. */
    351  1.1  christos     set->node = set_node(set);
    352  1.1  christos     set->node->key = key;
    353  1.1  christos     set_grow(set, set->node, level + 1, 0);
    354  1.1  christos     int i;
    355  1.1  christos     for (i = 0; i <= level; i++) {
    356  1.1  christos         set->node->right[i] = set->path->right[i]->right[i];
    357  1.1  christos         set->path->right[i]->right[i] = set->node;
    358  1.1  christos     }
    359  1.1  christos     set->node = NULL;
    360  1.1  christos     return 0;
    361  1.1  christos }
    362  1.1  christos 
    363  1.1  christos #else
    364  1.1  christos #error ** another skiplist set already created here
    365  1.1  christos /* Would need to implement a prefix in order to support multiple sets. */
    366  1.1  christos #endif
    367