Home | History | Annotate | Line # | Download | only in isc
      1 /*	$NetBSD: histo.h,v 1.2 2025/01/26 16:25:41 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  *
      6  * SPDX-License-Identifier: MPL-2.0
      7  *
      8  * This Source Code Form is subject to the terms of the Mozilla Public
      9  * License, v. 2.0. If a copy of the MPL was not distributed with this
     10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
     11  *
     12  * See the COPYRIGHT file distributed with this work for additional
     13  * information regarding copyright ownership.
     14  */
     15 
     16 #pragma once
     17 
     18 #include <sys/types.h>
     19 
     20 #include <isc/mem.h>
     21 
     22 /*
     23  * An `isc_histo_t` is a thread-safe histogram of `uint64_t` values.
     24  * It keeps a count of how many values land in each bucket. Use the
     25  * `isc_histo_inc()`, `isc_histo_acc()`, and `isc_histo_put()`
     26  * functions to add values to the histogram.
     27  *
     28  * Values are mapped to buckets by rounding them according to a
     29  * configurable precision, expressed as a number of significant bits.
     30  * The bits <-> digits functions convert betwen decimal significant
     31  * digits (as in scientific notation) and binary significant bits.
     32  *
     33  * You can use the `isc_histo_get()` function to export data from the
     34  * histogram. The range of a bucket is returned as its minimum and
     35  * maximum values, inclusive, i.e. a closed interval. We use closed
     36  * intervals so we are able to express the maximum of the last bucket,
     37  * UINT64_MAX, although half-open intervals are more common in C.
     38  *
     39  * You can calculate some basic statistics directly from a histogram.
     40  * The `isc_histo_quantiles()` function can get a histogram's median,
     41  * 99th percentile, etc. The `isc_histo_moments()` function gets a
     42  * histogram's population, mean, and standard deviation.
     43  *
     44  * The size of a histogram depends on the range of values in the
     45  * stream of samples, not the number of samples. Bucket counters are
     46  * 64 bits each, and are allocated in chunks of `1 << sigbits` where
     47  * `sigbits` is the histogram's configured precision. There are at
     48  * most 64 chunks, one for each bit of a 64 bit value. Histograms with
     49  * greater precision have larger chunks.
     50  *
     51  * At the low end (values near zero) there is one value per bucket,
     52  * then two values, four, eight, etc. The number of values that map to
     53  * a bucket is the same in each chunk. Chunks 0 and 1 have one value
     54  * per bucket, (see `ISC_HISTO_UNITBUCKETS()` below), chunk 2 has 2
     55  * values per bucket, chunk 3 has 4, etc.
     56  *
     57  * The update cost is roughly constant and very small (not much more
     58  * than an atomic increment). It mostly depends on cache locality and
     59  * thread contention.
     60  *
     61  * There is no overflow checking for the 64 bit bucket counters. It
     62  * takes a few nanoseconds to add a sample to the histogram, so it
     63  * would take at least a few CPU-centuries to cause an overflow.
     64  * Aggregate statistics from a quarter of a million CPUs might
     65  * overflow in a day. (Provided that in both examples the CPUs are
     66  * doing nothing apart from repeatedly adding 1 to histogram buckets.)
     67  */
     68 
     69 typedef struct isc_histo      isc_histo_t;
     70 typedef struct isc_histomulti isc_histomulti_t;
     71 
     72 #define ISC_HISTO_MINBITS      1
     73 #define ISC_HISTO_MAXBITS      18
     74 #define ISC_HISTO_MINDIGITS    1
     75 #define ISC_HISTO_MAXDIGITS    6
     76 #define ISC_HISTO_MAXQUANTILES 101 /* enough for all the percentiles */
     77 
     78 /*
     79  * How many values map 1:1 to buckets for a given number of sigbits?
     80  * These are the buckets at the low end, starting from zero.
     81  */
     82 #define ISC_HISTO_UNITBUCKETS(sigbits) (2 << (sigbits))
     83 
     84 void
     85 isc_histo_create(isc_mem_t *mctx, uint sigbits, isc_histo_t **hgp);
     86 /*%<
     87  * Create a histogram.
     88  *
     89  * The relative error of values stored in the histogram is less than
     90  * `pow(2.0, -sigbits)`.
     91  *
     92  * Requires:
     93  *\li	`sigbits >= ISC_HISTO_MINBITS`
     94  *\li	`sigbits <= ISC_HISTO_MAXBITS`
     95  *\li	`hgp != NULL`
     96  *\li	`*hgp == NULL`
     97  *
     98  * Ensures:
     99  *\li	`*hgp` is a pointer to a histogram.
    100  */
    101 
    102 void
    103 isc_histo_destroy(isc_histo_t **hgp);
    104 /*%<
    105  * Destroy a histogram
    106  *
    107  * Requires:
    108  *\li	`hgp != NULL`
    109  *\li	`*hgp` is a pointer to a valid histogram
    110  *
    111  * Ensures:
    112  *\li	all memory allocated by the histogram has been released
    113  *\li	`*hgp` is NULL
    114  */
    115 
    116 uint
    117 isc_histo_sigbits(isc_histo_t *hg);
    118 /*%<
    119  * Get the histogram's `sigbits` setting
    120  *
    121  * Requires:
    122  *\li	`hg` is a pointer to a valid histogram
    123  */
    124 
    125 uint
    126 isc_histo_bits_to_digits(uint bits);
    127 /*%<
    128  * Convert binary significant figures to decimal significant figures,
    129  * rounding down, i.e. get the decimal precision you can expect from a
    130  * given number of significant bits.
    131  *
    132  * Requires:
    133  *\li	`bits >= ISC_HISTO_MINBITS`
    134  *\li	`bits <= ISC_HISTO_MAXBITS`
    135  */
    136 
    137 uint
    138 isc_histo_digits_to_bits(uint digits);
    139 /*%<
    140  * Convert decimal significant figures to binary significant figures,
    141  * rounding up, i.e. get the number of significant bits required to
    142  * achieve the given decimal precision.
    143  *
    144  * Requires:
    145  *\li	`digits >= ISC_HISTO_MINDIGS`
    146  *\li	`digits <= ISC_HISTO_MAXDIGS`
    147  */
    148 
    149 /**********************************************************************/
    150 
    151 void
    152 isc_histo_inc(isc_histo_t *hg, uint64_t value);
    153 /*%<
    154  * Add 1 to the value's bucket
    155  *
    156  * Requires:
    157  *\li	`hg` is a pointer to a valid histogram
    158  */
    159 
    160 void
    161 isc_histo_add(isc_histo_t *hg, uint64_t value, uint64_t inc);
    162 /*%<
    163  * Add an arbitrary increment to the value's bucket
    164  *
    165  * Note: there is no counter overflow checking
    166  *
    167  * Requires:
    168  *\li	`hg` is a pointer to a valid histogram
    169  */
    170 
    171 void
    172 isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count);
    173 /*
    174  * Import a collection of samples, where values between `min` and
    175  * `max` inclusive occurred `count` times. This function is a
    176  * counterpart to `isc_histo_get()`.
    177  *
    178  * Note: there is no counter overflow checking
    179  *
    180  * Requires:
    181  *\li	`min <= max`
    182  *\li	`hg` is a pointer to a valid histogram
    183  */
    184 
    185 isc_result_t
    186 isc_histo_get(const isc_histo_t *hg, uint key, uint64_t *minp, uint64_t *maxp,
    187 	      uint64_t *countp);
    188 /*%<
    189  * Export information about a bucket.
    190  *
    191  * This can be used as an iterator, by initializing `key` to zero
    192  * and incrementing by one or using `isc_histo_next()` until
    193  * `isc_histo_get()` returns ISC_R_RANGE. The number of iterations is
    194  * less than `64 << sigbits`. (64 for the maximum number of chunks,
    195  * multiplied by the size of each chunk.)
    196  *
    197  * It is also a counterpart to `isc_histo_put()`.
    198  *
    199  * If `minp` is non-NULL it is set to the minimum inclusive value
    200  * that maps to this bucket.
    201  *
    202  * If `maxp` is non-NULL it is set to the maximum inclusive value
    203  * that maps to this bucket.
    204  *
    205  * If `countp` is non-NULL it is set to the bucket's counter,
    206  * which can be zero.
    207  *
    208  * Requires:
    209  *\li	`hg` is a pointer to a valid histogram
    210  *
    211  * Returns:
    212  *\li	ISC_R_SUCCESS, if `key` is valid
    213  *\li	ISC_R_RANGE, otherwise
    214  */
    215 
    216 void
    217 isc_histo_next(const isc_histo_t *hg, uint *keyp);
    218 /*%<
    219  * Skip to the next key, omitting chunks of unallocated buckets.
    220  *
    221  * This function does not skip buckets that have been allocated but
    222  * are zero. A chunk contains `1 << sigbits` buckets, and buckets
    223  * are created in bulk one chunk at a time.
    224  *
    225  * Example:
    226  *
    227  *	uint64_t min, max, count;
    228  *	for (uint key = 0;
    229  *	     isc_histo_get(hg, key, &min, &max, &count) == ISC_R_SUCCESS;
    230  *	     isc_histo_next(hg, &key))
    231  *	{
    232  *		// do something with the bucket
    233  *	}
    234  *
    235  * Requires:
    236  *\li	`hg` is a pointer to a valid histogram
    237  *\li	`keyp != NULL`
    238  */
    239 
    240 void
    241 isc_histo_merge(isc_histo_t **targetp, const isc_histo_t *source);
    242 /*%<
    243  * Increase the counts in `*ptarget` by the counts recorded in `source`
    244  *
    245  * If `*targetp == NULL` then `*ptarget` is set to point to a new
    246  * histogram with the same `sigbits` as the `source`.
    247  *
    248  * This function uses `isc_histo_get()` and `isc_histo_next()` to
    249  * export the data from `source`, and `isc_histo_put()` to import it
    250  * into `*ptarget`.
    251  *
    252  * Requires:
    253  *\li	`targetp != NULL`
    254  *\li	`*targetp` is NULL or a pointer to a valid histogram
    255  *\li	`source` is a pointer to a valid histogram
    256  *
    257  * Ensures:
    258  *\li	`*targetp` is a pointer to a valid histogram
    259  */
    260 
    261 /**********************************************************************/
    262 
    263 void
    264 isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp);
    265 /*%<
    266  * Create a multithreaded sharded histogram.
    267  *
    268  * Although an `isc_histo_t` is thread-safe, it can suffer
    269  * from cache contention under heavy load. To avoid this,
    270  * an `isc_histomulti_t` contains a histogram per thread,
    271  * so updates are local and low-contention.
    272  *
    273  * Requires:
    274  *\li	`sigbits >= ISC_HISTO_MINBITS`
    275  *\li	`sigbits <= ISC_HISTO_MAXBITS`
    276  *\li	`hmp != NULL`
    277  *\li	`*hmp == NULL`
    278  *
    279  * Ensures:
    280  *\li	`*hmp` is a pointer to a multithreaded sharded histogram.
    281  */
    282 
    283 void
    284 isc_histomulti_destroy(isc_histomulti_t **hmp);
    285 /*%<
    286  * Destroy a multithreaded sharded histogram
    287  *
    288  * Requires:
    289  *\li	`hmp != NULL`
    290  *\li	`*hmp` is a pointer to a valid multithreaded sharded histogram
    291  *
    292  * Ensures:
    293  *\li	all memory allocated by the histogram has been released
    294  *\li	`*hmp == NULL`
    295  */
    296 
    297 void
    298 isc_histomulti_merge(isc_histo_t **targetp, const isc_histomulti_t *source);
    299 /*%<
    300  * Increase the counts in `*targetp` by the counts recorded in `source`
    301  *
    302  * The target histogram is created if `*targetp` is NULL.
    303  *
    304  * Requires:
    305  *\li	`targetp != NULL`
    306  *\li	`*targetp` is NULL or a pointer to a valid histogram
    307  *\li	`source` is a pointer to a valid multithreaded sharded histogram
    308  *
    309  * Ensures:
    310  *\li	`*targetp` is a pointer to a valid histogram
    311  */
    312 
    313 void
    314 isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value);
    315 /*%<
    316  * Add 1 to the value's bucket
    317  *
    318  * Requires:
    319  *\li	`hm` is a pointer to a valid histomulti
    320  */
    321 
    322 void
    323 isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc);
    324 /*%<
    325  * Add an arbitrary increment to the value's bucket
    326  *
    327  * Requires:
    328  *\li	`hm` is a pointer to a valid histomulti
    329  */
    330 
    331 /**********************************************************************/
    332 
    333 void
    334 isc_histo_moments(const isc_histo_t *hg, double *pm0, double *pm1, double *pm2);
    335 /*%<
    336  * Get the population, mean, and standard deviation of a histogram.
    337  *
    338  * If `pm0` is non-NULL it is set to the population of the histogram.
    339  * (Strictly speaking, the zeroth moment is `pop / pop == 1`.)
    340  *
    341  * If `pm1` is non-NULL it is set to the mean (first moment) of the
    342  * recorded data.
    343  *
    344  * If `pm2` is non-NULL it is set to the standard deviation of the
    345  * recorded data. The standard deviation is the square root of the
    346  * variance, which is the second moment about the mean.
    347  *
    348  * It is safe if the histogram is concurrently modified.
    349  *
    350  * Requires:
    351  *\li	`hg` is a pointer to a valid histogram
    352  */
    353 
    354 isc_result_t
    355 isc_histo_quantiles(const isc_histo_t *hg, uint size, const double *fraction,
    356 		    uint64_t *value);
    357 /*%<
    358  * The quantile function (aka inverse cumulative distribution function)
    359  * of the histogram. What value is greater than the given fraction of
    360  * the population?
    361  *
    362  * A fraction of 0.5 gets the median value: it is greater than half
    363  * the population. 0.75 gets the third quartile value, and 0.99 gets
    364  * the 99th percentile value. The fraction must be between 0.0 and 1.0
    365  * inclusive.
    366  *
    367  * https://enwp.org/Quantile_function
    368  *
    369  * This implementation allows you to query quantile values for
    370  * multiple fractions in one function call. Internally, it makes one
    371  * linear scan over the histogram's buckets to find all the fractions.
    372  * Buckets are scanned from high to low, so that querying large
    373  * quantiles is more efficient. The `fraction` array must be sorted in
    374  * decreasing order. The results are stored in the `value` array. Both
    375  * arrays have `size` elements.
    376  *
    377  * The results may be nonsense if the histogram is concurrently
    378  * modified. To get a stable copy you can call `isc_histo_merge()`.
    379  *
    380  * Requires:
    381  *\li	`hg` is a pointer to a valid histogram
    382  *\li	`0 < size && size <= ISC_HISTO_MAXQUANTILES`
    383  *\li	`fraction != NULL`
    384  *\li	`value != NULL`
    385  *\li	`0.0 <= fraction[i] && fraction[i] <= 1.0` for every element
    386  *\li	`fraction[i - 1] > fraction[i]` for every pair of elements
    387  *
    388  * Returns:
    389  *\li	ISC_R_SUCCESS, if results were stored in the `value` array
    390  *\li	ISC_R_UNSET, if the histogram is empty
    391  */
    392 
    393 /**********************************************************************/
    394