1 /* $NetBSD: histo.h,v 1.2 2025/01/26 16:25:41 christos Exp $ */ 2 3 /* 4 * Copyright (C) Internet Systems Consortium, Inc. ("ISC") 5 * 6 * SPDX-License-Identifier: MPL-2.0 7 * 8 * This Source Code Form is subject to the terms of the Mozilla Public 9 * License, v. 2.0. If a copy of the MPL was not distributed with this 10 * file, you can obtain one at https://mozilla.org/MPL/2.0/. 11 * 12 * See the COPYRIGHT file distributed with this work for additional 13 * information regarding copyright ownership. 14 */ 15 16 #pragma once 17 18 #include <sys/types.h> 19 20 #include <isc/mem.h> 21 22 /* 23 * An `isc_histo_t` is a thread-safe histogram of `uint64_t` values. 24 * It keeps a count of how many values land in each bucket. Use the 25 * `isc_histo_inc()`, `isc_histo_acc()`, and `isc_histo_put()` 26 * functions to add values to the histogram. 27 * 28 * Values are mapped to buckets by rounding them according to a 29 * configurable precision, expressed as a number of significant bits. 30 * The bits <-> digits functions convert betwen decimal significant 31 * digits (as in scientific notation) and binary significant bits. 32 * 33 * You can use the `isc_histo_get()` function to export data from the 34 * histogram. The range of a bucket is returned as its minimum and 35 * maximum values, inclusive, i.e. a closed interval. We use closed 36 * intervals so we are able to express the maximum of the last bucket, 37 * UINT64_MAX, although half-open intervals are more common in C. 38 * 39 * You can calculate some basic statistics directly from a histogram. 40 * The `isc_histo_quantiles()` function can get a histogram's median, 41 * 99th percentile, etc. The `isc_histo_moments()` function gets a 42 * histogram's population, mean, and standard deviation. 43 * 44 * The size of a histogram depends on the range of values in the 45 * stream of samples, not the number of samples. Bucket counters are 46 * 64 bits each, and are allocated in chunks of `1 << sigbits` where 47 * `sigbits` is the histogram's configured precision. There are at 48 * most 64 chunks, one for each bit of a 64 bit value. Histograms with 49 * greater precision have larger chunks. 50 * 51 * At the low end (values near zero) there is one value per bucket, 52 * then two values, four, eight, etc. The number of values that map to 53 * a bucket is the same in each chunk. Chunks 0 and 1 have one value 54 * per bucket, (see `ISC_HISTO_UNITBUCKETS()` below), chunk 2 has 2 55 * values per bucket, chunk 3 has 4, etc. 56 * 57 * The update cost is roughly constant and very small (not much more 58 * than an atomic increment). It mostly depends on cache locality and 59 * thread contention. 60 * 61 * There is no overflow checking for the 64 bit bucket counters. It 62 * takes a few nanoseconds to add a sample to the histogram, so it 63 * would take at least a few CPU-centuries to cause an overflow. 64 * Aggregate statistics from a quarter of a million CPUs might 65 * overflow in a day. (Provided that in both examples the CPUs are 66 * doing nothing apart from repeatedly adding 1 to histogram buckets.) 67 */ 68 69 typedef struct isc_histo isc_histo_t; 70 typedef struct isc_histomulti isc_histomulti_t; 71 72 #define ISC_HISTO_MINBITS 1 73 #define ISC_HISTO_MAXBITS 18 74 #define ISC_HISTO_MINDIGITS 1 75 #define ISC_HISTO_MAXDIGITS 6 76 #define ISC_HISTO_MAXQUANTILES 101 /* enough for all the percentiles */ 77 78 /* 79 * How many values map 1:1 to buckets for a given number of sigbits? 80 * These are the buckets at the low end, starting from zero. 81 */ 82 #define ISC_HISTO_UNITBUCKETS(sigbits) (2 << (sigbits)) 83 84 void 85 isc_histo_create(isc_mem_t *mctx, uint sigbits, isc_histo_t **hgp); 86 /*%< 87 * Create a histogram. 88 * 89 * The relative error of values stored in the histogram is less than 90 * `pow(2.0, -sigbits)`. 91 * 92 * Requires: 93 *\li `sigbits >= ISC_HISTO_MINBITS` 94 *\li `sigbits <= ISC_HISTO_MAXBITS` 95 *\li `hgp != NULL` 96 *\li `*hgp == NULL` 97 * 98 * Ensures: 99 *\li `*hgp` is a pointer to a histogram. 100 */ 101 102 void 103 isc_histo_destroy(isc_histo_t **hgp); 104 /*%< 105 * Destroy a histogram 106 * 107 * Requires: 108 *\li `hgp != NULL` 109 *\li `*hgp` is a pointer to a valid histogram 110 * 111 * Ensures: 112 *\li all memory allocated by the histogram has been released 113 *\li `*hgp` is NULL 114 */ 115 116 uint 117 isc_histo_sigbits(isc_histo_t *hg); 118 /*%< 119 * Get the histogram's `sigbits` setting 120 * 121 * Requires: 122 *\li `hg` is a pointer to a valid histogram 123 */ 124 125 uint 126 isc_histo_bits_to_digits(uint bits); 127 /*%< 128 * Convert binary significant figures to decimal significant figures, 129 * rounding down, i.e. get the decimal precision you can expect from a 130 * given number of significant bits. 131 * 132 * Requires: 133 *\li `bits >= ISC_HISTO_MINBITS` 134 *\li `bits <= ISC_HISTO_MAXBITS` 135 */ 136 137 uint 138 isc_histo_digits_to_bits(uint digits); 139 /*%< 140 * Convert decimal significant figures to binary significant figures, 141 * rounding up, i.e. get the number of significant bits required to 142 * achieve the given decimal precision. 143 * 144 * Requires: 145 *\li `digits >= ISC_HISTO_MINDIGS` 146 *\li `digits <= ISC_HISTO_MAXDIGS` 147 */ 148 149 /**********************************************************************/ 150 151 void 152 isc_histo_inc(isc_histo_t *hg, uint64_t value); 153 /*%< 154 * Add 1 to the value's bucket 155 * 156 * Requires: 157 *\li `hg` is a pointer to a valid histogram 158 */ 159 160 void 161 isc_histo_add(isc_histo_t *hg, uint64_t value, uint64_t inc); 162 /*%< 163 * Add an arbitrary increment to the value's bucket 164 * 165 * Note: there is no counter overflow checking 166 * 167 * Requires: 168 *\li `hg` is a pointer to a valid histogram 169 */ 170 171 void 172 isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count); 173 /* 174 * Import a collection of samples, where values between `min` and 175 * `max` inclusive occurred `count` times. This function is a 176 * counterpart to `isc_histo_get()`. 177 * 178 * Note: there is no counter overflow checking 179 * 180 * Requires: 181 *\li `min <= max` 182 *\li `hg` is a pointer to a valid histogram 183 */ 184 185 isc_result_t 186 isc_histo_get(const isc_histo_t *hg, uint key, uint64_t *minp, uint64_t *maxp, 187 uint64_t *countp); 188 /*%< 189 * Export information about a bucket. 190 * 191 * This can be used as an iterator, by initializing `key` to zero 192 * and incrementing by one or using `isc_histo_next()` until 193 * `isc_histo_get()` returns ISC_R_RANGE. The number of iterations is 194 * less than `64 << sigbits`. (64 for the maximum number of chunks, 195 * multiplied by the size of each chunk.) 196 * 197 * It is also a counterpart to `isc_histo_put()`. 198 * 199 * If `minp` is non-NULL it is set to the minimum inclusive value 200 * that maps to this bucket. 201 * 202 * If `maxp` is non-NULL it is set to the maximum inclusive value 203 * that maps to this bucket. 204 * 205 * If `countp` is non-NULL it is set to the bucket's counter, 206 * which can be zero. 207 * 208 * Requires: 209 *\li `hg` is a pointer to a valid histogram 210 * 211 * Returns: 212 *\li ISC_R_SUCCESS, if `key` is valid 213 *\li ISC_R_RANGE, otherwise 214 */ 215 216 void 217 isc_histo_next(const isc_histo_t *hg, uint *keyp); 218 /*%< 219 * Skip to the next key, omitting chunks of unallocated buckets. 220 * 221 * This function does not skip buckets that have been allocated but 222 * are zero. A chunk contains `1 << sigbits` buckets, and buckets 223 * are created in bulk one chunk at a time. 224 * 225 * Example: 226 * 227 * uint64_t min, max, count; 228 * for (uint key = 0; 229 * isc_histo_get(hg, key, &min, &max, &count) == ISC_R_SUCCESS; 230 * isc_histo_next(hg, &key)) 231 * { 232 * // do something with the bucket 233 * } 234 * 235 * Requires: 236 *\li `hg` is a pointer to a valid histogram 237 *\li `keyp != NULL` 238 */ 239 240 void 241 isc_histo_merge(isc_histo_t **targetp, const isc_histo_t *source); 242 /*%< 243 * Increase the counts in `*ptarget` by the counts recorded in `source` 244 * 245 * If `*targetp == NULL` then `*ptarget` is set to point to a new 246 * histogram with the same `sigbits` as the `source`. 247 * 248 * This function uses `isc_histo_get()` and `isc_histo_next()` to 249 * export the data from `source`, and `isc_histo_put()` to import it 250 * into `*ptarget`. 251 * 252 * Requires: 253 *\li `targetp != NULL` 254 *\li `*targetp` is NULL or a pointer to a valid histogram 255 *\li `source` is a pointer to a valid histogram 256 * 257 * Ensures: 258 *\li `*targetp` is a pointer to a valid histogram 259 */ 260 261 /**********************************************************************/ 262 263 void 264 isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp); 265 /*%< 266 * Create a multithreaded sharded histogram. 267 * 268 * Although an `isc_histo_t` is thread-safe, it can suffer 269 * from cache contention under heavy load. To avoid this, 270 * an `isc_histomulti_t` contains a histogram per thread, 271 * so updates are local and low-contention. 272 * 273 * Requires: 274 *\li `sigbits >= ISC_HISTO_MINBITS` 275 *\li `sigbits <= ISC_HISTO_MAXBITS` 276 *\li `hmp != NULL` 277 *\li `*hmp == NULL` 278 * 279 * Ensures: 280 *\li `*hmp` is a pointer to a multithreaded sharded histogram. 281 */ 282 283 void 284 isc_histomulti_destroy(isc_histomulti_t **hmp); 285 /*%< 286 * Destroy a multithreaded sharded histogram 287 * 288 * Requires: 289 *\li `hmp != NULL` 290 *\li `*hmp` is a pointer to a valid multithreaded sharded histogram 291 * 292 * Ensures: 293 *\li all memory allocated by the histogram has been released 294 *\li `*hmp == NULL` 295 */ 296 297 void 298 isc_histomulti_merge(isc_histo_t **targetp, const isc_histomulti_t *source); 299 /*%< 300 * Increase the counts in `*targetp` by the counts recorded in `source` 301 * 302 * The target histogram is created if `*targetp` is NULL. 303 * 304 * Requires: 305 *\li `targetp != NULL` 306 *\li `*targetp` is NULL or a pointer to a valid histogram 307 *\li `source` is a pointer to a valid multithreaded sharded histogram 308 * 309 * Ensures: 310 *\li `*targetp` is a pointer to a valid histogram 311 */ 312 313 void 314 isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value); 315 /*%< 316 * Add 1 to the value's bucket 317 * 318 * Requires: 319 *\li `hm` is a pointer to a valid histomulti 320 */ 321 322 void 323 isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc); 324 /*%< 325 * Add an arbitrary increment to the value's bucket 326 * 327 * Requires: 328 *\li `hm` is a pointer to a valid histomulti 329 */ 330 331 /**********************************************************************/ 332 333 void 334 isc_histo_moments(const isc_histo_t *hg, double *pm0, double *pm1, double *pm2); 335 /*%< 336 * Get the population, mean, and standard deviation of a histogram. 337 * 338 * If `pm0` is non-NULL it is set to the population of the histogram. 339 * (Strictly speaking, the zeroth moment is `pop / pop == 1`.) 340 * 341 * If `pm1` is non-NULL it is set to the mean (first moment) of the 342 * recorded data. 343 * 344 * If `pm2` is non-NULL it is set to the standard deviation of the 345 * recorded data. The standard deviation is the square root of the 346 * variance, which is the second moment about the mean. 347 * 348 * It is safe if the histogram is concurrently modified. 349 * 350 * Requires: 351 *\li `hg` is a pointer to a valid histogram 352 */ 353 354 isc_result_t 355 isc_histo_quantiles(const isc_histo_t *hg, uint size, const double *fraction, 356 uint64_t *value); 357 /*%< 358 * The quantile function (aka inverse cumulative distribution function) 359 * of the histogram. What value is greater than the given fraction of 360 * the population? 361 * 362 * A fraction of 0.5 gets the median value: it is greater than half 363 * the population. 0.75 gets the third quartile value, and 0.99 gets 364 * the 99th percentile value. The fraction must be between 0.0 and 1.0 365 * inclusive. 366 * 367 * https://enwp.org/Quantile_function 368 * 369 * This implementation allows you to query quantile values for 370 * multiple fractions in one function call. Internally, it makes one 371 * linear scan over the histogram's buckets to find all the fractions. 372 * Buckets are scanned from high to low, so that querying large 373 * quantiles is more efficient. The `fraction` array must be sorted in 374 * decreasing order. The results are stored in the `value` array. Both 375 * arrays have `size` elements. 376 * 377 * The results may be nonsense if the histogram is concurrently 378 * modified. To get a stable copy you can call `isc_histo_merge()`. 379 * 380 * Requires: 381 *\li `hg` is a pointer to a valid histogram 382 *\li `0 < size && size <= ISC_HISTO_MAXQUANTILES` 383 *\li `fraction != NULL` 384 *\li `value != NULL` 385 *\li `0.0 <= fraction[i] && fraction[i] <= 1.0` for every element 386 *\li `fraction[i - 1] > fraction[i]` for every pair of elements 387 * 388 * Returns: 389 *\li ISC_R_SUCCESS, if results were stored in the `value` array 390 *\li ISC_R_UNSET, if the histogram is empty 391 */ 392 393 /**********************************************************************/ 394