Home | History | Annotate | Line # | Download | only in isc
      1 /*	$NetBSD: histo.c,v 1.4 2026/06/19 20:10:01 christos Exp $	*/
      2 
      3 /*
      4  * Copyright (C) Internet Systems Consortium, Inc. ("ISC")
      5  *
      6  * SPDX-License-Identifier: MPL-2.0
      7  *
      8  * This Source Code Form is subject to the terms of the Mozilla Public
      9  * License, v. 2.0. If a copy of the MPL was not distributed with this
     10  * file, you can obtain one at https://mozilla.org/MPL/2.0/.
     11  *
     12  * See the COPYRIGHT file distributed with this work for additional
     13  * information regarding copyright ownership.
     14  */
     15 
     16 #include <assert.h>
     17 #include <errno.h>
     18 #include <math.h>
     19 #include <stdbool.h>
     20 #include <stdint.h>
     21 #include <stdio.h>
     22 #include <stdlib.h>
     23 #include <string.h>
     24 
     25 #include <isc/atomic.h>
     26 #include <isc/histo.h>
     27 #include <isc/magic.h>
     28 #include <isc/mem.h>
     29 #include <isc/tid.h>
     30 
     31 #define HISTO_MAGIC	    ISC_MAGIC('H', 's', 't', 'o')
     32 #define HISTO_VALID(p)	    ISC_MAGIC_VALID(p, HISTO_MAGIC)
     33 #define HISTOMULTI_MAGIC    ISC_MAGIC('H', 'g', 'M', 't')
     34 #define HISTOMULTI_VALID(p) ISC_MAGIC_VALID(p, HISTOMULTI_MAGIC)
     35 
     36 /*
     37  * Natural logarithms of 2 and 10 for converting precisions between
     38  * binary and decimal significant figures
     39  */
     40 #define LN_2  0.693147180559945309
     41 #define LN_10 2.302585092994045684
     42 
     43 /*
     44  * The chunks array has a static size for simplicity, fixed as the
     45  * number of bits in a value. That means we waste a little extra space
     46  * that could be saved by omitting the exponents that are covered by
     47  * `sigbits`. The following macros calculate (at run time) the exact
     48  * number of buckets when we need to do accurate bounds checks.
     49  *
     50  * For a discussion of the floating point terminology, see the
     51  * commmentary on `value_to_key()` below.
     52  *
     53  * We often use the variable names `c` for chunk and `b` for bucket.
     54  */
     55 #define CHUNKS 64
     56 
     57 #define DENORMALS(hg) ((hg)->sigbits - 1)
     58 #define MANTISSAS(hg) (1 << (hg)->sigbits)
     59 #define EXPONENTS(hg) (CHUNKS - DENORMALS(hg))
     60 #define BUCKETS(hg)   (EXPONENTS(hg) * MANTISSAS(hg))
     61 
     62 #define MAXCHUNK(hg)  EXPONENTS(hg)
     63 #define CHUNKSIZE(hg) MANTISSAS(hg)
     64 
     65 #ifdef _LP64
     66 typedef atomic_uint_fast64_t hg_bucket_t;
     67 #else
     68 typedef atomic_uint_fast32_t hg_bucket_t;
     69 #endif
     70 typedef atomic_ptr(hg_bucket_t) hg_chunk_t;
     71 
     72 struct isc_histo {
     73 	uint magic;
     74 	uint sigbits;
     75 	isc_mem_t *mctx;
     76 	hg_chunk_t chunk[CHUNKS];
     77 };
     78 
     79 struct isc_histomulti {
     80 	uint magic;
     81 	uint size;
     82 	isc_histo_t *hg[];
     83 };
     84 
     85 /**********************************************************************/
     86 
     87 void
     88 isc_histo_create(isc_mem_t *mctx, uint sigbits, isc_histo_t **hgp) {
     89 	REQUIRE(sigbits >= ISC_HISTO_MINBITS);
     90 	REQUIRE(sigbits <= ISC_HISTO_MAXBITS);
     91 	REQUIRE(hgp != NULL);
     92 	REQUIRE(*hgp == NULL);
     93 
     94 	isc_histo_t *hg = isc_mem_get(mctx, sizeof(*hg));
     95 	*hg = (isc_histo_t){
     96 		.magic = HISTO_MAGIC,
     97 		.sigbits = sigbits,
     98 	};
     99 	isc_mem_attach(mctx, &hg->mctx);
    100 
    101 	*hgp = hg;
    102 }
    103 
    104 void
    105 isc_histo_destroy(isc_histo_t **hgp) {
    106 	REQUIRE(hgp != NULL);
    107 	REQUIRE(HISTO_VALID(*hgp));
    108 
    109 	isc_histo_t *hg = *hgp;
    110 	*hgp = NULL;
    111 
    112 	for (uint c = 0; c < CHUNKS; c++) {
    113 		if (hg->chunk[c] != NULL) {
    114 			isc_mem_cput(hg->mctx, hg->chunk[c], CHUNKSIZE(hg),
    115 				     sizeof(hg_bucket_t));
    116 		}
    117 	}
    118 	isc_mem_putanddetach(&hg->mctx, hg, sizeof(*hg));
    119 }
    120 
    121 /**********************************************************************/
    122 
    123 uint
    124 isc_histo_sigbits(isc_histo_t *hg) {
    125 	REQUIRE(HISTO_VALID(hg));
    126 	return hg->sigbits;
    127 }
    128 
    129 /*
    130  * use precomputed logs and builtins to avoid linking with libm
    131  */
    132 
    133 uint
    134 isc_histo_bits_to_digits(uint bits) {
    135 	REQUIRE(bits >= ISC_HISTO_MINBITS);
    136 	REQUIRE(bits <= ISC_HISTO_MAXBITS);
    137 	return floor(1.0 - (1.0 - bits) * LN_2 / LN_10);
    138 }
    139 
    140 uint
    141 isc_histo_digits_to_bits(uint digits) {
    142 	REQUIRE(digits >= ISC_HISTO_MINDIGITS);
    143 	REQUIRE(digits <= ISC_HISTO_MAXDIGITS);
    144 	return ceil(1.0 - (1.0 - digits) * LN_10 / LN_2);
    145 }
    146 
    147 /**********************************************************************/
    148 
    149 /*
    150  * The way we map buckets to keys is what gives the histogram a
    151  * consistent relative error across the whole range of `uint64_t`.
    152  * The mapping is log-linear: a chunk key is the logarithm of part
    153  * of the value (in other words, chunks are spaced exponentially);
    154  * and a bucket within a chunk is a linear function of another part
    155  * of the value.
    156  *
    157  * This log-linear spacing is similar to the size classes used by
    158  * jemalloc. It is also the way floating point numbers work: the
    159  * exponent is the log part, and the mantissa is the linear part.
    160  *
    161  * So, a chunk number is the log (base 2) of a `uint64_t`, which is
    162  * between 0 and 63, which is why there are up to 64 chunks. In
    163  * floating point terms the chunk number is the exponent. The
    164  * histogram's number of significant bits is the size of the
    165  * mantissa, which indexes buckets within each chunk.
    166  *
    167  * A fast way to get the logarithm of a positive integer is CLZ,
    168  * count leading zeroes.
    169  *
    170  * Chunk zero is special. Chunk 1 covers values between `CHUNKSIZE`
    171  * and `CHUNKSIZE * 2 - 1`, where `CHUNKSIZE == exponent << sigbits
    172  * == 1 << sigbits`. Each chunk has CHUNKSIZE buckets, so chunk 1 has
    173  * one value per bucket. There are CHUNKSIZE values before chunk 1
    174  * which map to chunk 0, so it also has one value per bucket. (Hence
    175  * the first two chunks have one value per bucket.) The values in
    176  * chunk 0 correspond to denormal numbers in floating point terms.
    177  * They are also the values where `63 - sigbits - clz` would be less
    178  * than one if denormals were not handled specially.
    179  *
    180  * This branchless conversion is due to Paul Khuong: see bin_down_of() in
    181  * https://pvk.ca/Blog/2015/06/27/linear-log-bucketing-fast-versatile-simple/
    182  *
    183  * This function is in the `isc_histo_inc()` fast path.
    184  */
    185 static inline uint
    186 value_to_key(const isc_histo_t *hg, uint64_t value) {
    187 	/* ensure that denormal numbers are all in chunk zero */
    188 	uint64_t chunked = value | CHUNKSIZE(hg);
    189 	int clz = __builtin_clzll((unsigned long long)(chunked));
    190 	/* actually 1 less than the exponent except for denormals */
    191 	uint exponent = 63 - hg->sigbits - clz;
    192 	/* mantissa has leading bit set except for denormals */
    193 	uint mantissa = value >> exponent;
    194 	/* leading bit of mantissa adds one to exponent */
    195 	return (exponent << hg->sigbits) + mantissa;
    196 }
    197 
    198 /*
    199  * Inverse functions of `value_to_key()`, to get the minimum and
    200  * maximum values that map to a particular key.
    201  *
    202  * We must not cause undefined behaviour by hitting integer limits,
    203  * which is a risk when we aim to cover the entire range of `uint64_t`.
    204  *
    205  * The maximum value in the last bucket is UINT64_MAX, which
    206  * `key_to_maxval()` gets by deliberately subtracting `0 - 1`,
    207  * undeflowing a `uint64_t`. That is OK when unsigned.
    208  *
    209  * We must take care not to shift too much in `key_to_minval()`.
    210  * The largest key passed by `key_to_maxval()` is `BUCKETS(hg)`, so
    211  *	`exponent == EXPONENTS(hg) - 1 == 64 - sigbits`
    212  * which is always less than 64, so the size of the shift is OK.
    213  *
    214  * The `mantissa` in this edge case is just `chunksize`, which when
    215  * shifted becomes `1 << 64` which overflows `uint64_t` Again this is
    216  * OK when unsigned, so the return value is zero.
    217  */
    218 
    219 static inline uint64_t
    220 key_to_minval(const isc_histo_t *hg, uint key) {
    221 	uint chunksize = CHUNKSIZE(hg);
    222 	uint exponent = (key / chunksize) - 1;
    223 	uint64_t mantissa = (key % chunksize) + chunksize;
    224 	return key < chunksize ? key : mantissa << exponent;
    225 }
    226 
    227 static inline uint64_t
    228 key_to_maxval(const isc_histo_t *hg, uint key) {
    229 	return key_to_minval(hg, key + 1) - 1;
    230 }
    231 
    232 /**********************************************************************/
    233 
    234 static hg_bucket_t *
    235 key_to_new_bucket(isc_histo_t *hg, uint key) {
    236 	/* slow path */
    237 	uint chunksize = CHUNKSIZE(hg);
    238 	uint chunk = key / chunksize;
    239 	uint bucket = key % chunksize;
    240 	hg_bucket_t *old_cp = NULL;
    241 	hg_bucket_t *new_cp = isc_mem_cget(hg->mctx, CHUNKSIZE(hg),
    242 					   sizeof(hg_bucket_t));
    243 	hg_chunk_t *cpp = &hg->chunk[chunk];
    244 	if (atomic_compare_exchange_strong_acq_rel(cpp, &old_cp, new_cp)) {
    245 		return &new_cp[bucket];
    246 	} else {
    247 		/* lost the race, so use the winner's chunk */
    248 		isc_mem_cput(hg->mctx, new_cp, CHUNKSIZE(hg),
    249 			     sizeof(hg_bucket_t));
    250 		return &old_cp[bucket];
    251 	}
    252 }
    253 
    254 static hg_bucket_t *
    255 get_chunk(const isc_histo_t *hg, uint chunk) {
    256 	return atomic_load_acquire(&hg->chunk[chunk]);
    257 }
    258 
    259 static inline hg_bucket_t *
    260 key_to_bucket(const isc_histo_t *hg, uint key) {
    261 	/* fast path */
    262 	uint chunksize = CHUNKSIZE(hg);
    263 	uint chunk = key / chunksize;
    264 	uint bucket = key % chunksize;
    265 	hg_bucket_t *cp = get_chunk(hg, chunk);
    266 	return cp == NULL ? NULL : &cp[bucket];
    267 }
    268 
    269 static inline uint64_t
    270 bucket_count(const hg_bucket_t *bp) {
    271 	return bp == NULL ? 0 : atomic_load_relaxed(bp);
    272 }
    273 
    274 static inline uint64_t
    275 get_key_count(const isc_histo_t *hg, uint key) {
    276 	return bucket_count(key_to_bucket(hg, key));
    277 }
    278 
    279 static inline void
    280 add_key_count(isc_histo_t *hg, uint key, uint64_t inc) {
    281 	/* fast path */
    282 	if (inc > 0) {
    283 		hg_bucket_t *bp = key_to_bucket(hg, key);
    284 		bp = bp != NULL ? bp : key_to_new_bucket(hg, key);
    285 		atomic_fetch_add_relaxed(bp, inc);
    286 	}
    287 }
    288 
    289 /**********************************************************************/
    290 
    291 void
    292 isc_histo_add(isc_histo_t *hg, uint64_t value, uint64_t inc) {
    293 	REQUIRE(HISTO_VALID(hg));
    294 	add_key_count(hg, value_to_key(hg, value), inc);
    295 }
    296 
    297 void
    298 isc_histo_inc(isc_histo_t *hg, uint64_t value) {
    299 	isc_histo_add(hg, value, 1);
    300 }
    301 
    302 void
    303 isc_histo_put(isc_histo_t *hg, uint64_t min, uint64_t max, uint64_t count) {
    304 	REQUIRE(HISTO_VALID(hg));
    305 
    306 	uint kmin = value_to_key(hg, min);
    307 	uint kmax = value_to_key(hg, max);
    308 
    309 	for (uint key = kmin; key <= kmax; key++) {
    310 		uint64_t mid = ISC_MIN(max, key_to_maxval(hg, key));
    311 		double in_bucket = mid - min + 1;
    312 		double remaining = max - min + 1;
    313 		uint64_t inc = ceil(count * in_bucket / remaining);
    314 		add_key_count(hg, key, inc);
    315 		count -= inc;
    316 		min = mid + 1;
    317 	}
    318 }
    319 
    320 isc_result_t
    321 isc_histo_get(const isc_histo_t *hg, uint key, uint64_t *minp, uint64_t *maxp,
    322 	      uint64_t *countp) {
    323 	REQUIRE(HISTO_VALID(hg));
    324 
    325 	if (key < BUCKETS(hg)) {
    326 		SET_IF_NOT_NULL(minp, key_to_minval(hg, key));
    327 		SET_IF_NOT_NULL(maxp, key_to_maxval(hg, key));
    328 		SET_IF_NOT_NULL(countp, get_key_count(hg, key));
    329 		return ISC_R_SUCCESS;
    330 	} else {
    331 		return ISC_R_RANGE;
    332 	}
    333 }
    334 
    335 void
    336 isc_histo_next(const isc_histo_t *hg, uint *keyp) {
    337 	REQUIRE(HISTO_VALID(hg));
    338 	REQUIRE(keyp != NULL);
    339 
    340 	uint chunksize = CHUNKSIZE(hg);
    341 	uint buckets = BUCKETS(hg);
    342 	uint key = *keyp;
    343 
    344 	key++;
    345 	while (key < buckets && key % chunksize == 0 &&
    346 	       key_to_bucket(hg, key) == NULL)
    347 	{
    348 		key += chunksize;
    349 	}
    350 	*keyp = key;
    351 }
    352 
    353 void
    354 isc_histo_merge(isc_histo_t **targetp, const isc_histo_t *source) {
    355 	REQUIRE(HISTO_VALID(source));
    356 	REQUIRE(targetp != NULL);
    357 
    358 	if (*targetp != NULL) {
    359 		REQUIRE(HISTO_VALID(*targetp));
    360 	} else {
    361 		isc_histo_create(source->mctx, source->sigbits, targetp);
    362 	}
    363 
    364 	uint64_t min, max, count;
    365 	for (uint key = 0;
    366 	     isc_histo_get(source, key, &min, &max, &count) == ISC_R_SUCCESS;
    367 	     isc_histo_next(source, &key))
    368 	{
    369 		isc_histo_put(*targetp, min, max, count);
    370 	}
    371 }
    372 
    373 /**********************************************************************/
    374 
    375 void
    376 isc_histomulti_create(isc_mem_t *mctx, uint sigbits, isc_histomulti_t **hmp) {
    377 	REQUIRE(hmp != NULL);
    378 	REQUIRE(*hmp == NULL);
    379 
    380 	uint size = isc_tid_count();
    381 	INSIST(size > 0);
    382 
    383 	isc_histomulti_t *hm = isc_mem_cget(mctx, 1,
    384 					    STRUCT_FLEX_SIZE(hm, hg, size));
    385 	*hm = (isc_histomulti_t){
    386 		.magic = HISTOMULTI_MAGIC,
    387 		.size = size,
    388 	};
    389 
    390 	for (uint i = 0; i < hm->size; i++) {
    391 		isc_histo_create(mctx, sigbits, &hm->hg[i]);
    392 	}
    393 
    394 	*hmp = hm;
    395 }
    396 
    397 void
    398 isc_histomulti_destroy(isc_histomulti_t **hmp) {
    399 	REQUIRE(hmp != NULL);
    400 	REQUIRE(HISTOMULTI_VALID(*hmp));
    401 
    402 	isc_histomulti_t *hm = *hmp;
    403 	isc_mem_t *mctx = hm->hg[0]->mctx;
    404 	*hmp = NULL;
    405 
    406 	for (uint i = 0; i < hm->size; i++) {
    407 		isc_histo_destroy(&hm->hg[i]);
    408 	}
    409 
    410 	isc_mem_put(mctx, hm, STRUCT_FLEX_SIZE(hm, hg, hm->size));
    411 }
    412 
    413 void
    414 isc_histomulti_merge(isc_histo_t **hgp, const isc_histomulti_t *hm) {
    415 	REQUIRE(HISTOMULTI_VALID(hm));
    416 
    417 	for (uint i = 0; i < hm->size; i++) {
    418 		isc_histo_merge(hgp, hm->hg[i]);
    419 	}
    420 }
    421 
    422 void
    423 isc_histomulti_add(isc_histomulti_t *hm, uint64_t value, uint64_t inc) {
    424 	REQUIRE(HISTOMULTI_VALID(hm));
    425 	isc_histo_t *hg = hm->hg[isc_tid()];
    426 	add_key_count(hg, value_to_key(hg, value), inc);
    427 }
    428 
    429 void
    430 isc_histomulti_inc(isc_histomulti_t *hm, uint64_t value) {
    431 	isc_histomulti_add(hm, value, 1);
    432 }
    433 
    434 /**********************************************************************/
    435 
    436 /*
    437  * https://fanf2.user.srcf.net/hermes/doc/antiforgery/stats.pdf
    438  * equation 4 (incremental mean) and equation 44 (incremental variance)
    439  */
    440 void
    441 isc_histo_moments(const isc_histo_t *hg, double *pm0, double *pm1,
    442 		  double *pm2) {
    443 	REQUIRE(HISTO_VALID(hg));
    444 
    445 	uint64_t pop = 0;
    446 	double mean = 0.0;
    447 	double sigma = 0.0;
    448 
    449 	uint64_t min, max, count;
    450 	for (uint key = 0;
    451 	     isc_histo_get(hg, key, &min, &max, &count) == ISC_R_SUCCESS;
    452 	     isc_histo_next(hg, &key))
    453 	{
    454 		if (count == 0) { /* avoid division by zero */
    455 			continue;
    456 		}
    457 		double value = min / 2.0 + max / 2.0;
    458 		double delta = value - mean;
    459 		pop += count;
    460 		mean += count * delta / pop;
    461 		sigma += count * delta * (value - mean);
    462 	}
    463 
    464 	SET_IF_NOT_NULL(pm0, pop);
    465 	SET_IF_NOT_NULL(pm1, mean);
    466 	SET_IF_NOT_NULL(pm2, (pop > 0) ? sqrt(sigma / pop) : 0.0);
    467 }
    468 
    469 /*
    470  * Clamped linear interpolation
    471  *
    472  * `outrange` should be `((1 << n) - 1)` for some `n`; when `n` is larger
    473  * than 53, `outrange` can get rounded up to a power of 2, so we clamp the
    474  * result to keep within bounds (extra important when `max == UINT64_MAX`)
    475  */
    476 static inline uint64_t
    477 lerp(uint64_t min, uint64_t max, uint64_t lo, uint64_t in, uint64_t hi) {
    478 	double inrange = (double)(hi - lo);
    479 	double inpart = (double)(in - lo);
    480 	double outrange = (double)(max - min);
    481 	double outpart = round(outrange * inpart / inrange);
    482 	return min + ISC_MIN((uint64_t)outpart, max - min);
    483 }
    484 
    485 /*
    486  * There is non-zero space for the inner value, and it is inside the bounds
    487  */
    488 static inline bool
    489 inside(uint64_t lo, uint64_t in, uint64_t hi) {
    490 	return lo < hi && lo <= in && in <= hi;
    491 }
    492 
    493 isc_result_t
    494 isc_histo_quantiles(const isc_histo_t *hg, uint size, const double *fraction,
    495 		    uint64_t *value) {
    496 	hg_bucket_t *chunk[CHUNKS];
    497 	uint64_t total[CHUNKS];
    498 	uint64_t rank[ISC_HISTO_MAXQUANTILES];
    499 
    500 	REQUIRE(HISTO_VALID(hg));
    501 	REQUIRE(0 < size && size <= ISC_HISTO_MAXQUANTILES);
    502 	REQUIRE(fraction != NULL);
    503 	REQUIRE(value != NULL);
    504 
    505 	const uint maxchunk = MAXCHUNK(hg);
    506 	const uint chunksize = CHUNKSIZE(hg);
    507 
    508 	/*
    509 	 * Find out which chunks exist and what their totals are. We take a
    510 	 * copy of the chunk pointers to reduce the need for atomic ops
    511 	 * later on. Scan from low to high so that higher buckets are more
    512 	 * likely to be in the CPU cache when we scan from high to low.
    513 	 */
    514 	uint64_t population = 0;
    515 	for (uint c = 0; c < maxchunk; c++) {
    516 		chunk[c] = get_chunk(hg, c);
    517 		total[c] = 0;
    518 		if (chunk[c] != NULL) {
    519 			for (uint b = chunksize; b-- > 0;) {
    520 				total[c] += bucket_count(&chunk[c][b]);
    521 			}
    522 			population += total[c];
    523 		}
    524 	}
    525 
    526 	/*
    527 	 * Now we know the population, we can convert fractions to ranks.
    528 	 * Also ensure they are within bounds and in decreasing order.
    529 	 */
    530 	for (uint i = 0; i < size; i++) {
    531 		REQUIRE(0.0 <= fraction[i] && fraction[i] <= 1.0);
    532 		REQUIRE(i == 0 || fraction[i - 1] > fraction[i]);
    533 		rank[i] = round(fraction[i] * population);
    534 	}
    535 
    536 	/*
    537 	 * Scan chunks from high to low, keeping track of the bounds on
    538 	 * each chunk's ranks. Each time we match `rank[i]`, move on to the
    539 	 * next rank and continue the scan from the same place.
    540 	 */
    541 	uint i = 0;
    542 	uint64_t chunk_lo = population;
    543 	for (uint c = maxchunk; c-- > 0;) {
    544 		uint64_t chunk_hi = chunk_lo;
    545 		chunk_lo = chunk_hi - total[c];
    546 
    547 		/*
    548 		 * Scan buckets backwards within this chunk, in a similar
    549 		 * manner to the chunk scan. Skip all or part of the loop
    550 		 * if the current rank is not in the chunk.
    551 		 */
    552 		uint64_t bucket_lo = chunk_hi;
    553 		for (uint b = chunksize;
    554 		     b-- > 0 && inside(chunk_lo, rank[i], chunk_hi);)
    555 		{
    556 			uint64_t bucket_hi = bucket_lo;
    557 			bucket_lo = bucket_hi - bucket_count(&chunk[c][b]);
    558 
    559 			/*
    560 			 * Convert all ranks that fall in this bucket.
    561 			 */
    562 			while (inside(bucket_lo, rank[i], bucket_hi)) {
    563 				uint key = chunksize * c + b;
    564 				value[i] = lerp(key_to_minval(hg, key),
    565 						key_to_maxval(hg, key),
    566 						bucket_lo, rank[i], bucket_hi);
    567 				if (++i == size) {
    568 					return ISC_R_SUCCESS;
    569 				}
    570 			}
    571 		}
    572 	}
    573 
    574 	return ISC_R_UNSET;
    575 }
    576 
    577 /**********************************************************************/
    578