Home | History | Annotate | Line # | Download | only in gen
arc4random.c revision 1.49
      1 /*	$NetBSD: arc4random.c,v 1.49 2025/03/11 13:35:54 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2014 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * This code is derived from software contributed to The NetBSD Foundation
      8  * by Taylor R. Campbell.
      9  *
     10  * Redistribution and use in source and binary forms, with or without
     11  * modification, are permitted provided that the following conditions
     12  * are met:
     13  * 1. Redistributions of source code must retain the above copyright
     14  *    notice, this list of conditions and the following disclaimer.
     15  * 2. Redistributions in binary form must reproduce the above copyright
     16  *    notice, this list of conditions and the following disclaimer in the
     17  *    documentation and/or other materials provided with the distribution.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  * POSSIBILITY OF SUCH DAMAGE.
     30  */
     31 
     32 /*
     33  * Legacy arc4random(3) API from OpenBSD reimplemented using the
     34  * ChaCha20 PRF, with per-thread state.
     35  *
     36  * Security model:
     37  * - An attacker who sees some outputs cannot predict past or future
     38  *   outputs.
     39  * - An attacker who sees the PRNG state cannot predict past outputs.
     40  * - An attacker who sees a child's PRNG state cannot predict past or
     41  *   future outputs in the parent, or in other children.
     42  *
     43  * The arc4random(3) API may abort the process if:
     44  *
     45  * (a) the crypto self-test fails, or
     46  * (b) sysctl(KERN_ARND) fails when reseeding the PRNG.
     47  *
     48  * The crypto self-test occurs only once, on the first use of any of
     49  * the arc4random(3) API.  KERN_ARND is unlikely to fail later unless
     50  * the kernel is seriously broken.
     51  */
     52 
     53 #include <sys/cdefs.h>
     54 __RCSID("$NetBSD: arc4random.c,v 1.49 2025/03/11 13:35:54 riastradh Exp $");
     55 
     56 #include "namespace.h"
     57 #include "reentrant.h"
     58 
     59 #include <sys/bitops.h>
     60 #include <sys/endian.h>
     61 #include <sys/errno.h>
     62 #include <sys/mman.h>
     63 #include <sys/sysctl.h>
     64 
     65 #include <assert.h>
     66 #include <sha2.h>
     67 #include <stdatomic.h>
     68 #include <stdbool.h>
     69 #include <stdint.h>
     70 #include <stdlib.h>
     71 #include <string.h>
     72 #include <unistd.h>
     73 
     74 #include "arc4random.h"
     75 #include "reentrant.h"
     76 
     77 #ifdef __weak_alias
     78 __weak_alias(arc4random,_arc4random)
     79 __weak_alias(arc4random_addrandom,_arc4random_addrandom)
     80 __weak_alias(arc4random_buf,_arc4random_buf)
     81 __weak_alias(arc4random_stir,_arc4random_stir)
     82 __weak_alias(arc4random_uniform,_arc4random_uniform)
     83 #endif
     84 
     85 /*
     86  * For standard ChaCha, use le32dec/le32enc.  We don't need that for
     87  * the purposes of a nondeterministic random number generator -- we
     88  * don't need to be bit-for-bit compatible over any wire.
     89  */
     90 
     91 static inline uint32_t
     92 crypto_le32dec(const void *p)
     93 {
     94 	uint32_t v;
     95 
     96 	(void)memcpy(&v, p, sizeof v);
     97 
     98 	return v;
     99 }
    100 
    101 static inline void
    102 crypto_le32enc(void *p, uint32_t v)
    103 {
    104 
    105 	(void)memcpy(p, &v, sizeof v);
    106 }
    107 
    108 /* ChaCha core */
    109 
    110 #define	crypto_core_OUTPUTBYTES	64
    111 #define	crypto_core_INPUTBYTES	16
    112 #define	crypto_core_KEYBYTES	32
    113 #define	crypto_core_CONSTBYTES	16
    114 
    115 #define	crypto_core_ROUNDS	20
    116 
    117 static uint32_t
    118 rotate(uint32_t u, unsigned c)
    119 {
    120 
    121 	return (u << c) | (u >> (32 - c));
    122 }
    123 
    124 #define	QUARTERROUND(a, b, c, d) do {					      \
    125 	(a) += (b); (d) ^= (a); (d) = rotate((d), 16);			      \
    126 	(c) += (d); (b) ^= (c); (b) = rotate((b), 12);			      \
    127 	(a) += (b); (d) ^= (a); (d) = rotate((d),  8);			      \
    128 	(c) += (d); (b) ^= (c); (b) = rotate((b),  7);			      \
    129 } while (0)
    130 
    131 static const uint8_t crypto_core_constant32[16] = "expand 32-byte k";
    132 
    133 static void
    134 crypto_core(uint8_t *out, const uint8_t *in, const uint8_t *k,
    135     const uint8_t *c)
    136 {
    137 	uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
    138 	uint32_t j0,j1,j2,j3,j4,j5,j6,j7,j8,j9,j10,j11,j12,j13,j14,j15;
    139 	int i;
    140 
    141 	j0 = x0 = crypto_le32dec(c + 0);
    142 	j1 = x1 = crypto_le32dec(c + 4);
    143 	j2 = x2 = crypto_le32dec(c + 8);
    144 	j3 = x3 = crypto_le32dec(c + 12);
    145 	j4 = x4 = crypto_le32dec(k + 0);
    146 	j5 = x5 = crypto_le32dec(k + 4);
    147 	j6 = x6 = crypto_le32dec(k + 8);
    148 	j7 = x7 = crypto_le32dec(k + 12);
    149 	j8 = x8 = crypto_le32dec(k + 16);
    150 	j9 = x9 = crypto_le32dec(k + 20);
    151 	j10 = x10 = crypto_le32dec(k + 24);
    152 	j11 = x11 = crypto_le32dec(k + 28);
    153 	j12 = x12 = crypto_le32dec(in + 0);
    154 	j13 = x13 = crypto_le32dec(in + 4);
    155 	j14 = x14 = crypto_le32dec(in + 8);
    156 	j15 = x15 = crypto_le32dec(in + 12);
    157 
    158 	for (i = crypto_core_ROUNDS; i > 0; i -= 2) {
    159 		QUARTERROUND( x0, x4, x8,x12);
    160 		QUARTERROUND( x1, x5, x9,x13);
    161 		QUARTERROUND( x2, x6,x10,x14);
    162 		QUARTERROUND( x3, x7,x11,x15);
    163 		QUARTERROUND( x0, x5,x10,x15);
    164 		QUARTERROUND( x1, x6,x11,x12);
    165 		QUARTERROUND( x2, x7, x8,x13);
    166 		QUARTERROUND( x3, x4, x9,x14);
    167 	}
    168 
    169 	crypto_le32enc(out + 0, x0 + j0);
    170 	crypto_le32enc(out + 4, x1 + j1);
    171 	crypto_le32enc(out + 8, x2 + j2);
    172 	crypto_le32enc(out + 12, x3 + j3);
    173 	crypto_le32enc(out + 16, x4 + j4);
    174 	crypto_le32enc(out + 20, x5 + j5);
    175 	crypto_le32enc(out + 24, x6 + j6);
    176 	crypto_le32enc(out + 28, x7 + j7);
    177 	crypto_le32enc(out + 32, x8 + j8);
    178 	crypto_le32enc(out + 36, x9 + j9);
    179 	crypto_le32enc(out + 40, x10 + j10);
    180 	crypto_le32enc(out + 44, x11 + j11);
    181 	crypto_le32enc(out + 48, x12 + j12);
    182 	crypto_le32enc(out + 52, x13 + j13);
    183 	crypto_le32enc(out + 56, x14 + j14);
    184 	crypto_le32enc(out + 60, x15 + j15);
    185 }
    186 
    187 /* ChaCha self-test */
    188 
    189 /*
    190  * Test vector for ChaCha20 from
    191  * <http://tools.ietf.org/html/draft-strombergson-chacha-test-vectors-00>,
    192  * test vectors for ChaCha12 and ChaCha8 and for big-endian machines
    193  * generated by the same crypto_core code with crypto_core_ROUNDS and
    194  * crypto_le32enc/dec varied.
    195  */
    196 
    197 static const uint8_t crypto_core_selftest_vector[64] = {
    198 #if _BYTE_ORDER == _LITTLE_ENDIAN
    199 #  if crypto_core_ROUNDS == 8
    200 	0x3e,0x00,0xef,0x2f,0x89,0x5f,0x40,0xd6,
    201 	0x7f,0x5b,0xb8,0xe8,0x1f,0x09,0xa5,0xa1,
    202 	0x2c,0x84,0x0e,0xc3,0xce,0x9a,0x7f,0x3b,
    203 	0x18,0x1b,0xe1,0x88,0xef,0x71,0x1a,0x1e,
    204 	0x98,0x4c,0xe1,0x72,0xb9,0x21,0x6f,0x41,
    205 	0x9f,0x44,0x53,0x67,0x45,0x6d,0x56,0x19,
    206 	0x31,0x4a,0x42,0xa3,0xda,0x86,0xb0,0x01,
    207 	0x38,0x7b,0xfd,0xb8,0x0e,0x0c,0xfe,0x42,
    208 #  elif crypto_core_ROUNDS == 12
    209 	0x9b,0xf4,0x9a,0x6a,0x07,0x55,0xf9,0x53,
    210 	0x81,0x1f,0xce,0x12,0x5f,0x26,0x83,0xd5,
    211 	0x04,0x29,0xc3,0xbb,0x49,0xe0,0x74,0x14,
    212 	0x7e,0x00,0x89,0xa5,0x2e,0xae,0x15,0x5f,
    213 	0x05,0x64,0xf8,0x79,0xd2,0x7a,0xe3,0xc0,
    214 	0x2c,0xe8,0x28,0x34,0xac,0xfa,0x8c,0x79,
    215 	0x3a,0x62,0x9f,0x2c,0xa0,0xde,0x69,0x19,
    216 	0x61,0x0b,0xe8,0x2f,0x41,0x13,0x26,0xbe,
    217 #  elif crypto_core_ROUNDS == 20
    218 	0x76,0xb8,0xe0,0xad,0xa0,0xf1,0x3d,0x90,
    219 	0x40,0x5d,0x6a,0xe5,0x53,0x86,0xbd,0x28,
    220 	0xbd,0xd2,0x19,0xb8,0xa0,0x8d,0xed,0x1a,
    221 	0xa8,0x36,0xef,0xcc,0x8b,0x77,0x0d,0xc7,
    222 	0xda,0x41,0x59,0x7c,0x51,0x57,0x48,0x8d,
    223 	0x77,0x24,0xe0,0x3f,0xb8,0xd8,0x4a,0x37,
    224 	0x6a,0x43,0xb8,0xf4,0x15,0x18,0xa1,0x1c,
    225 	0xc3,0x87,0xb6,0x69,0xb2,0xee,0x65,0x86,
    226 #  else
    227 #    error crypto_core_ROUNDS must be 8, 12, or 20.
    228 #  endif
    229 #elif _BYTE_ORDER == _BIG_ENDIAN
    230 #  if crypto_core_ROUNDS == 8
    231 	0x9a,0x13,0x07,0xe3,0x38,0x18,0x9e,0x99,
    232 	0x15,0x37,0x16,0x4d,0x04,0xe6,0x48,0x9a,
    233 	0x07,0xd6,0xe8,0x7a,0x02,0xf9,0xf5,0xc7,
    234 	0x3f,0xa9,0xc2,0x0a,0xe1,0xc6,0x62,0xea,
    235 	0x80,0xaf,0xb6,0x51,0xca,0x52,0x43,0x87,
    236 	0xe3,0xa6,0xa6,0x61,0x11,0xf5,0xe6,0xcf,
    237 	0x09,0x0f,0xdc,0x9d,0xc3,0xc3,0xbb,0x43,
    238 	0xd7,0xfa,0x70,0x42,0xbf,0xa5,0xee,0xa2,
    239 #  elif crypto_core_ROUNDS == 12
    240 	0xcf,0x6c,0x16,0x48,0xbf,0xf4,0xba,0x85,
    241 	0x32,0x69,0xd3,0x98,0xc8,0x7d,0xcd,0x3f,
    242 	0xdc,0x76,0x6b,0xa2,0x7b,0xcb,0x17,0x4d,
    243 	0x05,0xda,0xdd,0xd8,0x62,0x54,0xbf,0xe0,
    244 	0x65,0xed,0x0e,0xf4,0x01,0x7e,0x3c,0x05,
    245 	0x35,0xb2,0x7a,0x60,0xf3,0x8f,0x12,0x33,
    246 	0x24,0x60,0xcd,0x85,0xfe,0x4c,0xf3,0x39,
    247 	0xb1,0x0e,0x3e,0xe0,0xba,0xa6,0x2f,0xa9,
    248 #  elif crypto_core_ROUNDS == 20
    249 	0x83,0x8b,0xf8,0x75,0xf7,0xde,0x9d,0x8c,
    250 	0x33,0x14,0x72,0x28,0xd1,0xbe,0x88,0xe5,
    251 	0x94,0xb5,0xed,0xb8,0x56,0xb5,0x9e,0x0c,
    252 	0x64,0x6a,0xaf,0xd9,0xa7,0x49,0x10,0x59,
    253 	0xba,0x3a,0x82,0xf8,0x4a,0x70,0x9c,0x00,
    254 	0x82,0x2c,0xae,0xc6,0xd7,0x1c,0x2e,0xda,
    255 	0x2a,0xfb,0x61,0x70,0x2b,0xd1,0xbf,0x8b,
    256 	0x95,0xbc,0x23,0xb6,0x4b,0x60,0x02,0xec,
    257 #  else
    258 #    error crypto_core_ROUNDS must be 8, 12, or 20.
    259 #  endif
    260 #else
    261 #  error Byte order must be little-endian or big-endian.
    262 #endif
    263 };
    264 
    265 static int
    266 crypto_core_selftest(void)
    267 {
    268 	const uint8_t nonce[crypto_core_INPUTBYTES] = {0};
    269 	const uint8_t key[crypto_core_KEYBYTES] = {0};
    270 	uint8_t block[64];
    271 	unsigned i;
    272 
    273 	crypto_core(block, nonce, key, crypto_core_constant32);
    274 	for (i = 0; i < 64; i++) {
    275 		if (block[i] != crypto_core_selftest_vector[i])
    276 			return EIO;
    277 	}
    278 
    279 	return 0;
    280 }
    281 
    282 /* PRNG */
    283 
    284 /*
    285  * For a state s, rather than use ChaCha20 as a stream cipher to
    286  * generate the concatenation ChaCha20_s(0) || ChaCha20_s(1) || ..., we
    287  * split ChaCha20_s(0) into s' || x and yield x for the first request,
    288  * split ChaCha20_s'(0) into s'' || y and yield y for the second
    289  * request, &c.  This provides backtracking resistance: an attacker who
    290  * finds s'' can't recover s' or x.
    291  */
    292 
    293 #define	crypto_prng_SEEDBYTES		crypto_core_KEYBYTES
    294 #define	crypto_prng_MAXOUTPUTBYTES	\
    295 	(crypto_core_OUTPUTBYTES - crypto_prng_SEEDBYTES)
    296 
    297 __CTASSERT(sizeof(struct crypto_prng) == crypto_prng_SEEDBYTES);
    298 
    299 static void
    300 crypto_prng_seed(struct crypto_prng *prng, const void *seed)
    301 {
    302 
    303 	(void)memcpy(prng->state, seed, crypto_prng_SEEDBYTES);
    304 }
    305 
    306 static void
    307 crypto_prng_buf(struct crypto_prng *prng, void *buf, size_t n)
    308 {
    309 	const uint8_t nonce[crypto_core_INPUTBYTES] = {0};
    310 	uint8_t output[crypto_core_OUTPUTBYTES];
    311 
    312 	_DIAGASSERT(n <= crypto_prng_MAXOUTPUTBYTES);
    313 	__CTASSERT(sizeof prng->state + crypto_prng_MAXOUTPUTBYTES
    314 	    <= sizeof output);
    315 
    316 	crypto_core(output, nonce, prng->state, crypto_core_constant32);
    317 	(void)memcpy(prng->state, output, sizeof prng->state);
    318 	(void)memcpy(buf, output + sizeof prng->state, n);
    319 	(void)explicit_memset(output, 0, sizeof output);
    320 }
    321 
    322 static int
    323 crypto_prng_selftest(void)
    324 {
    325 	const uint8_t expected[32] = {
    326 #if _BYTE_ORDER == _LITTLE_ENDIAN
    327 #  if crypto_core_ROUNDS == 20
    328 		0x2b,	/* first call */
    329 		0x2d,0x41,0xa5,0x9c,0x90,0xe4,0x1a,0x8e, /* second call */
    330 		0x7a,0x4d,0xcc,0xaa,0x1c,0x46,0x06,0x99,
    331 		0x83,0xb1,0xa3,0x33,0xce,0x25,0x71,0x9e,
    332 		0xc3,0x43,0x77,0x68,0xab,0x57,
    333 		0x5f,	/* third call */
    334 #  else
    335 #    error crypto_core_ROUNDS other than 20 left as exercise for reader.
    336 #  endif
    337 #elif _BYTE_ORDER == _BIG_ENDIAN
    338 #  if crypto_core_ROUNDS == 20
    339 		0xae,	/* first call */
    340 		0x97,0x14,0x5a,0x05,0xad,0xa8,0x48,0xf1, /* second call */
    341 		0x3a,0x81,0x84,0xd7,0x05,0xda,0x20,0x5d,
    342 		0xc0,0xef,0x86,0x65,0x98,0xbd,0xb0,0x16,
    343 		0x1b,0xfc,0xff,0xc4,0xc2,0xfd,
    344 		0xa0,	/* third call */
    345 #  else
    346 #    error crypto_core_ROUNDS other than 20 left as exercise for reader.
    347 #  endif
    348 #else
    349 #  error Byte order must be little-endian or big-endian.
    350 #endif
    351 	};
    352 	uint8_t seed[crypto_prng_SEEDBYTES];
    353 	struct crypto_prng prng;
    354 	uint8_t output[32];
    355 	unsigned i;
    356 
    357 	for (i = 0; i < __arraycount(seed); i++)
    358 		seed[i] = i;
    359 	crypto_prng_seed(&prng, seed);
    360 	crypto_prng_buf(&prng, output, 1);
    361 	crypto_prng_buf(&prng, output + 1, 30);
    362 	crypto_prng_buf(&prng, output + 31, 1);
    363 	if (memcmp(output, expected, 32) != 0)
    364 		return EIO;
    365 	return 0;
    366 }
    367 
    368 /* One-time stream: expand short single-use secret into long secret */
    369 
    370 #define	crypto_onetimestream_SEEDBYTES	crypto_core_KEYBYTES
    371 
    372 static void
    373 crypto_onetimestream(const void *seed, void *buf, size_t n)
    374 {
    375 	uint32_t nonce[crypto_core_INPUTBYTES / sizeof(uint32_t)] = {0};
    376 	uint8_t block[crypto_core_OUTPUTBYTES];
    377 	uint8_t *p8, *p32;
    378 	const uint8_t *nonce8 = (const uint8_t *)(void *)nonce;
    379 	size_t ni, nb, nf;
    380 
    381 	/*
    382 	 * Guarantee we can generate up to n bytes.  We have
    383 	 * 2^(8*INPUTBYTES) possible inputs yielding output of
    384 	 * OUTPUTBYTES*2^(8*INPUTBYTES) bytes.  It suffices to require
    385 	 * that sizeof n > (1/CHAR_BIT) log_2 n be less than
    386 	 * (1/CHAR_BIT) log_2 of the total output stream length.  We
    387 	 * have
    388 	 *
    389 	 *	log_2 (o 2^(8 i)) = log_2 o + log_2 2^(8 i)
    390 	 *	  = log_2 o + 8 i.
    391 	 */
    392 #ifndef __lint__
    393 	__CTASSERT(CHAR_BIT * sizeof n <= (ilog2(crypto_core_OUTPUTBYTES) +
    394 		8 * crypto_core_INPUTBYTES));
    395 #endif
    396 
    397 	p8 = buf;
    398 	p32 = (uint8_t *)roundup2((uintptr_t)p8, 4);
    399 	ni = p32 - p8;
    400 	if (n < ni)
    401 		ni = n;
    402 	nb = (n - ni) / sizeof block;
    403 	nf = (n - ni) % sizeof block;
    404 
    405 	_DIAGASSERT(((uintptr_t)p32 & 3) == 0);
    406 	_DIAGASSERT(ni <= n);
    407 	_DIAGASSERT(nb <= (n / sizeof block));
    408 	_DIAGASSERT(nf <= n);
    409 	_DIAGASSERT(n == (ni + (nb * sizeof block) + nf));
    410 	_DIAGASSERT(ni < 4);
    411 	_DIAGASSERT(nf < sizeof block);
    412 
    413 	if (ni) {
    414 		crypto_core(block, nonce8, seed, crypto_core_constant32);
    415 		crypto_le32enc(&nonce[0], 1 + crypto_le32dec(&nonce[0]));
    416 		(void)memcpy(p8, block, ni);
    417 	}
    418 	while (nb--) {
    419 		crypto_core(p32, nonce8, seed, crypto_core_constant32);
    420 		crypto_le32enc(&nonce[0], 1 + crypto_le32dec(&nonce[0]));
    421 		if (crypto_le32dec(&nonce[0]) == 0) {
    422 			crypto_le32enc(&nonce[1],
    423 			    1 + crypto_le32dec(&nonce[1]));
    424 		}
    425 		p32 += crypto_core_OUTPUTBYTES;
    426 	}
    427 	if (nf) {
    428 		crypto_core(block, nonce8, seed, crypto_core_constant32);
    429 		crypto_le32enc(&nonce[0], 1 + crypto_le32dec(&nonce[0]));
    430 		if (crypto_le32dec(&nonce[0]) == 0) {
    431 			crypto_le32enc(&nonce[1],
    432 			    1 + crypto_le32dec(&nonce[1]));
    433 		}
    434 		(void)memcpy(p32, block, nf);
    435 	}
    436 
    437 	if (ni | nf)
    438 		(void)explicit_memset(block, 0, sizeof block);
    439 }
    440 
    441 static int
    442 crypto_onetimestream_selftest(void)
    443 {
    444 	const uint8_t expected[70] = {
    445 		0x5a,			/* guard byte */
    446 #if _BYTE_ORDER == _LITTLE_ENDIAN
    447 #  if crypto_core_ROUNDS == 20
    448 		0x39,0xfd,0x2b,		/* initial block */
    449 		0x18,0xb8,0x42,0x31,0xad,0xe6,0xa6,0xd1,
    450 		0x13,0x61,0x5c,0x61,0xaf,0x43,0x4e,0x27,
    451 		0xf8,0xb1,0xf3,0xf5,0xe1,0xad,0x5b,0x5c,
    452 		0xec,0xf8,0xfc,0x12,0x2a,0x35,0x75,0x5c,
    453 		0x72,0x08,0x08,0x6d,0xd1,0xee,0x3c,0x5d,
    454 		0x9d,0x81,0x58,0x24,0x64,0x0e,0x00,0x3c,
    455 		0x9b,0xa0,0xf6,0x5e,0xde,0x5d,0x59,0xce,
    456 		0x0d,0x2a,0x4a,0x7f,0x31,0x95,0x5a,0xcd,
    457 		0x42,			/* final block */
    458 #  else
    459 #    error crypto_core_ROUNDS other than 20 left as exercise for reader.
    460 #  endif
    461 #elif _BYTE_ORDER == _BIG_ENDIAN
    462 #  if crypto_core_ROUNDS == 20
    463 		0x20,0xf0,0x66,		/* initial block */
    464 		0x1a,0x82,0xda,0xb6,0xba,0x90,0x42,0x19,
    465 		0x39,0xc2,0x4e,0x4d,0xaf,0xbc,0x67,0xcf,
    466 		0xe3,0xe4,0xe2,0x80,0x38,0x80,0x8e,0x53,
    467 		0x19,0x25,0x37,0x67,0x66,0x57,0x7c,0x78,
    468 		0xac,0xb3,0x8b,0x97,0x54,0x20,0xc4,0x46,
    469 		0xff,0x90,0x76,0x56,0xcc,0xde,0xe5,0xb9,
    470 		0xdf,0x82,0x8c,0x05,0x9d,0xf0,0x69,0x99,
    471 		0x42,0x53,0x74,0x5e,0x80,0x81,0xdb,0x9b,
    472 		0xb1,			/* final block */
    473 #  else
    474 #    error crypto_core_ROUNDS other than 20 left as exercise for reader.
    475 #  endif
    476 #else
    477 #  error Byte order must be little-endian or big-endian.
    478 #endif
    479 		0xcc,			/* guard byte */
    480 	};
    481 	uint8_t seed[crypto_prng_SEEDBYTES];
    482 	uint8_t output[70] __aligned(4);
    483 	unsigned i;
    484 
    485 	for (i = 0; i < __arraycount(seed); i++)
    486 		seed[i] = i;
    487 	output[0] = 0x5a;
    488 	output[69] = 0xcc;
    489 	crypto_onetimestream(seed, output + 1, 68);
    490 	if (memcmp(output, expected, 70) != 0)
    491 		return EIO;
    492 	return 0;
    493 }
    494 
    495 /*
    496  * entropy_epoch()
    497  *
    498  *	Return the current entropy epoch, from the sysctl node
    499  *	kern.entropy.epoch.
    500  *
    501  *	The entropy epoch is never zero.  Initially, or on error, it is
    502  *	(unsigned)-1.  It may wrap around but it skips (unsigned)-1 and
    503  *	0 when it does.  Changes happen less than once per second, so
    504  *	wraparound will only affect systems after 136 years of uptime.
    505  *
    506  *	XXX This should get it from a page shared read-only by kernel
    507  *	with userland, but until we implement such a mechanism, this
    508  *	sysctl -- incurring the cost of a syscall -- will have to
    509  *	serve.
    510  */
    511 static unsigned
    512 entropy_epoch(void)
    513 {
    514 	static atomic_int mib0[3];
    515 	static atomic_bool initialized = false;
    516 	int mib[3];
    517 	unsigned epoch = (unsigned)-1;
    518 	size_t epochlen = sizeof(epoch);
    519 
    520 	/*
    521 	 * Resolve kern.entropy.epoch if we haven't already.  Cache it
    522 	 * for the next caller.  Initialization is idempotent, so it's
    523 	 * OK if two threads do it at once.
    524 	 */
    525 	if (atomic_load_explicit(&initialized, memory_order_acquire)) {
    526 		mib[0] = atomic_load_explicit(&mib0[0], memory_order_relaxed);
    527 		mib[1] = atomic_load_explicit(&mib0[1], memory_order_relaxed);
    528 		mib[2] = atomic_load_explicit(&mib0[2], memory_order_relaxed);
    529 	} else {
    530 		size_t nmib = __arraycount(mib);
    531 
    532 		if (sysctlnametomib("kern.entropy.epoch", mib, &nmib) == -1)
    533 			return (unsigned)-1;
    534 		if (nmib != __arraycount(mib))
    535 			return (unsigned)-1;
    536 		atomic_store_explicit(&mib0[0], mib[0], memory_order_relaxed);
    537 		atomic_store_explicit(&mib0[1], mib[1], memory_order_relaxed);
    538 		atomic_store_explicit(&mib0[2], mib[2], memory_order_relaxed);
    539 		atomic_store_explicit(&initialized, true,
    540 		    memory_order_release);
    541 	}
    542 
    543 	if (sysctl(mib, __arraycount(mib), &epoch, &epochlen, NULL, 0) == -1)
    544 		return (unsigned)-1;
    545 	if (epochlen != sizeof(epoch))
    546 		return (unsigned)-1;
    547 
    548 	return epoch;
    549 }
    550 
    551 /* arc4random state: per-thread, per-process (zeroed in child on fork) */
    552 
    553 static void
    554 arc4random_prng_addrandom(struct arc4random_prng *prng, const void *data,
    555     size_t datalen)
    556 {
    557 	const int mib[] = { CTL_KERN, KERN_ARND };
    558 	SHA256_CTX ctx;
    559 	uint8_t buf[crypto_prng_SEEDBYTES];
    560 	size_t buflen = sizeof buf;
    561 	unsigned epoch = entropy_epoch();
    562 
    563 	__CTASSERT(sizeof buf == SHA256_DIGEST_LENGTH);
    564 
    565 	SHA256_Init(&ctx);
    566 
    567 	crypto_prng_buf(&prng->arc4_prng, buf, sizeof buf);
    568 	SHA256_Update(&ctx, buf, sizeof buf);
    569 
    570 	if (sysctl(mib, (u_int)__arraycount(mib), buf, &buflen, NULL, 0) == -1)
    571 		abort();
    572 	if (buflen != sizeof buf)
    573 		abort();
    574 	SHA256_Update(&ctx, buf, sizeof buf);
    575 
    576 	if (data != NULL)
    577 		SHA256_Update(&ctx, data, datalen);
    578 
    579 	SHA256_Final(buf, &ctx);
    580 	(void)explicit_memset(&ctx, 0, sizeof ctx);
    581 
    582 	/* reseed(SHA256(prng() || sysctl(KERN_ARND) || data)) */
    583 	crypto_prng_seed(&prng->arc4_prng, buf);
    584 	(void)explicit_memset(buf, 0, sizeof buf);
    585 	prng->arc4_epoch = epoch;
    586 }
    587 
    588 #ifdef _REENTRANT
    589 static struct arc4random_prng *
    590 arc4random_prng_create(void)
    591 {
    592 	struct arc4random_prng *prng;
    593 	const size_t size = roundup(sizeof(*prng), sysconf(_SC_PAGESIZE));
    594 
    595 	prng = mmap(NULL, size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, -1,
    596 	    0);
    597 	if (prng == MAP_FAILED)
    598 		goto fail0;
    599 	if (minherit(prng, size, MAP_INHERIT_ZERO) == -1)
    600 		goto fail1;
    601 
    602 	return prng;
    603 
    604 fail1:	(void)munmap(prng, size);
    605 fail0:	return NULL;
    606 }
    607 #endif
    608 
    609 #ifdef _REENTRANT
    610 static void
    611 arc4random_prng_destroy(struct arc4random_prng *prng)
    612 {
    613 	const size_t size = roundup(sizeof(*prng), sysconf(_SC_PAGESIZE));
    614 
    615 	(void)explicit_memset(prng, 0, sizeof(*prng));
    616 	(void)munmap(prng, size);
    617 }
    618 #endif
    619 
    620 /* Library state */
    621 
    622 struct arc4random_global_state arc4random_global = {
    623 #ifdef _REENTRANT
    624 	.lock		= MUTEX_INITIALIZER,
    625 #endif
    626 	.once		= ONCE_INITIALIZER,
    627 };
    628 
    629 static void
    630 arc4random_atfork_prepare(void)
    631 {
    632 
    633 	mutex_lock(&arc4random_global.lock);
    634 	(void)explicit_memset(&arc4random_global.prng, 0,
    635 	    sizeof arc4random_global.prng);
    636 }
    637 
    638 static void
    639 arc4random_atfork_parent(void)
    640 {
    641 
    642 	mutex_unlock(&arc4random_global.lock);
    643 }
    644 
    645 static void
    646 arc4random_atfork_child(void)
    647 {
    648 
    649 	mutex_unlock(&arc4random_global.lock);
    650 }
    651 
    652 #ifdef _REENTRANT
    653 static void
    654 arc4random_tsd_destructor(void *p)
    655 {
    656 	struct arc4random_prng *const prng = p;
    657 
    658 	arc4random_prng_destroy(prng);
    659 }
    660 #endif
    661 
    662 static void
    663 arc4random_initialize(void)
    664 {
    665 
    666 	/*
    667 	 * If the crypto software is broken, abort -- something is
    668 	 * severely wrong with this process image.
    669 	 */
    670 	if (crypto_core_selftest() != 0 ||
    671 	    crypto_prng_selftest() != 0 ||
    672 	    crypto_onetimestream_selftest() != 0)
    673 		abort();
    674 
    675 	/*
    676 	 * Set up a pthread_atfork handler to lock the global state
    677 	 * around fork so that if forked children can't use the
    678 	 * per-thread state, they can take the lock and use the global
    679 	 * state without deadlock.  If this fails, we will fall back to
    680 	 * PRNG state on the stack reinitialized from the kernel
    681 	 * entropy pool at every call.
    682 	 */
    683 	if (pthread_atfork(&arc4random_atfork_prepare,
    684 		&arc4random_atfork_parent, &arc4random_atfork_child)
    685 	    == 0)
    686 		arc4random_global.forksafe = true;
    687 
    688 	/*
    689 	 * For multithreaded builds, try to allocate a per-thread PRNG
    690 	 * state to avoid contention due to arc4random.
    691 	 */
    692 #ifdef _REENTRANT
    693 	if (thr_keycreate(&arc4random_global.thread_key,
    694 		&arc4random_tsd_destructor) == 0)
    695 		arc4random_global.per_thread = true;
    696 #endif
    697 
    698 	/*
    699 	 * Note that the arc4random library state has been initialized
    700 	 * for the sake of automatic tests.
    701 	 */
    702 	arc4random_global.initialized = true;
    703 }
    704 
    705 static struct arc4random_prng *
    706 arc4random_prng_get(struct arc4random_prng *fallback)
    707 {
    708 	struct arc4random_prng *prng = NULL;
    709 
    710 	/* Make sure the library is initialized.  */
    711 	thr_once(&arc4random_global.once, &arc4random_initialize);
    712 
    713 #ifdef _REENTRANT
    714 	/* Get or create the per-thread PRNG state.  */
    715 	prng = __predict_true(arc4random_global.per_thread)
    716 	    ? thr_getspecific(arc4random_global.thread_key)
    717 	    : NULL;
    718 	if (__predict_false(prng == NULL) && arc4random_global.per_thread) {
    719 		prng = arc4random_prng_create();
    720 		thr_setspecific(arc4random_global.thread_key, prng);
    721 	}
    722 #endif
    723 
    724 	/*
    725 	 * If we can't create it, fall back to the global PRNG -- or an
    726 	 * on-stack PRNG, in the unlikely event that pthread_atfork
    727 	 * failed, which we have to seed from scratch each time
    728 	 * (suboptimal, but unlikely, so not worth optimizing).
    729 	 */
    730 	if (__predict_false(prng == NULL)) {
    731 		if (__predict_true(arc4random_global.forksafe)) {
    732 			mutex_lock(&arc4random_global.lock);
    733 			prng = &arc4random_global.prng;
    734 		} else {
    735 			prng = fallback;
    736 			memset(prng, 0, sizeof(*prng));
    737 		}
    738 	}
    739 
    740 	/* Guarantee the PRNG is seeded.  */
    741 	if (__predict_false(prng->arc4_epoch != entropy_epoch()))
    742 		arc4random_prng_addrandom(prng, NULL, 0);
    743 
    744 	return prng;
    745 }
    746 
    747 static void
    748 arc4random_prng_put(struct arc4random_prng *prng,
    749     struct arc4random_prng *fallback)
    750 {
    751 
    752 	/*
    753 	 * If we had to use a stack fallback, zero it before we return
    754 	 * so that after we return we avoid leaving secrets on the
    755 	 * stack that could recover the parent's future outputs in an
    756 	 * unprivileged forked child (of course, we can't guarantee
    757 	 * that the compiler hasn't spilled anything; this is
    758 	 * best-effort, not a guarantee).
    759 	 */
    760 	if (__predict_false(prng == fallback))
    761 		explicit_memset(fallback, 0, sizeof(*fallback));
    762 
    763 	/* If we had fallen back to the global PRNG, unlock it.  */
    764 	if (__predict_false(prng == &arc4random_global.prng))
    765 		mutex_unlock(&arc4random_global.lock);
    766 }
    767 
    768 /* Public API */
    769 
    770 uint32_t
    771 arc4random(void)
    772 {
    773 	struct arc4random_prng *prng, fallback;
    774 	uint32_t v;
    775 
    776 	prng = arc4random_prng_get(&fallback);
    777 	crypto_prng_buf(&prng->arc4_prng, &v, sizeof v);
    778 	arc4random_prng_put(prng, &fallback);
    779 
    780 	return v;
    781 }
    782 
    783 void
    784 arc4random_buf(void *buf, size_t len)
    785 {
    786 	struct arc4random_prng *prng, fallback;
    787 
    788 	if (len <= crypto_prng_MAXOUTPUTBYTES) {
    789 		prng = arc4random_prng_get(&fallback);
    790 		crypto_prng_buf(&prng->arc4_prng, buf, len);
    791 		arc4random_prng_put(prng, &fallback);
    792 	} else {
    793 		uint8_t seed[crypto_onetimestream_SEEDBYTES];
    794 
    795 		prng = arc4random_prng_get(&fallback);
    796 		crypto_prng_buf(&prng->arc4_prng, seed, sizeof seed);
    797 		arc4random_prng_put(prng, &fallback);
    798 
    799 		crypto_onetimestream(seed, buf, len);
    800 		(void)explicit_memset(seed, 0, sizeof seed);
    801 	}
    802 }
    803 
    804 uint32_t
    805 arc4random_uniform(uint32_t bound)
    806 {
    807 	struct arc4random_prng *prng, fallback;
    808 	uint32_t minimum, r;
    809 
    810 	/*
    811 	 * We want a uniform random choice in [0, n), and arc4random()
    812 	 * makes a uniform random choice in [0, 2^32).  If we reduce
    813 	 * that modulo n, values in [0, 2^32 mod n) will be represented
    814 	 * slightly more than values in [2^32 mod n, n).  Instead we
    815 	 * choose only from [2^32 mod n, 2^32) by rejecting samples in
    816 	 * [0, 2^32 mod n), to avoid counting the extra representative
    817 	 * of [0, 2^32 mod n).  To compute 2^32 mod n, note that
    818 	 *
    819 	 *	2^32 mod n = 2^32 mod n - 0
    820 	 *	  = 2^32 mod n - n mod n
    821 	 *	  = (2^32 - n) mod n,
    822 	 *
    823 	 * the last of which is what we compute in 32-bit arithmetic.
    824 	 */
    825 	minimum = (-bound % bound);
    826 
    827 	prng = arc4random_prng_get(&fallback);
    828 	do crypto_prng_buf(&prng->arc4_prng, &r, sizeof r);
    829 	while (__predict_false(r < minimum));
    830 	arc4random_prng_put(prng, &fallback);
    831 
    832 	return (r % bound);
    833 }
    834 
    835 void
    836 arc4random_stir(void)
    837 {
    838 	struct arc4random_prng *prng, fallback;
    839 
    840 	prng = arc4random_prng_get(&fallback);
    841 	arc4random_prng_addrandom(prng, NULL, 0);
    842 	arc4random_prng_put(prng, &fallback);
    843 }
    844 
    845 /*
    846  * Silly signature here is for hysterical raisins.  Should instead be
    847  * const void *data and size_t datalen.
    848  */
    849 void
    850 arc4random_addrandom(u_char *data, int datalen)
    851 {
    852 	struct arc4random_prng *prng, fallback;
    853 
    854 	_DIAGASSERT(0 <= datalen);
    855 
    856 	prng = arc4random_prng_get(&fallback);
    857 	arc4random_prng_addrandom(prng, data, datalen);
    858 	arc4random_prng_put(prng, &fallback);
    859 }
    860 
    861 #ifdef _ARC4RANDOM_TEST
    862 
    863 #include <sys/wait.h>
    864 
    865 #include <err.h>
    866 #include <stdio.h>
    867 
    868 int
    869 main(int argc __unused, char **argv __unused)
    870 {
    871 	unsigned char gubbish[] = "random gubbish";
    872 	const uint8_t zero64[64] = {0};
    873 	uint8_t buf[2048];
    874 	unsigned i, a, n;
    875 
    876 	/* Test arc4random: should not be deterministic.  */
    877 	if (printf("arc4random: %08"PRIx32"\n", arc4random()) < 0)
    878 		err(1, "printf");
    879 
    880 	/* Test stirring: should definitely not be deterministic.  */
    881 	arc4random_stir();
    882 
    883 	/* Test small buffer.  */
    884 	arc4random_buf(buf, 8);
    885 	if (printf("arc4randombuf small:") < 0)
    886 		err(1, "printf");
    887 	for (i = 0; i < 8; i++)
    888 		if (printf(" %02x", buf[i]) < 0)
    889 			err(1, "printf");
    890 	if (printf("\n") < 0)
    891 		err(1, "printf");
    892 
    893 	/* Test addrandom: should not make the rest deterministic.  */
    894 	arc4random_addrandom(gubbish, sizeof gubbish);
    895 
    896 	/* Test large buffer.  */
    897 	arc4random_buf(buf, sizeof buf);
    898 	if (printf("arc4randombuf_large:") < 0)
    899 		err(1, "printf");
    900 	for (i = 0; i < sizeof buf; i++)
    901 		if (printf(" %02x", buf[i]) < 0)
    902 			err(1, "printf");
    903 	if (printf("\n") < 0)
    904 		err(1, "printf");
    905 
    906 	/* Test misaligned small and large.  */
    907 	for (a = 0; a < 64; a++) {
    908 		for (n = a; n < sizeof buf; n++) {
    909 			(void)memset(buf, 0, sizeof buf);
    910 			arc4random_buf(buf, n - a);
    911 			if (memcmp(buf + n - a, zero64, a) != 0)
    912 				errx(1, "arc4random buffer overflow 0");
    913 
    914 			(void)memset(buf, 0, sizeof buf);
    915 			arc4random_buf(buf + a, n - a);
    916 			if (memcmp(buf, zero64, a) != 0)
    917 				errx(1, "arc4random buffer overflow 1");
    918 
    919 			if ((2*a) <= n) {
    920 				(void)memset(buf, 0, sizeof buf);
    921 				arc4random_buf(buf + a, n - a - a);
    922 				if (memcmp(buf + n - a, zero64, a) != 0)
    923 					errx(1,
    924 					    "arc4random buffer overflow 2");
    925 			}
    926 		}
    927 	}
    928 
    929 	/* Test fork-safety.  */
    930     {
    931 	pid_t pid, rpid;
    932 	int status;
    933 
    934 	pid = fork();
    935 	switch (pid) {
    936 	case -1:
    937 		err(1, "fork");
    938 	case 0: {
    939 		/*
    940 		 * Verify the epoch has been set to zero by fork.
    941 		 */
    942 		struct arc4random_prng *prng = NULL;
    943 #ifdef _REENTRANT
    944 		prng = arc4random_global.per_thread
    945 		    ? thr_getspecific(arc4random_global.thread_key)
    946 		    : NULL;
    947 #endif
    948 		if (prng == NULL)
    949 			prng = &arc4random_global.prng;
    950 		_exit(prng->arc4_epoch != 0);
    951 	}
    952 	default:
    953 		rpid = waitpid(pid, &status, 0);
    954 		if (rpid == -1)
    955 			err(1, "waitpid");
    956 		if (rpid != pid)
    957 			errx(1, "waitpid returned wrong pid"
    958 			    ": %"PRIdMAX" != %"PRIdMAX,
    959 			    (intmax_t)rpid,
    960 			    (intmax_t)pid);
    961 		if (WIFEXITED(status)) {
    962 			if (WEXITSTATUS(status) != 0)
    963 				errx(1, "child exited with %d",
    964 				    WEXITSTATUS(status));
    965 		} else if (WIFSIGNALED(status)) {
    966 			errx(1, "child terminated on signal %d",
    967 			    WTERMSIG(status));
    968 		} else {
    969 			errx(1, "child died mysteriously: %d", status);
    970 		}
    971 	}
    972     }
    973 
    974 	/* XXX Test multithreaded fork safety...?  */
    975 
    976 	return 0;
    977 }
    978 #endif
    979