Home | History | Annotate | Line # | Download | only in cprng_fast
cprng_fast.c revision 1.7
      1  1.7  riastrad /*	$NetBSD: cprng_fast.c,v 1.7 2014/08/11 13:01:58 riastradh Exp $	*/
      2  1.2       tls 
      3  1.2       tls /*-
      4  1.2       tls  * Copyright (c) 2014 The NetBSD Foundation, Inc.
      5  1.2       tls  * All rights reserved.
      6  1.2       tls  *
      7  1.2       tls  * This code is derived from software contributed to The NetBSD Foundation
      8  1.2       tls  * by Taylor R. Campbell.
      9  1.2       tls  *
     10  1.2       tls  * Redistribution and use in source and binary forms, with or without
     11  1.2       tls  * modification, are permitted provided that the following conditions
     12  1.2       tls  * are met:
     13  1.2       tls  * 1. Redistributions of source code must retain the above copyright
     14  1.2       tls  *    notice, this list of conditions and the following disclaimer.
     15  1.2       tls  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.2       tls  *    notice, this list of conditions and the following disclaimer in the
     17  1.2       tls  *    documentation and/or other materials provided with the distribution.
     18  1.2       tls  *
     19  1.2       tls  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  1.2       tls  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  1.2       tls  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  1.2       tls  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  1.2       tls  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  1.2       tls  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  1.2       tls  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  1.2       tls  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  1.2       tls  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  1.2       tls  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  1.2       tls  * POSSIBILITY OF SUCH DAMAGE.
     30  1.2       tls  */
     31  1.2       tls 
     32  1.2       tls #include <sys/cdefs.h>
     33  1.7  riastrad __KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.7 2014/08/11 13:01:58 riastradh Exp $");
     34  1.2       tls 
     35  1.2       tls #include <sys/types.h>
     36  1.4  riastrad #include <sys/param.h>
     37  1.2       tls #include <sys/bitops.h>
     38  1.4  riastrad #include <sys/cprng.h>
     39  1.2       tls #include <sys/cpu.h>
     40  1.2       tls #include <sys/intr.h>
     41  1.2       tls #include <sys/percpu.h>
     42  1.5  riastrad #include <sys/rnd.h>
     43  1.2       tls 
     44  1.2       tls /* ChaCha core */
     46  1.2       tls 
     47  1.2       tls #define	crypto_core_OUTPUTWORDS	16
     48  1.2       tls #define	crypto_core_INPUTWORDS	4
     49  1.2       tls #define	crypto_core_KEYWORDS	8
     50  1.2       tls #define	crypto_core_CONSTWORDS	4
     51  1.2       tls 
     52  1.2       tls #define	crypto_core_ROUNDS	8
     53  1.2       tls 
     54  1.2       tls static uint32_t
     55  1.2       tls rotate(uint32_t u, unsigned c)
     56  1.2       tls {
     57  1.2       tls 
     58  1.2       tls 	return (u << c) | (u >> (32 - c));
     59  1.2       tls }
     60  1.2       tls 
     61  1.2       tls #define	QUARTERROUND(a, b, c, d) do {					      \
     62  1.2       tls 	(a) += (b); (d) ^= (a); (d) = rotate((d), 16);			      \
     63  1.2       tls 	(c) += (d); (b) ^= (c); (b) = rotate((b), 12);			      \
     64  1.2       tls 	(a) += (b); (d) ^= (a); (d) = rotate((d),  8);			      \
     65  1.2       tls 	(c) += (d); (b) ^= (c); (b) = rotate((b),  7);			      \
     66  1.2       tls } while (0)
     67  1.2       tls 
     68  1.2       tls static void
     69  1.2       tls crypto_core(uint32_t *out, const uint32_t *in, const uint32_t *k,
     70  1.2       tls     const uint32_t *c)
     71  1.2       tls {
     72  1.2       tls 	uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
     73  1.2       tls 	int i;
     74  1.2       tls 
     75  1.2       tls 	x0 = c[0];
     76  1.2       tls 	x1 = c[1];
     77  1.2       tls 	x2 = c[2];
     78  1.2       tls 	x3 = c[3];
     79  1.2       tls 	x4 = k[0];
     80  1.2       tls 	x5 = k[1];
     81  1.2       tls 	x6 = k[2];
     82  1.2       tls 	x7 = k[3];
     83  1.2       tls 	x8 = k[4];
     84  1.2       tls 	x9 = k[5];
     85  1.2       tls 	x10 = k[6];
     86  1.2       tls 	x11 = k[7];
     87  1.2       tls 	x12 = in[0];
     88  1.2       tls 	x13 = in[1];
     89  1.2       tls 	x14 = in[2];
     90  1.2       tls 	x15 = in[3];
     91  1.2       tls 
     92  1.2       tls 	for (i = crypto_core_ROUNDS; i > 0; i -= 2) {
     93  1.2       tls 		QUARTERROUND( x0, x4, x8,x12);
     94  1.2       tls 		QUARTERROUND( x1, x5, x9,x13);
     95  1.2       tls 		QUARTERROUND( x2, x6,x10,x14);
     96  1.2       tls 		QUARTERROUND( x3, x7,x11,x15);
     97  1.2       tls 		QUARTERROUND( x0, x5,x10,x15);
     98  1.2       tls 		QUARTERROUND( x1, x6,x11,x12);
     99  1.2       tls 		QUARTERROUND( x2, x7, x8,x13);
    100  1.2       tls 		QUARTERROUND( x3, x4, x9,x14);
    101  1.2       tls 	}
    102  1.2       tls 
    103  1.2       tls 	out[0] = x0 + c[0];
    104  1.2       tls 	out[1] = x1 + c[1];
    105  1.2       tls 	out[2] = x2 + c[2];
    106  1.2       tls 	out[3] = x3 + c[3];
    107  1.2       tls 	out[4] = x4 + k[0];
    108  1.2       tls 	out[5] = x5 + k[1];
    109  1.2       tls 	out[6] = x6 + k[2];
    110  1.2       tls 	out[7] = x7 + k[3];
    111  1.2       tls 	out[8] = x8 + k[4];
    112  1.2       tls 	out[9] = x9 + k[5];
    113  1.2       tls 	out[10] = x10 + k[6];
    114  1.2       tls 	out[11] = x11 + k[7];
    115  1.2       tls 	out[12] = x12 + in[0];
    116  1.2       tls 	out[13] = x13 + in[1];
    117  1.2       tls 	out[14] = x14 + in[2];
    118  1.2       tls 	out[15] = x15 + in[3];
    119  1.2       tls }
    120  1.2       tls 
    121  1.2       tls /* `expand 32-byte k' */
    123  1.2       tls static const uint32_t crypto_core_constant32[4] = {
    124  1.2       tls 	0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U,
    125  1.2       tls };
    126  1.2       tls 
    127  1.2       tls /*
    128  1.2       tls  * Test vector for ChaCha20 from
    129  1.2       tls  * <http://tools.ietf.org/html/draft-strombergson-chacha-test-vectors-00>,
    130  1.2       tls  * test vectors for ChaCha12 and ChaCha8 generated by the same
    131  1.2       tls  * crypto_core code with crypto_core_ROUNDS varied.
    132  1.2       tls  */
    133  1.2       tls 
    134  1.2       tls #define	check(E)	do						\
    135  1.2       tls {									\
    136  1.2       tls 	if (!(E))							\
    137  1.2       tls 		panic("crypto self-test failed: %s", #E);		\
    138  1.2       tls } while (0)
    139  1.2       tls 
    140  1.2       tls static void
    141  1.2       tls crypto_core_selftest(void)
    142  1.2       tls {
    143  1.2       tls 	const uint32_t zero32[8] = {0};
    144  1.2       tls 	const uint8_t sigma[] = "expand 32-byte k";
    145  1.2       tls 	uint32_t block[16];
    146  1.2       tls 	unsigned i;
    147  1.2       tls 
    148  1.2       tls #if crypto_core_ROUNDS == 8
    149  1.2       tls 	static const uint8_t out[64] = {
    150  1.2       tls 		0x3e,0x00,0xef,0x2f,0x89,0x5f,0x40,0xd6,
    151  1.2       tls 		0x7f,0x5b,0xb8,0xe8,0x1f,0x09,0xa5,0xa1,
    152  1.2       tls 		0x2c,0x84,0x0e,0xc3,0xce,0x9a,0x7f,0x3b,
    153  1.2       tls 		0x18,0x1b,0xe1,0x88,0xef,0x71,0x1a,0x1e,
    154  1.2       tls 		0x98,0x4c,0xe1,0x72,0xb9,0x21,0x6f,0x41,
    155  1.2       tls 		0x9f,0x44,0x53,0x67,0x45,0x6d,0x56,0x19,
    156  1.2       tls 		0x31,0x4a,0x42,0xa3,0xda,0x86,0xb0,0x01,
    157  1.2       tls 		0x38,0x7b,0xfd,0xb8,0x0e,0x0c,0xfe,0x42,
    158  1.2       tls 	};
    159  1.2       tls #elif crypto_core_ROUNDS == 12
    160  1.2       tls 	static const uint8_t out[64] = {
    161  1.2       tls 		0x9b,0xf4,0x9a,0x6a,0x07,0x55,0xf9,0x53,
    162  1.2       tls 		0x81,0x1f,0xce,0x12,0x5f,0x26,0x83,0xd5,
    163  1.2       tls 		0x04,0x29,0xc3,0xbb,0x49,0xe0,0x74,0x14,
    164  1.2       tls 		0x7e,0x00,0x89,0xa5,0x2e,0xae,0x15,0x5f,
    165  1.2       tls 		0x05,0x64,0xf8,0x79,0xd2,0x7a,0xe3,0xc0,
    166  1.2       tls 		0x2c,0xe8,0x28,0x34,0xac,0xfa,0x8c,0x79,
    167  1.2       tls 		0x3a,0x62,0x9f,0x2c,0xa0,0xde,0x69,0x19,
    168  1.2       tls 		0x61,0x0b,0xe8,0x2f,0x41,0x13,0x26,0xbe,
    169  1.2       tls 	};
    170  1.2       tls #elif crypto_core_ROUNDS == 20
    171  1.2       tls 	static const uint8_t out[64] = {
    172  1.2       tls 		0x76,0xb8,0xe0,0xad,0xa0,0xf1,0x3d,0x90,
    173  1.2       tls 		0x40,0x5d,0x6a,0xe5,0x53,0x86,0xbd,0x28,
    174  1.2       tls 		0xbd,0xd2,0x19,0xb8,0xa0,0x8d,0xed,0x1a,
    175  1.2       tls 		0xa8,0x36,0xef,0xcc,0x8b,0x77,0x0d,0xc7,
    176  1.2       tls 		0xda,0x41,0x59,0x7c,0x51,0x57,0x48,0x8d,
    177  1.2       tls 		0x77,0x24,0xe0,0x3f,0xb8,0xd8,0x4a,0x37,
    178  1.2       tls 		0x6a,0x43,0xb8,0xf4,0x15,0x18,0xa1,0x1c,
    179  1.2       tls 		0xc3,0x87,0xb6,0x69,0xb2,0xee,0x65,0x86,
    180  1.2       tls 	};
    181  1.2       tls #else
    182  1.2       tls #error crypto_core_ROUNDS must be 8, 12, or 20.
    183  1.2       tls #endif
    184  1.2       tls 
    185  1.2       tls 	check(crypto_core_constant32[0] == le32dec(&sigma[0]));
    186  1.2       tls 	check(crypto_core_constant32[1] == le32dec(&sigma[4]));
    187  1.2       tls 	check(crypto_core_constant32[2] == le32dec(&sigma[8]));
    188  1.2       tls 	check(crypto_core_constant32[3] == le32dec(&sigma[12]));
    189  1.2       tls 
    190  1.2       tls 	crypto_core(block, zero32, zero32, crypto_core_constant32);
    191  1.2       tls 	for (i = 0; i < 16; i++)
    192  1.2       tls 		check(block[i] == le32dec(&out[i*4]));
    193  1.2       tls }
    194  1.2       tls 
    195  1.2       tls #undef check
    196  1.2       tls 
    197  1.2       tls #define	CPRNG_FAST_SEED_BYTES	(crypto_core_KEYWORDS * sizeof(uint32_t))
    199  1.2       tls 
    200  1.2       tls struct cprng_fast {
    201  1.2       tls 	uint32_t 	buffer[crypto_core_OUTPUTWORDS];
    202  1.2       tls 	uint32_t 	key[crypto_core_KEYWORDS];
    203  1.2       tls 	uint32_t 	nonce[crypto_core_INPUTWORDS];
    204  1.2       tls 	bool		have_initial;
    205  1.2       tls };
    206  1.2       tls 
    207  1.2       tls __CTASSERT(sizeof ((struct cprng_fast *)0)->key == CPRNG_FAST_SEED_BYTES);
    208  1.2       tls 
    209  1.6  riastrad static void	cprng_fast_schedule_reseed(struct cprng_fast *);
    210  1.2       tls static void	cprng_fast_intr(void *);
    211  1.2       tls 
    212  1.2       tls static void	cprng_fast_seed(struct cprng_fast *, const void *);
    213  1.2       tls static void	cprng_fast_buf(struct cprng_fast *, void *, unsigned);
    214  1.2       tls 
    215  1.2       tls static void	cprng_fast_buf_short(void *, size_t);
    216  1.2       tls static void	cprng_fast_buf_long(void *, size_t);
    217  1.2       tls 
    218  1.2       tls static percpu_t	*cprng_fast_percpu	__read_mostly;
    219  1.2       tls static void	*cprng_fast_softint	__read_mostly;
    220  1.2       tls 
    221  1.2       tls void
    222  1.2       tls cprng_fast_init(void)
    223  1.2       tls {
    224  1.2       tls 	struct cpu_info *ci;
    225  1.2       tls 	CPU_INFO_ITERATOR cii;
    226  1.2       tls 
    227  1.2       tls 	crypto_core_selftest();
    228  1.2       tls 	cprng_fast_percpu = percpu_alloc(sizeof(struct cprng_fast));
    229  1.2       tls 	for (CPU_INFO_FOREACH(cii, ci)) {
    230  1.2       tls 		struct cprng_fast *cprng;
    231  1.2       tls 		uint8_t seed[CPRNG_FAST_SEED_BYTES];
    232  1.2       tls 
    233  1.2       tls 		percpu_traverse_enter();
    234  1.2       tls 		cprng = percpu_getptr_remote(cprng_fast_percpu, ci);
    235  1.2       tls 		cprng_strong(kern_cprng, seed, sizeof(seed), FASYNC);
    236  1.2       tls 		/* Can't do anything about it if not full entropy.  */
    237  1.2       tls 		cprng_fast_seed(cprng, seed);
    238  1.2       tls 		explicit_memset(seed, 0, sizeof(seed));
    239  1.2       tls 		percpu_traverse_exit();
    240  1.2       tls 	}
    241  1.2       tls 	cprng_fast_softint = softint_establish(SOFTINT_SERIAL|SOFTINT_MPSAFE,
    242  1.2       tls 	    &cprng_fast_intr, NULL);
    243  1.2       tls }
    244  1.2       tls 
    245  1.2       tls static inline int
    246  1.2       tls cprng_fast_get(struct cprng_fast **cprngp)
    247  1.2       tls {
    248  1.2       tls 
    249  1.2       tls 	*cprngp = percpu_getref(cprng_fast_percpu);
    250  1.2       tls 	return splvm();
    251  1.2       tls }
    252  1.2       tls 
    253  1.2       tls static inline void
    254  1.2       tls cprng_fast_put(struct cprng_fast *cprng, int s)
    255  1.2       tls {
    256  1.2       tls 
    257  1.2       tls 	KASSERT((cprng == percpu_getref(cprng_fast_percpu)) &&
    258  1.2       tls 	    (percpu_putref(cprng_fast_percpu), true));
    259  1.2       tls 	splx(s);
    260  1.2       tls 	percpu_putref(cprng_fast_percpu);
    261  1.2       tls }
    262  1.2       tls 
    263  1.2       tls static inline void
    265  1.2       tls cprng_fast_schedule_reseed(struct cprng_fast *cprng __unused)
    266  1.2       tls {
    267  1.2       tls 
    268  1.2       tls 	softint_schedule(cprng_fast_softint);
    269  1.2       tls }
    270  1.2       tls 
    271  1.2       tls static void
    272  1.7  riastrad cprng_fast_intr(void *cookie __unused)
    273  1.2       tls {
    274  1.2       tls 	struct cprng_fast *cprng;
    275  1.2       tls 	uint8_t seed[CPRNG_FAST_SEED_BYTES];
    276  1.2       tls 	int s;
    277  1.7  riastrad 
    278  1.2       tls 	cprng_strong(kern_cprng, seed, sizeof(seed), FASYNC);
    279  1.7  riastrad 
    280  1.2       tls 	cprng = percpu_getref(cprng_fast_percpu);
    281  1.2       tls 	s = splvm();
    282  1.2       tls 	cprng_fast_seed(cprng, seed);
    283  1.2       tls 	splx(s);
    284  1.2       tls 	percpu_putref(cprng_fast_percpu);
    285  1.2       tls 
    286  1.2       tls 	explicit_memset(seed, 0, sizeof(seed));
    287  1.2       tls }
    288  1.2       tls 
    289  1.2       tls /* CPRNG algorithm */
    291  1.2       tls 
    292  1.2       tls /*
    293  1.2       tls  * The state consists of a key, the current nonce, and a 64-byte buffer
    294  1.2       tls  * of output.  Since we fill the buffer only when we need output, and
    295  1.2       tls  * eat a 32-bit word at a time, one 32-bit word of the buffer would be
    296  1.2       tls  * wasted.  Instead, we repurpose it to count the number of entries in
    297  1.6  riastrad  * the buffer remaining, counting from high to low in order to allow
    298  1.2       tls  * comparison to zero to detect when we need to refill it.
    299  1.2       tls  */
    300  1.2       tls #define	CPRNG_FAST_BUFIDX	(crypto_core_OUTPUTWORDS - 1)
    301  1.2       tls 
    302  1.2       tls static void
    303  1.2       tls cprng_fast_seed(struct cprng_fast *cprng, const void *seed)
    304  1.2       tls {
    305  1.2       tls 
    306  1.2       tls 	(void)memset(cprng->buffer, 0, sizeof cprng->buffer);
    307  1.2       tls 	(void)memcpy(cprng->key, seed, sizeof cprng->key);
    308  1.2       tls 	(void)memset(cprng->nonce, 0, sizeof cprng->nonce);
    309  1.2       tls 
    310  1.2       tls 	if (__predict_true(rnd_initial_entropy)) {
    311  1.2       tls 		cprng->have_initial = true;
    312  1.2       tls 	} else {
    313  1.2       tls 		cprng->have_initial = false;
    314  1.2       tls 	}
    315  1.2       tls }
    316  1.2       tls 
    317  1.2       tls static inline uint32_t
    318  1.2       tls cprng_fast_word(struct cprng_fast *cprng)
    319  1.2       tls {
    320  1.2       tls 	uint32_t v;
    321  1.2       tls 
    322  1.2       tls 	if (__predict_true(0 < cprng->buffer[CPRNG_FAST_BUFIDX])) {
    323  1.2       tls 		v = cprng->buffer[--cprng->buffer[CPRNG_FAST_BUFIDX]];
    324  1.2       tls 	} else {
    325  1.2       tls 		/* If we don't have enough words, refill the buffer.  */
    326  1.2       tls 		crypto_core(cprng->buffer, cprng->nonce, cprng->key,
    327  1.2       tls 		    crypto_core_constant32);
    328  1.2       tls 		if (__predict_false(++cprng->nonce[0] == 0)) {
    329  1.2       tls 			cprng->nonce[1]++;
    330  1.2       tls 			cprng_fast_schedule_reseed(cprng);
    331  1.2       tls 		} else {
    332  1.2       tls 			if (__predict_false(false == cprng->have_initial)) {
    333  1.2       tls 				if (rnd_initial_entropy) {
    334  1.2       tls 					cprng_fast_schedule_reseed(cprng);
    335  1.2       tls 				}
    336  1.2       tls 			}
    337  1.2       tls 		}
    338  1.2       tls 		v = cprng->buffer[CPRNG_FAST_BUFIDX];
    339  1.2       tls 		cprng->buffer[CPRNG_FAST_BUFIDX] = CPRNG_FAST_BUFIDX;
    340  1.2       tls 	}
    341  1.2       tls 
    342  1.2       tls 	return v;
    343  1.2       tls }
    344  1.2       tls 
    345  1.2       tls static inline void
    346  1.2       tls cprng_fast_buf(struct cprng_fast *cprng, void *buf, unsigned n)
    347  1.2       tls {
    348  1.2       tls 	uint8_t *p = buf;
    349  1.2       tls 	uint32_t v;
    350  1.2       tls 	unsigned r;
    351  1.2       tls 
    352  1.2       tls 	while (n) {
    353  1.2       tls 		r = MIN(n, 4);
    354  1.2       tls 		n -= r;
    355  1.2       tls 		v = cprng_fast_word(cprng);
    356  1.2       tls 		while (r--) {
    357  1.2       tls 			*p++ = (v & 0xff);
    358  1.2       tls 			v >>= 8;
    359  1.2       tls 		}
    360  1.2       tls 	}
    361  1.2       tls }
    362  1.2       tls 
    363  1.2       tls /*
    365  1.2       tls  * crypto_onetimestream: Expand a short unpredictable one-time seed
    366  1.2       tls  * into a long unpredictable output.
    367  1.2       tls  */
    368  1.2       tls static void
    369  1.2       tls crypto_onetimestream(const uint32_t seed[crypto_core_KEYWORDS], void *buf,
    370  1.2       tls     size_t n)
    371  1.2       tls {
    372  1.2       tls 	uint32_t block[crypto_core_OUTPUTWORDS];
    373  1.2       tls 	uint32_t nonce[crypto_core_INPUTWORDS] = {0};
    374  1.2       tls 	uint8_t *p8;
    375  1.2       tls 	uint32_t *p32;
    376  1.2       tls 	size_t ni, nb, nf;
    377  1.2       tls 
    378  1.2       tls 	/*
    379  1.2       tls 	 * Guarantee we can generate up to n bytes.  We have
    380  1.2       tls 	 * 2^(32*INPUTWORDS) possible inputs yielding output of
    381  1.2       tls 	 * 4*OUTPUTWORDS*2^(32*INPUTWORDS) bytes.  It suffices to
    382  1.2       tls 	 * require that sizeof n > (1/CHAR_BIT) log_2 n be less than
    383  1.2       tls 	 * (1/CHAR_BIT) log_2 of the total output stream length.  We
    384  1.2       tls 	 * have
    385  1.2       tls 	 *
    386  1.2       tls 	 *	log_2 (4 o 2^(32 i)) = log_2 (4 o) + log_2 2^(32 i)
    387  1.2       tls 	 *	  = 2 + log_2 o + 32 i.
    388  1.2       tls 	 */
    389  1.2       tls 	__CTASSERT(CHAR_BIT*sizeof n <=
    390  1.2       tls 	    (2 + ilog2(crypto_core_OUTPUTWORDS) + 32*crypto_core_INPUTWORDS));
    391  1.2       tls 
    392  1.2       tls 	p8 = buf;
    393  1.2       tls 	p32 = (uint32_t *)roundup2((uintptr_t)p8, sizeof(uint32_t));
    394  1.2       tls 	ni = (uint8_t *)p32 - p8;
    395  1.2       tls 	if (n < ni)
    396  1.2       tls 		ni = n;
    397  1.2       tls 	nb = (n - ni) / sizeof block;
    398  1.2       tls 	nf = (n - ni) % sizeof block;
    399  1.2       tls 
    400  1.2       tls 	KASSERT(((uintptr_t)p32 & 3) == 0);
    401  1.2       tls 	KASSERT(ni <= n);
    402  1.2       tls 	KASSERT(nb <= (n / sizeof block));
    403  1.2       tls 	KASSERT(nf <= n);
    404  1.2       tls 	KASSERT(n == (ni + (nb * sizeof block) + nf));
    405  1.2       tls 	KASSERT(ni < sizeof(uint32_t));
    406  1.2       tls 	KASSERT(nf < sizeof block);
    407  1.2       tls 
    408  1.2       tls 	if (ni) {
    409  1.2       tls 		crypto_core(block, nonce, seed, crypto_core_constant32);
    410  1.2       tls 		nonce[0]++;
    411  1.2       tls 		(void)memcpy(p8, block, ni);
    412  1.2       tls 	}
    413  1.2       tls 	while (nb--) {
    414  1.2       tls 		crypto_core(p32, nonce, seed, crypto_core_constant32);
    415  1.2       tls 		if (++nonce[0] == 0)
    416  1.2       tls 			nonce[1]++;
    417  1.2       tls 		p32 += crypto_core_OUTPUTWORDS;
    418  1.2       tls 	}
    419  1.2       tls 	if (nf) {
    420  1.2       tls 		crypto_core(block, nonce, seed, crypto_core_constant32);
    421  1.2       tls 		if (++nonce[0] == 0)
    422  1.2       tls 			nonce[1]++;
    423  1.2       tls 		(void)memcpy(p32, block, nf);
    424  1.2       tls 	}
    425  1.2       tls 
    426  1.2       tls 	if (ni | nf)
    427  1.2       tls 		(void)explicit_memset(block, 0, sizeof block);
    428  1.2       tls }
    429  1.2       tls 
    430  1.2       tls /* Public API */
    432  1.2       tls 
    433  1.2       tls uint32_t
    434  1.2       tls cprng_fast32(void)
    435  1.2       tls {
    436  1.2       tls 	struct cprng_fast *cprng;
    437  1.2       tls 	uint32_t v;
    438  1.2       tls 	int s;
    439  1.2       tls 
    440  1.2       tls 	s = cprng_fast_get(&cprng);
    441  1.2       tls 	v = cprng_fast_word(cprng);
    442  1.2       tls 	cprng_fast_put(cprng, s);
    443  1.2       tls 
    444  1.2       tls 	return v;
    445  1.2       tls }
    446  1.2       tls 
    447  1.2       tls uint64_t
    448  1.2       tls cprng_fast64(void)
    449  1.2       tls {
    450  1.2       tls 	struct cprng_fast *cprng;
    451  1.2       tls 	uint32_t hi, lo;
    452  1.2       tls 	int s;
    453  1.2       tls 
    454  1.2       tls 	s = cprng_fast_get(&cprng);
    455  1.2       tls 	hi = cprng_fast_word(cprng);
    456  1.2       tls 	lo = cprng_fast_word(cprng);
    457  1.2       tls 	cprng_fast_put(cprng, s);
    458  1.2       tls 
    459  1.2       tls 	return ((uint64_t)hi << 32) | lo;
    460  1.2       tls }
    461  1.2       tls 
    462  1.2       tls static void
    463  1.2       tls cprng_fast_buf_short(void *buf, size_t len)
    464  1.2       tls {
    465  1.2       tls 	struct cprng_fast *cprng;
    466  1.2       tls 	int s;
    467  1.2       tls 
    468  1.2       tls 	s = cprng_fast_get(&cprng);
    469  1.2       tls 	cprng_fast_buf(cprng, buf, len);
    470  1.2       tls 	cprng_fast_put(cprng, s);
    471  1.2       tls }
    472  1.2       tls 
    473  1.2       tls static __noinline void
    474  1.2       tls cprng_fast_buf_long(void *buf, size_t len)
    475  1.2       tls {
    476  1.2       tls 	uint32_t seed[crypto_core_KEYWORDS];
    477  1.2       tls 	struct cprng_fast *cprng;
    478  1.2       tls 	int s;
    479  1.2       tls 
    480  1.2       tls 	s = cprng_fast_get(&cprng);
    481  1.2       tls 	cprng_fast_buf(cprng, seed, sizeof seed);
    482  1.2       tls 	cprng_fast_put(cprng, s);
    483  1.2       tls 
    484  1.2       tls 	crypto_onetimestream(seed, buf, len);
    485  1.2       tls 
    486  1.2       tls 	(void)explicit_memset(seed, 0, sizeof seed);
    487  1.2       tls }
    488  1.2       tls 
    489  1.2       tls size_t
    490  1.2       tls cprng_fast(void *buf, size_t len)
    491  1.2       tls {
    492  1.2       tls 
    493  1.2       tls 	/*
    494  1.2       tls 	 * We don't want to hog the CPU, so we use the short version,
    495  1.2       tls 	 * to generate output without preemption, only if we can do it
    496  1.2       tls 	 * with at most one crypto_core.
    497  1.2       tls 	 */
    498                	if (len <= (sizeof(uint32_t) * crypto_core_OUTPUTWORDS))
    499                		cprng_fast_buf_short(buf, len);
    500                	else
    501                		cprng_fast_buf_long(buf, len);
    502                
    503                	return len;
    504                }
    505