Home | History | Annotate | Line # | Download | only in cprng_fast
cprng_fast.c revision 1.4
      1  1.4  riastrad /*	$NetBSD: cprng_fast.c,v 1.4 2014/08/11 03:46:54 riastradh Exp $	*/
      2  1.2       tls 
      3  1.2       tls /*-
      4  1.2       tls  * Copyright (c) 2014 The NetBSD Foundation, Inc.
      5  1.2       tls  * All rights reserved.
      6  1.2       tls  *
      7  1.2       tls  * This code is derived from software contributed to The NetBSD Foundation
      8  1.2       tls  * by Taylor R. Campbell.
      9  1.2       tls  *
     10  1.2       tls  * Redistribution and use in source and binary forms, with or without
     11  1.2       tls  * modification, are permitted provided that the following conditions
     12  1.2       tls  * are met:
     13  1.2       tls  * 1. Redistributions of source code must retain the above copyright
     14  1.2       tls  *    notice, this list of conditions and the following disclaimer.
     15  1.2       tls  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.2       tls  *    notice, this list of conditions and the following disclaimer in the
     17  1.2       tls  *    documentation and/or other materials provided with the distribution.
     18  1.2       tls  *
     19  1.2       tls  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  1.2       tls  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  1.2       tls  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  1.2       tls  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  1.2       tls  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  1.2       tls  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  1.2       tls  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  1.2       tls  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  1.2       tls  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  1.2       tls  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  1.2       tls  * POSSIBILITY OF SUCH DAMAGE.
     30  1.2       tls  */
     31  1.2       tls 
     32  1.2       tls #include <sys/cdefs.h>
     33  1.4  riastrad __KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.4 2014/08/11 03:46:54 riastradh Exp $");
     34  1.2       tls 
     35  1.2       tls #include <sys/types.h>
     36  1.4  riastrad #include <sys/param.h>
     37  1.2       tls #include <sys/bitops.h>
     38  1.4  riastrad #include <sys/cprng.h>
     39  1.2       tls #include <sys/cpu.h>
     40  1.2       tls #include <sys/intr.h>
     41  1.2       tls #include <sys/percpu.h>
     42  1.2       tls 
     43  1.2       tls /* ChaCha core */
     45  1.2       tls 
     46  1.2       tls #define	crypto_core_OUTPUTWORDS	16
     47  1.2       tls #define	crypto_core_INPUTWORDS	4
     48  1.2       tls #define	crypto_core_KEYWORDS	8
     49  1.2       tls #define	crypto_core_CONSTWORDS	4
     50  1.2       tls 
     51  1.2       tls #define	crypto_core_ROUNDS	8
     52  1.2       tls 
     53  1.2       tls static uint32_t
     54  1.2       tls rotate(uint32_t u, unsigned c)
     55  1.2       tls {
     56  1.2       tls 
     57  1.2       tls 	return (u << c) | (u >> (32 - c));
     58  1.2       tls }
     59  1.2       tls 
     60  1.2       tls #define	QUARTERROUND(a, b, c, d) do {					      \
     61  1.2       tls 	(a) += (b); (d) ^= (a); (d) = rotate((d), 16);			      \
     62  1.2       tls 	(c) += (d); (b) ^= (c); (b) = rotate((b), 12);			      \
     63  1.2       tls 	(a) += (b); (d) ^= (a); (d) = rotate((d),  8);			      \
     64  1.2       tls 	(c) += (d); (b) ^= (c); (b) = rotate((b),  7);			      \
     65  1.2       tls } while (0)
     66  1.2       tls 
     67  1.2       tls static void
     68  1.2       tls crypto_core(uint32_t *out, const uint32_t *in, const uint32_t *k,
     69  1.2       tls     const uint32_t *c)
     70  1.2       tls {
     71  1.2       tls 	uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
     72  1.2       tls 	int i;
     73  1.2       tls 
     74  1.2       tls 	x0 = c[0];
     75  1.2       tls 	x1 = c[1];
     76  1.2       tls 	x2 = c[2];
     77  1.2       tls 	x3 = c[3];
     78  1.2       tls 	x4 = k[0];
     79  1.2       tls 	x5 = k[1];
     80  1.2       tls 	x6 = k[2];
     81  1.2       tls 	x7 = k[3];
     82  1.2       tls 	x8 = k[4];
     83  1.2       tls 	x9 = k[5];
     84  1.2       tls 	x10 = k[6];
     85  1.2       tls 	x11 = k[7];
     86  1.2       tls 	x12 = in[0];
     87  1.2       tls 	x13 = in[1];
     88  1.2       tls 	x14 = in[2];
     89  1.2       tls 	x15 = in[3];
     90  1.2       tls 
     91  1.2       tls 	for (i = crypto_core_ROUNDS; i > 0; i -= 2) {
     92  1.2       tls 		QUARTERROUND( x0, x4, x8,x12);
     93  1.2       tls 		QUARTERROUND( x1, x5, x9,x13);
     94  1.2       tls 		QUARTERROUND( x2, x6,x10,x14);
     95  1.2       tls 		QUARTERROUND( x3, x7,x11,x15);
     96  1.2       tls 		QUARTERROUND( x0, x5,x10,x15);
     97  1.2       tls 		QUARTERROUND( x1, x6,x11,x12);
     98  1.2       tls 		QUARTERROUND( x2, x7, x8,x13);
     99  1.2       tls 		QUARTERROUND( x3, x4, x9,x14);
    100  1.2       tls 	}
    101  1.2       tls 
    102  1.2       tls 	out[0] = x0 + c[0];
    103  1.2       tls 	out[1] = x1 + c[1];
    104  1.2       tls 	out[2] = x2 + c[2];
    105  1.2       tls 	out[3] = x3 + c[3];
    106  1.2       tls 	out[4] = x4 + k[0];
    107  1.2       tls 	out[5] = x5 + k[1];
    108  1.2       tls 	out[6] = x6 + k[2];
    109  1.2       tls 	out[7] = x7 + k[3];
    110  1.2       tls 	out[8] = x8 + k[4];
    111  1.2       tls 	out[9] = x9 + k[5];
    112  1.2       tls 	out[10] = x10 + k[6];
    113  1.2       tls 	out[11] = x11 + k[7];
    114  1.2       tls 	out[12] = x12 + in[0];
    115  1.2       tls 	out[13] = x13 + in[1];
    116  1.2       tls 	out[14] = x14 + in[2];
    117  1.2       tls 	out[15] = x15 + in[3];
    118  1.2       tls }
    119  1.2       tls 
    120  1.2       tls /* `expand 32-byte k' */
    122  1.2       tls static const uint32_t crypto_core_constant32[4] = {
    123  1.2       tls 	0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U,
    124  1.2       tls };
    125  1.2       tls 
    126  1.2       tls /*
    127  1.2       tls  * Test vector for ChaCha20 from
    128  1.2       tls  * <http://tools.ietf.org/html/draft-strombergson-chacha-test-vectors-00>,
    129  1.2       tls  * test vectors for ChaCha12 and ChaCha8 generated by the same
    130  1.2       tls  * crypto_core code with crypto_core_ROUNDS varied.
    131  1.2       tls  */
    132  1.2       tls 
    133  1.2       tls #define	check(E)	do						\
    134  1.2       tls {									\
    135  1.2       tls 	if (!(E))							\
    136  1.2       tls 		panic("crypto self-test failed: %s", #E);		\
    137  1.2       tls } while (0)
    138  1.2       tls 
    139  1.2       tls static void
    140  1.2       tls crypto_core_selftest(void)
    141  1.2       tls {
    142  1.2       tls 	const uint32_t zero32[8] = {0};
    143  1.2       tls 	const uint8_t sigma[] = "expand 32-byte k";
    144  1.2       tls 	uint32_t block[16];
    145  1.2       tls 	unsigned i;
    146  1.2       tls 
    147  1.2       tls #if crypto_core_ROUNDS == 8
    148  1.2       tls 	static const uint8_t out[64] = {
    149  1.2       tls 		0x3e,0x00,0xef,0x2f,0x89,0x5f,0x40,0xd6,
    150  1.2       tls 		0x7f,0x5b,0xb8,0xe8,0x1f,0x09,0xa5,0xa1,
    151  1.2       tls 		0x2c,0x84,0x0e,0xc3,0xce,0x9a,0x7f,0x3b,
    152  1.2       tls 		0x18,0x1b,0xe1,0x88,0xef,0x71,0x1a,0x1e,
    153  1.2       tls 		0x98,0x4c,0xe1,0x72,0xb9,0x21,0x6f,0x41,
    154  1.2       tls 		0x9f,0x44,0x53,0x67,0x45,0x6d,0x56,0x19,
    155  1.2       tls 		0x31,0x4a,0x42,0xa3,0xda,0x86,0xb0,0x01,
    156  1.2       tls 		0x38,0x7b,0xfd,0xb8,0x0e,0x0c,0xfe,0x42,
    157  1.2       tls 	};
    158  1.2       tls #elif crypto_core_ROUNDS == 12
    159  1.2       tls 	static const uint8_t out[64] = {
    160  1.2       tls 		0x9b,0xf4,0x9a,0x6a,0x07,0x55,0xf9,0x53,
    161  1.2       tls 		0x81,0x1f,0xce,0x12,0x5f,0x26,0x83,0xd5,
    162  1.2       tls 		0x04,0x29,0xc3,0xbb,0x49,0xe0,0x74,0x14,
    163  1.2       tls 		0x7e,0x00,0x89,0xa5,0x2e,0xae,0x15,0x5f,
    164  1.2       tls 		0x05,0x64,0xf8,0x79,0xd2,0x7a,0xe3,0xc0,
    165  1.2       tls 		0x2c,0xe8,0x28,0x34,0xac,0xfa,0x8c,0x79,
    166  1.2       tls 		0x3a,0x62,0x9f,0x2c,0xa0,0xde,0x69,0x19,
    167  1.2       tls 		0x61,0x0b,0xe8,0x2f,0x41,0x13,0x26,0xbe,
    168  1.2       tls 	};
    169  1.2       tls #elif crypto_core_ROUNDS == 20
    170  1.2       tls 	static const uint8_t out[64] = {
    171  1.2       tls 		0x76,0xb8,0xe0,0xad,0xa0,0xf1,0x3d,0x90,
    172  1.2       tls 		0x40,0x5d,0x6a,0xe5,0x53,0x86,0xbd,0x28,
    173  1.2       tls 		0xbd,0xd2,0x19,0xb8,0xa0,0x8d,0xed,0x1a,
    174  1.2       tls 		0xa8,0x36,0xef,0xcc,0x8b,0x77,0x0d,0xc7,
    175  1.2       tls 		0xda,0x41,0x59,0x7c,0x51,0x57,0x48,0x8d,
    176  1.2       tls 		0x77,0x24,0xe0,0x3f,0xb8,0xd8,0x4a,0x37,
    177  1.2       tls 		0x6a,0x43,0xb8,0xf4,0x15,0x18,0xa1,0x1c,
    178  1.2       tls 		0xc3,0x87,0xb6,0x69,0xb2,0xee,0x65,0x86,
    179  1.2       tls 	};
    180  1.2       tls #else
    181  1.2       tls #error crypto_core_ROUNDS must be 8, 12, or 20.
    182  1.2       tls #endif
    183  1.2       tls 
    184  1.2       tls 	check(crypto_core_constant32[0] == le32dec(&sigma[0]));
    185  1.2       tls 	check(crypto_core_constant32[1] == le32dec(&sigma[4]));
    186  1.2       tls 	check(crypto_core_constant32[2] == le32dec(&sigma[8]));
    187  1.2       tls 	check(crypto_core_constant32[3] == le32dec(&sigma[12]));
    188  1.2       tls 
    189  1.2       tls 	crypto_core(block, zero32, zero32, crypto_core_constant32);
    190  1.2       tls 	for (i = 0; i < 16; i++)
    191  1.2       tls 		check(block[i] == le32dec(&out[i*4]));
    192  1.2       tls }
    193  1.2       tls 
    194  1.2       tls #undef check
    195  1.2       tls 
    196  1.2       tls #define	CPRNG_FAST_SEED_BYTES	(crypto_core_KEYWORDS * sizeof(uint32_t))
    198  1.2       tls 
    199  1.2       tls struct cprng_fast {
    200  1.2       tls 	uint32_t 	buffer[crypto_core_OUTPUTWORDS];
    201  1.2       tls 	uint32_t 	key[crypto_core_KEYWORDS];
    202  1.2       tls 	uint32_t 	nonce[crypto_core_INPUTWORDS];
    203  1.2       tls 	bool		have_initial;
    204  1.2       tls };
    205  1.2       tls 
    206  1.2       tls __CTASSERT(sizeof ((struct cprng_fast *)0)->key == CPRNG_FAST_SEED_BYTES);
    207  1.2       tls 
    208  1.3    justin static void	cprng_fast_schedule_reseed(struct cprng_fast *);
    209  1.2       tls static void	cprng_fast_intr(void *);
    210  1.2       tls 
    211  1.2       tls static inline void	cprng_fast_seed(struct cprng_fast *, const void *);
    212  1.2       tls static void	cprng_fast_buf(struct cprng_fast *, void *, unsigned);
    213  1.2       tls 
    214  1.2       tls static void	cprng_fast_buf_short(void *, size_t);
    215  1.2       tls static void	cprng_fast_buf_long(void *, size_t);
    216  1.2       tls 
    217  1.2       tls static percpu_t	*cprng_fast_percpu	__read_mostly;
    218  1.2       tls static void	*cprng_fast_softint	__read_mostly;
    219  1.2       tls 
    220  1.2       tls extern int	rnd_initial_entropy;
    221  1.2       tls 
    222  1.2       tls void
    223  1.2       tls cprng_fast_init(void)
    224  1.2       tls {
    225  1.2       tls 	struct cpu_info *ci;
    226  1.2       tls 	CPU_INFO_ITERATOR cii;
    227  1.2       tls 
    228  1.2       tls 	crypto_core_selftest();
    229  1.2       tls 	cprng_fast_percpu = percpu_alloc(sizeof(struct cprng_fast));
    230  1.2       tls 	for (CPU_INFO_FOREACH(cii, ci)) {
    231  1.2       tls 		struct cprng_fast *cprng;
    232  1.2       tls 		uint8_t seed[CPRNG_FAST_SEED_BYTES];
    233  1.2       tls 
    234  1.2       tls 		percpu_traverse_enter();
    235  1.2       tls 		cprng = percpu_getptr_remote(cprng_fast_percpu, ci);
    236  1.2       tls 		cprng_strong(kern_cprng, seed, sizeof(seed), FASYNC);
    237  1.2       tls 		/* Can't do anything about it if not full entropy.  */
    238  1.2       tls 		cprng_fast_seed(cprng, seed);
    239  1.2       tls 		explicit_memset(seed, 0, sizeof(seed));
    240  1.2       tls 		percpu_traverse_exit();
    241  1.2       tls 	}
    242  1.2       tls 	cprng_fast_softint = softint_establish(SOFTINT_SERIAL|SOFTINT_MPSAFE,
    243  1.2       tls 	    &cprng_fast_intr, NULL);
    244  1.2       tls }
    245  1.2       tls 
    246  1.2       tls static inline int
    247  1.2       tls cprng_fast_get(struct cprng_fast **cprngp)
    248  1.2       tls {
    249  1.2       tls 
    250  1.2       tls 	*cprngp = percpu_getref(cprng_fast_percpu);
    251  1.2       tls 	return splvm();
    252  1.2       tls }
    253  1.2       tls 
    254  1.2       tls static inline void
    255  1.2       tls cprng_fast_put(struct cprng_fast *cprng, int s)
    256  1.2       tls {
    257  1.2       tls 
    258  1.2       tls 	KASSERT((cprng == percpu_getref(cprng_fast_percpu)) &&
    259  1.2       tls 	    (percpu_putref(cprng_fast_percpu), true));
    260  1.2       tls 	splx(s);
    261  1.2       tls 	percpu_putref(cprng_fast_percpu);
    262  1.2       tls }
    263  1.2       tls 
    264  1.2       tls static inline void
    266  1.2       tls cprng_fast_schedule_reseed(struct cprng_fast *cprng __unused)
    267  1.2       tls {
    268  1.2       tls 
    269  1.2       tls 	softint_schedule(cprng_fast_softint);
    270  1.2       tls }
    271  1.2       tls 
    272  1.2       tls static void
    273  1.2       tls cprng_fast_intr(void *cookie __unused)
    274  1.2       tls {
    275  1.2       tls 	struct cprng_fast *cprng;
    276  1.2       tls 	uint8_t seed[CPRNG_FAST_SEED_BYTES];
    277  1.2       tls 
    278  1.2       tls 	cprng_strong(kern_cprng, seed, sizeof(seed), FASYNC);
    279  1.2       tls 
    280  1.2       tls 	cprng = percpu_getref(cprng_fast_percpu);
    281  1.2       tls 	cprng_fast_seed(cprng, seed);
    282  1.2       tls 	percpu_putref(cprng_fast_percpu);
    283  1.2       tls 
    284  1.2       tls 	explicit_memset(seed, 0, sizeof(seed));
    285  1.2       tls }
    286  1.2       tls 
    287  1.2       tls /* CPRNG algorithm */
    289  1.2       tls 
    290  1.2       tls /*
    291  1.2       tls  * The state consists of a key, the current nonce, and a 64-byte buffer
    292  1.2       tls  * of output.  Since we fill the buffer only when we need output, and
    293  1.2       tls  * eat a 32-bit word at a time, one 32-bit word of the buffer would be
    294  1.2       tls  * wasted.  Instead, we repurpose it to count the number of entries in
    295  1.2       tls  * the buffer remaining, counting from high to low in order to allow
    296  1.2       tls  * comparison to zero to detect when we need to refill it.
    297  1.2       tls  */
    298  1.2       tls #define	CPRNG_FAST_BUFIDX	(crypto_core_OUTPUTWORDS - 1)
    299  1.2       tls 
    300  1.2       tls static inline void
    301  1.2       tls cprng_fast_seed(struct cprng_fast *cprng, const void *seed)
    302  1.2       tls {
    303  1.2       tls 
    304  1.2       tls 	(void)memset(cprng->buffer, 0, sizeof cprng->buffer);
    305  1.2       tls 	(void)memcpy(cprng->key, seed, sizeof cprng->key);
    306  1.2       tls 	(void)memset(cprng->nonce, 0, sizeof cprng->nonce);
    307  1.2       tls 
    308  1.2       tls 	if (__predict_true(rnd_initial_entropy)) {
    309  1.2       tls 		cprng->have_initial = true;
    310  1.2       tls 	} else {
    311  1.2       tls 		cprng->have_initial = false;
    312  1.2       tls 	}
    313  1.2       tls }
    314  1.2       tls 
    315  1.2       tls static inline uint32_t
    316  1.2       tls cprng_fast_word(struct cprng_fast *cprng)
    317  1.2       tls {
    318  1.2       tls 	uint32_t v;
    319  1.2       tls 
    320  1.2       tls 	if (__predict_true(0 < cprng->buffer[CPRNG_FAST_BUFIDX])) {
    321  1.2       tls 		v = cprng->buffer[--cprng->buffer[CPRNG_FAST_BUFIDX]];
    322  1.2       tls 	} else {
    323  1.2       tls 		/* If we don't have enough words, refill the buffer.  */
    324  1.2       tls 		crypto_core(cprng->buffer, cprng->nonce, cprng->key,
    325  1.2       tls 		    crypto_core_constant32);
    326  1.2       tls 		if (__predict_false(++cprng->nonce[0] == 0)) {
    327  1.2       tls 			cprng->nonce[1]++;
    328  1.2       tls 			cprng_fast_schedule_reseed(cprng);
    329  1.2       tls 		} else {
    330  1.2       tls 			if (__predict_false(false == cprng->have_initial)) {
    331  1.2       tls 				if (rnd_initial_entropy) {
    332  1.2       tls 					cprng_fast_schedule_reseed(cprng);
    333  1.2       tls 				}
    334  1.2       tls 			}
    335  1.2       tls 		}
    336  1.2       tls 		v = cprng->buffer[CPRNG_FAST_BUFIDX];
    337  1.2       tls 		cprng->buffer[CPRNG_FAST_BUFIDX] = CPRNG_FAST_BUFIDX;
    338  1.2       tls 	}
    339  1.2       tls 
    340  1.2       tls 	return v;
    341  1.2       tls }
    342  1.2       tls 
    343  1.2       tls static inline void
    344  1.2       tls cprng_fast_buf(struct cprng_fast *cprng, void *buf, unsigned n)
    345  1.2       tls {
    346  1.2       tls 	uint8_t *p = buf;
    347  1.2       tls 	uint32_t v;
    348  1.2       tls 	unsigned r;
    349  1.2       tls 
    350  1.2       tls 	while (n) {
    351  1.2       tls 		r = MIN(n, 4);
    352  1.2       tls 		n -= r;
    353  1.2       tls 		v = cprng_fast_word(cprng);
    354  1.2       tls 		while (r--) {
    355  1.2       tls 			*p++ = (v & 0xff);
    356  1.2       tls 			v >>= 8;
    357  1.2       tls 		}
    358  1.2       tls 	}
    359  1.2       tls }
    360  1.2       tls 
    361  1.2       tls /*
    363  1.2       tls  * crypto_onetimestream: Expand a short unpredictable one-time seed
    364  1.2       tls  * into a long unpredictable output.
    365  1.2       tls  */
    366  1.2       tls static void
    367  1.2       tls crypto_onetimestream(const uint32_t seed[crypto_core_KEYWORDS], void *buf,
    368  1.2       tls     size_t n)
    369  1.2       tls {
    370  1.2       tls 	uint32_t block[crypto_core_OUTPUTWORDS];
    371  1.2       tls 	uint32_t nonce[crypto_core_INPUTWORDS] = {0};
    372  1.2       tls 	uint8_t *p8;
    373  1.2       tls 	uint32_t *p32;
    374  1.2       tls 	size_t ni, nb, nf;
    375  1.2       tls 
    376  1.2       tls 	/*
    377  1.2       tls 	 * Guarantee we can generate up to n bytes.  We have
    378  1.2       tls 	 * 2^(32*INPUTWORDS) possible inputs yielding output of
    379  1.2       tls 	 * 4*OUTPUTWORDS*2^(32*INPUTWORDS) bytes.  It suffices to
    380  1.2       tls 	 * require that sizeof n > (1/CHAR_BIT) log_2 n be less than
    381  1.2       tls 	 * (1/CHAR_BIT) log_2 of the total output stream length.  We
    382  1.2       tls 	 * have
    383  1.2       tls 	 *
    384  1.2       tls 	 *	log_2 (4 o 2^(32 i)) = log_2 (4 o) + log_2 2^(32 i)
    385  1.2       tls 	 *	  = 2 + log_2 o + 32 i.
    386  1.2       tls 	 */
    387  1.2       tls 	__CTASSERT(CHAR_BIT*sizeof n <=
    388  1.2       tls 	    (2 + ilog2(crypto_core_OUTPUTWORDS) + 32*crypto_core_INPUTWORDS));
    389  1.2       tls 
    390  1.2       tls 	p8 = buf;
    391  1.2       tls 	p32 = (uint32_t *)roundup2((uintptr_t)p8, sizeof(uint32_t));
    392  1.2       tls 	ni = (uint8_t *)p32 - p8;
    393  1.2       tls 	if (n < ni)
    394  1.2       tls 		ni = n;
    395  1.2       tls 	nb = (n - ni) / sizeof block;
    396  1.2       tls 	nf = (n - ni) % sizeof block;
    397  1.2       tls 
    398  1.2       tls 	KASSERT(((uintptr_t)p32 & 3) == 0);
    399  1.2       tls 	KASSERT(ni <= n);
    400  1.2       tls 	KASSERT(nb <= (n / sizeof block));
    401  1.2       tls 	KASSERT(nf <= n);
    402  1.2       tls 	KASSERT(n == (ni + (nb * sizeof block) + nf));
    403  1.2       tls 	KASSERT(ni < sizeof(uint32_t));
    404  1.2       tls 	KASSERT(nf < sizeof block);
    405  1.2       tls 
    406  1.2       tls 	if (ni) {
    407  1.2       tls 		crypto_core(block, nonce, seed, crypto_core_constant32);
    408  1.2       tls 		nonce[0]++;
    409  1.2       tls 		(void)memcpy(p8, block, ni);
    410  1.2       tls 	}
    411  1.2       tls 	while (nb--) {
    412  1.2       tls 		crypto_core(p32, nonce, seed, crypto_core_constant32);
    413  1.2       tls 		if (++nonce[0] == 0)
    414  1.2       tls 			nonce[1]++;
    415  1.2       tls 		p32 += crypto_core_OUTPUTWORDS;
    416  1.2       tls 	}
    417  1.2       tls 	if (nf) {
    418  1.2       tls 		crypto_core(block, nonce, seed, crypto_core_constant32);
    419  1.2       tls 		if (++nonce[0] == 0)
    420  1.2       tls 			nonce[1]++;
    421  1.2       tls 		(void)memcpy(p32, block, nf);
    422  1.2       tls 	}
    423  1.2       tls 
    424  1.2       tls 	if (ni | nf)
    425  1.2       tls 		(void)explicit_memset(block, 0, sizeof block);
    426  1.2       tls }
    427  1.2       tls 
    428  1.2       tls /* Public API */
    430  1.2       tls 
    431  1.2       tls uint32_t
    432  1.2       tls cprng_fast32(void)
    433  1.2       tls {
    434  1.2       tls 	struct cprng_fast *cprng;
    435  1.2       tls 	uint32_t v;
    436  1.2       tls 	int s;
    437  1.2       tls 
    438  1.2       tls 	s = cprng_fast_get(&cprng);
    439  1.2       tls 	v = cprng_fast_word(cprng);
    440  1.2       tls 	cprng_fast_put(cprng, s);
    441  1.2       tls 
    442  1.2       tls 	return v;
    443  1.2       tls }
    444  1.2       tls 
    445  1.2       tls uint64_t
    446  1.2       tls cprng_fast64(void)
    447  1.2       tls {
    448  1.2       tls 	struct cprng_fast *cprng;
    449  1.2       tls 	uint32_t hi, lo;
    450  1.2       tls 	int s;
    451  1.2       tls 
    452  1.2       tls 	s = cprng_fast_get(&cprng);
    453  1.2       tls 	hi = cprng_fast_word(cprng);
    454  1.2       tls 	lo = cprng_fast_word(cprng);
    455  1.2       tls 	cprng_fast_put(cprng, s);
    456  1.2       tls 
    457  1.2       tls 	return ((uint64_t)hi << 32) | lo;
    458  1.2       tls }
    459  1.2       tls 
    460  1.2       tls static void
    461  1.2       tls cprng_fast_buf_short(void *buf, size_t len)
    462  1.2       tls {
    463  1.2       tls 	struct cprng_fast *cprng;
    464  1.2       tls 	int s;
    465  1.2       tls 
    466  1.2       tls 	s = cprng_fast_get(&cprng);
    467  1.2       tls 	cprng_fast_buf(cprng, buf, len);
    468  1.2       tls 	cprng_fast_put(cprng, s);
    469  1.2       tls }
    470  1.2       tls 
    471  1.2       tls static __noinline void
    472  1.2       tls cprng_fast_buf_long(void *buf, size_t len)
    473  1.2       tls {
    474  1.2       tls 	uint32_t seed[crypto_core_KEYWORDS];
    475  1.2       tls 	struct cprng_fast *cprng;
    476  1.2       tls 	int s;
    477  1.2       tls 
    478  1.2       tls 	s = cprng_fast_get(&cprng);
    479  1.2       tls 	cprng_fast_buf(cprng, seed, sizeof seed);
    480  1.2       tls 	cprng_fast_put(cprng, s);
    481  1.2       tls 
    482  1.2       tls 	crypto_onetimestream(seed, buf, len);
    483  1.2       tls 
    484  1.2       tls 	(void)explicit_memset(seed, 0, sizeof seed);
    485  1.2       tls }
    486  1.2       tls 
    487  1.2       tls size_t
    488  1.2       tls cprng_fast(void *buf, size_t len)
    489  1.2       tls {
    490  1.2       tls 
    491  1.2       tls 	/*
    492  1.2       tls 	 * We don't want to hog the CPU, so we use the short version,
    493  1.2       tls 	 * to generate output without preemption, only if we can do it
    494  1.2       tls 	 * with at most one crypto_core.
    495  1.2       tls 	 */
    496                	if (len <= (sizeof(uint32_t) * crypto_core_OUTPUTWORDS))
    497                		cprng_fast_buf_short(buf, len);
    498                	else
    499                		cprng_fast_buf_long(buf, len);
    500                
    501                	return len;
    502                }
    503