Home | History | Annotate | Line # | Download | only in cprng_fast
cprng_fast.c revision 1.9
      1  1.9  riastrad /*	$NetBSD: cprng_fast.c,v 1.9 2014/08/11 13:12:53 riastradh Exp $	*/
      2  1.2       tls 
      3  1.2       tls /*-
      4  1.2       tls  * Copyright (c) 2014 The NetBSD Foundation, Inc.
      5  1.2       tls  * All rights reserved.
      6  1.2       tls  *
      7  1.2       tls  * This code is derived from software contributed to The NetBSD Foundation
      8  1.2       tls  * by Taylor R. Campbell.
      9  1.2       tls  *
     10  1.2       tls  * Redistribution and use in source and binary forms, with or without
     11  1.2       tls  * modification, are permitted provided that the following conditions
     12  1.2       tls  * are met:
     13  1.2       tls  * 1. Redistributions of source code must retain the above copyright
     14  1.2       tls  *    notice, this list of conditions and the following disclaimer.
     15  1.2       tls  * 2. Redistributions in binary form must reproduce the above copyright
     16  1.2       tls  *    notice, this list of conditions and the following disclaimer in the
     17  1.2       tls  *    documentation and/or other materials provided with the distribution.
     18  1.2       tls  *
     19  1.2       tls  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20  1.2       tls  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21  1.2       tls  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22  1.2       tls  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23  1.2       tls  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24  1.2       tls  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25  1.2       tls  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26  1.2       tls  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27  1.2       tls  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28  1.2       tls  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29  1.2       tls  * POSSIBILITY OF SUCH DAMAGE.
     30  1.2       tls  */
     31  1.2       tls 
     32  1.2       tls #include <sys/cdefs.h>
     33  1.9  riastrad __KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.9 2014/08/11 13:12:53 riastradh Exp $");
     34  1.2       tls 
     35  1.2       tls #include <sys/types.h>
     36  1.4  riastrad #include <sys/param.h>
     37  1.2       tls #include <sys/bitops.h>
     38  1.4  riastrad #include <sys/cprng.h>
     39  1.2       tls #include <sys/cpu.h>
     40  1.2       tls #include <sys/intr.h>
     41  1.2       tls #include <sys/percpu.h>
     42  1.5  riastrad #include <sys/rnd.h>
     43  1.2       tls 
     44  1.2       tls /* ChaCha core */
     46  1.2       tls 
     47  1.2       tls #define	crypto_core_OUTPUTWORDS	16
     48  1.2       tls #define	crypto_core_INPUTWORDS	4
     49  1.2       tls #define	crypto_core_KEYWORDS	8
     50  1.2       tls #define	crypto_core_CONSTWORDS	4
     51  1.2       tls 
     52  1.2       tls #define	crypto_core_ROUNDS	8
     53  1.2       tls 
     54  1.2       tls static uint32_t
     55  1.2       tls rotate(uint32_t u, unsigned c)
     56  1.2       tls {
     57  1.2       tls 
     58  1.2       tls 	return (u << c) | (u >> (32 - c));
     59  1.2       tls }
     60  1.2       tls 
     61  1.2       tls #define	QUARTERROUND(a, b, c, d) do {					      \
     62  1.2       tls 	(a) += (b); (d) ^= (a); (d) = rotate((d), 16);			      \
     63  1.2       tls 	(c) += (d); (b) ^= (c); (b) = rotate((b), 12);			      \
     64  1.2       tls 	(a) += (b); (d) ^= (a); (d) = rotate((d),  8);			      \
     65  1.2       tls 	(c) += (d); (b) ^= (c); (b) = rotate((b),  7);			      \
     66  1.2       tls } while (0)
     67  1.2       tls 
     68  1.2       tls static void
     69  1.2       tls crypto_core(uint32_t *out, const uint32_t *in, const uint32_t *k,
     70  1.2       tls     const uint32_t *c)
     71  1.2       tls {
     72  1.2       tls 	uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
     73  1.2       tls 	int i;
     74  1.2       tls 
     75  1.2       tls 	x0 = c[0];
     76  1.2       tls 	x1 = c[1];
     77  1.2       tls 	x2 = c[2];
     78  1.2       tls 	x3 = c[3];
     79  1.2       tls 	x4 = k[0];
     80  1.2       tls 	x5 = k[1];
     81  1.2       tls 	x6 = k[2];
     82  1.2       tls 	x7 = k[3];
     83  1.2       tls 	x8 = k[4];
     84  1.2       tls 	x9 = k[5];
     85  1.2       tls 	x10 = k[6];
     86  1.2       tls 	x11 = k[7];
     87  1.2       tls 	x12 = in[0];
     88  1.2       tls 	x13 = in[1];
     89  1.2       tls 	x14 = in[2];
     90  1.2       tls 	x15 = in[3];
     91  1.2       tls 
     92  1.2       tls 	for (i = crypto_core_ROUNDS; i > 0; i -= 2) {
     93  1.2       tls 		QUARTERROUND( x0, x4, x8,x12);
     94  1.2       tls 		QUARTERROUND( x1, x5, x9,x13);
     95  1.2       tls 		QUARTERROUND( x2, x6,x10,x14);
     96  1.2       tls 		QUARTERROUND( x3, x7,x11,x15);
     97  1.2       tls 		QUARTERROUND( x0, x5,x10,x15);
     98  1.2       tls 		QUARTERROUND( x1, x6,x11,x12);
     99  1.2       tls 		QUARTERROUND( x2, x7, x8,x13);
    100  1.2       tls 		QUARTERROUND( x3, x4, x9,x14);
    101  1.2       tls 	}
    102  1.2       tls 
    103  1.2       tls 	out[0] = x0 + c[0];
    104  1.2       tls 	out[1] = x1 + c[1];
    105  1.2       tls 	out[2] = x2 + c[2];
    106  1.2       tls 	out[3] = x3 + c[3];
    107  1.2       tls 	out[4] = x4 + k[0];
    108  1.2       tls 	out[5] = x5 + k[1];
    109  1.2       tls 	out[6] = x6 + k[2];
    110  1.2       tls 	out[7] = x7 + k[3];
    111  1.2       tls 	out[8] = x8 + k[4];
    112  1.2       tls 	out[9] = x9 + k[5];
    113  1.2       tls 	out[10] = x10 + k[6];
    114  1.2       tls 	out[11] = x11 + k[7];
    115  1.2       tls 	out[12] = x12 + in[0];
    116  1.2       tls 	out[13] = x13 + in[1];
    117  1.2       tls 	out[14] = x14 + in[2];
    118  1.2       tls 	out[15] = x15 + in[3];
    119  1.2       tls }
    120  1.2       tls 
    121  1.2       tls /* `expand 32-byte k' */
    123  1.2       tls static const uint32_t crypto_core_constant32[4] = {
    124  1.2       tls 	0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U,
    125  1.2       tls };
    126  1.2       tls 
    127  1.2       tls /*
    128  1.2       tls  * Test vector for ChaCha20 from
    129  1.2       tls  * <http://tools.ietf.org/html/draft-strombergson-chacha-test-vectors-00>,
    130  1.2       tls  * test vectors for ChaCha12 and ChaCha8 generated by the same
    131  1.2       tls  * crypto_core code with crypto_core_ROUNDS varied.
    132  1.2       tls  */
    133  1.2       tls 
    134  1.2       tls #define	check(E)	do						\
    135  1.2       tls {									\
    136  1.2       tls 	if (!(E))							\
    137  1.2       tls 		panic("crypto self-test failed: %s", #E);		\
    138  1.2       tls } while (0)
    139  1.2       tls 
    140  1.2       tls static void
    141  1.2       tls crypto_core_selftest(void)
    142  1.2       tls {
    143  1.2       tls 	const uint32_t zero32[8] = {0};
    144  1.2       tls 	const uint8_t sigma[] = "expand 32-byte k";
    145  1.2       tls 	uint32_t block[16];
    146  1.2       tls 	unsigned i;
    147  1.2       tls 
    148  1.2       tls #if crypto_core_ROUNDS == 8
    149  1.2       tls 	static const uint8_t out[64] = {
    150  1.2       tls 		0x3e,0x00,0xef,0x2f,0x89,0x5f,0x40,0xd6,
    151  1.2       tls 		0x7f,0x5b,0xb8,0xe8,0x1f,0x09,0xa5,0xa1,
    152  1.2       tls 		0x2c,0x84,0x0e,0xc3,0xce,0x9a,0x7f,0x3b,
    153  1.2       tls 		0x18,0x1b,0xe1,0x88,0xef,0x71,0x1a,0x1e,
    154  1.2       tls 		0x98,0x4c,0xe1,0x72,0xb9,0x21,0x6f,0x41,
    155  1.2       tls 		0x9f,0x44,0x53,0x67,0x45,0x6d,0x56,0x19,
    156  1.2       tls 		0x31,0x4a,0x42,0xa3,0xda,0x86,0xb0,0x01,
    157  1.2       tls 		0x38,0x7b,0xfd,0xb8,0x0e,0x0c,0xfe,0x42,
    158  1.2       tls 	};
    159  1.2       tls #elif crypto_core_ROUNDS == 12
    160  1.2       tls 	static const uint8_t out[64] = {
    161  1.2       tls 		0x9b,0xf4,0x9a,0x6a,0x07,0x55,0xf9,0x53,
    162  1.2       tls 		0x81,0x1f,0xce,0x12,0x5f,0x26,0x83,0xd5,
    163  1.2       tls 		0x04,0x29,0xc3,0xbb,0x49,0xe0,0x74,0x14,
    164  1.2       tls 		0x7e,0x00,0x89,0xa5,0x2e,0xae,0x15,0x5f,
    165  1.2       tls 		0x05,0x64,0xf8,0x79,0xd2,0x7a,0xe3,0xc0,
    166  1.2       tls 		0x2c,0xe8,0x28,0x34,0xac,0xfa,0x8c,0x79,
    167  1.2       tls 		0x3a,0x62,0x9f,0x2c,0xa0,0xde,0x69,0x19,
    168  1.2       tls 		0x61,0x0b,0xe8,0x2f,0x41,0x13,0x26,0xbe,
    169  1.2       tls 	};
    170  1.2       tls #elif crypto_core_ROUNDS == 20
    171  1.2       tls 	static const uint8_t out[64] = {
    172  1.2       tls 		0x76,0xb8,0xe0,0xad,0xa0,0xf1,0x3d,0x90,
    173  1.2       tls 		0x40,0x5d,0x6a,0xe5,0x53,0x86,0xbd,0x28,
    174  1.2       tls 		0xbd,0xd2,0x19,0xb8,0xa0,0x8d,0xed,0x1a,
    175  1.2       tls 		0xa8,0x36,0xef,0xcc,0x8b,0x77,0x0d,0xc7,
    176  1.2       tls 		0xda,0x41,0x59,0x7c,0x51,0x57,0x48,0x8d,
    177  1.2       tls 		0x77,0x24,0xe0,0x3f,0xb8,0xd8,0x4a,0x37,
    178  1.2       tls 		0x6a,0x43,0xb8,0xf4,0x15,0x18,0xa1,0x1c,
    179  1.2       tls 		0xc3,0x87,0xb6,0x69,0xb2,0xee,0x65,0x86,
    180  1.2       tls 	};
    181  1.2       tls #else
    182  1.2       tls #error crypto_core_ROUNDS must be 8, 12, or 20.
    183  1.2       tls #endif
    184  1.2       tls 
    185  1.2       tls 	check(crypto_core_constant32[0] == le32dec(&sigma[0]));
    186  1.2       tls 	check(crypto_core_constant32[1] == le32dec(&sigma[4]));
    187  1.2       tls 	check(crypto_core_constant32[2] == le32dec(&sigma[8]));
    188  1.2       tls 	check(crypto_core_constant32[3] == le32dec(&sigma[12]));
    189  1.2       tls 
    190  1.2       tls 	crypto_core(block, zero32, zero32, crypto_core_constant32);
    191  1.2       tls 	for (i = 0; i < 16; i++)
    192  1.2       tls 		check(block[i] == le32dec(&out[i*4]));
    193  1.2       tls }
    194  1.2       tls 
    195  1.2       tls #undef check
    196  1.2       tls 
    197  1.2       tls #define	CPRNG_FAST_SEED_BYTES	(crypto_core_KEYWORDS * sizeof(uint32_t))
    199  1.2       tls 
    200  1.2       tls struct cprng_fast {
    201  1.2       tls 	uint32_t 	buffer[crypto_core_OUTPUTWORDS];
    202  1.2       tls 	uint32_t 	key[crypto_core_KEYWORDS];
    203  1.2       tls 	uint32_t 	nonce[crypto_core_INPUTWORDS];
    204  1.2       tls 	bool		have_initial;
    205  1.2       tls };
    206  1.8  riastrad 
    207  1.2       tls __CTASSERT(sizeof ((struct cprng_fast *)0)->key == CPRNG_FAST_SEED_BYTES);
    208  1.2       tls 
    209  1.2       tls static void	cprng_fast_init_cpu(void *, void *, struct cpu_info *);
    210  1.6  riastrad static void	cprng_fast_schedule_reseed(struct cprng_fast *);
    211  1.2       tls static void	cprng_fast_intr(void *);
    212  1.2       tls 
    213  1.2       tls static void	cprng_fast_seed(struct cprng_fast *, const void *);
    214  1.2       tls static void	cprng_fast_buf(struct cprng_fast *, void *, unsigned);
    215  1.2       tls 
    216  1.2       tls static void	cprng_fast_buf_short(void *, size_t);
    217  1.2       tls static void	cprng_fast_buf_long(void *, size_t);
    218  1.2       tls 
    219  1.2       tls static percpu_t	*cprng_fast_percpu	__read_mostly;
    220  1.2       tls static void	*cprng_fast_softint	__read_mostly;
    221  1.2       tls 
    222  1.2       tls void
    223  1.2       tls cprng_fast_init(void)
    224  1.2       tls {
    225  1.8  riastrad 
    226  1.2       tls 	crypto_core_selftest();
    227  1.2       tls 	cprng_fast_percpu = percpu_alloc(sizeof(struct cprng_fast));
    228  1.2       tls 	percpu_foreach(cprng_fast_percpu, &cprng_fast_init_cpu, NULL);
    229  1.2       tls 	cprng_fast_softint = softint_establish(SOFTINT_SERIAL|SOFTINT_MPSAFE,
    230  1.8  riastrad 	    &cprng_fast_intr, NULL);
    231  1.8  riastrad }
    232  1.8  riastrad 
    233  1.8  riastrad static void
    234  1.8  riastrad cprng_fast_init_cpu(void *p, void *arg __unused, struct cpu_info *ci __unused)
    235  1.8  riastrad {
    236  1.8  riastrad 	struct cprng_fast *const cprng = p;
    237  1.8  riastrad 	uint8_t seed[CPRNG_FAST_SEED_BYTES];
    238  1.9  riastrad 
    239  1.8  riastrad 	cprng_strong(kern_cprng, seed, sizeof seed, FASYNC);
    240  1.8  riastrad 	cprng_fast_seed(cprng, seed);
    241  1.9  riastrad 	cprng->have_initial = rnd_initial_entropy;
    242  1.2       tls 	(void)explicit_memset(seed, 0, sizeof seed);
    243  1.2       tls }
    244  1.2       tls 
    245  1.9  riastrad static inline int
    247  1.9  riastrad cprng_fast_get(struct cprng_fast **cprngp)
    248  1.9  riastrad {
    249  1.9  riastrad 	struct cprng_fast *cprng;
    250  1.2       tls 	int s;
    251  1.9  riastrad 
    252  1.9  riastrad 	*cprngp = cprng = percpu_getref(cprng_fast_percpu);
    253  1.9  riastrad 	s = splvm();
    254  1.9  riastrad 
    255  1.2       tls 	if (__predict_false(!cprng->have_initial))
    256  1.2       tls 		cprng_fast_schedule_reseed(cprng);
    257  1.2       tls 
    258  1.2       tls 	return s;
    259  1.2       tls }
    260  1.2       tls 
    261  1.2       tls static inline void
    262  1.2       tls cprng_fast_put(struct cprng_fast *cprng, int s)
    263  1.2       tls {
    264  1.2       tls 
    265  1.2       tls 	KASSERT((cprng == percpu_getref(cprng_fast_percpu)) &&
    266  1.9  riastrad 	    (percpu_putref(cprng_fast_percpu), true));
    267  1.2       tls 	splx(s);
    268  1.2       tls 	percpu_putref(cprng_fast_percpu);
    269  1.2       tls }
    270  1.2       tls 
    271  1.2       tls static inline void
    272  1.2       tls cprng_fast_schedule_reseed(struct cprng_fast *cprng __unused)
    273  1.2       tls {
    274  1.2       tls 
    275  1.2       tls 	softint_schedule(cprng_fast_softint);
    276  1.2       tls }
    277  1.2       tls 
    278  1.2       tls static void
    279  1.7  riastrad cprng_fast_intr(void *cookie __unused)
    280  1.2       tls {
    281  1.2       tls 	struct cprng_fast *cprng;
    282  1.2       tls 	uint8_t seed[CPRNG_FAST_SEED_BYTES];
    283  1.2       tls 	int s;
    284  1.7  riastrad 
    285  1.2       tls 	cprng_strong(kern_cprng, seed, sizeof(seed), FASYNC);
    286  1.9  riastrad 
    287  1.7  riastrad 	cprng = percpu_getref(cprng_fast_percpu);
    288  1.2       tls 	s = splvm();
    289  1.2       tls 	cprng_fast_seed(cprng, seed);
    290  1.2       tls 	cprng->have_initial = rnd_initial_entropy;
    291  1.2       tls 	splx(s);
    292  1.2       tls 	percpu_putref(cprng_fast_percpu);
    293  1.2       tls 
    294  1.2       tls 	explicit_memset(seed, 0, sizeof(seed));
    295  1.2       tls }
    296  1.2       tls 
    297  1.2       tls /* CPRNG algorithm */
    299  1.2       tls 
    300  1.2       tls /*
    301  1.2       tls  * The state consists of a key, the current nonce, and a 64-byte buffer
    302  1.2       tls  * of output.  Since we fill the buffer only when we need output, and
    303  1.2       tls  * eat a 32-bit word at a time, one 32-bit word of the buffer would be
    304  1.2       tls  * wasted.  Instead, we repurpose it to count the number of entries in
    305  1.6  riastrad  * the buffer remaining, counting from high to low in order to allow
    306  1.2       tls  * comparison to zero to detect when we need to refill it.
    307  1.2       tls  */
    308  1.2       tls #define	CPRNG_FAST_BUFIDX	(crypto_core_OUTPUTWORDS - 1)
    309  1.2       tls 
    310  1.2       tls static void
    311  1.2       tls cprng_fast_seed(struct cprng_fast *cprng, const void *seed)
    312  1.2       tls {
    313  1.2       tls 
    314  1.2       tls 	(void)memset(cprng->buffer, 0, sizeof cprng->buffer);
    315  1.2       tls 	(void)memcpy(cprng->key, seed, sizeof cprng->key);
    316  1.2       tls 	(void)memset(cprng->nonce, 0, sizeof cprng->nonce);
    317  1.2       tls }
    318  1.2       tls 
    319  1.2       tls static inline uint32_t
    320  1.2       tls cprng_fast_word(struct cprng_fast *cprng)
    321  1.2       tls {
    322  1.2       tls 	uint32_t v;
    323  1.2       tls 
    324  1.2       tls 	if (__predict_true(0 < cprng->buffer[CPRNG_FAST_BUFIDX])) {
    325  1.2       tls 		v = cprng->buffer[--cprng->buffer[CPRNG_FAST_BUFIDX]];
    326  1.2       tls 	} else {
    327  1.2       tls 		/* If we don't have enough words, refill the buffer.  */
    328  1.2       tls 		crypto_core(cprng->buffer, cprng->nonce, cprng->key,
    329  1.2       tls 		    crypto_core_constant32);
    330  1.2       tls 		if (__predict_false(++cprng->nonce[0] == 0)) {
    331  1.2       tls 			cprng->nonce[1]++;
    332  1.2       tls 			cprng_fast_schedule_reseed(cprng);
    333  1.2       tls 		}
    334  1.2       tls 		v = cprng->buffer[CPRNG_FAST_BUFIDX];
    335  1.2       tls 		cprng->buffer[CPRNG_FAST_BUFIDX] = CPRNG_FAST_BUFIDX;
    336  1.2       tls 	}
    337  1.2       tls 
    338  1.2       tls 	return v;
    339  1.2       tls }
    340  1.2       tls 
    341  1.2       tls static inline void
    342  1.2       tls cprng_fast_buf(struct cprng_fast *cprng, void *buf, unsigned n)
    343  1.2       tls {
    344  1.2       tls 	uint8_t *p = buf;
    345  1.2       tls 	uint32_t v;
    346  1.2       tls 	unsigned r;
    347  1.2       tls 
    348  1.2       tls 	while (n) {
    349  1.2       tls 		r = MIN(n, 4);
    350  1.2       tls 		n -= r;
    351  1.2       tls 		v = cprng_fast_word(cprng);
    352  1.2       tls 		while (r--) {
    353  1.2       tls 			*p++ = (v & 0xff);
    354  1.2       tls 			v >>= 8;
    355  1.2       tls 		}
    356  1.2       tls 	}
    357  1.2       tls }
    358  1.2       tls 
    359  1.2       tls /*
    361  1.2       tls  * crypto_onetimestream: Expand a short unpredictable one-time seed
    362  1.2       tls  * into a long unpredictable output.
    363  1.2       tls  */
    364  1.2       tls static void
    365  1.2       tls crypto_onetimestream(const uint32_t seed[crypto_core_KEYWORDS], void *buf,
    366  1.2       tls     size_t n)
    367  1.2       tls {
    368  1.2       tls 	uint32_t block[crypto_core_OUTPUTWORDS];
    369  1.2       tls 	uint32_t nonce[crypto_core_INPUTWORDS] = {0};
    370  1.2       tls 	uint8_t *p8;
    371  1.2       tls 	uint32_t *p32;
    372  1.2       tls 	size_t ni, nb, nf;
    373  1.2       tls 
    374  1.2       tls 	/*
    375  1.2       tls 	 * Guarantee we can generate up to n bytes.  We have
    376  1.2       tls 	 * 2^(32*INPUTWORDS) possible inputs yielding output of
    377  1.2       tls 	 * 4*OUTPUTWORDS*2^(32*INPUTWORDS) bytes.  It suffices to
    378  1.2       tls 	 * require that sizeof n > (1/CHAR_BIT) log_2 n be less than
    379  1.2       tls 	 * (1/CHAR_BIT) log_2 of the total output stream length.  We
    380  1.2       tls 	 * have
    381  1.2       tls 	 *
    382  1.2       tls 	 *	log_2 (4 o 2^(32 i)) = log_2 (4 o) + log_2 2^(32 i)
    383  1.2       tls 	 *	  = 2 + log_2 o + 32 i.
    384  1.2       tls 	 */
    385  1.2       tls 	__CTASSERT(CHAR_BIT*sizeof n <=
    386  1.2       tls 	    (2 + ilog2(crypto_core_OUTPUTWORDS) + 32*crypto_core_INPUTWORDS));
    387  1.2       tls 
    388  1.2       tls 	p8 = buf;
    389  1.2       tls 	p32 = (uint32_t *)roundup2((uintptr_t)p8, sizeof(uint32_t));
    390  1.2       tls 	ni = (uint8_t *)p32 - p8;
    391  1.2       tls 	if (n < ni)
    392  1.2       tls 		ni = n;
    393  1.2       tls 	nb = (n - ni) / sizeof block;
    394  1.2       tls 	nf = (n - ni) % sizeof block;
    395  1.2       tls 
    396  1.2       tls 	KASSERT(((uintptr_t)p32 & 3) == 0);
    397  1.2       tls 	KASSERT(ni <= n);
    398  1.2       tls 	KASSERT(nb <= (n / sizeof block));
    399  1.2       tls 	KASSERT(nf <= n);
    400  1.2       tls 	KASSERT(n == (ni + (nb * sizeof block) + nf));
    401  1.2       tls 	KASSERT(ni < sizeof(uint32_t));
    402  1.2       tls 	KASSERT(nf < sizeof block);
    403  1.2       tls 
    404  1.2       tls 	if (ni) {
    405  1.2       tls 		crypto_core(block, nonce, seed, crypto_core_constant32);
    406  1.2       tls 		nonce[0]++;
    407  1.2       tls 		(void)memcpy(p8, block, ni);
    408  1.2       tls 	}
    409  1.2       tls 	while (nb--) {
    410  1.2       tls 		crypto_core(p32, nonce, seed, crypto_core_constant32);
    411  1.2       tls 		if (++nonce[0] == 0)
    412  1.2       tls 			nonce[1]++;
    413  1.2       tls 		p32 += crypto_core_OUTPUTWORDS;
    414  1.2       tls 	}
    415  1.2       tls 	if (nf) {
    416  1.2       tls 		crypto_core(block, nonce, seed, crypto_core_constant32);
    417  1.2       tls 		if (++nonce[0] == 0)
    418  1.2       tls 			nonce[1]++;
    419  1.2       tls 		(void)memcpy(p32, block, nf);
    420  1.2       tls 	}
    421  1.2       tls 
    422  1.2       tls 	if (ni | nf)
    423  1.2       tls 		(void)explicit_memset(block, 0, sizeof block);
    424  1.2       tls }
    425  1.2       tls 
    426  1.2       tls /* Public API */
    428  1.2       tls 
    429  1.2       tls uint32_t
    430  1.2       tls cprng_fast32(void)
    431  1.2       tls {
    432  1.2       tls 	struct cprng_fast *cprng;
    433  1.2       tls 	uint32_t v;
    434  1.2       tls 	int s;
    435  1.2       tls 
    436  1.2       tls 	s = cprng_fast_get(&cprng);
    437  1.2       tls 	v = cprng_fast_word(cprng);
    438  1.2       tls 	cprng_fast_put(cprng, s);
    439  1.2       tls 
    440  1.2       tls 	return v;
    441  1.2       tls }
    442  1.2       tls 
    443  1.2       tls uint64_t
    444  1.2       tls cprng_fast64(void)
    445  1.2       tls {
    446  1.2       tls 	struct cprng_fast *cprng;
    447  1.2       tls 	uint32_t hi, lo;
    448  1.2       tls 	int s;
    449  1.2       tls 
    450  1.2       tls 	s = cprng_fast_get(&cprng);
    451  1.2       tls 	hi = cprng_fast_word(cprng);
    452  1.2       tls 	lo = cprng_fast_word(cprng);
    453  1.2       tls 	cprng_fast_put(cprng, s);
    454  1.2       tls 
    455  1.2       tls 	return ((uint64_t)hi << 32) | lo;
    456  1.2       tls }
    457  1.2       tls 
    458  1.2       tls static void
    459  1.2       tls cprng_fast_buf_short(void *buf, size_t len)
    460  1.2       tls {
    461  1.2       tls 	struct cprng_fast *cprng;
    462  1.2       tls 	int s;
    463  1.2       tls 
    464  1.2       tls 	s = cprng_fast_get(&cprng);
    465  1.2       tls 	cprng_fast_buf(cprng, buf, len);
    466  1.2       tls 	cprng_fast_put(cprng, s);
    467  1.2       tls }
    468  1.2       tls 
    469  1.2       tls static __noinline void
    470  1.2       tls cprng_fast_buf_long(void *buf, size_t len)
    471  1.2       tls {
    472  1.2       tls 	uint32_t seed[crypto_core_KEYWORDS];
    473  1.2       tls 	struct cprng_fast *cprng;
    474  1.2       tls 	int s;
    475  1.2       tls 
    476  1.2       tls 	s = cprng_fast_get(&cprng);
    477  1.2       tls 	cprng_fast_buf(cprng, seed, sizeof seed);
    478  1.2       tls 	cprng_fast_put(cprng, s);
    479  1.2       tls 
    480  1.2       tls 	crypto_onetimestream(seed, buf, len);
    481  1.2       tls 
    482  1.2       tls 	(void)explicit_memset(seed, 0, sizeof seed);
    483  1.2       tls }
    484  1.2       tls 
    485  1.2       tls size_t
    486  1.2       tls cprng_fast(void *buf, size_t len)
    487  1.2       tls {
    488  1.2       tls 
    489  1.2       tls 	/*
    490  1.2       tls 	 * We don't want to hog the CPU, so we use the short version,
    491  1.2       tls 	 * to generate output without preemption, only if we can do it
    492  1.2       tls 	 * with at most one crypto_core.
    493  1.2       tls 	 */
    494                	if (len <= (sizeof(uint32_t) * crypto_core_OUTPUTWORDS))
    495                		cprng_fast_buf_short(buf, len);
    496                	else
    497                		cprng_fast_buf_long(buf, len);
    498                
    499                	return len;
    500                }
    501