cprng_fast.c revision 1.4 1 1.4 riastrad /* $NetBSD: cprng_fast.c,v 1.4 2014/08/11 03:46:54 riastradh Exp $ */
2 1.2 tls
3 1.2 tls /*-
4 1.2 tls * Copyright (c) 2014 The NetBSD Foundation, Inc.
5 1.2 tls * All rights reserved.
6 1.2 tls *
7 1.2 tls * This code is derived from software contributed to The NetBSD Foundation
8 1.2 tls * by Taylor R. Campbell.
9 1.2 tls *
10 1.2 tls * Redistribution and use in source and binary forms, with or without
11 1.2 tls * modification, are permitted provided that the following conditions
12 1.2 tls * are met:
13 1.2 tls * 1. Redistributions of source code must retain the above copyright
14 1.2 tls * notice, this list of conditions and the following disclaimer.
15 1.2 tls * 2. Redistributions in binary form must reproduce the above copyright
16 1.2 tls * notice, this list of conditions and the following disclaimer in the
17 1.2 tls * documentation and/or other materials provided with the distribution.
18 1.2 tls *
19 1.2 tls * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20 1.2 tls * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21 1.2 tls * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22 1.2 tls * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23 1.2 tls * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24 1.2 tls * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25 1.2 tls * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26 1.2 tls * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27 1.2 tls * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28 1.2 tls * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29 1.2 tls * POSSIBILITY OF SUCH DAMAGE.
30 1.2 tls */
31 1.2 tls
32 1.2 tls #include <sys/cdefs.h>
33 1.4 riastrad __KERNEL_RCSID(0, "$NetBSD: cprng_fast.c,v 1.4 2014/08/11 03:46:54 riastradh Exp $");
34 1.2 tls
35 1.2 tls #include <sys/types.h>
36 1.4 riastrad #include <sys/param.h>
37 1.2 tls #include <sys/bitops.h>
38 1.4 riastrad #include <sys/cprng.h>
39 1.2 tls #include <sys/cpu.h>
40 1.2 tls #include <sys/intr.h>
41 1.2 tls #include <sys/percpu.h>
42 1.2 tls
43 1.2 tls /* ChaCha core */
45 1.2 tls
46 1.2 tls #define crypto_core_OUTPUTWORDS 16
47 1.2 tls #define crypto_core_INPUTWORDS 4
48 1.2 tls #define crypto_core_KEYWORDS 8
49 1.2 tls #define crypto_core_CONSTWORDS 4
50 1.2 tls
51 1.2 tls #define crypto_core_ROUNDS 8
52 1.2 tls
53 1.2 tls static uint32_t
54 1.2 tls rotate(uint32_t u, unsigned c)
55 1.2 tls {
56 1.2 tls
57 1.2 tls return (u << c) | (u >> (32 - c));
58 1.2 tls }
59 1.2 tls
60 1.2 tls #define QUARTERROUND(a, b, c, d) do { \
61 1.2 tls (a) += (b); (d) ^= (a); (d) = rotate((d), 16); \
62 1.2 tls (c) += (d); (b) ^= (c); (b) = rotate((b), 12); \
63 1.2 tls (a) += (b); (d) ^= (a); (d) = rotate((d), 8); \
64 1.2 tls (c) += (d); (b) ^= (c); (b) = rotate((b), 7); \
65 1.2 tls } while (0)
66 1.2 tls
67 1.2 tls static void
68 1.2 tls crypto_core(uint32_t *out, const uint32_t *in, const uint32_t *k,
69 1.2 tls const uint32_t *c)
70 1.2 tls {
71 1.2 tls uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
72 1.2 tls int i;
73 1.2 tls
74 1.2 tls x0 = c[0];
75 1.2 tls x1 = c[1];
76 1.2 tls x2 = c[2];
77 1.2 tls x3 = c[3];
78 1.2 tls x4 = k[0];
79 1.2 tls x5 = k[1];
80 1.2 tls x6 = k[2];
81 1.2 tls x7 = k[3];
82 1.2 tls x8 = k[4];
83 1.2 tls x9 = k[5];
84 1.2 tls x10 = k[6];
85 1.2 tls x11 = k[7];
86 1.2 tls x12 = in[0];
87 1.2 tls x13 = in[1];
88 1.2 tls x14 = in[2];
89 1.2 tls x15 = in[3];
90 1.2 tls
91 1.2 tls for (i = crypto_core_ROUNDS; i > 0; i -= 2) {
92 1.2 tls QUARTERROUND( x0, x4, x8,x12);
93 1.2 tls QUARTERROUND( x1, x5, x9,x13);
94 1.2 tls QUARTERROUND( x2, x6,x10,x14);
95 1.2 tls QUARTERROUND( x3, x7,x11,x15);
96 1.2 tls QUARTERROUND( x0, x5,x10,x15);
97 1.2 tls QUARTERROUND( x1, x6,x11,x12);
98 1.2 tls QUARTERROUND( x2, x7, x8,x13);
99 1.2 tls QUARTERROUND( x3, x4, x9,x14);
100 1.2 tls }
101 1.2 tls
102 1.2 tls out[0] = x0 + c[0];
103 1.2 tls out[1] = x1 + c[1];
104 1.2 tls out[2] = x2 + c[2];
105 1.2 tls out[3] = x3 + c[3];
106 1.2 tls out[4] = x4 + k[0];
107 1.2 tls out[5] = x5 + k[1];
108 1.2 tls out[6] = x6 + k[2];
109 1.2 tls out[7] = x7 + k[3];
110 1.2 tls out[8] = x8 + k[4];
111 1.2 tls out[9] = x9 + k[5];
112 1.2 tls out[10] = x10 + k[6];
113 1.2 tls out[11] = x11 + k[7];
114 1.2 tls out[12] = x12 + in[0];
115 1.2 tls out[13] = x13 + in[1];
116 1.2 tls out[14] = x14 + in[2];
117 1.2 tls out[15] = x15 + in[3];
118 1.2 tls }
119 1.2 tls
120 1.2 tls /* `expand 32-byte k' */
122 1.2 tls static const uint32_t crypto_core_constant32[4] = {
123 1.2 tls 0x61707865U, 0x3320646eU, 0x79622d32U, 0x6b206574U,
124 1.2 tls };
125 1.2 tls
126 1.2 tls /*
127 1.2 tls * Test vector for ChaCha20 from
128 1.2 tls * <http://tools.ietf.org/html/draft-strombergson-chacha-test-vectors-00>,
129 1.2 tls * test vectors for ChaCha12 and ChaCha8 generated by the same
130 1.2 tls * crypto_core code with crypto_core_ROUNDS varied.
131 1.2 tls */
132 1.2 tls
133 1.2 tls #define check(E) do \
134 1.2 tls { \
135 1.2 tls if (!(E)) \
136 1.2 tls panic("crypto self-test failed: %s", #E); \
137 1.2 tls } while (0)
138 1.2 tls
139 1.2 tls static void
140 1.2 tls crypto_core_selftest(void)
141 1.2 tls {
142 1.2 tls const uint32_t zero32[8] = {0};
143 1.2 tls const uint8_t sigma[] = "expand 32-byte k";
144 1.2 tls uint32_t block[16];
145 1.2 tls unsigned i;
146 1.2 tls
147 1.2 tls #if crypto_core_ROUNDS == 8
148 1.2 tls static const uint8_t out[64] = {
149 1.2 tls 0x3e,0x00,0xef,0x2f,0x89,0x5f,0x40,0xd6,
150 1.2 tls 0x7f,0x5b,0xb8,0xe8,0x1f,0x09,0xa5,0xa1,
151 1.2 tls 0x2c,0x84,0x0e,0xc3,0xce,0x9a,0x7f,0x3b,
152 1.2 tls 0x18,0x1b,0xe1,0x88,0xef,0x71,0x1a,0x1e,
153 1.2 tls 0x98,0x4c,0xe1,0x72,0xb9,0x21,0x6f,0x41,
154 1.2 tls 0x9f,0x44,0x53,0x67,0x45,0x6d,0x56,0x19,
155 1.2 tls 0x31,0x4a,0x42,0xa3,0xda,0x86,0xb0,0x01,
156 1.2 tls 0x38,0x7b,0xfd,0xb8,0x0e,0x0c,0xfe,0x42,
157 1.2 tls };
158 1.2 tls #elif crypto_core_ROUNDS == 12
159 1.2 tls static const uint8_t out[64] = {
160 1.2 tls 0x9b,0xf4,0x9a,0x6a,0x07,0x55,0xf9,0x53,
161 1.2 tls 0x81,0x1f,0xce,0x12,0x5f,0x26,0x83,0xd5,
162 1.2 tls 0x04,0x29,0xc3,0xbb,0x49,0xe0,0x74,0x14,
163 1.2 tls 0x7e,0x00,0x89,0xa5,0x2e,0xae,0x15,0x5f,
164 1.2 tls 0x05,0x64,0xf8,0x79,0xd2,0x7a,0xe3,0xc0,
165 1.2 tls 0x2c,0xe8,0x28,0x34,0xac,0xfa,0x8c,0x79,
166 1.2 tls 0x3a,0x62,0x9f,0x2c,0xa0,0xde,0x69,0x19,
167 1.2 tls 0x61,0x0b,0xe8,0x2f,0x41,0x13,0x26,0xbe,
168 1.2 tls };
169 1.2 tls #elif crypto_core_ROUNDS == 20
170 1.2 tls static const uint8_t out[64] = {
171 1.2 tls 0x76,0xb8,0xe0,0xad,0xa0,0xf1,0x3d,0x90,
172 1.2 tls 0x40,0x5d,0x6a,0xe5,0x53,0x86,0xbd,0x28,
173 1.2 tls 0xbd,0xd2,0x19,0xb8,0xa0,0x8d,0xed,0x1a,
174 1.2 tls 0xa8,0x36,0xef,0xcc,0x8b,0x77,0x0d,0xc7,
175 1.2 tls 0xda,0x41,0x59,0x7c,0x51,0x57,0x48,0x8d,
176 1.2 tls 0x77,0x24,0xe0,0x3f,0xb8,0xd8,0x4a,0x37,
177 1.2 tls 0x6a,0x43,0xb8,0xf4,0x15,0x18,0xa1,0x1c,
178 1.2 tls 0xc3,0x87,0xb6,0x69,0xb2,0xee,0x65,0x86,
179 1.2 tls };
180 1.2 tls #else
181 1.2 tls #error crypto_core_ROUNDS must be 8, 12, or 20.
182 1.2 tls #endif
183 1.2 tls
184 1.2 tls check(crypto_core_constant32[0] == le32dec(&sigma[0]));
185 1.2 tls check(crypto_core_constant32[1] == le32dec(&sigma[4]));
186 1.2 tls check(crypto_core_constant32[2] == le32dec(&sigma[8]));
187 1.2 tls check(crypto_core_constant32[3] == le32dec(&sigma[12]));
188 1.2 tls
189 1.2 tls crypto_core(block, zero32, zero32, crypto_core_constant32);
190 1.2 tls for (i = 0; i < 16; i++)
191 1.2 tls check(block[i] == le32dec(&out[i*4]));
192 1.2 tls }
193 1.2 tls
194 1.2 tls #undef check
195 1.2 tls
196 1.2 tls #define CPRNG_FAST_SEED_BYTES (crypto_core_KEYWORDS * sizeof(uint32_t))
198 1.2 tls
199 1.2 tls struct cprng_fast {
200 1.2 tls uint32_t buffer[crypto_core_OUTPUTWORDS];
201 1.2 tls uint32_t key[crypto_core_KEYWORDS];
202 1.2 tls uint32_t nonce[crypto_core_INPUTWORDS];
203 1.2 tls bool have_initial;
204 1.2 tls };
205 1.2 tls
206 1.2 tls __CTASSERT(sizeof ((struct cprng_fast *)0)->key == CPRNG_FAST_SEED_BYTES);
207 1.2 tls
208 1.3 justin static void cprng_fast_schedule_reseed(struct cprng_fast *);
209 1.2 tls static void cprng_fast_intr(void *);
210 1.2 tls
211 1.2 tls static inline void cprng_fast_seed(struct cprng_fast *, const void *);
212 1.2 tls static void cprng_fast_buf(struct cprng_fast *, void *, unsigned);
213 1.2 tls
214 1.2 tls static void cprng_fast_buf_short(void *, size_t);
215 1.2 tls static void cprng_fast_buf_long(void *, size_t);
216 1.2 tls
217 1.2 tls static percpu_t *cprng_fast_percpu __read_mostly;
218 1.2 tls static void *cprng_fast_softint __read_mostly;
219 1.2 tls
220 1.2 tls extern int rnd_initial_entropy;
221 1.2 tls
222 1.2 tls void
223 1.2 tls cprng_fast_init(void)
224 1.2 tls {
225 1.2 tls struct cpu_info *ci;
226 1.2 tls CPU_INFO_ITERATOR cii;
227 1.2 tls
228 1.2 tls crypto_core_selftest();
229 1.2 tls cprng_fast_percpu = percpu_alloc(sizeof(struct cprng_fast));
230 1.2 tls for (CPU_INFO_FOREACH(cii, ci)) {
231 1.2 tls struct cprng_fast *cprng;
232 1.2 tls uint8_t seed[CPRNG_FAST_SEED_BYTES];
233 1.2 tls
234 1.2 tls percpu_traverse_enter();
235 1.2 tls cprng = percpu_getptr_remote(cprng_fast_percpu, ci);
236 1.2 tls cprng_strong(kern_cprng, seed, sizeof(seed), FASYNC);
237 1.2 tls /* Can't do anything about it if not full entropy. */
238 1.2 tls cprng_fast_seed(cprng, seed);
239 1.2 tls explicit_memset(seed, 0, sizeof(seed));
240 1.2 tls percpu_traverse_exit();
241 1.2 tls }
242 1.2 tls cprng_fast_softint = softint_establish(SOFTINT_SERIAL|SOFTINT_MPSAFE,
243 1.2 tls &cprng_fast_intr, NULL);
244 1.2 tls }
245 1.2 tls
246 1.2 tls static inline int
247 1.2 tls cprng_fast_get(struct cprng_fast **cprngp)
248 1.2 tls {
249 1.2 tls
250 1.2 tls *cprngp = percpu_getref(cprng_fast_percpu);
251 1.2 tls return splvm();
252 1.2 tls }
253 1.2 tls
254 1.2 tls static inline void
255 1.2 tls cprng_fast_put(struct cprng_fast *cprng, int s)
256 1.2 tls {
257 1.2 tls
258 1.2 tls KASSERT((cprng == percpu_getref(cprng_fast_percpu)) &&
259 1.2 tls (percpu_putref(cprng_fast_percpu), true));
260 1.2 tls splx(s);
261 1.2 tls percpu_putref(cprng_fast_percpu);
262 1.2 tls }
263 1.2 tls
264 1.2 tls static inline void
266 1.2 tls cprng_fast_schedule_reseed(struct cprng_fast *cprng __unused)
267 1.2 tls {
268 1.2 tls
269 1.2 tls softint_schedule(cprng_fast_softint);
270 1.2 tls }
271 1.2 tls
272 1.2 tls static void
273 1.2 tls cprng_fast_intr(void *cookie __unused)
274 1.2 tls {
275 1.2 tls struct cprng_fast *cprng;
276 1.2 tls uint8_t seed[CPRNG_FAST_SEED_BYTES];
277 1.2 tls
278 1.2 tls cprng_strong(kern_cprng, seed, sizeof(seed), FASYNC);
279 1.2 tls
280 1.2 tls cprng = percpu_getref(cprng_fast_percpu);
281 1.2 tls cprng_fast_seed(cprng, seed);
282 1.2 tls percpu_putref(cprng_fast_percpu);
283 1.2 tls
284 1.2 tls explicit_memset(seed, 0, sizeof(seed));
285 1.2 tls }
286 1.2 tls
287 1.2 tls /* CPRNG algorithm */
289 1.2 tls
290 1.2 tls /*
291 1.2 tls * The state consists of a key, the current nonce, and a 64-byte buffer
292 1.2 tls * of output. Since we fill the buffer only when we need output, and
293 1.2 tls * eat a 32-bit word at a time, one 32-bit word of the buffer would be
294 1.2 tls * wasted. Instead, we repurpose it to count the number of entries in
295 1.2 tls * the buffer remaining, counting from high to low in order to allow
296 1.2 tls * comparison to zero to detect when we need to refill it.
297 1.2 tls */
298 1.2 tls #define CPRNG_FAST_BUFIDX (crypto_core_OUTPUTWORDS - 1)
299 1.2 tls
300 1.2 tls static inline void
301 1.2 tls cprng_fast_seed(struct cprng_fast *cprng, const void *seed)
302 1.2 tls {
303 1.2 tls
304 1.2 tls (void)memset(cprng->buffer, 0, sizeof cprng->buffer);
305 1.2 tls (void)memcpy(cprng->key, seed, sizeof cprng->key);
306 1.2 tls (void)memset(cprng->nonce, 0, sizeof cprng->nonce);
307 1.2 tls
308 1.2 tls if (__predict_true(rnd_initial_entropy)) {
309 1.2 tls cprng->have_initial = true;
310 1.2 tls } else {
311 1.2 tls cprng->have_initial = false;
312 1.2 tls }
313 1.2 tls }
314 1.2 tls
315 1.2 tls static inline uint32_t
316 1.2 tls cprng_fast_word(struct cprng_fast *cprng)
317 1.2 tls {
318 1.2 tls uint32_t v;
319 1.2 tls
320 1.2 tls if (__predict_true(0 < cprng->buffer[CPRNG_FAST_BUFIDX])) {
321 1.2 tls v = cprng->buffer[--cprng->buffer[CPRNG_FAST_BUFIDX]];
322 1.2 tls } else {
323 1.2 tls /* If we don't have enough words, refill the buffer. */
324 1.2 tls crypto_core(cprng->buffer, cprng->nonce, cprng->key,
325 1.2 tls crypto_core_constant32);
326 1.2 tls if (__predict_false(++cprng->nonce[0] == 0)) {
327 1.2 tls cprng->nonce[1]++;
328 1.2 tls cprng_fast_schedule_reseed(cprng);
329 1.2 tls } else {
330 1.2 tls if (__predict_false(false == cprng->have_initial)) {
331 1.2 tls if (rnd_initial_entropy) {
332 1.2 tls cprng_fast_schedule_reseed(cprng);
333 1.2 tls }
334 1.2 tls }
335 1.2 tls }
336 1.2 tls v = cprng->buffer[CPRNG_FAST_BUFIDX];
337 1.2 tls cprng->buffer[CPRNG_FAST_BUFIDX] = CPRNG_FAST_BUFIDX;
338 1.2 tls }
339 1.2 tls
340 1.2 tls return v;
341 1.2 tls }
342 1.2 tls
343 1.2 tls static inline void
344 1.2 tls cprng_fast_buf(struct cprng_fast *cprng, void *buf, unsigned n)
345 1.2 tls {
346 1.2 tls uint8_t *p = buf;
347 1.2 tls uint32_t v;
348 1.2 tls unsigned r;
349 1.2 tls
350 1.2 tls while (n) {
351 1.2 tls r = MIN(n, 4);
352 1.2 tls n -= r;
353 1.2 tls v = cprng_fast_word(cprng);
354 1.2 tls while (r--) {
355 1.2 tls *p++ = (v & 0xff);
356 1.2 tls v >>= 8;
357 1.2 tls }
358 1.2 tls }
359 1.2 tls }
360 1.2 tls
361 1.2 tls /*
363 1.2 tls * crypto_onetimestream: Expand a short unpredictable one-time seed
364 1.2 tls * into a long unpredictable output.
365 1.2 tls */
366 1.2 tls static void
367 1.2 tls crypto_onetimestream(const uint32_t seed[crypto_core_KEYWORDS], void *buf,
368 1.2 tls size_t n)
369 1.2 tls {
370 1.2 tls uint32_t block[crypto_core_OUTPUTWORDS];
371 1.2 tls uint32_t nonce[crypto_core_INPUTWORDS] = {0};
372 1.2 tls uint8_t *p8;
373 1.2 tls uint32_t *p32;
374 1.2 tls size_t ni, nb, nf;
375 1.2 tls
376 1.2 tls /*
377 1.2 tls * Guarantee we can generate up to n bytes. We have
378 1.2 tls * 2^(32*INPUTWORDS) possible inputs yielding output of
379 1.2 tls * 4*OUTPUTWORDS*2^(32*INPUTWORDS) bytes. It suffices to
380 1.2 tls * require that sizeof n > (1/CHAR_BIT) log_2 n be less than
381 1.2 tls * (1/CHAR_BIT) log_2 of the total output stream length. We
382 1.2 tls * have
383 1.2 tls *
384 1.2 tls * log_2 (4 o 2^(32 i)) = log_2 (4 o) + log_2 2^(32 i)
385 1.2 tls * = 2 + log_2 o + 32 i.
386 1.2 tls */
387 1.2 tls __CTASSERT(CHAR_BIT*sizeof n <=
388 1.2 tls (2 + ilog2(crypto_core_OUTPUTWORDS) + 32*crypto_core_INPUTWORDS));
389 1.2 tls
390 1.2 tls p8 = buf;
391 1.2 tls p32 = (uint32_t *)roundup2((uintptr_t)p8, sizeof(uint32_t));
392 1.2 tls ni = (uint8_t *)p32 - p8;
393 1.2 tls if (n < ni)
394 1.2 tls ni = n;
395 1.2 tls nb = (n - ni) / sizeof block;
396 1.2 tls nf = (n - ni) % sizeof block;
397 1.2 tls
398 1.2 tls KASSERT(((uintptr_t)p32 & 3) == 0);
399 1.2 tls KASSERT(ni <= n);
400 1.2 tls KASSERT(nb <= (n / sizeof block));
401 1.2 tls KASSERT(nf <= n);
402 1.2 tls KASSERT(n == (ni + (nb * sizeof block) + nf));
403 1.2 tls KASSERT(ni < sizeof(uint32_t));
404 1.2 tls KASSERT(nf < sizeof block);
405 1.2 tls
406 1.2 tls if (ni) {
407 1.2 tls crypto_core(block, nonce, seed, crypto_core_constant32);
408 1.2 tls nonce[0]++;
409 1.2 tls (void)memcpy(p8, block, ni);
410 1.2 tls }
411 1.2 tls while (nb--) {
412 1.2 tls crypto_core(p32, nonce, seed, crypto_core_constant32);
413 1.2 tls if (++nonce[0] == 0)
414 1.2 tls nonce[1]++;
415 1.2 tls p32 += crypto_core_OUTPUTWORDS;
416 1.2 tls }
417 1.2 tls if (nf) {
418 1.2 tls crypto_core(block, nonce, seed, crypto_core_constant32);
419 1.2 tls if (++nonce[0] == 0)
420 1.2 tls nonce[1]++;
421 1.2 tls (void)memcpy(p32, block, nf);
422 1.2 tls }
423 1.2 tls
424 1.2 tls if (ni | nf)
425 1.2 tls (void)explicit_memset(block, 0, sizeof block);
426 1.2 tls }
427 1.2 tls
428 1.2 tls /* Public API */
430 1.2 tls
431 1.2 tls uint32_t
432 1.2 tls cprng_fast32(void)
433 1.2 tls {
434 1.2 tls struct cprng_fast *cprng;
435 1.2 tls uint32_t v;
436 1.2 tls int s;
437 1.2 tls
438 1.2 tls s = cprng_fast_get(&cprng);
439 1.2 tls v = cprng_fast_word(cprng);
440 1.2 tls cprng_fast_put(cprng, s);
441 1.2 tls
442 1.2 tls return v;
443 1.2 tls }
444 1.2 tls
445 1.2 tls uint64_t
446 1.2 tls cprng_fast64(void)
447 1.2 tls {
448 1.2 tls struct cprng_fast *cprng;
449 1.2 tls uint32_t hi, lo;
450 1.2 tls int s;
451 1.2 tls
452 1.2 tls s = cprng_fast_get(&cprng);
453 1.2 tls hi = cprng_fast_word(cprng);
454 1.2 tls lo = cprng_fast_word(cprng);
455 1.2 tls cprng_fast_put(cprng, s);
456 1.2 tls
457 1.2 tls return ((uint64_t)hi << 32) | lo;
458 1.2 tls }
459 1.2 tls
460 1.2 tls static void
461 1.2 tls cprng_fast_buf_short(void *buf, size_t len)
462 1.2 tls {
463 1.2 tls struct cprng_fast *cprng;
464 1.2 tls int s;
465 1.2 tls
466 1.2 tls s = cprng_fast_get(&cprng);
467 1.2 tls cprng_fast_buf(cprng, buf, len);
468 1.2 tls cprng_fast_put(cprng, s);
469 1.2 tls }
470 1.2 tls
471 1.2 tls static __noinline void
472 1.2 tls cprng_fast_buf_long(void *buf, size_t len)
473 1.2 tls {
474 1.2 tls uint32_t seed[crypto_core_KEYWORDS];
475 1.2 tls struct cprng_fast *cprng;
476 1.2 tls int s;
477 1.2 tls
478 1.2 tls s = cprng_fast_get(&cprng);
479 1.2 tls cprng_fast_buf(cprng, seed, sizeof seed);
480 1.2 tls cprng_fast_put(cprng, s);
481 1.2 tls
482 1.2 tls crypto_onetimestream(seed, buf, len);
483 1.2 tls
484 1.2 tls (void)explicit_memset(seed, 0, sizeof seed);
485 1.2 tls }
486 1.2 tls
487 1.2 tls size_t
488 1.2 tls cprng_fast(void *buf, size_t len)
489 1.2 tls {
490 1.2 tls
491 1.2 tls /*
492 1.2 tls * We don't want to hog the CPU, so we use the short version,
493 1.2 tls * to generate output without preemption, only if we can do it
494 1.2 tls * with at most one crypto_core.
495 1.2 tls */
496 if (len <= (sizeof(uint32_t) * crypto_core_OUTPUTWORDS))
497 cprng_fast_buf_short(buf, len);
498 else
499 cprng_fast_buf_long(buf, len);
500
501 return len;
502 }
503