chacha_ref.c revision 1.1 1 /* $NetBSD: chacha_ref.c,v 1.1 2020/07/25 22:46:34 riastradh Exp $ */
2
3 /*-
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /*
30 * ChaCha pseudorandom function family and stream cipher portable C
31 * implementation. Derived from the specification,
32 *
33 * Daniel J. Bernstein, `ChaCha, a variant of Salsa20', Workshop
34 * Record of the State of the Art in Stream Ciphers -- SASC 2008.
35 * https://cr.yp.to/papers.html#chacha
36 *
37 * which in turn builds on the specification of Salsa20 available at
38 * <https://cr.yp.to/snuffle.html>. The particular parametrization of
39 * the stream cipher, with a 32-bit block counter and 96-bit nonce, is
40 * described in
41 *
42 * Y. Nir and A. Langley, `ChaCha20 and Poly1305 for IETF
43 * Protocols', IETF RFC 8439, June 2018.
44 * https://tools.ietf.org/html/rfc8439
45 */
46
47 #include "chacha_ref.h"
48
49 static uint32_t
51 rol32(uint32_t u, unsigned c)
52 {
53
54 return (u << c) | (u >> (32 - c));
55 }
56
57 #define CHACHA_QUARTERROUND(a, b, c, d) do \
58 { \
59 (a) += (b); (d) ^= (a); (d) = rol32((d), 16); \
60 (c) += (d); (b) ^= (c); (b) = rol32((b), 12); \
61 (a) += (b); (d) ^= (a); (d) = rol32((d), 8); \
62 (c) += (d); (b) ^= (c); (b) = rol32((b), 7); \
63 } while (/*CONSTCOND*/0)
64
65 const uint8_t chacha_const32[16] = "expand 32-byte k";
66
67 static void
68 chacha_core_ref(uint8_t out[restrict static 64], const uint8_t in[static 16],
69 const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr)
70 {
71 uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
72 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
73
74 x0 = y0 = le32dec(c + 0);
75 x1 = y1 = le32dec(c + 4);
76 x2 = y2 = le32dec(c + 8);
77 x3 = y3 = le32dec(c + 12);
78 x4 = y4 = le32dec(k + 0);
79 x5 = y5 = le32dec(k + 4);
80 x6 = y6 = le32dec(k + 8);
81 x7 = y7 = le32dec(k + 12);
82 x8 = y8 = le32dec(k + 16);
83 x9 = y9 = le32dec(k + 20);
84 x10 = y10 = le32dec(k + 24);
85 x11 = y11 = le32dec(k + 28);
86 x12 = y12 = le32dec(in + 0);
87 x13 = y13 = le32dec(in + 4);
88 x14 = y14 = le32dec(in + 8);
89 x15 = y15 = le32dec(in + 12);
90
91 for (; nr > 0; nr -= 2) {
92 CHACHA_QUARTERROUND( y0, y4, y8,y12);
93 CHACHA_QUARTERROUND( y1, y5, y9,y13);
94 CHACHA_QUARTERROUND( y2, y6,y10,y14);
95 CHACHA_QUARTERROUND( y3, y7,y11,y15);
96 CHACHA_QUARTERROUND( y0, y5,y10,y15);
97 CHACHA_QUARTERROUND( y1, y6,y11,y12);
98 CHACHA_QUARTERROUND( y2, y7, y8,y13);
99 CHACHA_QUARTERROUND( y3, y4, y9,y14);
100 }
101
102 le32enc(out + 0, x0 + y0);
103 le32enc(out + 4, x1 + y1);
104 le32enc(out + 8, x2 + y2);
105 le32enc(out + 12, x3 + y3);
106 le32enc(out + 16, x4 + y4);
107 le32enc(out + 20, x5 + y5);
108 le32enc(out + 24, x6 + y6);
109 le32enc(out + 28, x7 + y7);
110 le32enc(out + 32, x8 + y8);
111 le32enc(out + 36, x9 + y9);
112 le32enc(out + 40, x10 + y10);
113 le32enc(out + 44, x11 + y11);
114 le32enc(out + 48, x12 + y12);
115 le32enc(out + 52, x13 + y13);
116 le32enc(out + 56, x14 + y14);
117 le32enc(out + 60, x15 + y15);
118 }
119
120 /* ChaCha stream cipher (IETF style, 96-bit nonce and 32-bit block counter) */
122
123 static void
124 chacha_stream_ref(uint8_t *restrict s, size_t nbytes,
125 uint32_t blkno,
126 const uint8_t nonce[static 12],
127 const uint8_t k[static 32],
128 unsigned nr)
129 {
130 const uint8_t *c = chacha_const32;
131 uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
132 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
133 unsigned i;
134
135 x0 = le32dec(c + 0);
136 x1 = le32dec(c + 4);
137 x2 = le32dec(c + 8);
138 x3 = le32dec(c + 12);
139 x4 = le32dec(k + 0);
140 x5 = le32dec(k + 4);
141 x6 = le32dec(k + 8);
142 x7 = le32dec(k + 12);
143 x8 = le32dec(k + 16);
144 x9 = le32dec(k + 20);
145 x10 = le32dec(k + 24);
146 x11 = le32dec(k + 28);
147 /* x12 = blkno */
148 x13 = le32dec(nonce + 0);
149 x14 = le32dec(nonce + 4);
150 x15 = le32dec(nonce + 8);
151
152 for (; nbytes >= 64; nbytes -= 64, s += 64, blkno++) {
153 y0 = x0;
154 y1 = x1;
155 y2 = x2;
156 y3 = x3;
157 y4 = x4;
158 y5 = x5;
159 y6 = x6;
160 y7 = x7;
161 y8 = x8;
162 y9 = x9;
163 y10 = x10;
164 y11 = x11;
165 y12 = x12 = blkno;
166 y13 = x13;
167 y14 = x14;
168 y15 = x15;
169 for (i = nr; i > 0; i -= 2) {
170 CHACHA_QUARTERROUND( y0, y4, y8,y12);
171 CHACHA_QUARTERROUND( y1, y5, y9,y13);
172 CHACHA_QUARTERROUND( y2, y6,y10,y14);
173 CHACHA_QUARTERROUND( y3, y7,y11,y15);
174 CHACHA_QUARTERROUND( y0, y5,y10,y15);
175 CHACHA_QUARTERROUND( y1, y6,y11,y12);
176 CHACHA_QUARTERROUND( y2, y7, y8,y13);
177 CHACHA_QUARTERROUND( y3, y4, y9,y14);
178 }
179 le32enc(s + 0, x0 + y0);
180 le32enc(s + 4, x1 + y1);
181 le32enc(s + 8, x2 + y2);
182 le32enc(s + 12, x3 + y3);
183 le32enc(s + 16, x4 + y4);
184 le32enc(s + 20, x5 + y5);
185 le32enc(s + 24, x6 + y6);
186 le32enc(s + 28, x7 + y7);
187 le32enc(s + 32, x8 + y8);
188 le32enc(s + 36, x9 + y9);
189 le32enc(s + 40, x10 + y10);
190 le32enc(s + 44, x11 + y11);
191 le32enc(s + 48, x12 + y12);
192 le32enc(s + 52, x13 + y13);
193 le32enc(s + 56, x14 + y14);
194 le32enc(s + 60, x15 + y15);
195 }
196
197 if (nbytes) {
199 uint8_t buf[64];
200
201 y0 = x0;
202 y1 = x1;
203 y2 = x2;
204 y3 = x3;
205 y4 = x4;
206 y5 = x5;
207 y6 = x6;
208 y7 = x7;
209 y8 = x8;
210 y9 = x9;
211 y10 = x10;
212 y11 = x11;
213 y12 = x12 = blkno;
214 y13 = x13;
215 y14 = x14;
216 y15 = x15;
217 for (i = nr; i > 0; i -= 2) {
218 CHACHA_QUARTERROUND( y0, y4, y8,y12);
219 CHACHA_QUARTERROUND( y1, y5, y9,y13);
220 CHACHA_QUARTERROUND( y2, y6,y10,y14);
221 CHACHA_QUARTERROUND( y3, y7,y11,y15);
222 CHACHA_QUARTERROUND( y0, y5,y10,y15);
223 CHACHA_QUARTERROUND( y1, y6,y11,y12);
224 CHACHA_QUARTERROUND( y2, y7, y8,y13);
225 CHACHA_QUARTERROUND( y3, y4, y9,y14);
226 }
227 le32enc(buf + 0, x0 + y0);
228 le32enc(buf + 4, x1 + y1);
229 le32enc(buf + 8, x2 + y2);
230 le32enc(buf + 12, x3 + y3);
231 le32enc(buf + 16, x4 + y4);
232 le32enc(buf + 20, x5 + y5);
233 le32enc(buf + 24, x6 + y6);
234 le32enc(buf + 28, x7 + y7);
235 le32enc(buf + 32, x8 + y8);
236 le32enc(buf + 36, x9 + y9);
237 le32enc(buf + 40, x10 + y10);
238 le32enc(buf + 44, x11 + y11);
239 le32enc(buf + 48, x12 + y12);
240 le32enc(buf + 52, x13 + y13);
241 le32enc(buf + 56, x14 + y14);
242 le32enc(buf + 60, x15 + y15);
243 memcpy(s, buf, nbytes);
244 }
245 }
246
247 static void
249 chacha_stream_xor_ref(uint8_t *s, const uint8_t *p, size_t nbytes,
250 uint32_t blkno,
251 const uint8_t nonce[static 12],
252 const uint8_t k[static 32],
253 unsigned nr)
254 {
255 const uint8_t *c = chacha_const32;
256 uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15;
257 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
258 unsigned i;
259
260 x0 = le32dec(c + 0);
261 x1 = le32dec(c + 4);
262 x2 = le32dec(c + 8);
263 x3 = le32dec(c + 12);
264 x4 = le32dec(k + 0);
265 x5 = le32dec(k + 4);
266 x6 = le32dec(k + 8);
267 x7 = le32dec(k + 12);
268 x8 = le32dec(k + 16);
269 x9 = le32dec(k + 20);
270 x10 = le32dec(k + 24);
271 x11 = le32dec(k + 28);
272 /* x12 = blkno */
273 x13 = le32dec(nonce + 0);
274 x14 = le32dec(nonce + 4);
275 x15 = le32dec(nonce + 8);
276
277 for (; nbytes >= 64; nbytes -= 64, s += 64, p += 64, blkno++) {
278 y0 = x0;
279 y1 = x1;
280 y2 = x2;
281 y3 = x3;
282 y4 = x4;
283 y5 = x5;
284 y6 = x6;
285 y7 = x7;
286 y8 = x8;
287 y9 = x9;
288 y10 = x10;
289 y11 = x11;
290 y12 = x12 = blkno;
291 y13 = x13;
292 y14 = x14;
293 y15 = x15;
294 for (i = nr; i > 0; i -= 2) {
295 CHACHA_QUARTERROUND( y0, y4, y8,y12);
296 CHACHA_QUARTERROUND( y1, y5, y9,y13);
297 CHACHA_QUARTERROUND( y2, y6,y10,y14);
298 CHACHA_QUARTERROUND( y3, y7,y11,y15);
299 CHACHA_QUARTERROUND( y0, y5,y10,y15);
300 CHACHA_QUARTERROUND( y1, y6,y11,y12);
301 CHACHA_QUARTERROUND( y2, y7, y8,y13);
302 CHACHA_QUARTERROUND( y3, y4, y9,y14);
303 }
304 le32enc(s + 0, (x0 + y0) ^ le32dec(p + 0));
305 le32enc(s + 4, (x1 + y1) ^ le32dec(p + 4));
306 le32enc(s + 8, (x2 + y2) ^ le32dec(p + 8));
307 le32enc(s + 12, (x3 + y3) ^ le32dec(p + 12));
308 le32enc(s + 16, (x4 + y4) ^ le32dec(p + 16));
309 le32enc(s + 20, (x5 + y5) ^ le32dec(p + 20));
310 le32enc(s + 24, (x6 + y6) ^ le32dec(p + 24));
311 le32enc(s + 28, (x7 + y7) ^ le32dec(p + 28));
312 le32enc(s + 32, (x8 + y8) ^ le32dec(p + 32));
313 le32enc(s + 36, (x9 + y9) ^ le32dec(p + 36));
314 le32enc(s + 40, (x10 + y10) ^ le32dec(p + 40));
315 le32enc(s + 44, (x11 + y11) ^ le32dec(p + 44));
316 le32enc(s + 48, (x12 + y12) ^ le32dec(p + 48));
317 le32enc(s + 52, (x13 + y13) ^ le32dec(p + 52));
318 le32enc(s + 56, (x14 + y14) ^ le32dec(p + 56));
319 le32enc(s + 60, (x15 + y15) ^ le32dec(p + 60));
320 }
321
322 if (nbytes) {
324 uint8_t buf[64];
325
326 y0 = x0;
327 y1 = x1;
328 y2 = x2;
329 y3 = x3;
330 y4 = x4;
331 y5 = x5;
332 y6 = x6;
333 y7 = x7;
334 y8 = x8;
335 y9 = x9;
336 y10 = x10;
337 y11 = x11;
338 y12 = x12 = blkno;
339 y13 = x13;
340 y14 = x14;
341 y15 = x15;
342 for (i = nr; i > 0; i -= 2) {
343 CHACHA_QUARTERROUND( y0, y4, y8,y12);
344 CHACHA_QUARTERROUND( y1, y5, y9,y13);
345 CHACHA_QUARTERROUND( y2, y6,y10,y14);
346 CHACHA_QUARTERROUND( y3, y7,y11,y15);
347 CHACHA_QUARTERROUND( y0, y5,y10,y15);
348 CHACHA_QUARTERROUND( y1, y6,y11,y12);
349 CHACHA_QUARTERROUND( y2, y7, y8,y13);
350 CHACHA_QUARTERROUND( y3, y4, y9,y14);
351 }
352 le32enc(buf + 0, x0 + y0);
353 le32enc(buf + 4, x1 + y1);
354 le32enc(buf + 8, x2 + y2);
355 le32enc(buf + 12, x3 + y3);
356 le32enc(buf + 16, x4 + y4);
357 le32enc(buf + 20, x5 + y5);
358 le32enc(buf + 24, x6 + y6);
359 le32enc(buf + 28, x7 + y7);
360 le32enc(buf + 32, x8 + y8);
361 le32enc(buf + 36, x9 + y9);
362 le32enc(buf + 40, x10 + y10);
363 le32enc(buf + 44, x11 + y11);
364 le32enc(buf + 48, x12 + y12);
365 le32enc(buf + 52, x13 + y13);
366 le32enc(buf + 56, x14 + y14);
367 le32enc(buf + 60, x15 + y15);
368 for (i = 0; i < nbytes - nbytes%4; i += 4)
369 le32enc(s + i, le32dec(p + i) ^ le32dec(buf + i));
370 for (; i < nbytes; i++)
371 s[i] = p[i] ^ buf[i];
372 }
373 }
374
375 /* HChaCha */
377
378 static void
379 hchacha_ref(uint8_t out[restrict static 32], const uint8_t in[static 16],
380 const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr)
381 {
382 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15;
383
384 y0 = le32dec(c + 0);
385 y1 = le32dec(c + 4);
386 y2 = le32dec(c + 8);
387 y3 = le32dec(c + 12);
388 y4 = le32dec(k + 0);
389 y5 = le32dec(k + 4);
390 y6 = le32dec(k + 8);
391 y7 = le32dec(k + 12);
392 y8 = le32dec(k + 16);
393 y9 = le32dec(k + 20);
394 y10 = le32dec(k + 24);
395 y11 = le32dec(k + 28);
396 y12 = le32dec(in + 0);
397 y13 = le32dec(in + 4);
398 y14 = le32dec(in + 8);
399 y15 = le32dec(in + 12);
400
401 for (; nr > 0; nr -= 2) {
402 CHACHA_QUARTERROUND( y0, y4, y8,y12);
403 CHACHA_QUARTERROUND( y1, y5, y9,y13);
404 CHACHA_QUARTERROUND( y2, y6,y10,y14);
405 CHACHA_QUARTERROUND( y3, y7,y11,y15);
406 CHACHA_QUARTERROUND( y0, y5,y10,y15);
407 CHACHA_QUARTERROUND( y1, y6,y11,y12);
408 CHACHA_QUARTERROUND( y2, y7, y8,y13);
409 CHACHA_QUARTERROUND( y3, y4, y9,y14);
410 }
411
412 le32enc(out + 0, y0);
413 le32enc(out + 4, y1);
414 le32enc(out + 8, y2);
415 le32enc(out + 12, y3);
416 le32enc(out + 16, y12);
417 le32enc(out + 20, y13);
418 le32enc(out + 24, y14);
419 le32enc(out + 28, y15);
420 }
421
422 /* XChaCha stream cipher */
424
425 /* https://tools.ietf.org/html/draft-irtf-cfrg-xchacha-03 */
426
427 static void
428 xchacha_stream_ref(uint8_t *restrict s, size_t nbytes, uint32_t blkno,
429 const uint8_t nonce[static 24], const uint8_t k[static 32], unsigned nr)
430 {
431 uint8_t subkey[32];
432 uint8_t subnonce[12];
433
434 hchacha_ref(subkey, nonce/*[0:16)*/, k, chacha_const32, nr);
435 memset(subnonce, 0, 4);
436 memcpy(subnonce + 4, nonce + 16, 8);
437 chacha_stream_ref(s, nbytes, blkno, subnonce, subkey, nr);
438 }
439
440 static void
441 xchacha_stream_xor_ref(uint8_t *restrict c, const uint8_t *p, size_t nbytes,
442 uint32_t blkno,
443 const uint8_t nonce[static 24],
444 const uint8_t k[static 32],
445 unsigned nr)
446 {
447 uint8_t subkey[32];
448 uint8_t subnonce[12];
449
450 hchacha_ref(subkey, nonce/*[0:16)*/, k, chacha_const32, nr);
451 memset(subnonce, 0, 4);
452 memcpy(subnonce + 4, nonce + 16, 8);
453 chacha_stream_xor_ref(c, p, nbytes, blkno, subnonce, subkey, nr);
454 }
455
456 static int
457 chacha_probe_ref(void)
458 {
459
460 /* The reference implementation is always available. */
461 return 0;
462 }
463
464 const struct chacha_impl chacha_ref_impl = {
465 .ci_name = "Portable C ChaCha",
466 .ci_probe = chacha_probe_ref,
467 .ci_chacha_core = chacha_core_ref,
468 .ci_hchacha = hchacha_ref,
469 .ci_chacha_stream = chacha_stream_ref,
470 .ci_chacha_stream_xor = chacha_stream_xor_ref,
471 .ci_xchacha_stream = xchacha_stream_ref,
472 .ci_xchacha_stream_xor = xchacha_stream_xor_ref,
473 };
474