1 /* $NetBSD: chacha_ref.c,v 1.1 2020/07/25 22:46:34 riastradh Exp $ */ 2 3 /*- 4 * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* 30 * ChaCha pseudorandom function family and stream cipher portable C 31 * implementation. Derived from the specification, 32 * 33 * Daniel J. Bernstein, `ChaCha, a variant of Salsa20', Workshop 34 * Record of the State of the Art in Stream Ciphers -- SASC 2008. 35 * https://cr.yp.to/papers.html#chacha 36 * 37 * which in turn builds on the specification of Salsa20 available at 38 * <https://cr.yp.to/snuffle.html>. The particular parametrization of 39 * the stream cipher, with a 32-bit block counter and 96-bit nonce, is 40 * described in 41 * 42 * Y. Nir and A. Langley, `ChaCha20 and Poly1305 for IETF 43 * Protocols', IETF RFC 8439, June 2018. 44 * https://tools.ietf.org/html/rfc8439 45 */ 46 47 #include "chacha_ref.h" 48 49 static uint32_t 51 rol32(uint32_t u, unsigned c) 52 { 53 54 return (u << c) | (u >> (32 - c)); 55 } 56 57 #define CHACHA_QUARTERROUND(a, b, c, d) do \ 58 { \ 59 (a) += (b); (d) ^= (a); (d) = rol32((d), 16); \ 60 (c) += (d); (b) ^= (c); (b) = rol32((b), 12); \ 61 (a) += (b); (d) ^= (a); (d) = rol32((d), 8); \ 62 (c) += (d); (b) ^= (c); (b) = rol32((b), 7); \ 63 } while (/*CONSTCOND*/0) 64 65 const uint8_t chacha_const32[16] = "expand 32-byte k"; 66 67 static void 68 chacha_core_ref(uint8_t out[restrict static 64], const uint8_t in[static 16], 69 const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr) 70 { 71 uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; 72 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15; 73 74 x0 = y0 = le32dec(c + 0); 75 x1 = y1 = le32dec(c + 4); 76 x2 = y2 = le32dec(c + 8); 77 x3 = y3 = le32dec(c + 12); 78 x4 = y4 = le32dec(k + 0); 79 x5 = y5 = le32dec(k + 4); 80 x6 = y6 = le32dec(k + 8); 81 x7 = y7 = le32dec(k + 12); 82 x8 = y8 = le32dec(k + 16); 83 x9 = y9 = le32dec(k + 20); 84 x10 = y10 = le32dec(k + 24); 85 x11 = y11 = le32dec(k + 28); 86 x12 = y12 = le32dec(in + 0); 87 x13 = y13 = le32dec(in + 4); 88 x14 = y14 = le32dec(in + 8); 89 x15 = y15 = le32dec(in + 12); 90 91 for (; nr > 0; nr -= 2) { 92 CHACHA_QUARTERROUND( y0, y4, y8,y12); 93 CHACHA_QUARTERROUND( y1, y5, y9,y13); 94 CHACHA_QUARTERROUND( y2, y6,y10,y14); 95 CHACHA_QUARTERROUND( y3, y7,y11,y15); 96 CHACHA_QUARTERROUND( y0, y5,y10,y15); 97 CHACHA_QUARTERROUND( y1, y6,y11,y12); 98 CHACHA_QUARTERROUND( y2, y7, y8,y13); 99 CHACHA_QUARTERROUND( y3, y4, y9,y14); 100 } 101 102 le32enc(out + 0, x0 + y0); 103 le32enc(out + 4, x1 + y1); 104 le32enc(out + 8, x2 + y2); 105 le32enc(out + 12, x3 + y3); 106 le32enc(out + 16, x4 + y4); 107 le32enc(out + 20, x5 + y5); 108 le32enc(out + 24, x6 + y6); 109 le32enc(out + 28, x7 + y7); 110 le32enc(out + 32, x8 + y8); 111 le32enc(out + 36, x9 + y9); 112 le32enc(out + 40, x10 + y10); 113 le32enc(out + 44, x11 + y11); 114 le32enc(out + 48, x12 + y12); 115 le32enc(out + 52, x13 + y13); 116 le32enc(out + 56, x14 + y14); 117 le32enc(out + 60, x15 + y15); 118 } 119 120 /* ChaCha stream cipher (IETF style, 96-bit nonce and 32-bit block counter) */ 122 123 static void 124 chacha_stream_ref(uint8_t *restrict s, size_t nbytes, 125 uint32_t blkno, 126 const uint8_t nonce[static 12], 127 const uint8_t k[static 32], 128 unsigned nr) 129 { 130 const uint8_t *c = chacha_const32; 131 uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; 132 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15; 133 unsigned i; 134 135 x0 = le32dec(c + 0); 136 x1 = le32dec(c + 4); 137 x2 = le32dec(c + 8); 138 x3 = le32dec(c + 12); 139 x4 = le32dec(k + 0); 140 x5 = le32dec(k + 4); 141 x6 = le32dec(k + 8); 142 x7 = le32dec(k + 12); 143 x8 = le32dec(k + 16); 144 x9 = le32dec(k + 20); 145 x10 = le32dec(k + 24); 146 x11 = le32dec(k + 28); 147 /* x12 = blkno */ 148 x13 = le32dec(nonce + 0); 149 x14 = le32dec(nonce + 4); 150 x15 = le32dec(nonce + 8); 151 152 for (; nbytes >= 64; nbytes -= 64, s += 64, blkno++) { 153 y0 = x0; 154 y1 = x1; 155 y2 = x2; 156 y3 = x3; 157 y4 = x4; 158 y5 = x5; 159 y6 = x6; 160 y7 = x7; 161 y8 = x8; 162 y9 = x9; 163 y10 = x10; 164 y11 = x11; 165 y12 = x12 = blkno; 166 y13 = x13; 167 y14 = x14; 168 y15 = x15; 169 for (i = nr; i > 0; i -= 2) { 170 CHACHA_QUARTERROUND( y0, y4, y8,y12); 171 CHACHA_QUARTERROUND( y1, y5, y9,y13); 172 CHACHA_QUARTERROUND( y2, y6,y10,y14); 173 CHACHA_QUARTERROUND( y3, y7,y11,y15); 174 CHACHA_QUARTERROUND( y0, y5,y10,y15); 175 CHACHA_QUARTERROUND( y1, y6,y11,y12); 176 CHACHA_QUARTERROUND( y2, y7, y8,y13); 177 CHACHA_QUARTERROUND( y3, y4, y9,y14); 178 } 179 le32enc(s + 0, x0 + y0); 180 le32enc(s + 4, x1 + y1); 181 le32enc(s + 8, x2 + y2); 182 le32enc(s + 12, x3 + y3); 183 le32enc(s + 16, x4 + y4); 184 le32enc(s + 20, x5 + y5); 185 le32enc(s + 24, x6 + y6); 186 le32enc(s + 28, x7 + y7); 187 le32enc(s + 32, x8 + y8); 188 le32enc(s + 36, x9 + y9); 189 le32enc(s + 40, x10 + y10); 190 le32enc(s + 44, x11 + y11); 191 le32enc(s + 48, x12 + y12); 192 le32enc(s + 52, x13 + y13); 193 le32enc(s + 56, x14 + y14); 194 le32enc(s + 60, x15 + y15); 195 } 196 197 if (nbytes) { 199 uint8_t buf[64]; 200 201 y0 = x0; 202 y1 = x1; 203 y2 = x2; 204 y3 = x3; 205 y4 = x4; 206 y5 = x5; 207 y6 = x6; 208 y7 = x7; 209 y8 = x8; 210 y9 = x9; 211 y10 = x10; 212 y11 = x11; 213 y12 = x12 = blkno; 214 y13 = x13; 215 y14 = x14; 216 y15 = x15; 217 for (i = nr; i > 0; i -= 2) { 218 CHACHA_QUARTERROUND( y0, y4, y8,y12); 219 CHACHA_QUARTERROUND( y1, y5, y9,y13); 220 CHACHA_QUARTERROUND( y2, y6,y10,y14); 221 CHACHA_QUARTERROUND( y3, y7,y11,y15); 222 CHACHA_QUARTERROUND( y0, y5,y10,y15); 223 CHACHA_QUARTERROUND( y1, y6,y11,y12); 224 CHACHA_QUARTERROUND( y2, y7, y8,y13); 225 CHACHA_QUARTERROUND( y3, y4, y9,y14); 226 } 227 le32enc(buf + 0, x0 + y0); 228 le32enc(buf + 4, x1 + y1); 229 le32enc(buf + 8, x2 + y2); 230 le32enc(buf + 12, x3 + y3); 231 le32enc(buf + 16, x4 + y4); 232 le32enc(buf + 20, x5 + y5); 233 le32enc(buf + 24, x6 + y6); 234 le32enc(buf + 28, x7 + y7); 235 le32enc(buf + 32, x8 + y8); 236 le32enc(buf + 36, x9 + y9); 237 le32enc(buf + 40, x10 + y10); 238 le32enc(buf + 44, x11 + y11); 239 le32enc(buf + 48, x12 + y12); 240 le32enc(buf + 52, x13 + y13); 241 le32enc(buf + 56, x14 + y14); 242 le32enc(buf + 60, x15 + y15); 243 memcpy(s, buf, nbytes); 244 } 245 } 246 247 static void 249 chacha_stream_xor_ref(uint8_t *s, const uint8_t *p, size_t nbytes, 250 uint32_t blkno, 251 const uint8_t nonce[static 12], 252 const uint8_t k[static 32], 253 unsigned nr) 254 { 255 const uint8_t *c = chacha_const32; 256 uint32_t x0,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12,x13,x14,x15; 257 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15; 258 unsigned i; 259 260 x0 = le32dec(c + 0); 261 x1 = le32dec(c + 4); 262 x2 = le32dec(c + 8); 263 x3 = le32dec(c + 12); 264 x4 = le32dec(k + 0); 265 x5 = le32dec(k + 4); 266 x6 = le32dec(k + 8); 267 x7 = le32dec(k + 12); 268 x8 = le32dec(k + 16); 269 x9 = le32dec(k + 20); 270 x10 = le32dec(k + 24); 271 x11 = le32dec(k + 28); 272 /* x12 = blkno */ 273 x13 = le32dec(nonce + 0); 274 x14 = le32dec(nonce + 4); 275 x15 = le32dec(nonce + 8); 276 277 for (; nbytes >= 64; nbytes -= 64, s += 64, p += 64, blkno++) { 278 y0 = x0; 279 y1 = x1; 280 y2 = x2; 281 y3 = x3; 282 y4 = x4; 283 y5 = x5; 284 y6 = x6; 285 y7 = x7; 286 y8 = x8; 287 y9 = x9; 288 y10 = x10; 289 y11 = x11; 290 y12 = x12 = blkno; 291 y13 = x13; 292 y14 = x14; 293 y15 = x15; 294 for (i = nr; i > 0; i -= 2) { 295 CHACHA_QUARTERROUND( y0, y4, y8,y12); 296 CHACHA_QUARTERROUND( y1, y5, y9,y13); 297 CHACHA_QUARTERROUND( y2, y6,y10,y14); 298 CHACHA_QUARTERROUND( y3, y7,y11,y15); 299 CHACHA_QUARTERROUND( y0, y5,y10,y15); 300 CHACHA_QUARTERROUND( y1, y6,y11,y12); 301 CHACHA_QUARTERROUND( y2, y7, y8,y13); 302 CHACHA_QUARTERROUND( y3, y4, y9,y14); 303 } 304 le32enc(s + 0, (x0 + y0) ^ le32dec(p + 0)); 305 le32enc(s + 4, (x1 + y1) ^ le32dec(p + 4)); 306 le32enc(s + 8, (x2 + y2) ^ le32dec(p + 8)); 307 le32enc(s + 12, (x3 + y3) ^ le32dec(p + 12)); 308 le32enc(s + 16, (x4 + y4) ^ le32dec(p + 16)); 309 le32enc(s + 20, (x5 + y5) ^ le32dec(p + 20)); 310 le32enc(s + 24, (x6 + y6) ^ le32dec(p + 24)); 311 le32enc(s + 28, (x7 + y7) ^ le32dec(p + 28)); 312 le32enc(s + 32, (x8 + y8) ^ le32dec(p + 32)); 313 le32enc(s + 36, (x9 + y9) ^ le32dec(p + 36)); 314 le32enc(s + 40, (x10 + y10) ^ le32dec(p + 40)); 315 le32enc(s + 44, (x11 + y11) ^ le32dec(p + 44)); 316 le32enc(s + 48, (x12 + y12) ^ le32dec(p + 48)); 317 le32enc(s + 52, (x13 + y13) ^ le32dec(p + 52)); 318 le32enc(s + 56, (x14 + y14) ^ le32dec(p + 56)); 319 le32enc(s + 60, (x15 + y15) ^ le32dec(p + 60)); 320 } 321 322 if (nbytes) { 324 uint8_t buf[64]; 325 326 y0 = x0; 327 y1 = x1; 328 y2 = x2; 329 y3 = x3; 330 y4 = x4; 331 y5 = x5; 332 y6 = x6; 333 y7 = x7; 334 y8 = x8; 335 y9 = x9; 336 y10 = x10; 337 y11 = x11; 338 y12 = x12 = blkno; 339 y13 = x13; 340 y14 = x14; 341 y15 = x15; 342 for (i = nr; i > 0; i -= 2) { 343 CHACHA_QUARTERROUND( y0, y4, y8,y12); 344 CHACHA_QUARTERROUND( y1, y5, y9,y13); 345 CHACHA_QUARTERROUND( y2, y6,y10,y14); 346 CHACHA_QUARTERROUND( y3, y7,y11,y15); 347 CHACHA_QUARTERROUND( y0, y5,y10,y15); 348 CHACHA_QUARTERROUND( y1, y6,y11,y12); 349 CHACHA_QUARTERROUND( y2, y7, y8,y13); 350 CHACHA_QUARTERROUND( y3, y4, y9,y14); 351 } 352 le32enc(buf + 0, x0 + y0); 353 le32enc(buf + 4, x1 + y1); 354 le32enc(buf + 8, x2 + y2); 355 le32enc(buf + 12, x3 + y3); 356 le32enc(buf + 16, x4 + y4); 357 le32enc(buf + 20, x5 + y5); 358 le32enc(buf + 24, x6 + y6); 359 le32enc(buf + 28, x7 + y7); 360 le32enc(buf + 32, x8 + y8); 361 le32enc(buf + 36, x9 + y9); 362 le32enc(buf + 40, x10 + y10); 363 le32enc(buf + 44, x11 + y11); 364 le32enc(buf + 48, x12 + y12); 365 le32enc(buf + 52, x13 + y13); 366 le32enc(buf + 56, x14 + y14); 367 le32enc(buf + 60, x15 + y15); 368 for (i = 0; i < nbytes - nbytes%4; i += 4) 369 le32enc(s + i, le32dec(p + i) ^ le32dec(buf + i)); 370 for (; i < nbytes; i++) 371 s[i] = p[i] ^ buf[i]; 372 } 373 } 374 375 /* HChaCha */ 377 378 static void 379 hchacha_ref(uint8_t out[restrict static 32], const uint8_t in[static 16], 380 const uint8_t k[static 32], const uint8_t c[static 16], unsigned nr) 381 { 382 uint32_t y0,y1,y2,y3,y4,y5,y6,y7,y8,y9,y10,y11,y12,y13,y14,y15; 383 384 y0 = le32dec(c + 0); 385 y1 = le32dec(c + 4); 386 y2 = le32dec(c + 8); 387 y3 = le32dec(c + 12); 388 y4 = le32dec(k + 0); 389 y5 = le32dec(k + 4); 390 y6 = le32dec(k + 8); 391 y7 = le32dec(k + 12); 392 y8 = le32dec(k + 16); 393 y9 = le32dec(k + 20); 394 y10 = le32dec(k + 24); 395 y11 = le32dec(k + 28); 396 y12 = le32dec(in + 0); 397 y13 = le32dec(in + 4); 398 y14 = le32dec(in + 8); 399 y15 = le32dec(in + 12); 400 401 for (; nr > 0; nr -= 2) { 402 CHACHA_QUARTERROUND( y0, y4, y8,y12); 403 CHACHA_QUARTERROUND( y1, y5, y9,y13); 404 CHACHA_QUARTERROUND( y2, y6,y10,y14); 405 CHACHA_QUARTERROUND( y3, y7,y11,y15); 406 CHACHA_QUARTERROUND( y0, y5,y10,y15); 407 CHACHA_QUARTERROUND( y1, y6,y11,y12); 408 CHACHA_QUARTERROUND( y2, y7, y8,y13); 409 CHACHA_QUARTERROUND( y3, y4, y9,y14); 410 } 411 412 le32enc(out + 0, y0); 413 le32enc(out + 4, y1); 414 le32enc(out + 8, y2); 415 le32enc(out + 12, y3); 416 le32enc(out + 16, y12); 417 le32enc(out + 20, y13); 418 le32enc(out + 24, y14); 419 le32enc(out + 28, y15); 420 } 421 422 /* XChaCha stream cipher */ 424 425 /* https://tools.ietf.org/html/draft-irtf-cfrg-xchacha-03 */ 426 427 static void 428 xchacha_stream_ref(uint8_t *restrict s, size_t nbytes, uint32_t blkno, 429 const uint8_t nonce[static 24], const uint8_t k[static 32], unsigned nr) 430 { 431 uint8_t subkey[32]; 432 uint8_t subnonce[12]; 433 434 hchacha_ref(subkey, nonce/*[0:16)*/, k, chacha_const32, nr); 435 memset(subnonce, 0, 4); 436 memcpy(subnonce + 4, nonce + 16, 8); 437 chacha_stream_ref(s, nbytes, blkno, subnonce, subkey, nr); 438 } 439 440 static void 441 xchacha_stream_xor_ref(uint8_t *restrict c, const uint8_t *p, size_t nbytes, 442 uint32_t blkno, 443 const uint8_t nonce[static 24], 444 const uint8_t k[static 32], 445 unsigned nr) 446 { 447 uint8_t subkey[32]; 448 uint8_t subnonce[12]; 449 450 hchacha_ref(subkey, nonce/*[0:16)*/, k, chacha_const32, nr); 451 memset(subnonce, 0, 4); 452 memcpy(subnonce + 4, nonce + 16, 8); 453 chacha_stream_xor_ref(c, p, nbytes, blkno, subnonce, subkey, nr); 454 } 455 456 static int 457 chacha_probe_ref(void) 458 { 459 460 /* The reference implementation is always available. */ 461 return 0; 462 } 463 464 const struct chacha_impl chacha_ref_impl = { 465 .ci_name = "Portable C ChaCha", 466 .ci_probe = chacha_probe_ref, 467 .ci_chacha_core = chacha_core_ref, 468 .ci_hchacha = hchacha_ref, 469 .ci_chacha_stream = chacha_stream_ref, 470 .ci_chacha_stream_xor = chacha_stream_xor_ref, 471 .ci_xchacha_stream = xchacha_stream_ref, 472 .ci_xchacha_stream_xor = xchacha_stream_xor_ref, 473 }; 474