1/* $NetBSD: aes_bear64.c,v 1.1 2025/11/23 22:44:13 riastradh Exp $ */ 2 3/*- 4 * Copyright (c) 2025 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29#include <sys/cdefs.h> 30__KERNEL_RCSID(1, "$NetBSD: aes_bear64.c,v 1.1 2025/11/23 22:44:13 riastradh Exp $"); 31 32#include <sys/types.h> 33#include <sys/endian.h> 34 35#ifdef _KERNEL 36#include <sys/systm.h> 37#else 38#include <assert.h> 39#include <err.h> 40#include <string.h> 41#define KASSERT assert 42#define panic(fmt, args...) err(1, fmt, args) 43#endif 44 45#include <crypto/aes/aes.h> 46#include <crypto/aes/aes_bear64.h> 47#include <crypto/aes/aes_impl.h> 48 49static void 50aesbear64_setkey(uint64_t rk[static 30], const void *key, uint32_t nrounds) 51{ 52 size_t key_len; 53 54 switch (nrounds) { 55 case 10: 56 key_len = 16; 57 break; 58 case 12: 59 key_len = 24; 60 break; 61 case 14: 62 key_len = 32; 63 break; 64 default: 65 panic("invalid AES nrounds: %u", nrounds); 66 } 67 68 br_aes_ct64_keysched(rk, key, key_len); 69} 70 71static void 72aesbear64_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds) 73{ 74 75 aesbear64_setkey(enc->aese_aes.aes_rk64, key, nrounds); 76} 77 78static void 79aesbear64_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds) 80{ 81 82 /* 83 * BearSSL computes InvMixColumns on the fly -- no need for 84 * distinct decryption round keys. 85 */ 86 aesbear64_setkey(dec->aesd_aes.aes_rk64, key, nrounds); 87} 88 89static void 90aesbear64_enc(const struct aesenc *enc, const uint8_t in[static 16], 91 uint8_t out[static 16], uint32_t nrounds) 92{ 93 uint64_t sk_exp[120]; 94 uint32_t w[4]; 95 uint64_t q[8]; 96 97 /* Expand round keys for bitslicing. */ 98 br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64); 99 100 /* Load input block interleaved with garbage blocks. */ 101 w[0] = le32dec(in + 4*0); 102 w[1] = le32dec(in + 4*1); 103 w[2] = le32dec(in + 4*2); 104 w[3] = le32dec(in + 4*3); 105 br_aes_ct64_interleave_in(&q[0], &q[4], w); 106 q[1] = q[2] = q[3] = 0; 107 q[5] = q[6] = q[7] = 0; 108 109 /* Transform to bitslice, encrypt, transform from bitslice. */ 110 br_aes_ct64_ortho(q); 111 br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q); 112 br_aes_ct64_ortho(q); 113 114 /* Store output block. */ 115 br_aes_ct64_interleave_out(w, q[0], q[4]); 116 le32enc(out + 4*0, w[0]); 117 le32enc(out + 4*1, w[1]); 118 le32enc(out + 4*2, w[2]); 119 le32enc(out + 4*3, w[3]); 120 121 /* Paranoia: Zero temporary buffers. */ 122 explicit_memset(sk_exp, 0, sizeof sk_exp); 123 explicit_memset(q, 0, sizeof q); 124} 125 126static void 127aesbear64_dec(const struct aesdec *dec, const uint8_t in[static 16], 128 uint8_t out[static 16], uint32_t nrounds) 129{ 130 uint64_t sk_exp[120]; 131 uint32_t w[4]; 132 uint64_t q[8]; 133 134 /* Expand round keys for bitslicing. */ 135 br_aes_ct64_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64); 136 137 /* Load input block interleaved with garbage blocks. */ 138 w[0] = le32dec(in + 4*0); 139 w[1] = le32dec(in + 4*1); 140 w[2] = le32dec(in + 4*2); 141 w[3] = le32dec(in + 4*3); 142 br_aes_ct64_interleave_in(&q[0], &q[4], w); 143 q[1] = q[2] = q[3] = 0; 144 q[5] = q[6] = q[7] = 0; 145 146 /* Transform to bitslice, decrypt, transform from bitslice. */ 147 br_aes_ct64_ortho(q); 148 br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q); 149 br_aes_ct64_ortho(q); 150 151 /* Store output block. */ 152 br_aes_ct64_interleave_out(w, q[0], q[4]); 153 le32enc(out + 4*0, w[0]); 154 le32enc(out + 4*1, w[1]); 155 le32enc(out + 4*2, w[2]); 156 le32enc(out + 4*3, w[3]); 157 158 /* Paranoia: Zero temporary buffers. */ 159 explicit_memset(sk_exp, 0, sizeof sk_exp); 160 explicit_memset(q, 0, sizeof q); 161} 162 163static void 164aesbear64_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16], 165 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 166 uint32_t nrounds) 167{ 168 uint64_t sk_exp[120]; 169 uint32_t w[4]; 170 uint64_t q[8]; 171 uint32_t cv0, cv1, cv2, cv3; 172 173 KASSERT(nbytes % 16 == 0); 174 175 /* Skip if there's nothing to do. */ 176 if (nbytes == 0) 177 return; 178 179 /* Expand round keys for bitslicing. */ 180 br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64); 181 182 /* Initialize garbage blocks. */ 183 q[1] = q[2] = q[3] = 0; 184 q[5] = q[6] = q[7] = 0; 185 186 /* Load IV. */ 187 cv0 = le32dec(iv + 4*0); 188 cv1 = le32dec(iv + 4*1); 189 cv2 = le32dec(iv + 4*2); 190 cv3 = le32dec(iv + 4*3); 191 192 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 193 /* Load input block and apply CV. */ 194 w[0] = cv0 ^ le32dec(in + 4*0); 195 w[1] = cv1 ^ le32dec(in + 4*1); 196 w[2] = cv2 ^ le32dec(in + 4*2); 197 w[3] = cv3 ^ le32dec(in + 4*3); 198 br_aes_ct64_interleave_in(&q[0], &q[4], w); 199 200 /* Transform to bitslice, encrypt, transform from bitslice. */ 201 br_aes_ct64_ortho(q); 202 br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q); 203 br_aes_ct64_ortho(q); 204 205 /* Remember ciphertext as CV and store output block. */ 206 br_aes_ct64_interleave_out(w, q[0], q[4]); 207 cv0 = w[0]; 208 cv1 = w[1]; 209 cv2 = w[2]; 210 cv3 = w[3]; 211 le32enc(out + 4*0, cv0); 212 le32enc(out + 4*1, cv1); 213 le32enc(out + 4*2, cv2); 214 le32enc(out + 4*3, cv3); 215 } 216 217 /* Store updated IV. */ 218 le32enc(iv + 4*0, cv0); 219 le32enc(iv + 4*1, cv1); 220 le32enc(iv + 4*2, cv2); 221 le32enc(iv + 4*3, cv3); 222 223 /* Paranoia: Zero temporary buffers. */ 224 explicit_memset(sk_exp, 0, sizeof sk_exp); 225 explicit_memset(q, 0, sizeof q); 226} 227 228static void 229aesbear64_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16], 230 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 231 uint32_t nrounds) 232{ 233 uint64_t sk_exp[120]; 234 uint32_t w[4]; 235 uint64_t q[8]; 236 uint32_t cv0, cv1, cv2, cv3, iv0, iv1, iv2, iv3; 237 unsigned i; 238 239 KASSERT(nbytes % 16 == 0); 240 241 /* Skip if there's nothing to do. */ 242 if (nbytes == 0) 243 return; 244 245 /* Expand round keys for bitslicing. */ 246 br_aes_ct64_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64); 247 248 /* Load the IV. */ 249 iv0 = le32dec(iv + 4*0); 250 iv1 = le32dec(iv + 4*1); 251 iv2 = le32dec(iv + 4*2); 252 iv3 = le32dec(iv + 4*3); 253 254 /* Load the last cipher block. */ 255 cv0 = le32dec(in + nbytes - 16 + 4*0); 256 cv1 = le32dec(in + nbytes - 16 + 4*1); 257 cv2 = le32dec(in + nbytes - 16 + 4*2); 258 cv3 = le32dec(in + nbytes - 16 + 4*3); 259 260 /* Store the updated IV. */ 261 le32enc(iv + 4*0, cv0); 262 le32enc(iv + 4*1, cv1); 263 le32enc(iv + 4*2, cv2); 264 le32enc(iv + 4*3, cv3); 265 266 /* Handle the last cipher block separately if odd number. */ 267 if (nbytes % 64) { 268 unsigned n = (nbytes % 64)/16; 269 270 KASSERT(n == 1 || n == 2 || n == 3); 271 272 for (i = 4; i --> n;) 273 q[i] = q[4 + i] = 0; 274 KASSERT(i == n - 1); 275 w[0] = cv0; /* le32dec(in + nbytes - 16*n + 16*i + 4*0) */ 276 w[1] = cv1; /* le32dec(in + nbytes - 16*n + 16*i + 4*1) */ 277 w[2] = cv2; /* le32dec(in + nbytes - 16*n + 16*i + 4*2) */ 278 w[3] = cv3; /* le32dec(in + nbytes - 16*n + 16*i + 4*3) */ 279 br_aes_ct64_interleave_in(&q[i], &q[4 + i], w); 280 while (i --> 0) { 281 w[0] = le32dec(in + nbytes - 16*n + 16*i + 4*0); 282 w[1] = le32dec(in + nbytes - 16*n + 16*i + 4*1); 283 w[2] = le32dec(in + nbytes - 16*n + 16*i + 4*2); 284 w[3] = le32dec(in + nbytes - 16*n + 16*i + 4*3); 285 br_aes_ct64_interleave_in(&q[i], &q[4 + i], w); 286 } 287 288 /* Decrypt. */ 289 br_aes_ct64_ortho(q); 290 br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q); 291 br_aes_ct64_ortho(q); 292 293 for (i = n; i --> 1;) { 294 br_aes_ct64_interleave_out(w, q[i], q[4 + i]); 295 cv0 = le32dec(in + nbytes - 16*n + 16*(i - 1) + 4*0); 296 cv1 = le32dec(in + nbytes - 16*n + 16*(i - 1) + 4*1); 297 cv2 = le32dec(in + nbytes - 16*n + 16*(i - 1) + 4*2); 298 cv3 = le32dec(in + nbytes - 16*n + 16*(i - 1) + 4*3); 299 le32enc(out + nbytes - 16*n + 16*i + 4*0, w[0] ^ cv0); 300 le32enc(out + nbytes - 16*n + 16*i + 4*1, w[1] ^ cv1); 301 le32enc(out + nbytes - 16*n + 16*i + 4*2, w[2] ^ cv2); 302 le32enc(out + nbytes - 16*n + 16*i + 4*3, w[3] ^ cv3); 303 } 304 br_aes_ct64_interleave_out(w, q[0], q[4]); 305 306 /* If this was the only cipher block, we're done. */ 307 nbytes -= nbytes % 64; 308 if (nbytes == 0) 309 goto out; 310 311 /* 312 * Otherwise, load up the previous cipher block, and 313 * store the output block. 314 */ 315 cv0 = le32dec(in + nbytes - 16 + 4*0); 316 cv1 = le32dec(in + nbytes - 16 + 4*1); 317 cv2 = le32dec(in + nbytes - 16 + 4*2); 318 cv3 = le32dec(in + nbytes - 16 + 4*3); 319 le32enc(out + nbytes + 4*0, cv0 ^ w[0]); 320 le32enc(out + nbytes + 4*1, cv1 ^ w[1]); 321 le32enc(out + nbytes + 4*2, cv2 ^ w[2]); 322 le32enc(out + nbytes + 4*3, cv3 ^ w[3]); 323 } 324 325 for (;;) { 326 KASSERT(nbytes >= 64); 327 328 /* Load the input blocks. */ 329 w[0] = cv0; /* le32dec(in + nbytes - 64 + 16*i + 4*0) */ 330 w[1] = cv1; /* le32dec(in + nbytes - 64 + 16*i + 4*1) */ 331 w[2] = cv2; /* le32dec(in + nbytes - 64 + 16*i + 4*2) */ 332 w[3] = cv3; /* le32dec(in + nbytes - 64 + 16*i + 4*3) */ 333 br_aes_ct64_interleave_in(&q[3], &q[7], w); 334 for (i = 3; i --> 0;) { 335 w[0] = le32dec(in + nbytes - 64 + 16*i + 4*0); 336 w[1] = le32dec(in + nbytes - 64 + 16*i + 4*1); 337 w[2] = le32dec(in + nbytes - 64 + 16*i + 4*2); 338 w[3] = le32dec(in + nbytes - 64 + 16*i + 4*3); 339 br_aes_ct64_interleave_in(&q[i], &q[4 + i], w); 340 } 341 342 /* Decrypt. */ 343 br_aes_ct64_ortho(q); 344 br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q); 345 br_aes_ct64_ortho(q); 346 347 /* Store the upper output blocks. */ 348 for (i = 4; i --> 1;) { 349 br_aes_ct64_interleave_out(w, q[i], q[4 + i]); 350 cv0 = le32dec(in + nbytes - 64 + 16*(i - 1) + 4*0); 351 cv1 = le32dec(in + nbytes - 64 + 16*(i - 1) + 4*1); 352 cv2 = le32dec(in + nbytes - 64 + 16*(i - 1) + 4*2); 353 cv3 = le32dec(in + nbytes - 64 + 16*(i - 1) + 4*3); 354 le32enc(out + nbytes - 64 + 16*i + 4*0, w[0] ^ cv0); 355 le32enc(out + nbytes - 64 + 16*i + 4*1, w[1] ^ cv1); 356 le32enc(out + nbytes - 64 + 16*i + 4*2, w[2] ^ cv2); 357 le32enc(out + nbytes - 64 + 16*i + 4*3, w[3] ^ cv3); 358 } 359 360 /* Prepare the first output block. */ 361 br_aes_ct64_interleave_out(w, q[0], q[4]); 362 363 /* Stop if we've reached the first output block. */ 364 nbytes -= 64; 365 if (nbytes == 0) 366 goto out; 367 368 /* 369 * Load the preceding cipher block, and apply it as the 370 * chaining value to this one. 371 */ 372 cv0 = le32dec(in + nbytes - 16 + 4*0); 373 cv1 = le32dec(in + nbytes - 16 + 4*1); 374 cv2 = le32dec(in + nbytes - 16 + 4*2); 375 cv3 = le32dec(in + nbytes - 16 + 4*3); 376 le32enc(out + nbytes + 4*0, w[0] ^ cv0); 377 le32enc(out + nbytes + 4*1, w[1] ^ cv1); 378 le32enc(out + nbytes + 4*2, w[2] ^ cv2); 379 le32enc(out + nbytes + 4*3, w[3] ^ cv3); 380 } 381 382out: /* Store the first output block. */ 383 le32enc(out + 4*0, w[0] ^ iv0); 384 le32enc(out + 4*1, w[1] ^ iv1); 385 le32enc(out + 4*2, w[2] ^ iv2); 386 le32enc(out + 4*3, w[3] ^ iv3); 387 388 /* Paranoia: Zero temporary buffers. */ 389 explicit_memset(sk_exp, 0, sizeof sk_exp); 390 explicit_memset(q, 0, sizeof q); 391} 392 393static inline void 394aesbear64_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3) 395{ 396 uint32_t s0, s1, s2, s3; 397 398 s0 = *t0 >> 31; 399 s1 = *t1 >> 31; 400 s2 = *t2 >> 31; 401 s3 = *t3 >> 31; 402 *t0 = (*t0 << 1) ^ (-s3 & 0x87); 403 *t1 = (*t1 << 1) ^ s0; 404 *t2 = (*t2 << 1) ^ s1; 405 *t3 = (*t3 << 1) ^ s2; 406} 407 408static int 409aesbear64_xts_update_selftest(void) 410{ 411 static const struct { 412 uint32_t in[4], out[4]; 413 } cases[] = { 414 { {1}, {2} }, 415 { {0x80000000U,0,0,0}, {0,1,0,0} }, 416 { {0,0x80000000U,0,0}, {0,0,1,0} }, 417 { {0,0,0x80000000U,0}, {0,0,0,1} }, 418 { {0,0,0,0x80000000U}, {0x87,0,0,0} }, 419 { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} }, 420 }; 421 unsigned i; 422 uint32_t t0, t1, t2, t3; 423 424 for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { 425 t0 = cases[i].in[0]; 426 t1 = cases[i].in[1]; 427 t2 = cases[i].in[2]; 428 t3 = cases[i].in[3]; 429 aesbear64_xts_update(&t0, &t1, &t2, &t3); 430 if (t0 != cases[i].out[0] || 431 t1 != cases[i].out[1] || 432 t2 != cases[i].out[2] || 433 t3 != cases[i].out[3]) 434 return -1; 435 } 436 437 /* Success! */ 438 return 0; 439} 440 441static void 442aesbear64_xts_enc(const struct aesenc *enc, const uint8_t in[static 16], 443 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 444 uint32_t nrounds) 445{ 446 uint64_t sk_exp[120]; 447 uint32_t w[4]; 448 uint64_t q[8]; 449 uint32_t t0, t1, t2, t3, u0, u1, u2, u3; 450 unsigned i; 451 452 KASSERT(nbytes % 16 == 0); 453 454 /* Skip if there's nothing to do. */ 455 if (nbytes == 0) 456 return; 457 458 /* Expand round keys for bitslicing. */ 459 br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64); 460 461 /* Load tweak. */ 462 t0 = le32dec(tweak + 4*0); 463 t1 = le32dec(tweak + 4*1); 464 t2 = le32dec(tweak + 4*2); 465 t3 = le32dec(tweak + 4*3); 466 467 /* Handle the first blocks separately if odd number. */ 468 if (nbytes % 64) { 469 unsigned n = (nbytes % 64)/16; 470 471 /* Load up the first blocks and garbage. */ 472 for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < n; i++) { 473 w[0] = le32dec(in + 16*i + 4*0) ^ u0; 474 w[1] = le32dec(in + 16*i + 4*1) ^ u1; 475 w[2] = le32dec(in + 16*i + 4*2) ^ u2; 476 w[3] = le32dec(in + 16*i + 4*3) ^ u3; 477 aesbear64_xts_update(&u0, &u1, &u2, &u3); 478 br_aes_ct64_interleave_in(&q[i], &q[4 + i], w); 479 } 480 for (; i < 4; i++) 481 q[i] = q[4 + i] = 0; 482 483 /* Encrypt up to three blocks. */ 484 br_aes_ct64_ortho(q); 485 br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q); 486 br_aes_ct64_ortho(q); 487 488 /* Store up to three blocks. */ 489 for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < n; i++) { 490 br_aes_ct64_interleave_out(w, q[i], q[4 + i]); 491 le32enc(out + 16*i + 4*0, w[0] ^ u0); 492 le32enc(out + 16*i + 4*1, w[1] ^ u1); 493 le32enc(out + 16*i + 4*2, w[2] ^ u2); 494 le32enc(out + 16*i + 4*3, w[3] ^ u3); 495 aesbear64_xts_update(&u0, &u1, &u2, &u3); 496 } 497 498 /* Advance to the next block. */ 499 t0 = u0, t1 = u1, t2 = u2, t3 = u3; 500 if ((nbytes -= 16*n) == 0) 501 goto out; 502 in += 16*n; 503 out += 16*n; 504 } 505 506 do { 507 KASSERT(nbytes >= 64); 508 509 /* Load four blocks. */ 510 for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < 4; i++) { 511 w[0] = le32dec(in + 16*i + 4*0) ^ u0; 512 w[1] = le32dec(in + 16*i + 4*1) ^ u1; 513 w[2] = le32dec(in + 16*i + 4*2) ^ u2; 514 w[3] = le32dec(in + 16*i + 4*3) ^ u3; 515 aesbear64_xts_update(&u0, &u1, &u2, &u3); 516 br_aes_ct64_interleave_in(&q[i], &q[4 + i], w); 517 } 518 519 /* Encrypt four blocks. */ 520 br_aes_ct64_ortho(q); 521 br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q); 522 br_aes_ct64_ortho(q); 523 524 /* Store four blocks. */ 525 for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < 4; i++) { 526 br_aes_ct64_interleave_out(w, q[i], q[4 + i]); 527 le32enc(out + 16*i + 4*0, w[0] ^ u0); 528 le32enc(out + 16*i + 4*1, w[1] ^ u1); 529 le32enc(out + 16*i + 4*2, w[2] ^ u2); 530 le32enc(out + 16*i + 4*3, w[3] ^ u3); 531 aesbear64_xts_update(&u0, &u1, &u2, &u3); 532 } 533 534 /* Advance to the next pair of blocks. */ 535 t0 = u0, t1 = u1, t2 = u2, t3 = u3; 536 in += 64; 537 out += 64; 538 } while (nbytes -= 64, nbytes); 539 540out: /* Store the updated tweak. */ 541 le32enc(tweak + 4*0, t0); 542 le32enc(tweak + 4*1, t1); 543 le32enc(tweak + 4*2, t2); 544 le32enc(tweak + 4*3, t3); 545 546 /* Paranoia: Zero temporary buffers. */ 547 explicit_memset(sk_exp, 0, sizeof sk_exp); 548 explicit_memset(q, 0, sizeof q); 549} 550 551static void 552aesbear64_xts_dec(const struct aesdec *dec, const uint8_t in[static 16], 553 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 554 uint32_t nrounds) 555{ 556 uint64_t sk_exp[120]; 557 uint32_t w[4]; 558 uint64_t q[8]; 559 uint32_t t0, t1, t2, t3, u0, u1, u2, u3; 560 unsigned i; 561 562 KASSERT(nbytes % 16 == 0); 563 564 /* Skip if there's nothing to do. */ 565 if (nbytes == 0) 566 return; 567 568 /* Expand round keys for bitslicing. */ 569 br_aes_ct64_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64); 570 571 /* Load tweak. */ 572 t0 = le32dec(tweak + 4*0); 573 t1 = le32dec(tweak + 4*1); 574 t2 = le32dec(tweak + 4*2); 575 t3 = le32dec(tweak + 4*3); 576 577 /* Handle the first blocks separately if odd number. */ 578 if (nbytes % 64) { 579 unsigned n = (nbytes % 64)/16; 580 581 /* Load up the first blocks and garbage. */ 582 for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < n; i++) { 583 w[0] = le32dec(in + 16*i + 4*0) ^ u0; 584 w[1] = le32dec(in + 16*i + 4*1) ^ u1; 585 w[2] = le32dec(in + 16*i + 4*2) ^ u2; 586 w[3] = le32dec(in + 16*i + 4*3) ^ u3; 587 aesbear64_xts_update(&u0, &u1, &u2, &u3); 588 br_aes_ct64_interleave_in(&q[i], &q[4 + i], w); 589 } 590 for (; i < 4; i++) 591 q[i] = q[4 + i] = 0; 592 593 /* Decrypt up to three blocks. */ 594 br_aes_ct64_ortho(q); 595 br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q); 596 br_aes_ct64_ortho(q); 597 598 /* Store up to three blocks. */ 599 for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < n; i++) { 600 br_aes_ct64_interleave_out(w, q[i], q[4 + i]); 601 le32enc(out + 16*i + 4*0, w[0] ^ u0); 602 le32enc(out + 16*i + 4*1, w[1] ^ u1); 603 le32enc(out + 16*i + 4*2, w[2] ^ u2); 604 le32enc(out + 16*i + 4*3, w[3] ^ u3); 605 aesbear64_xts_update(&u0, &u1, &u2, &u3); 606 } 607 608 /* Advance to the next block. */ 609 t0 = u0, t1 = u1, t2 = u2, t3 = u3; 610 if ((nbytes -= 16*n) == 0) 611 goto out; 612 in += 16*n; 613 out += 16*n; 614 } 615 616 do { 617 KASSERT(nbytes >= 64); 618 619 /* Load four blocks. */ 620 for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < 4; i++) { 621 w[0] = le32dec(in + 16*i + 4*0) ^ u0; 622 w[1] = le32dec(in + 16*i + 4*1) ^ u1; 623 w[2] = le32dec(in + 16*i + 4*2) ^ u2; 624 w[3] = le32dec(in + 16*i + 4*3) ^ u3; 625 aesbear64_xts_update(&u0, &u1, &u2, &u3); 626 br_aes_ct64_interleave_in(&q[i], &q[4 + i], w); 627 } 628 629 /* Decrypt four blocks. */ 630 br_aes_ct64_ortho(q); 631 br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q); 632 br_aes_ct64_ortho(q); 633 634 /* Store four blocks. */ 635 for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < 4; i++) { 636 br_aes_ct64_interleave_out(w, q[i], q[4 + i]); 637 le32enc(out + 16*i + 4*0, w[0] ^ u0); 638 le32enc(out + 16*i + 4*1, w[1] ^ u1); 639 le32enc(out + 16*i + 4*2, w[2] ^ u2); 640 le32enc(out + 16*i + 4*3, w[3] ^ u3); 641 aesbear64_xts_update(&u0, &u1, &u2, &u3); 642 } 643 644 /* Advance to the next pair of blocks. */ 645 t0 = u0, t1 = u1, t2 = u2, t3 = u3; 646 in += 64; 647 out += 64; 648 } while (nbytes -= 64, nbytes); 649 650out: /* Store the updated tweak. */ 651 le32enc(tweak + 4*0, t0); 652 le32enc(tweak + 4*1, t1); 653 le32enc(tweak + 4*2, t2); 654 le32enc(tweak + 4*3, t3); 655 656 /* Paranoia: Zero temporary buffers. */ 657 explicit_memset(sk_exp, 0, sizeof sk_exp); 658 explicit_memset(q, 0, sizeof q); 659} 660 661static void 662aesbear64_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], 663 size_t nbytes, uint8_t auth[static 16], uint32_t nrounds) 664{ 665 uint64_t sk_exp[120]; 666 uint32_t w[4]; 667 uint64_t q[8]; 668 669 KASSERT(nbytes); 670 KASSERT(nbytes % 16 == 0); 671 672 /* Expand round keys for bitslicing. */ 673 br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64); 674 675 /* Initialize garbage blocks. */ 676 q[1] = q[2] = q[3] = 0; 677 q[5] = q[6] = q[7] = 0; 678 679 /* Load initial authenticator. */ 680 w[0] = le32dec(auth + 4*0); 681 w[1] = le32dec(auth + 4*1); 682 w[2] = le32dec(auth + 4*2); 683 w[3] = le32dec(auth + 4*3); 684 685 for (; nbytes; nbytes -= 16, in += 16) { 686 /* Combine input block. */ 687 w[0] ^= le32dec(in + 4*0); 688 w[1] ^= le32dec(in + 4*1); 689 w[2] ^= le32dec(in + 4*2); 690 w[3] ^= le32dec(in + 4*3); 691 br_aes_ct64_interleave_in(&q[0], &q[4], w); 692 693 /* Transform to bitslice, encrypt, transform from bitslice. */ 694 br_aes_ct64_ortho(q); 695 br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q); 696 br_aes_ct64_ortho(q); 697 698 br_aes_ct64_interleave_out(w, q[0], q[4]); 699 } 700 701 /* Store updated authenticator. */ 702 le32enc(auth + 4*0, w[0]); 703 le32enc(auth + 4*1, w[1]); 704 le32enc(auth + 4*2, w[2]); 705 le32enc(auth + 4*3, w[3]); 706 707 /* Paranoia: Zero temporary buffers. */ 708 explicit_memset(sk_exp, 0, sizeof sk_exp); 709 explicit_memset(q, 0, sizeof q); 710} 711 712static void 713aesbear64_ccm_enc1(const struct aesenc *enc, const uint8_t *in, uint8_t *out, 714 size_t nbytes, uint8_t authctr[32], uint32_t nrounds) 715{ 716 uint64_t sk_exp[120]; 717 uint32_t w[4]; 718 uint64_t q[8]; 719 uint32_t c0, c1, c2, c3be; 720 721 KASSERT(nbytes); 722 KASSERT(nbytes % 16 == 0); 723 724 /* Expand round keys for bitslicing. */ 725 br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64); 726 727 /* Initialize garbage blocks. */ 728 q[2] = q[3] = 0; 729 q[6] = q[7] = 0; 730 731 /* Set first block to authenticator. */ 732 w[0] = le32dec(authctr + 4*0); 733 w[1] = le32dec(authctr + 4*1); 734 w[2] = le32dec(authctr + 4*2); 735 w[3] = le32dec(authctr + 4*3); 736 737 /* Load initial counter block, big-endian so we can increment it. */ 738 c0 = le32dec(authctr + 16 + 4*0); 739 c1 = le32dec(authctr + 16 + 4*1); 740 c2 = le32dec(authctr + 16 + 4*2); 741 c3be = bswap32(le32dec(authctr + 16 + 4*3)); 742 743 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 744 /* Update authenticator. */ 745 w[0] ^= le32dec(in + 4*0); 746 w[1] ^= le32dec(in + 4*1); 747 w[2] ^= le32dec(in + 4*2); 748 w[3] ^= le32dec(in + 4*3); 749 br_aes_ct64_interleave_in(&q[0], &q[4], w); 750 751 /* Increment 32-bit counter. */ 752 w[0] = c0; 753 w[1] = c1; 754 w[2] = c2; 755 w[3] = bswap32(++c3be); 756 br_aes_ct64_interleave_in(&q[1], &q[5], w); 757 758 /* Encrypt authenticator and counter. */ 759 br_aes_ct64_ortho(q); 760 br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q); 761 br_aes_ct64_ortho(q); 762 763 /* Encrypt with CTR output. */ 764 br_aes_ct64_interleave_out(w, q[1], q[5]); 765 le32enc(out + 4*0, le32dec(in + 4*0) ^ w[0]); 766 le32enc(out + 4*1, le32dec(in + 4*1) ^ w[1]); 767 le32enc(out + 4*2, le32dec(in + 4*2) ^ w[2]); 768 le32enc(out + 4*3, le32dec(in + 4*3) ^ w[3]); 769 770 /* Fish out the authenticator so far. */ 771 br_aes_ct64_interleave_out(w, q[0], q[4]); 772 } 773 774 /* Update authenticator. */ 775 le32enc(authctr + 4*0, w[0]); 776 le32enc(authctr + 4*1, w[1]); 777 le32enc(authctr + 4*2, w[2]); 778 le32enc(authctr + 4*3, w[3]); 779 780 /* Update counter. */ 781 le32enc(authctr + 16 + 4*3, bswap32(c3be)); 782 783 /* Paranoia: Zero temporary buffers. */ 784 explicit_memset(sk_exp, 0, sizeof sk_exp); 785 explicit_memset(q, 0, sizeof q); 786} 787 788static void 789aesbear64_ccm_dec1(const struct aesenc *enc, const uint8_t *in, uint8_t *out, 790 size_t nbytes, uint8_t authctr[32], uint32_t nrounds) 791{ 792 uint64_t sk_exp[120]; 793 uint32_t w[4]; 794 uint64_t q[8]; 795 uint32_t c0, c1, c2, c3be; 796 uint32_t b0, b1, b2, b3; 797 798 KASSERT(nbytes); 799 KASSERT(nbytes % 16 == 0); 800 801 /* Expand round keys for bitslicing. */ 802 br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64); 803 804 /* Initialize garbage blocks. */ 805 q[2] = q[3] = 0; 806 q[6] = q[7] = 0; 807 808 /* Load initial counter block, big-endian so we can increment it. */ 809 c0 = le32dec(authctr + 16 + 4*0); 810 c1 = le32dec(authctr + 16 + 4*1); 811 c2 = le32dec(authctr + 16 + 4*2); 812 c3be = bswap32(le32dec(authctr + 16 + 4*3)); 813 814 /* Increment 32-bit counter. */ 815 w[0] = c0; 816 w[1] = c1; 817 w[2] = c2; 818 w[3] = bswap32(++c3be); 819 br_aes_ct64_interleave_in(&q[1], &q[5], w); 820 821 /* 822 * Set the other block to garbage -- we don't have any 823 * plaintext to authenticate yet. 824 */ 825 q[0] = q[4] = 0; 826 827 /* Encrypt first CTR. */ 828 br_aes_ct64_ortho(q); 829 br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q); 830 br_aes_ct64_ortho(q); 831 832 /* Load the initial authenticator. */ 833 w[0] = le32dec(authctr + 4*0); 834 w[1] = le32dec(authctr + 4*1); 835 w[2] = le32dec(authctr + 4*2); 836 w[3] = le32dec(authctr + 4*3); 837 br_aes_ct64_interleave_in(&q[0], &q[4], w); 838 839 for (;; in += 16, out += 16) { 840 /* Decrypt the block. */ 841 br_aes_ct64_interleave_out(w, q[1], q[5]); 842 b0 = le32dec(in + 4*0) ^ w[0]; 843 b1 = le32dec(in + 4*1) ^ w[1]; 844 b2 = le32dec(in + 4*2) ^ w[2]; 845 b3 = le32dec(in + 4*3) ^ w[3]; 846 847 /* Update authenticator. */ 848 br_aes_ct64_interleave_out(w, q[0], q[4]); 849 w[0] ^= b0; 850 w[1] ^= b1; 851 w[2] ^= b2; 852 w[3] ^= b3; 853 br_aes_ct64_interleave_in(&q[0], &q[4], w); 854 855 /* Store plaintext. */ 856 le32enc(out + 4*0, b0); 857 le32enc(out + 4*1, b1); 858 le32enc(out + 4*2, b2); 859 le32enc(out + 4*3, b3); 860 861 /* If this is the last block, stop. */ 862 if ((nbytes -= 16) == 0) 863 break; 864 865 /* Increment 32-bit counter. */ 866 w[0] = c0; 867 w[1] = c1; 868 w[2] = c2; 869 w[3] = bswap32(++c3be); 870 br_aes_ct64_interleave_in(&q[1], &q[5], w); 871 872 /* Authenticate previous plaintext, encrypt next CTR. */ 873 br_aes_ct64_ortho(q); 874 br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q); 875 br_aes_ct64_ortho(q); 876 } 877 878 /* 879 * Authenticate last plaintext. We're only doing this for the 880 * authenticator, not for the counter, so don't bother to 881 * initialize q[2*i]. (Even for the sake of sanitizers, 882 * they're already initialized to something by now.) 883 */ 884 br_aes_ct64_ortho(q); 885 br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q); 886 br_aes_ct64_ortho(q); 887 888 /* Update authenticator. */ 889 br_aes_ct64_interleave_out(w, q[0], q[4]); 890 le32enc(authctr + 4*0, w[0]); 891 le32enc(authctr + 4*1, w[1]); 892 le32enc(authctr + 4*2, w[2]); 893 le32enc(authctr + 4*3, w[3]); 894 895 /* Update counter. */ 896 le32enc(authctr + 16 + 4*3, bswap32(c3be)); 897 898 /* Paranoia: Zero temporary buffers. */ 899 explicit_memset(sk_exp, 0, sizeof sk_exp); 900 explicit_memset(q, 0, sizeof q); 901} 902 903static int 904aesbear64_probe(void) 905{ 906 907 if (aesbear64_xts_update_selftest()) 908 return -1; 909 910 /* XXX test br_aes_ct64_bitslice_decrypt */ 911 /* XXX test br_aes_ct64_bitslice_encrypt */ 912 /* XXX test br_aes_ct64_keysched */ 913 /* XXX test br_aes_ct64_ortho */ 914 /* XXX test br_aes_ct64_skey_expand */ 915 916 return 0; 917} 918 919struct aes_impl aes_bear64_impl = { 920 .ai_name = "BearSSL aes_ct64", 921 .ai_probe = aesbear64_probe, 922 .ai_setenckey = aesbear64_setenckey, 923 .ai_setdeckey = aesbear64_setdeckey, 924 .ai_enc = aesbear64_enc, 925 .ai_dec = aesbear64_dec, 926 .ai_cbc_enc = aesbear64_cbc_enc, 927 .ai_cbc_dec = aesbear64_cbc_dec, 928 .ai_xts_enc = aesbear64_xts_enc, 929 .ai_xts_dec = aesbear64_xts_dec, 930 .ai_cbcmac_update1 = aesbear64_cbcmac_update1, 931 .ai_ccm_enc1 = aesbear64_ccm_enc1, 932 .ai_ccm_dec1 = aesbear64_ccm_dec1, 933}; 934