Home | History | Annotate | Line # | Download | only in aes
      1 /*	$NetBSD: aes_bear64.c,v 1.1 2025/11/23 22:44:13 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2025 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(1, "$NetBSD: aes_bear64.c,v 1.1 2025/11/23 22:44:13 riastradh Exp $");
     31 
     32 #include <sys/types.h>
     33 #include <sys/endian.h>
     34 
     35 #ifdef _KERNEL
     36 #include <sys/systm.h>
     37 #else
     38 #include <assert.h>
     39 #include <err.h>
     40 #include <string.h>
     41 #define	KASSERT			assert
     42 #define	panic(fmt, args...)	err(1, fmt, args)
     43 #endif
     44 
     45 #include <crypto/aes/aes.h>
     46 #include <crypto/aes/aes_bear64.h>
     47 #include <crypto/aes/aes_impl.h>
     48 
     49 static void
     50 aesbear64_setkey(uint64_t rk[static 30], const void *key, uint32_t nrounds)
     51 {
     52 	size_t key_len;
     53 
     54 	switch (nrounds) {
     55 	case 10:
     56 		key_len = 16;
     57 		break;
     58 	case 12:
     59 		key_len = 24;
     60 		break;
     61 	case 14:
     62 		key_len = 32;
     63 		break;
     64 	default:
     65 		panic("invalid AES nrounds: %u", nrounds);
     66 	}
     67 
     68 	br_aes_ct64_keysched(rk, key, key_len);
     69 }
     70 
     71 static void
     72 aesbear64_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
     73 {
     74 
     75 	aesbear64_setkey(enc->aese_aes.aes_rk64, key, nrounds);
     76 }
     77 
     78 static void
     79 aesbear64_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
     80 {
     81 
     82 	/*
     83 	 * BearSSL computes InvMixColumns on the fly -- no need for
     84 	 * distinct decryption round keys.
     85 	 */
     86 	aesbear64_setkey(dec->aesd_aes.aes_rk64, key, nrounds);
     87 }
     88 
     89 static void
     90 aesbear64_enc(const struct aesenc *enc, const uint8_t in[static 16],
     91     uint8_t out[static 16], uint32_t nrounds)
     92 {
     93 	uint64_t sk_exp[120];
     94 	uint32_t w[4];
     95 	uint64_t q[8];
     96 
     97 	/* Expand round keys for bitslicing.  */
     98 	br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
     99 
    100 	/* Load input block interleaved with garbage blocks.  */
    101 	w[0] = le32dec(in + 4*0);
    102 	w[1] = le32dec(in + 4*1);
    103 	w[2] = le32dec(in + 4*2);
    104 	w[3] = le32dec(in + 4*3);
    105 	br_aes_ct64_interleave_in(&q[0], &q[4], w);
    106 	q[1] = q[2] = q[3] = 0;
    107 	q[5] = q[6] = q[7] = 0;
    108 
    109 	/* Transform to bitslice, encrypt, transform from bitslice.  */
    110 	br_aes_ct64_ortho(q);
    111 	br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
    112 	br_aes_ct64_ortho(q);
    113 
    114 	/* Store output block.  */
    115 	br_aes_ct64_interleave_out(w, q[0], q[4]);
    116 	le32enc(out + 4*0, w[0]);
    117 	le32enc(out + 4*1, w[1]);
    118 	le32enc(out + 4*2, w[2]);
    119 	le32enc(out + 4*3, w[3]);
    120 
    121 	/* Paranoia: Zero temporary buffers.  */
    122 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    123 	explicit_memset(q, 0, sizeof q);
    124 }
    125 
    126 static void
    127 aesbear64_dec(const struct aesdec *dec, const uint8_t in[static 16],
    128     uint8_t out[static 16], uint32_t nrounds)
    129 {
    130 	uint64_t sk_exp[120];
    131 	uint32_t w[4];
    132 	uint64_t q[8];
    133 
    134 	/* Expand round keys for bitslicing.  */
    135 	br_aes_ct64_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64);
    136 
    137 	/* Load input block interleaved with garbage blocks.  */
    138 	w[0] = le32dec(in + 4*0);
    139 	w[1] = le32dec(in + 4*1);
    140 	w[2] = le32dec(in + 4*2);
    141 	w[3] = le32dec(in + 4*3);
    142 	br_aes_ct64_interleave_in(&q[0], &q[4], w);
    143 	q[1] = q[2] = q[3] = 0;
    144 	q[5] = q[6] = q[7] = 0;
    145 
    146 	/* Transform to bitslice, decrypt, transform from bitslice.  */
    147 	br_aes_ct64_ortho(q);
    148 	br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q);
    149 	br_aes_ct64_ortho(q);
    150 
    151 	/* Store output block.  */
    152 	br_aes_ct64_interleave_out(w, q[0], q[4]);
    153 	le32enc(out + 4*0, w[0]);
    154 	le32enc(out + 4*1, w[1]);
    155 	le32enc(out + 4*2, w[2]);
    156 	le32enc(out + 4*3, w[3]);
    157 
    158 	/* Paranoia: Zero temporary buffers.  */
    159 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    160 	explicit_memset(q, 0, sizeof q);
    161 }
    162 
    163 static void
    164 aesbear64_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
    165     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
    166     uint32_t nrounds)
    167 {
    168 	uint64_t sk_exp[120];
    169 	uint32_t w[4];
    170 	uint64_t q[8];
    171 	uint32_t cv0, cv1, cv2, cv3;
    172 
    173 	KASSERT(nbytes % 16 == 0);
    174 
    175 	/* Skip if there's nothing to do.  */
    176 	if (nbytes == 0)
    177 		return;
    178 
    179 	/* Expand round keys for bitslicing.  */
    180 	br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
    181 
    182 	/* Initialize garbage blocks.  */
    183 	q[1] = q[2] = q[3] = 0;
    184 	q[5] = q[6] = q[7] = 0;
    185 
    186 	/* Load IV.  */
    187 	cv0 = le32dec(iv + 4*0);
    188 	cv1 = le32dec(iv + 4*1);
    189 	cv2 = le32dec(iv + 4*2);
    190 	cv3 = le32dec(iv + 4*3);
    191 
    192 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
    193 		/* Load input block and apply CV.  */
    194 		w[0] = cv0 ^ le32dec(in + 4*0);
    195 		w[1] = cv1 ^ le32dec(in + 4*1);
    196 		w[2] = cv2 ^ le32dec(in + 4*2);
    197 		w[3] = cv3 ^ le32dec(in + 4*3);
    198 		br_aes_ct64_interleave_in(&q[0], &q[4], w);
    199 
    200 		/* Transform to bitslice, encrypt, transform from bitslice.  */
    201 		br_aes_ct64_ortho(q);
    202 		br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
    203 		br_aes_ct64_ortho(q);
    204 
    205 		/* Remember ciphertext as CV and store output block.  */
    206 		br_aes_ct64_interleave_out(w, q[0], q[4]);
    207 		cv0 = w[0];
    208 		cv1 = w[1];
    209 		cv2 = w[2];
    210 		cv3 = w[3];
    211 		le32enc(out + 4*0, cv0);
    212 		le32enc(out + 4*1, cv1);
    213 		le32enc(out + 4*2, cv2);
    214 		le32enc(out + 4*3, cv3);
    215 	}
    216 
    217 	/* Store updated IV.  */
    218 	le32enc(iv + 4*0, cv0);
    219 	le32enc(iv + 4*1, cv1);
    220 	le32enc(iv + 4*2, cv2);
    221 	le32enc(iv + 4*3, cv3);
    222 
    223 	/* Paranoia: Zero temporary buffers.  */
    224 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    225 	explicit_memset(q, 0, sizeof q);
    226 }
    227 
    228 static void
    229 aesbear64_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
    230     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
    231     uint32_t nrounds)
    232 {
    233 	uint64_t sk_exp[120];
    234 	uint32_t w[4];
    235 	uint64_t q[8];
    236 	uint32_t cv0, cv1, cv2, cv3, iv0, iv1, iv2, iv3;
    237 	unsigned i;
    238 
    239 	KASSERT(nbytes % 16 == 0);
    240 
    241 	/* Skip if there's nothing to do.  */
    242 	if (nbytes == 0)
    243 		return;
    244 
    245 	/* Expand round keys for bitslicing.  */
    246 	br_aes_ct64_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64);
    247 
    248 	/* Load the IV.  */
    249 	iv0 = le32dec(iv + 4*0);
    250 	iv1 = le32dec(iv + 4*1);
    251 	iv2 = le32dec(iv + 4*2);
    252 	iv3 = le32dec(iv + 4*3);
    253 
    254 	/* Load the last cipher block.  */
    255 	cv0 = le32dec(in + nbytes - 16 + 4*0);
    256 	cv1 = le32dec(in + nbytes - 16 + 4*1);
    257 	cv2 = le32dec(in + nbytes - 16 + 4*2);
    258 	cv3 = le32dec(in + nbytes - 16 + 4*3);
    259 
    260 	/* Store the updated IV.  */
    261 	le32enc(iv + 4*0, cv0);
    262 	le32enc(iv + 4*1, cv1);
    263 	le32enc(iv + 4*2, cv2);
    264 	le32enc(iv + 4*3, cv3);
    265 
    266 	/* Handle the last cipher block separately if odd number.  */
    267 	if (nbytes % 64) {
    268 		unsigned n = (nbytes % 64)/16;
    269 
    270 		KASSERT(n == 1 || n == 2 || n == 3);
    271 
    272 		for (i = 4; i --> n;)
    273 			q[i] = q[4 + i] = 0;
    274 		KASSERT(i == n - 1);
    275 		w[0] = cv0;	/* le32dec(in + nbytes - 16*n + 16*i + 4*0) */
    276 		w[1] = cv1;	/* le32dec(in + nbytes - 16*n + 16*i + 4*1) */
    277 		w[2] = cv2;	/* le32dec(in + nbytes - 16*n + 16*i + 4*2) */
    278 		w[3] = cv3;	/* le32dec(in + nbytes - 16*n + 16*i + 4*3) */
    279 		br_aes_ct64_interleave_in(&q[i], &q[4 + i], w);
    280 		while (i --> 0) {
    281 			w[0] = le32dec(in + nbytes - 16*n + 16*i + 4*0);
    282 			w[1] = le32dec(in + nbytes - 16*n + 16*i + 4*1);
    283 			w[2] = le32dec(in + nbytes - 16*n + 16*i + 4*2);
    284 			w[3] = le32dec(in + nbytes - 16*n + 16*i + 4*3);
    285 			br_aes_ct64_interleave_in(&q[i], &q[4 + i], w);
    286 		}
    287 
    288 		/* Decrypt.  */
    289 		br_aes_ct64_ortho(q);
    290 		br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q);
    291 		br_aes_ct64_ortho(q);
    292 
    293 		for (i = n; i --> 1;) {
    294 			br_aes_ct64_interleave_out(w, q[i], q[4 + i]);
    295 			cv0 = le32dec(in + nbytes - 16*n + 16*(i - 1) + 4*0);
    296 			cv1 = le32dec(in + nbytes - 16*n + 16*(i - 1) + 4*1);
    297 			cv2 = le32dec(in + nbytes - 16*n + 16*(i - 1) + 4*2);
    298 			cv3 = le32dec(in + nbytes - 16*n + 16*(i - 1) + 4*3);
    299 			le32enc(out + nbytes - 16*n + 16*i + 4*0, w[0] ^ cv0);
    300 			le32enc(out + nbytes - 16*n + 16*i + 4*1, w[1] ^ cv1);
    301 			le32enc(out + nbytes - 16*n + 16*i + 4*2, w[2] ^ cv2);
    302 			le32enc(out + nbytes - 16*n + 16*i + 4*3, w[3] ^ cv3);
    303 		}
    304 		br_aes_ct64_interleave_out(w, q[0], q[4]);
    305 
    306 		/* If this was the only cipher block, we're done.  */
    307 		nbytes -= nbytes % 64;
    308 		if (nbytes == 0)
    309 			goto out;
    310 
    311 		/*
    312 		 * Otherwise, load up the previous cipher block, and
    313 		 * store the output block.
    314 		 */
    315 		cv0 = le32dec(in + nbytes - 16 + 4*0);
    316 		cv1 = le32dec(in + nbytes - 16 + 4*1);
    317 		cv2 = le32dec(in + nbytes - 16 + 4*2);
    318 		cv3 = le32dec(in + nbytes - 16 + 4*3);
    319 		le32enc(out + nbytes + 4*0, cv0 ^ w[0]);
    320 		le32enc(out + nbytes + 4*1, cv1 ^ w[1]);
    321 		le32enc(out + nbytes + 4*2, cv2 ^ w[2]);
    322 		le32enc(out + nbytes + 4*3, cv3 ^ w[3]);
    323 	}
    324 
    325 	for (;;) {
    326 		KASSERT(nbytes >= 64);
    327 
    328 		/* Load the input blocks.  */
    329 		w[0] = cv0;	/* le32dec(in + nbytes - 64 + 16*i + 4*0) */
    330 		w[1] = cv1;	/* le32dec(in + nbytes - 64 + 16*i + 4*1) */
    331 		w[2] = cv2;	/* le32dec(in + nbytes - 64 + 16*i + 4*2) */
    332 		w[3] = cv3;	/* le32dec(in + nbytes - 64 + 16*i + 4*3) */
    333 		br_aes_ct64_interleave_in(&q[3], &q[7], w);
    334 		for (i = 3; i --> 0;) {
    335 			w[0] = le32dec(in + nbytes - 64 + 16*i + 4*0);
    336 			w[1] = le32dec(in + nbytes - 64 + 16*i + 4*1);
    337 			w[2] = le32dec(in + nbytes - 64 + 16*i + 4*2);
    338 			w[3] = le32dec(in + nbytes - 64 + 16*i + 4*3);
    339 			br_aes_ct64_interleave_in(&q[i], &q[4 + i], w);
    340 		}
    341 
    342 		/* Decrypt.  */
    343 		br_aes_ct64_ortho(q);
    344 		br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q);
    345 		br_aes_ct64_ortho(q);
    346 
    347 		/* Store the upper output blocks.  */
    348 		for (i = 4; i --> 1;) {
    349 			br_aes_ct64_interleave_out(w, q[i], q[4 + i]);
    350 			cv0 = le32dec(in + nbytes - 64 + 16*(i - 1) + 4*0);
    351 			cv1 = le32dec(in + nbytes - 64 + 16*(i - 1) + 4*1);
    352 			cv2 = le32dec(in + nbytes - 64 + 16*(i - 1) + 4*2);
    353 			cv3 = le32dec(in + nbytes - 64 + 16*(i - 1) + 4*3);
    354 			le32enc(out + nbytes - 64 + 16*i + 4*0, w[0] ^ cv0);
    355 			le32enc(out + nbytes - 64 + 16*i + 4*1, w[1] ^ cv1);
    356 			le32enc(out + nbytes - 64 + 16*i + 4*2, w[2] ^ cv2);
    357 			le32enc(out + nbytes - 64 + 16*i + 4*3, w[3] ^ cv3);
    358 		}
    359 
    360 		/* Prepare the first output block.  */
    361 		br_aes_ct64_interleave_out(w, q[0], q[4]);
    362 
    363 		/* Stop if we've reached the first output block.  */
    364 		nbytes -= 64;
    365 		if (nbytes == 0)
    366 			goto out;
    367 
    368 		/*
    369 		 * Load the preceding cipher block, and apply it as the
    370 		 * chaining value to this one.
    371 		 */
    372 		cv0 = le32dec(in + nbytes - 16 + 4*0);
    373 		cv1 = le32dec(in + nbytes - 16 + 4*1);
    374 		cv2 = le32dec(in + nbytes - 16 + 4*2);
    375 		cv3 = le32dec(in + nbytes - 16 + 4*3);
    376 		le32enc(out + nbytes + 4*0, w[0] ^ cv0);
    377 		le32enc(out + nbytes + 4*1, w[1] ^ cv1);
    378 		le32enc(out + nbytes + 4*2, w[2] ^ cv2);
    379 		le32enc(out + nbytes + 4*3, w[3] ^ cv3);
    380 	}
    381 
    382 out:	/* Store the first output block.  */
    383 	le32enc(out + 4*0, w[0] ^ iv0);
    384 	le32enc(out + 4*1, w[1] ^ iv1);
    385 	le32enc(out + 4*2, w[2] ^ iv2);
    386 	le32enc(out + 4*3, w[3] ^ iv3);
    387 
    388 	/* Paranoia: Zero temporary buffers.  */
    389 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    390 	explicit_memset(q, 0, sizeof q);
    391 }
    392 
    393 static inline void
    394 aesbear64_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
    395 {
    396 	uint32_t s0, s1, s2, s3;
    397 
    398 	s0 = *t0 >> 31;
    399 	s1 = *t1 >> 31;
    400 	s2 = *t2 >> 31;
    401 	s3 = *t3 >> 31;
    402 	*t0 = (*t0 << 1) ^ (-s3 & 0x87);
    403 	*t1 = (*t1 << 1) ^ s0;
    404 	*t2 = (*t2 << 1) ^ s1;
    405 	*t3 = (*t3 << 1) ^ s2;
    406 }
    407 
    408 static int
    409 aesbear64_xts_update_selftest(void)
    410 {
    411 	static const struct {
    412 		uint32_t in[4], out[4];
    413 	} cases[] = {
    414 		{ {1}, {2} },
    415 		{ {0x80000000U,0,0,0}, {0,1,0,0} },
    416 		{ {0,0x80000000U,0,0}, {0,0,1,0} },
    417 		{ {0,0,0x80000000U,0}, {0,0,0,1} },
    418 		{ {0,0,0,0x80000000U}, {0x87,0,0,0} },
    419 		{ {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
    420 	};
    421 	unsigned i;
    422 	uint32_t t0, t1, t2, t3;
    423 
    424 	for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
    425 		t0 = cases[i].in[0];
    426 		t1 = cases[i].in[1];
    427 		t2 = cases[i].in[2];
    428 		t3 = cases[i].in[3];
    429 		aesbear64_xts_update(&t0, &t1, &t2, &t3);
    430 		if (t0 != cases[i].out[0] ||
    431 		    t1 != cases[i].out[1] ||
    432 		    t2 != cases[i].out[2] ||
    433 		    t3 != cases[i].out[3])
    434 			return -1;
    435 	}
    436 
    437 	/* Success!  */
    438 	return 0;
    439 }
    440 
    441 static void
    442 aesbear64_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
    443     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
    444     uint32_t nrounds)
    445 {
    446 	uint64_t sk_exp[120];
    447 	uint32_t w[4];
    448 	uint64_t q[8];
    449 	uint32_t t0, t1, t2, t3, u0, u1, u2, u3;
    450 	unsigned i;
    451 
    452 	KASSERT(nbytes % 16 == 0);
    453 
    454 	/* Skip if there's nothing to do.  */
    455 	if (nbytes == 0)
    456 		return;
    457 
    458 	/* Expand round keys for bitslicing.  */
    459 	br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
    460 
    461 	/* Load tweak.  */
    462 	t0 = le32dec(tweak + 4*0);
    463 	t1 = le32dec(tweak + 4*1);
    464 	t2 = le32dec(tweak + 4*2);
    465 	t3 = le32dec(tweak + 4*3);
    466 
    467 	/* Handle the first blocks separately if odd number.  */
    468 	if (nbytes % 64) {
    469 		unsigned n = (nbytes % 64)/16;
    470 
    471 		/* Load up the first blocks and garbage.  */
    472 		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < n; i++) {
    473 			w[0] = le32dec(in + 16*i + 4*0) ^ u0;
    474 			w[1] = le32dec(in + 16*i + 4*1) ^ u1;
    475 			w[2] = le32dec(in + 16*i + 4*2) ^ u2;
    476 			w[3] = le32dec(in + 16*i + 4*3) ^ u3;
    477 			aesbear64_xts_update(&u0, &u1, &u2, &u3);
    478 			br_aes_ct64_interleave_in(&q[i], &q[4 + i], w);
    479 		}
    480 		for (; i < 4; i++)
    481 			q[i] = q[4 + i] = 0;
    482 
    483 		/* Encrypt up to three blocks.  */
    484 		br_aes_ct64_ortho(q);
    485 		br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
    486 		br_aes_ct64_ortho(q);
    487 
    488 		/* Store up to three blocks.  */
    489 		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < n; i++) {
    490 			br_aes_ct64_interleave_out(w, q[i], q[4 + i]);
    491 			le32enc(out + 16*i + 4*0, w[0] ^ u0);
    492 			le32enc(out + 16*i + 4*1, w[1] ^ u1);
    493 			le32enc(out + 16*i + 4*2, w[2] ^ u2);
    494 			le32enc(out + 16*i + 4*3, w[3] ^ u3);
    495 			aesbear64_xts_update(&u0, &u1, &u2, &u3);
    496 		}
    497 
    498 		/* Advance to the next block.  */
    499 		t0 = u0, t1 = u1, t2 = u2, t3 = u3;
    500 		if ((nbytes -= 16*n) == 0)
    501 			goto out;
    502 		in += 16*n;
    503 		out += 16*n;
    504 	}
    505 
    506 	do {
    507 		KASSERT(nbytes >= 64);
    508 
    509 		/* Load four blocks.  */
    510 		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < 4; i++) {
    511 			w[0] = le32dec(in + 16*i + 4*0) ^ u0;
    512 			w[1] = le32dec(in + 16*i + 4*1) ^ u1;
    513 			w[2] = le32dec(in + 16*i + 4*2) ^ u2;
    514 			w[3] = le32dec(in + 16*i + 4*3) ^ u3;
    515 			aesbear64_xts_update(&u0, &u1, &u2, &u3);
    516 			br_aes_ct64_interleave_in(&q[i], &q[4 + i], w);
    517 		}
    518 
    519 		/* Encrypt four blocks.  */
    520 		br_aes_ct64_ortho(q);
    521 		br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
    522 		br_aes_ct64_ortho(q);
    523 
    524 		/* Store four blocks.  */
    525 		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < 4; i++) {
    526 			br_aes_ct64_interleave_out(w, q[i], q[4 + i]);
    527 			le32enc(out + 16*i + 4*0, w[0] ^ u0);
    528 			le32enc(out + 16*i + 4*1, w[1] ^ u1);
    529 			le32enc(out + 16*i + 4*2, w[2] ^ u2);
    530 			le32enc(out + 16*i + 4*3, w[3] ^ u3);
    531 			aesbear64_xts_update(&u0, &u1, &u2, &u3);
    532 		}
    533 
    534 		/* Advance to the next pair of blocks.  */
    535 		t0 = u0, t1 = u1, t2 = u2, t3 = u3;
    536 		in += 64;
    537 		out += 64;
    538 	} while (nbytes -= 64, nbytes);
    539 
    540 out:	/* Store the updated tweak.  */
    541 	le32enc(tweak + 4*0, t0);
    542 	le32enc(tweak + 4*1, t1);
    543 	le32enc(tweak + 4*2, t2);
    544 	le32enc(tweak + 4*3, t3);
    545 
    546 	/* Paranoia: Zero temporary buffers.  */
    547 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    548 	explicit_memset(q, 0, sizeof q);
    549 }
    550 
    551 static void
    552 aesbear64_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
    553     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
    554     uint32_t nrounds)
    555 {
    556 	uint64_t sk_exp[120];
    557 	uint32_t w[4];
    558 	uint64_t q[8];
    559 	uint32_t t0, t1, t2, t3, u0, u1, u2, u3;
    560 	unsigned i;
    561 
    562 	KASSERT(nbytes % 16 == 0);
    563 
    564 	/* Skip if there's nothing to do.  */
    565 	if (nbytes == 0)
    566 		return;
    567 
    568 	/* Expand round keys for bitslicing.  */
    569 	br_aes_ct64_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64);
    570 
    571 	/* Load tweak.  */
    572 	t0 = le32dec(tweak + 4*0);
    573 	t1 = le32dec(tweak + 4*1);
    574 	t2 = le32dec(tweak + 4*2);
    575 	t3 = le32dec(tweak + 4*3);
    576 
    577 	/* Handle the first blocks separately if odd number.  */
    578 	if (nbytes % 64) {
    579 		unsigned n = (nbytes % 64)/16;
    580 
    581 		/* Load up the first blocks and garbage.  */
    582 		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < n; i++) {
    583 			w[0] = le32dec(in + 16*i + 4*0) ^ u0;
    584 			w[1] = le32dec(in + 16*i + 4*1) ^ u1;
    585 			w[2] = le32dec(in + 16*i + 4*2) ^ u2;
    586 			w[3] = le32dec(in + 16*i + 4*3) ^ u3;
    587 			aesbear64_xts_update(&u0, &u1, &u2, &u3);
    588 			br_aes_ct64_interleave_in(&q[i], &q[4 + i], w);
    589 		}
    590 		for (; i < 4; i++)
    591 			q[i] = q[4 + i] = 0;
    592 
    593 		/* Decrypt up to three blocks.  */
    594 		br_aes_ct64_ortho(q);
    595 		br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q);
    596 		br_aes_ct64_ortho(q);
    597 
    598 		/* Store up to three blocks.  */
    599 		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < n; i++) {
    600 			br_aes_ct64_interleave_out(w, q[i], q[4 + i]);
    601 			le32enc(out + 16*i + 4*0, w[0] ^ u0);
    602 			le32enc(out + 16*i + 4*1, w[1] ^ u1);
    603 			le32enc(out + 16*i + 4*2, w[2] ^ u2);
    604 			le32enc(out + 16*i + 4*3, w[3] ^ u3);
    605 			aesbear64_xts_update(&u0, &u1, &u2, &u3);
    606 		}
    607 
    608 		/* Advance to the next block.  */
    609 		t0 = u0, t1 = u1, t2 = u2, t3 = u3;
    610 		if ((nbytes -= 16*n) == 0)
    611 			goto out;
    612 		in += 16*n;
    613 		out += 16*n;
    614 	}
    615 
    616 	do {
    617 		KASSERT(nbytes >= 64);
    618 
    619 		/* Load four blocks.  */
    620 		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < 4; i++) {
    621 			w[0] = le32dec(in + 16*i + 4*0) ^ u0;
    622 			w[1] = le32dec(in + 16*i + 4*1) ^ u1;
    623 			w[2] = le32dec(in + 16*i + 4*2) ^ u2;
    624 			w[3] = le32dec(in + 16*i + 4*3) ^ u3;
    625 			aesbear64_xts_update(&u0, &u1, &u2, &u3);
    626 			br_aes_ct64_interleave_in(&q[i], &q[4 + i], w);
    627 		}
    628 
    629 		/* Decrypt four blocks.  */
    630 		br_aes_ct64_ortho(q);
    631 		br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q);
    632 		br_aes_ct64_ortho(q);
    633 
    634 		/* Store four blocks.  */
    635 		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < 4; i++) {
    636 			br_aes_ct64_interleave_out(w, q[i], q[4 + i]);
    637 			le32enc(out + 16*i + 4*0, w[0] ^ u0);
    638 			le32enc(out + 16*i + 4*1, w[1] ^ u1);
    639 			le32enc(out + 16*i + 4*2, w[2] ^ u2);
    640 			le32enc(out + 16*i + 4*3, w[3] ^ u3);
    641 			aesbear64_xts_update(&u0, &u1, &u2, &u3);
    642 		}
    643 
    644 		/* Advance to the next pair of blocks.  */
    645 		t0 = u0, t1 = u1, t2 = u2, t3 = u3;
    646 		in += 64;
    647 		out += 64;
    648 	} while (nbytes -= 64, nbytes);
    649 
    650 out:	/* Store the updated tweak.  */
    651 	le32enc(tweak + 4*0, t0);
    652 	le32enc(tweak + 4*1, t1);
    653 	le32enc(tweak + 4*2, t2);
    654 	le32enc(tweak + 4*3, t3);
    655 
    656 	/* Paranoia: Zero temporary buffers.  */
    657 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    658 	explicit_memset(q, 0, sizeof q);
    659 }
    660 
    661 static void
    662 aesbear64_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
    663     size_t nbytes, uint8_t auth[static 16], uint32_t nrounds)
    664 {
    665 	uint64_t sk_exp[120];
    666 	uint32_t w[4];
    667 	uint64_t q[8];
    668 
    669 	KASSERT(nbytes);
    670 	KASSERT(nbytes % 16 == 0);
    671 
    672 	/* Expand round keys for bitslicing.  */
    673 	br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
    674 
    675 	/* Initialize garbage blocks.  */
    676 	q[1] = q[2] = q[3] = 0;
    677 	q[5] = q[6] = q[7] = 0;
    678 
    679 	/* Load initial authenticator.  */
    680 	w[0] = le32dec(auth + 4*0);
    681 	w[1] = le32dec(auth + 4*1);
    682 	w[2] = le32dec(auth + 4*2);
    683 	w[3] = le32dec(auth + 4*3);
    684 
    685 	for (; nbytes; nbytes -= 16, in += 16) {
    686 		/* Combine input block.  */
    687 		w[0] ^= le32dec(in + 4*0);
    688 		w[1] ^= le32dec(in + 4*1);
    689 		w[2] ^= le32dec(in + 4*2);
    690 		w[3] ^= le32dec(in + 4*3);
    691 		br_aes_ct64_interleave_in(&q[0], &q[4], w);
    692 
    693 		/* Transform to bitslice, encrypt, transform from bitslice.  */
    694 		br_aes_ct64_ortho(q);
    695 		br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
    696 		br_aes_ct64_ortho(q);
    697 
    698 		br_aes_ct64_interleave_out(w, q[0], q[4]);
    699 	}
    700 
    701 	/* Store updated authenticator.  */
    702 	le32enc(auth + 4*0, w[0]);
    703 	le32enc(auth + 4*1, w[1]);
    704 	le32enc(auth + 4*2, w[2]);
    705 	le32enc(auth + 4*3, w[3]);
    706 
    707 	/* Paranoia: Zero temporary buffers.  */
    708 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    709 	explicit_memset(q, 0, sizeof q);
    710 }
    711 
    712 static void
    713 aesbear64_ccm_enc1(const struct aesenc *enc, const uint8_t *in, uint8_t *out,
    714     size_t nbytes, uint8_t authctr[32], uint32_t nrounds)
    715 {
    716 	uint64_t sk_exp[120];
    717 	uint32_t w[4];
    718 	uint64_t q[8];
    719 	uint32_t c0, c1, c2, c3be;
    720 
    721 	KASSERT(nbytes);
    722 	KASSERT(nbytes % 16 == 0);
    723 
    724 	/* Expand round keys for bitslicing.  */
    725 	br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
    726 
    727 	/* Initialize garbage blocks.  */
    728 	q[2] = q[3] = 0;
    729 	q[6] = q[7] = 0;
    730 
    731 	/* Set first block to authenticator.  */
    732 	w[0] = le32dec(authctr + 4*0);
    733 	w[1] = le32dec(authctr + 4*1);
    734 	w[2] = le32dec(authctr + 4*2);
    735 	w[3] = le32dec(authctr + 4*3);
    736 
    737 	/* Load initial counter block, big-endian so we can increment it.  */
    738 	c0 = le32dec(authctr + 16 + 4*0);
    739 	c1 = le32dec(authctr + 16 + 4*1);
    740 	c2 = le32dec(authctr + 16 + 4*2);
    741 	c3be = bswap32(le32dec(authctr + 16 + 4*3));
    742 
    743 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
    744 		/* Update authenticator.  */
    745 		w[0] ^= le32dec(in + 4*0);
    746 		w[1] ^= le32dec(in + 4*1);
    747 		w[2] ^= le32dec(in + 4*2);
    748 		w[3] ^= le32dec(in + 4*3);
    749 		br_aes_ct64_interleave_in(&q[0], &q[4], w);
    750 
    751 		/* Increment 32-bit counter.  */
    752 		w[0] = c0;
    753 		w[1] = c1;
    754 		w[2] = c2;
    755 		w[3] = bswap32(++c3be);
    756 		br_aes_ct64_interleave_in(&q[1], &q[5], w);
    757 
    758 		/* Encrypt authenticator and counter.  */
    759 		br_aes_ct64_ortho(q);
    760 		br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
    761 		br_aes_ct64_ortho(q);
    762 
    763 		/* Encrypt with CTR output.  */
    764 		br_aes_ct64_interleave_out(w, q[1], q[5]);
    765 		le32enc(out + 4*0, le32dec(in + 4*0) ^ w[0]);
    766 		le32enc(out + 4*1, le32dec(in + 4*1) ^ w[1]);
    767 		le32enc(out + 4*2, le32dec(in + 4*2) ^ w[2]);
    768 		le32enc(out + 4*3, le32dec(in + 4*3) ^ w[3]);
    769 
    770 		/* Fish out the authenticator so far.  */
    771 		br_aes_ct64_interleave_out(w, q[0], q[4]);
    772 	}
    773 
    774 	/* Update authenticator.  */
    775 	le32enc(authctr + 4*0, w[0]);
    776 	le32enc(authctr + 4*1, w[1]);
    777 	le32enc(authctr + 4*2, w[2]);
    778 	le32enc(authctr + 4*3, w[3]);
    779 
    780 	/* Update counter.  */
    781 	le32enc(authctr + 16 + 4*3, bswap32(c3be));
    782 
    783 	/* Paranoia: Zero temporary buffers.  */
    784 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    785 	explicit_memset(q, 0, sizeof q);
    786 }
    787 
    788 static void
    789 aesbear64_ccm_dec1(const struct aesenc *enc, const uint8_t *in, uint8_t *out,
    790     size_t nbytes, uint8_t authctr[32], uint32_t nrounds)
    791 {
    792 	uint64_t sk_exp[120];
    793 	uint32_t w[4];
    794 	uint64_t q[8];
    795 	uint32_t c0, c1, c2, c3be;
    796 	uint32_t b0, b1, b2, b3;
    797 
    798 	KASSERT(nbytes);
    799 	KASSERT(nbytes % 16 == 0);
    800 
    801 	/* Expand round keys for bitslicing.  */
    802 	br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
    803 
    804 	/* Initialize garbage blocks.  */
    805 	q[2] = q[3] = 0;
    806 	q[6] = q[7] = 0;
    807 
    808 	/* Load initial counter block, big-endian so we can increment it.  */
    809 	c0 = le32dec(authctr + 16 + 4*0);
    810 	c1 = le32dec(authctr + 16 + 4*1);
    811 	c2 = le32dec(authctr + 16 + 4*2);
    812 	c3be = bswap32(le32dec(authctr + 16 + 4*3));
    813 
    814 	/* Increment 32-bit counter.  */
    815 	w[0] = c0;
    816 	w[1] = c1;
    817 	w[2] = c2;
    818 	w[3] = bswap32(++c3be);
    819 	br_aes_ct64_interleave_in(&q[1], &q[5], w);
    820 
    821 	/*
    822 	 * Set the other block to garbage -- we don't have any
    823 	 * plaintext to authenticate yet.
    824 	 */
    825 	q[0] = q[4] = 0;
    826 
    827 	/* Encrypt first CTR.  */
    828 	br_aes_ct64_ortho(q);
    829 	br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
    830 	br_aes_ct64_ortho(q);
    831 
    832 	/* Load the initial authenticator.  */
    833 	w[0] = le32dec(authctr + 4*0);
    834 	w[1] = le32dec(authctr + 4*1);
    835 	w[2] = le32dec(authctr + 4*2);
    836 	w[3] = le32dec(authctr + 4*3);
    837 	br_aes_ct64_interleave_in(&q[0], &q[4], w);
    838 
    839 	for (;; in += 16, out += 16) {
    840 		/* Decrypt the block.  */
    841 		br_aes_ct64_interleave_out(w, q[1], q[5]);
    842 		b0 = le32dec(in + 4*0) ^ w[0];
    843 		b1 = le32dec(in + 4*1) ^ w[1];
    844 		b2 = le32dec(in + 4*2) ^ w[2];
    845 		b3 = le32dec(in + 4*3) ^ w[3];
    846 
    847 		/* Update authenticator.  */
    848 		br_aes_ct64_interleave_out(w, q[0], q[4]);
    849 		w[0] ^= b0;
    850 		w[1] ^= b1;
    851 		w[2] ^= b2;
    852 		w[3] ^= b3;
    853 		br_aes_ct64_interleave_in(&q[0], &q[4], w);
    854 
    855 		/* Store plaintext.  */
    856 		le32enc(out + 4*0, b0);
    857 		le32enc(out + 4*1, b1);
    858 		le32enc(out + 4*2, b2);
    859 		le32enc(out + 4*3, b3);
    860 
    861 		/* If this is the last block, stop.  */
    862 		if ((nbytes -= 16) == 0)
    863 			break;
    864 
    865 		/* Increment 32-bit counter.  */
    866 		w[0] = c0;
    867 		w[1] = c1;
    868 		w[2] = c2;
    869 		w[3] = bswap32(++c3be);
    870 		br_aes_ct64_interleave_in(&q[1], &q[5], w);
    871 
    872 		/* Authenticate previous plaintext, encrypt next CTR.  */
    873 		br_aes_ct64_ortho(q);
    874 		br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
    875 		br_aes_ct64_ortho(q);
    876 	}
    877 
    878 	/*
    879 	 * Authenticate last plaintext.  We're only doing this for the
    880 	 * authenticator, not for the counter, so don't bother to
    881 	 * initialize q[2*i].  (Even for the sake of sanitizers,
    882 	 * they're already initialized to something by now.)
    883 	 */
    884 	br_aes_ct64_ortho(q);
    885 	br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
    886 	br_aes_ct64_ortho(q);
    887 
    888 	/* Update authenticator.  */
    889 	br_aes_ct64_interleave_out(w, q[0], q[4]);
    890 	le32enc(authctr + 4*0, w[0]);
    891 	le32enc(authctr + 4*1, w[1]);
    892 	le32enc(authctr + 4*2, w[2]);
    893 	le32enc(authctr + 4*3, w[3]);
    894 
    895 	/* Update counter.  */
    896 	le32enc(authctr + 16 + 4*3, bswap32(c3be));
    897 
    898 	/* Paranoia: Zero temporary buffers.  */
    899 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    900 	explicit_memset(q, 0, sizeof q);
    901 }
    902 
    903 static int
    904 aesbear64_probe(void)
    905 {
    906 
    907 	if (aesbear64_xts_update_selftest())
    908 		return -1;
    909 
    910 	/* XXX test br_aes_ct64_bitslice_decrypt */
    911 	/* XXX test br_aes_ct64_bitslice_encrypt */
    912 	/* XXX test br_aes_ct64_keysched */
    913 	/* XXX test br_aes_ct64_ortho */
    914 	/* XXX test br_aes_ct64_skey_expand */
    915 
    916 	return 0;
    917 }
    918 
    919 struct aes_impl aes_bear64_impl = {
    920 	.ai_name = "BearSSL aes_ct64",
    921 	.ai_probe = aesbear64_probe,
    922 	.ai_setenckey = aesbear64_setenckey,
    923 	.ai_setdeckey = aesbear64_setdeckey,
    924 	.ai_enc = aesbear64_enc,
    925 	.ai_dec = aesbear64_dec,
    926 	.ai_cbc_enc = aesbear64_cbc_enc,
    927 	.ai_cbc_dec = aesbear64_cbc_dec,
    928 	.ai_xts_enc = aesbear64_xts_enc,
    929 	.ai_xts_dec = aesbear64_xts_dec,
    930 	.ai_cbcmac_update1 = aesbear64_cbcmac_update1,
    931 	.ai_ccm_enc1 = aesbear64_ccm_enc1,
    932 	.ai_ccm_dec1 = aesbear64_ccm_dec1,
    933 };
    934