Home | History | Annotate | Line # | Download | only in aes
      1 /*	$NetBSD: aes_bear.c,v 1.4 2020/07/25 22:28:27 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(1, "$NetBSD: aes_bear.c,v 1.4 2020/07/25 22:28:27 riastradh Exp $");
     31 
     32 #include <sys/types.h>
     33 #include <sys/endian.h>
     34 
     35 #ifdef _KERNEL
     36 #include <sys/systm.h>
     37 #else
     38 #include <assert.h>
     39 #include <err.h>
     40 #include <string.h>
     41 #define	KASSERT			assert
     42 #define	panic(fmt, args...)	err(1, fmt, args)
     43 #endif
     44 
     45 #include <crypto/aes/aes.h>
     46 #include <crypto/aes/aes_bear.h>
     47 #include <crypto/aes/aes_impl.h>
     48 
     49 static void
     50 aesbear_setkey(uint32_t rk[static 60], const void *key, uint32_t nrounds)
     51 {
     52 	size_t key_len;
     53 
     54 	switch (nrounds) {
     55 	case 10:
     56 		key_len = 16;
     57 		break;
     58 	case 12:
     59 		key_len = 24;
     60 		break;
     61 	case 14:
     62 		key_len = 32;
     63 		break;
     64 	default:
     65 		panic("invalid AES nrounds: %u", nrounds);
     66 	}
     67 
     68 	br_aes_ct_keysched(rk, key, key_len);
     69 }
     70 
     71 static void
     72 aesbear_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
     73 {
     74 
     75 	aesbear_setkey(enc->aese_aes.aes_rk, key, nrounds);
     76 }
     77 
     78 static void
     79 aesbear_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
     80 {
     81 
     82 	/*
     83 	 * BearSSL computes InvMixColumns on the fly -- no need for
     84 	 * distinct decryption round keys.
     85 	 */
     86 	aesbear_setkey(dec->aesd_aes.aes_rk, key, nrounds);
     87 }
     88 
     89 static void
     90 aesbear_enc(const struct aesenc *enc, const uint8_t in[static 16],
     91     uint8_t out[static 16], uint32_t nrounds)
     92 {
     93 	uint32_t sk_exp[120];
     94 	uint32_t q[8];
     95 
     96 	/* Expand round keys for bitslicing.  */
     97 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
     98 
     99 	/* Load input block interleaved with garbage block.  */
    100 	q[2*0] = le32dec(in + 4*0);
    101 	q[2*1] = le32dec(in + 4*1);
    102 	q[2*2] = le32dec(in + 4*2);
    103 	q[2*3] = le32dec(in + 4*3);
    104 	q[1] = q[3] = q[5] = q[7] = 0;
    105 
    106 	/* Transform to bitslice, decrypt, transform from bitslice.  */
    107 	br_aes_ct_ortho(q);
    108 	br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
    109 	br_aes_ct_ortho(q);
    110 
    111 	/* Store output block.  */
    112 	le32enc(out + 4*0, q[2*0]);
    113 	le32enc(out + 4*1, q[2*1]);
    114 	le32enc(out + 4*2, q[2*2]);
    115 	le32enc(out + 4*3, q[2*3]);
    116 
    117 	/* Paranoia: Zero temporary buffers.  */
    118 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    119 	explicit_memset(q, 0, sizeof q);
    120 }
    121 
    122 static void
    123 aesbear_dec(const struct aesdec *dec, const uint8_t in[static 16],
    124     uint8_t out[static 16], uint32_t nrounds)
    125 {
    126 	uint32_t sk_exp[120];
    127 	uint32_t q[8];
    128 
    129 	/* Expand round keys for bitslicing.  */
    130 	br_aes_ct_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk);
    131 
    132 	/* Load input block interleaved with garbage.  */
    133 	q[2*0] = le32dec(in + 4*0);
    134 	q[2*1] = le32dec(in + 4*1);
    135 	q[2*2] = le32dec(in + 4*2);
    136 	q[2*3] = le32dec(in + 4*3);
    137 	q[1] = q[3] = q[5] = q[7] = 0;
    138 
    139 	/* Transform to bitslice, decrypt, transform from bitslice.  */
    140 	br_aes_ct_ortho(q);
    141 	br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
    142 	br_aes_ct_ortho(q);
    143 
    144 	/* Store output block.  */
    145 	le32enc(out + 4*0, q[2*0]);
    146 	le32enc(out + 4*1, q[2*1]);
    147 	le32enc(out + 4*2, q[2*2]);
    148 	le32enc(out + 4*3, q[2*3]);
    149 
    150 	/* Paranoia: Zero temporary buffers.  */
    151 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    152 	explicit_memset(q, 0, sizeof q);
    153 }
    154 
    155 static void
    156 aesbear_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
    157     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
    158     uint32_t nrounds)
    159 {
    160 	uint32_t sk_exp[120];
    161 	uint32_t q[8];
    162 	uint32_t cv0, cv1, cv2, cv3;
    163 
    164 	KASSERT(nbytes % 16 == 0);
    165 
    166 	/* Skip if there's nothing to do.  */
    167 	if (nbytes == 0)
    168 		return;
    169 
    170 	/* Expand round keys for bitslicing.  */
    171 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
    172 
    173 	/* Initialize garbage block.  */
    174 	q[1] = q[3] = q[5] = q[7] = 0;
    175 
    176 	/* Load IV.  */
    177 	cv0 = le32dec(iv + 4*0);
    178 	cv1 = le32dec(iv + 4*1);
    179 	cv2 = le32dec(iv + 4*2);
    180 	cv3 = le32dec(iv + 4*3);
    181 
    182 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
    183 		/* Load input block and apply CV.  */
    184 		q[2*0] = cv0 ^ le32dec(in + 4*0);
    185 		q[2*1] = cv1 ^ le32dec(in + 4*1);
    186 		q[2*2] = cv2 ^ le32dec(in + 4*2);
    187 		q[2*3] = cv3 ^ le32dec(in + 4*3);
    188 
    189 		/* Transform to bitslice, encrypt, transform from bitslice.  */
    190 		br_aes_ct_ortho(q);
    191 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
    192 		br_aes_ct_ortho(q);
    193 
    194 		/* Remember ciphertext as CV and store output block.  */
    195 		cv0 = q[2*0];
    196 		cv1 = q[2*1];
    197 		cv2 = q[2*2];
    198 		cv3 = q[2*3];
    199 		le32enc(out + 4*0, cv0);
    200 		le32enc(out + 4*1, cv1);
    201 		le32enc(out + 4*2, cv2);
    202 		le32enc(out + 4*3, cv3);
    203 	}
    204 
    205 	/* Store updated IV.  */
    206 	le32enc(iv + 4*0, cv0);
    207 	le32enc(iv + 4*1, cv1);
    208 	le32enc(iv + 4*2, cv2);
    209 	le32enc(iv + 4*3, cv3);
    210 
    211 	/* Paranoia: Zero temporary buffers.  */
    212 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    213 	explicit_memset(q, 0, sizeof q);
    214 }
    215 
    216 static void
    217 aesbear_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
    218     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
    219     uint32_t nrounds)
    220 {
    221 	uint32_t sk_exp[120];
    222 	uint32_t q[8];
    223 	uint32_t cv0, cv1, cv2, cv3, iv0, iv1, iv2, iv3;
    224 
    225 	KASSERT(nbytes % 16 == 0);
    226 
    227 	/* Skip if there's nothing to do.  */
    228 	if (nbytes == 0)
    229 		return;
    230 
    231 	/* Expand round keys for bitslicing.  */
    232 	br_aes_ct_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk);
    233 
    234 	/* Load the IV.  */
    235 	iv0 = le32dec(iv + 4*0);
    236 	iv1 = le32dec(iv + 4*1);
    237 	iv2 = le32dec(iv + 4*2);
    238 	iv3 = le32dec(iv + 4*3);
    239 
    240 	/* Load the last cipher block.  */
    241 	cv0 = le32dec(in + nbytes - 16 + 4*0);
    242 	cv1 = le32dec(in + nbytes - 16 + 4*1);
    243 	cv2 = le32dec(in + nbytes - 16 + 4*2);
    244 	cv3 = le32dec(in + nbytes - 16 + 4*3);
    245 
    246 	/* Store the updated IV.  */
    247 	le32enc(iv + 4*0, cv0);
    248 	le32enc(iv + 4*1, cv1);
    249 	le32enc(iv + 4*2, cv2);
    250 	le32enc(iv + 4*3, cv3);
    251 
    252 	/* Handle the last cipher block separately if odd number.  */
    253 	if (nbytes % 32) {
    254 		KASSERT(nbytes % 32 == 16);
    255 
    256 		/* Set up the last cipher block and a garbage block.  */
    257 		q[2*0] = cv0;
    258 		q[2*1] = cv1;
    259 		q[2*2] = cv2;
    260 		q[2*3] = cv3;
    261 		q[1] = q[3] = q[5] = q[7] = 0;
    262 
    263 		/* Decrypt.  */
    264 		br_aes_ct_ortho(q);
    265 		br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
    266 		br_aes_ct_ortho(q);
    267 
    268 		/* If this was the only cipher block, we're done.  */
    269 		nbytes -= 16;
    270 		if (nbytes == 0)
    271 			goto out;
    272 
    273 		/*
    274 		 * Otherwise, load up the penultimate cipher block, and
    275 		 * store the output block.
    276 		 */
    277 		cv0 = le32dec(in + nbytes - 16 + 4*0);
    278 		cv1 = le32dec(in + nbytes - 16 + 4*1);
    279 		cv2 = le32dec(in + nbytes - 16 + 4*2);
    280 		cv3 = le32dec(in + nbytes - 16 + 4*3);
    281 		le32enc(out + nbytes + 4*0, cv0 ^ q[2*0]);
    282 		le32enc(out + nbytes + 4*1, cv1 ^ q[2*1]);
    283 		le32enc(out + nbytes + 4*2, cv2 ^ q[2*2]);
    284 		le32enc(out + nbytes + 4*3, cv3 ^ q[2*3]);
    285 	}
    286 
    287 	for (;;) {
    288 		KASSERT(nbytes >= 32);
    289 
    290 		/*
    291 		 * 1. Set up upper cipher block from cvN.
    292 		 * 2. Load lower cipher block into cvN and set it up.
    293 		 * 3. Decrypt.
    294 		 */
    295 		q[2*0 + 1] = cv0;
    296 		q[2*1 + 1] = cv1;
    297 		q[2*2 + 1] = cv2;
    298 		q[2*3 + 1] = cv3;
    299 		cv0 = q[2*0] = le32dec(in + nbytes - 32 + 4*0);
    300 		cv1 = q[2*1] = le32dec(in + nbytes - 32 + 4*1);
    301 		cv2 = q[2*2] = le32dec(in + nbytes - 32 + 4*2);
    302 		cv3 = q[2*3] = le32dec(in + nbytes - 32 + 4*3);
    303 
    304 		br_aes_ct_ortho(q);
    305 		br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
    306 		br_aes_ct_ortho(q);
    307 
    308 		/* Store the upper output block.  */
    309 		le32enc(out + nbytes - 16 + 4*0, q[2*0 + 1] ^ cv0);
    310 		le32enc(out + nbytes - 16 + 4*1, q[2*1 + 1] ^ cv1);
    311 		le32enc(out + nbytes - 16 + 4*2, q[2*2 + 1] ^ cv2);
    312 		le32enc(out + nbytes - 16 + 4*3, q[2*3 + 1] ^ cv3);
    313 
    314 		/* Stop if we've reached the first output block.  */
    315 		nbytes -= 32;
    316 		if (nbytes == 0)
    317 			goto out;
    318 
    319 		/*
    320 		 * Load the preceding cipher block, and apply it as the
    321 		 * chaining value to this one.
    322 		 */
    323 		cv0 = le32dec(in + nbytes - 16 + 4*0);
    324 		cv1 = le32dec(in + nbytes - 16 + 4*1);
    325 		cv2 = le32dec(in + nbytes - 16 + 4*2);
    326 		cv3 = le32dec(in + nbytes - 16 + 4*3);
    327 		le32enc(out + nbytes + 4*0, q[2*0] ^ cv0);
    328 		le32enc(out + nbytes + 4*1, q[2*1] ^ cv1);
    329 		le32enc(out + nbytes + 4*2, q[2*2] ^ cv2);
    330 		le32enc(out + nbytes + 4*3, q[2*3] ^ cv3);
    331 	}
    332 
    333 out:	/* Store the first output block.  */
    334 	le32enc(out + 4*0, q[2*0] ^ iv0);
    335 	le32enc(out + 4*1, q[2*1] ^ iv1);
    336 	le32enc(out + 4*2, q[2*2] ^ iv2);
    337 	le32enc(out + 4*3, q[2*3] ^ iv3);
    338 
    339 	/* Paranoia: Zero temporary buffers.  */
    340 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    341 	explicit_memset(q, 0, sizeof q);
    342 }
    343 
    344 static inline void
    345 aesbear_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
    346 {
    347 	uint32_t s0, s1, s2, s3;
    348 
    349 	s0 = *t0 >> 31;
    350 	s1 = *t1 >> 31;
    351 	s2 = *t2 >> 31;
    352 	s3 = *t3 >> 31;
    353 	*t0 = (*t0 << 1) ^ (-s3 & 0x87);
    354 	*t1 = (*t1 << 1) ^ s0;
    355 	*t2 = (*t2 << 1) ^ s1;
    356 	*t3 = (*t3 << 1) ^ s2;
    357 }
    358 
    359 static int
    360 aesbear_xts_update_selftest(void)
    361 {
    362 	static const struct {
    363 		uint32_t in[4], out[4];
    364 	} cases[] = {
    365 		{ {1}, {2} },
    366 		{ {0x80000000U,0,0,0}, {0,1,0,0} },
    367 		{ {0,0x80000000U,0,0}, {0,0,1,0} },
    368 		{ {0,0,0x80000000U,0}, {0,0,0,1} },
    369 		{ {0,0,0,0x80000000U}, {0x87,0,0,0} },
    370 		{ {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
    371 	};
    372 	unsigned i;
    373 	uint32_t t0, t1, t2, t3;
    374 
    375 	for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
    376 		t0 = cases[i].in[0];
    377 		t1 = cases[i].in[1];
    378 		t2 = cases[i].in[2];
    379 		t3 = cases[i].in[3];
    380 		aesbear_xts_update(&t0, &t1, &t2, &t3);
    381 		if (t0 != cases[i].out[0] ||
    382 		    t1 != cases[i].out[1] ||
    383 		    t2 != cases[i].out[2] ||
    384 		    t3 != cases[i].out[3])
    385 			return -1;
    386 	}
    387 
    388 	/* Success!  */
    389 	return 0;
    390 }
    391 
    392 static void
    393 aesbear_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
    394     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
    395     uint32_t nrounds)
    396 {
    397 	uint32_t sk_exp[120];
    398 	uint32_t q[8];
    399 	uint32_t t0, t1, t2, t3, u0, u1, u2, u3;
    400 
    401 	KASSERT(nbytes % 16 == 0);
    402 
    403 	/* Skip if there's nothing to do.  */
    404 	if (nbytes == 0)
    405 		return;
    406 
    407 	/* Expand round keys for bitslicing.  */
    408 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
    409 
    410 	/* Load tweak.  */
    411 	t0 = le32dec(tweak + 4*0);
    412 	t1 = le32dec(tweak + 4*1);
    413 	t2 = le32dec(tweak + 4*2);
    414 	t3 = le32dec(tweak + 4*3);
    415 
    416 	/* Handle the first block separately if odd number.  */
    417 	if (nbytes % 32) {
    418 		KASSERT(nbytes % 32 == 16);
    419 
    420 		/* Load up the first block and a garbage block.  */
    421 		q[2*0] = le32dec(in + 4*0) ^ t0;
    422 		q[2*1] = le32dec(in + 4*1) ^ t1;
    423 		q[2*2] = le32dec(in + 4*2) ^ t2;
    424 		q[2*3] = le32dec(in + 4*3) ^ t3;
    425 		q[1] = q[3] = q[5] = q[7] = 0;
    426 
    427 		/* Encrypt two blocks.  */
    428 		br_aes_ct_ortho(q);
    429 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
    430 		br_aes_ct_ortho(q);
    431 
    432 		/* Store the first cipher block.  */
    433 		le32enc(out + 4*0, q[2*0] ^ t0);
    434 		le32enc(out + 4*1, q[2*1] ^ t1);
    435 		le32enc(out + 4*2, q[2*2] ^ t2);
    436 		le32enc(out + 4*3, q[2*3] ^ t3);
    437 
    438 		/* Advance to the next block.  */
    439 		aesbear_xts_update(&t0, &t1, &t2, &t3);
    440 		if ((nbytes -= 16) == 0)
    441 			goto out;
    442 		in += 16;
    443 		out += 16;
    444 	}
    445 
    446 	do {
    447 		KASSERT(nbytes >= 32);
    448 
    449 		/* Compute the upper tweak.  */
    450 		u0 = t0; u1 = t1; u2 = t2; u3 = t3;
    451 		aesbear_xts_update(&u0, &u1, &u2, &u3);
    452 
    453 		/* Load lower and upper blocks.  */
    454 		q[2*0] = le32dec(in + 4*0) ^ t0;
    455 		q[2*1] = le32dec(in + 4*1) ^ t1;
    456 		q[2*2] = le32dec(in + 4*2) ^ t2;
    457 		q[2*3] = le32dec(in + 4*3) ^ t3;
    458 		q[2*0 + 1] = le32dec(in + 16 + 4*0) ^ u0;
    459 		q[2*1 + 1] = le32dec(in + 16 + 4*1) ^ u1;
    460 		q[2*2 + 1] = le32dec(in + 16 + 4*2) ^ u2;
    461 		q[2*3 + 1] = le32dec(in + 16 + 4*3) ^ u3;
    462 
    463 		/* Encrypt two blocks.  */
    464 		br_aes_ct_ortho(q);
    465 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
    466 		br_aes_ct_ortho(q);
    467 
    468 		/* Store lower and upper blocks.  */
    469 		le32enc(out + 4*0, q[2*0] ^ t0);
    470 		le32enc(out + 4*1, q[2*1] ^ t1);
    471 		le32enc(out + 4*2, q[2*2] ^ t2);
    472 		le32enc(out + 4*3, q[2*3] ^ t3);
    473 		le32enc(out + 16 + 4*0, q[2*0 + 1] ^ u0);
    474 		le32enc(out + 16 + 4*1, q[2*1 + 1] ^ u1);
    475 		le32enc(out + 16 + 4*2, q[2*2 + 1] ^ u2);
    476 		le32enc(out + 16 + 4*3, q[2*3 + 1] ^ u3);
    477 
    478 		/* Advance to the next pair of blocks.  */
    479 		t0 = u0; t1 = u1; t2 = u2; t3 = u3;
    480 		aesbear_xts_update(&t0, &t1, &t2, &t3);
    481 		in += 32;
    482 		out += 32;
    483 	} while (nbytes -= 32, nbytes);
    484 
    485 out:	/* Store the updated tweak.  */
    486 	le32enc(tweak + 4*0, t0);
    487 	le32enc(tweak + 4*1, t1);
    488 	le32enc(tweak + 4*2, t2);
    489 	le32enc(tweak + 4*3, t3);
    490 
    491 	/* Paranoia: Zero temporary buffers.  */
    492 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    493 	explicit_memset(q, 0, sizeof q);
    494 }
    495 
    496 static void
    497 aesbear_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
    498     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
    499     uint32_t nrounds)
    500 {
    501 	uint32_t sk_exp[120];
    502 	uint32_t q[8];
    503 	uint32_t t0, t1, t2, t3, u0, u1, u2, u3;
    504 
    505 	KASSERT(nbytes % 16 == 0);
    506 
    507 	/* Skip if there's nothing to do.  */
    508 	if (nbytes == 0)
    509 		return;
    510 
    511 	/* Expand round keys for bitslicing.  */
    512 	br_aes_ct_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk);
    513 
    514 	/* Load tweak.  */
    515 	t0 = le32dec(tweak + 4*0);
    516 	t1 = le32dec(tweak + 4*1);
    517 	t2 = le32dec(tweak + 4*2);
    518 	t3 = le32dec(tweak + 4*3);
    519 
    520 	/* Handle the first block separately if odd number.  */
    521 	if (nbytes % 32) {
    522 		KASSERT(nbytes % 32 == 16);
    523 
    524 		/* Load up the first block and a garbage block.  */
    525 		q[2*0] = le32dec(in + 4*0) ^ t0;
    526 		q[2*1] = le32dec(in + 4*1) ^ t1;
    527 		q[2*2] = le32dec(in + 4*2) ^ t2;
    528 		q[2*3] = le32dec(in + 4*3) ^ t3;
    529 		q[1] = q[3] = q[5] = q[7] = 0;
    530 
    531 		/* Decrypt two blocks.  */
    532 		br_aes_ct_ortho(q);
    533 		br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
    534 		br_aes_ct_ortho(q);
    535 
    536 		/* Store the first cipher block.  */
    537 		le32enc(out + 4*0, q[2*0] ^ t0);
    538 		le32enc(out + 4*1, q[2*1] ^ t1);
    539 		le32enc(out + 4*2, q[2*2] ^ t2);
    540 		le32enc(out + 4*3, q[2*3] ^ t3);
    541 
    542 		/* Advance to the next block.  */
    543 		aesbear_xts_update(&t0, &t1, &t2, &t3);
    544 		if ((nbytes -= 16) == 0)
    545 			goto out;
    546 		in += 16;
    547 		out += 16;
    548 	}
    549 
    550 	do {
    551 		KASSERT(nbytes >= 32);
    552 
    553 		/* Compute the upper tweak.  */
    554 		u0 = t0; u1 = t1; u2 = t2; u3 = t3;
    555 		aesbear_xts_update(&u0, &u1, &u2, &u3);
    556 
    557 		/* Load lower and upper blocks.  */
    558 		q[2*0] = le32dec(in + 4*0) ^ t0;
    559 		q[2*1] = le32dec(in + 4*1) ^ t1;
    560 		q[2*2] = le32dec(in + 4*2) ^ t2;
    561 		q[2*3] = le32dec(in + 4*3) ^ t3;
    562 		q[2*0 + 1] = le32dec(in + 16 + 4*0) ^ u0;
    563 		q[2*1 + 1] = le32dec(in + 16 + 4*1) ^ u1;
    564 		q[2*2 + 1] = le32dec(in + 16 + 4*2) ^ u2;
    565 		q[2*3 + 1] = le32dec(in + 16 + 4*3) ^ u3;
    566 
    567 		/* Encrypt two blocks.  */
    568 		br_aes_ct_ortho(q);
    569 		br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
    570 		br_aes_ct_ortho(q);
    571 
    572 		/* Store lower and upper blocks.  */
    573 		le32enc(out + 4*0, q[2*0] ^ t0);
    574 		le32enc(out + 4*1, q[2*1] ^ t1);
    575 		le32enc(out + 4*2, q[2*2] ^ t2);
    576 		le32enc(out + 4*3, q[2*3] ^ t3);
    577 		le32enc(out + 16 + 4*0, q[2*0 + 1] ^ u0);
    578 		le32enc(out + 16 + 4*1, q[2*1 + 1] ^ u1);
    579 		le32enc(out + 16 + 4*2, q[2*2 + 1] ^ u2);
    580 		le32enc(out + 16 + 4*3, q[2*3 + 1] ^ u3);
    581 
    582 		/* Advance to the next pair of blocks.  */
    583 		t0 = u0; t1 = u1; t2 = u2; t3 = u3;
    584 		aesbear_xts_update(&t0, &t1, &t2, &t3);
    585 		in += 32;
    586 		out += 32;
    587 	} while (nbytes -= 32, nbytes);
    588 
    589 out:	/* Store the updated tweak.  */
    590 	le32enc(tweak + 4*0, t0);
    591 	le32enc(tweak + 4*1, t1);
    592 	le32enc(tweak + 4*2, t2);
    593 	le32enc(tweak + 4*3, t3);
    594 
    595 	/* Paranoia: Zero temporary buffers.  */
    596 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    597 	explicit_memset(q, 0, sizeof q);
    598 }
    599 
    600 static void
    601 aesbear_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
    602     size_t nbytes, uint8_t auth[static 16], uint32_t nrounds)
    603 {
    604 	uint32_t sk_exp[120];
    605 	uint32_t q[8];
    606 
    607 	KASSERT(nbytes);
    608 	KASSERT(nbytes % 16 == 0);
    609 
    610 	/* Expand round keys for bitslicing.  */
    611 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
    612 
    613 	/* Initialize garbage block.  */
    614 	q[1] = q[3] = q[5] = q[7] = 0;
    615 
    616 	/* Load initial authenticator.  */
    617 	q[2*0] = le32dec(auth + 4*0);
    618 	q[2*1] = le32dec(auth + 4*1);
    619 	q[2*2] = le32dec(auth + 4*2);
    620 	q[2*3] = le32dec(auth + 4*3);
    621 
    622 	for (; nbytes; nbytes -= 16, in += 16) {
    623 		/* Combine input block.  */
    624 		q[2*0] ^= le32dec(in + 4*0);
    625 		q[2*1] ^= le32dec(in + 4*1);
    626 		q[2*2] ^= le32dec(in + 4*2);
    627 		q[2*3] ^= le32dec(in + 4*3);
    628 
    629 		/* Transform to bitslice, encrypt, transform from bitslice.  */
    630 		br_aes_ct_ortho(q);
    631 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
    632 		br_aes_ct_ortho(q);
    633 	}
    634 
    635 	/* Store updated authenticator.  */
    636 	le32enc(auth + 4*0, q[2*0]);
    637 	le32enc(auth + 4*1, q[2*1]);
    638 	le32enc(auth + 4*2, q[2*2]);
    639 	le32enc(auth + 4*3, q[2*3]);
    640 
    641 	/* Paranoia: Zero temporary buffers.  */
    642 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    643 	explicit_memset(q, 0, sizeof q);
    644 }
    645 
    646 static void
    647 aesbear_ccm_enc1(const struct aesenc *enc, const uint8_t *in, uint8_t *out,
    648     size_t nbytes, uint8_t authctr[32], uint32_t nrounds)
    649 {
    650 	uint32_t sk_exp[120];
    651 	uint32_t q[8];
    652 	uint32_t c0, c1, c2, c3;
    653 
    654 	KASSERT(nbytes);
    655 	KASSERT(nbytes % 16 == 0);
    656 
    657 	/* Expand round keys for bitslicing.  */
    658 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
    659 
    660 	/* Set first block to authenticator.  */
    661 	q[2*0] = le32dec(authctr + 4*0);
    662 	q[2*1] = le32dec(authctr + 4*1);
    663 	q[2*2] = le32dec(authctr + 4*2);
    664 	q[2*3] = le32dec(authctr + 4*3);
    665 
    666 	/* Load initial counter block, big-endian so we can increment it.  */
    667 	c0 = le32dec(authctr + 16 + 4*0);
    668 	c1 = le32dec(authctr + 16 + 4*1);
    669 	c2 = le32dec(authctr + 16 + 4*2);
    670 	c3 = be32dec(authctr + 16 + 4*3);
    671 
    672 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
    673 		/* Update authenticator.  */
    674 		q[2*0] ^= le32dec(in + 4*0);
    675 		q[2*1] ^= le32dec(in + 4*1);
    676 		q[2*2] ^= le32dec(in + 4*2);
    677 		q[2*3] ^= le32dec(in + 4*3);
    678 
    679 		/* Increment 32-bit counter.  */
    680 		q[2*0 + 1] = c0;
    681 		q[2*1 + 1] = c1;
    682 		q[2*2 + 1] = c2;
    683 		q[2*3 + 1] = bswap32(++c3);
    684 
    685 		/* Encrypt authenticator and counter.  */
    686 		br_aes_ct_ortho(q);
    687 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
    688 		br_aes_ct_ortho(q);
    689 
    690 		/* Encrypt with CTR output.  */
    691 		le32enc(out + 4*0, le32dec(in + 4*0) ^ q[2*0 + 1]);
    692 		le32enc(out + 4*1, le32dec(in + 4*1) ^ q[2*1 + 1]);
    693 		le32enc(out + 4*2, le32dec(in + 4*2) ^ q[2*2 + 1]);
    694 		le32enc(out + 4*3, le32dec(in + 4*3) ^ q[2*3 + 1]);
    695 	}
    696 
    697 	/* Update authenticator.  */
    698 	le32enc(authctr + 4*0, q[2*0]);
    699 	le32enc(authctr + 4*1, q[2*1]);
    700 	le32enc(authctr + 4*2, q[2*2]);
    701 	le32enc(authctr + 4*3, q[2*3]);
    702 
    703 	/* Update counter.  */
    704 	be32enc(authctr + 16 + 4*3, c3);
    705 
    706 	/* Paranoia: Zero temporary buffers.  */
    707 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    708 	explicit_memset(q, 0, sizeof q);
    709 }
    710 
    711 static void
    712 aesbear_ccm_dec1(const struct aesenc *enc, const uint8_t *in, uint8_t *out,
    713     size_t nbytes, uint8_t authctr[32], uint32_t nrounds)
    714 {
    715 	uint32_t sk_exp[120];
    716 	uint32_t q[8];
    717 	uint32_t c0, c1, c2, c3;
    718 	uint32_t b0, b1, b2, b3;
    719 
    720 	KASSERT(nbytes);
    721 	KASSERT(nbytes % 16 == 0);
    722 
    723 	/* Expand round keys for bitslicing.  */
    724 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
    725 
    726 	/* Load initial counter block, big-endian so we can increment it.  */
    727 	c0 = le32dec(authctr + 16 + 4*0);
    728 	c1 = le32dec(authctr + 16 + 4*1);
    729 	c2 = le32dec(authctr + 16 + 4*2);
    730 	c3 = be32dec(authctr + 16 + 4*3);
    731 
    732 	/* Increment 32-bit counter.  */
    733 	q[2*0] = c0;
    734 	q[2*1] = c1;
    735 	q[2*2] = c2;
    736 	q[2*3] = bswap32(++c3);
    737 
    738 	/*
    739 	 * Set the second block to garbage -- we don't have any
    740 	 * plaintext to authenticate yet.
    741 	 */
    742 	q[1] = q[3] = q[5] = q[7] = 0;
    743 
    744 	/* Encrypt first CTR.  */
    745 	br_aes_ct_ortho(q);
    746 	br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
    747 	br_aes_ct_ortho(q);
    748 
    749 	/* Load the initial authenticator.  */
    750 	q[2*0 + 1] = le32dec(authctr + 4*0);
    751 	q[2*1 + 1] = le32dec(authctr + 4*1);
    752 	q[2*2 + 1] = le32dec(authctr + 4*2);
    753 	q[2*3 + 1] = le32dec(authctr + 4*3);
    754 
    755 	for (;; in += 16, out += 16) {
    756 		/* Decrypt the block.  */
    757 		b0 = le32dec(in + 4*0) ^ q[2*0];
    758 		b1 = le32dec(in + 4*1) ^ q[2*1];
    759 		b2 = le32dec(in + 4*2) ^ q[2*2];
    760 		b3 = le32dec(in + 4*3) ^ q[2*3];
    761 
    762 		/* Update authenticator.  */
    763 		q[2*0 + 1] ^= b0;
    764 		q[2*1 + 1] ^= b1;
    765 		q[2*2 + 1] ^= b2;
    766 		q[2*3 + 1] ^= b3;
    767 
    768 		/* Store plaintext.  */
    769 		le32enc(out + 4*0, b0);
    770 		le32enc(out + 4*1, b1);
    771 		le32enc(out + 4*2, b2);
    772 		le32enc(out + 4*3, b3);
    773 
    774 		/* If this is the last block, stop.  */
    775 		if ((nbytes -= 16) == 0)
    776 			break;
    777 
    778 		/* Increment 32-bit counter.  */
    779 		q[2*0] = c0;
    780 		q[2*1] = c1;
    781 		q[2*2] = c2;
    782 		q[2*3] = bswap32(++c3);
    783 
    784 		/* Authenticate previous plaintext, encrypt next CTR.  */
    785 		br_aes_ct_ortho(q);
    786 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
    787 		br_aes_ct_ortho(q);
    788 	}
    789 
    790 	/*
    791 	 * Authenticate last plaintext.  We're only doing this for the
    792 	 * authenticator, not for the counter, so don't bother to
    793 	 * initialize q[2*i].  (Even for the sake of sanitizers,
    794 	 * they're already initialized to something by now.)
    795 	 */
    796 	br_aes_ct_ortho(q);
    797 	br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
    798 	br_aes_ct_ortho(q);
    799 
    800 	/* Update authenticator.  */
    801 	le32enc(authctr + 4*0, q[2*0 + 1]);
    802 	le32enc(authctr + 4*1, q[2*1 + 1]);
    803 	le32enc(authctr + 4*2, q[2*2 + 1]);
    804 	le32enc(authctr + 4*3, q[2*3 + 1]);
    805 
    806 	/* Update counter.  */
    807 	be32enc(authctr + 16 + 4*3, c3);
    808 
    809 	/* Paranoia: Zero temporary buffers.  */
    810 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    811 	explicit_memset(q, 0, sizeof q);
    812 }
    813 
    814 static int
    815 aesbear_probe(void)
    816 {
    817 
    818 	if (aesbear_xts_update_selftest())
    819 		return -1;
    820 
    821 	/* XXX test br_aes_ct_bitslice_decrypt */
    822 	/* XXX test br_aes_ct_bitslice_encrypt */
    823 	/* XXX test br_aes_ct_keysched */
    824 	/* XXX test br_aes_ct_ortho */
    825 	/* XXX test br_aes_ct_skey_expand */
    826 
    827 	return 0;
    828 }
    829 
    830 struct aes_impl aes_bear_impl = {
    831 	.ai_name = "BearSSL aes_ct",
    832 	.ai_probe = aesbear_probe,
    833 	.ai_setenckey = aesbear_setenckey,
    834 	.ai_setdeckey = aesbear_setdeckey,
    835 	.ai_enc = aesbear_enc,
    836 	.ai_dec = aesbear_dec,
    837 	.ai_cbc_enc = aesbear_cbc_enc,
    838 	.ai_cbc_dec = aesbear_cbc_dec,
    839 	.ai_xts_enc = aesbear_xts_enc,
    840 	.ai_xts_dec = aesbear_xts_dec,
    841 	.ai_cbcmac_update1 = aesbear_cbcmac_update1,
    842 	.ai_ccm_enc1 = aesbear_ccm_enc1,
    843 	.ai_ccm_dec1 = aesbear_ccm_dec1,
    844 };
    845