Home | History | Annotate | Line # | Download | only in aes
aes_bear.c revision 1.2
      1 /*	$NetBSD: aes_bear.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(1, "$NetBSD: aes_bear.c,v 1.2 2020/06/30 20:32:11 riastradh Exp $");
     31 
     32 #include <sys/types.h>
     33 #include <sys/endian.h>
     34 
     35 #ifdef _KERNEL
     36 #include <sys/systm.h>
     37 #else
     38 #include <assert.h>
     39 #include <err.h>
     40 #include <string.h>
     41 #define	KASSERT			assert
     42 #define	panic(fmt, args...)	err(1, fmt, args)
     43 #endif
     44 
     45 #include <crypto/aes/aes.h>
     46 #include <crypto/aes/aes_bear.h>
     47 
     48 static void
     49 aesbear_setkey(uint32_t rk[static 60], const void *key, uint32_t nrounds)
     50 {
     51 	size_t key_len;
     52 
     53 	switch (nrounds) {
     54 	case 10:
     55 		key_len = 16;
     56 		break;
     57 	case 12:
     58 		key_len = 24;
     59 		break;
     60 	case 14:
     61 		key_len = 32;
     62 		break;
     63 	default:
     64 		panic("invalid AES nrounds: %u", nrounds);
     65 	}
     66 
     67 	br_aes_ct_keysched(rk, key, key_len);
     68 }
     69 
     70 static void
     71 aesbear_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
     72 {
     73 
     74 	aesbear_setkey(enc->aese_aes.aes_rk, key, nrounds);
     75 }
     76 
     77 static void
     78 aesbear_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
     79 {
     80 
     81 	/*
     82 	 * BearSSL computes InvMixColumns on the fly -- no need for
     83 	 * distinct decryption round keys.
     84 	 */
     85 	aesbear_setkey(dec->aesd_aes.aes_rk, key, nrounds);
     86 }
     87 
     88 static void
     89 aesbear_enc(const struct aesenc *enc, const uint8_t in[static 16],
     90     uint8_t out[static 16], uint32_t nrounds)
     91 {
     92 	uint32_t sk_exp[120];
     93 	uint32_t q[8];
     94 
     95 	/* Expand round keys for bitslicing.  */
     96 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
     97 
     98 	/* Load input block interleaved with garbage block.  */
     99 	q[2*0] = le32dec(in + 4*0);
    100 	q[2*1] = le32dec(in + 4*1);
    101 	q[2*2] = le32dec(in + 4*2);
    102 	q[2*3] = le32dec(in + 4*3);
    103 	q[1] = q[3] = q[5] = q[7] = 0;
    104 
    105 	/* Transform to bitslice, decrypt, transform from bitslice.  */
    106 	br_aes_ct_ortho(q);
    107 	br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
    108 	br_aes_ct_ortho(q);
    109 
    110 	/* Store output block.  */
    111 	le32enc(out + 4*0, q[2*0]);
    112 	le32enc(out + 4*1, q[2*1]);
    113 	le32enc(out + 4*2, q[2*2]);
    114 	le32enc(out + 4*3, q[2*3]);
    115 
    116 	/* Paranoia: Zero temporary buffers.  */
    117 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    118 	explicit_memset(q, 0, sizeof q);
    119 }
    120 
    121 static void
    122 aesbear_dec(const struct aesdec *dec, const uint8_t in[static 16],
    123     uint8_t out[static 16], uint32_t nrounds)
    124 {
    125 	uint32_t sk_exp[120];
    126 	uint32_t q[8];
    127 
    128 	/* Expand round keys for bitslicing.  */
    129 	br_aes_ct_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk);
    130 
    131 	/* Load input block interleaved with garbage.  */
    132 	q[2*0] = le32dec(in + 4*0);
    133 	q[2*1] = le32dec(in + 4*1);
    134 	q[2*2] = le32dec(in + 4*2);
    135 	q[2*3] = le32dec(in + 4*3);
    136 	q[1] = q[3] = q[5] = q[7] = 0;
    137 
    138 	/* Transform to bitslice, decrypt, transform from bitslice.  */
    139 	br_aes_ct_ortho(q);
    140 	br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
    141 	br_aes_ct_ortho(q);
    142 
    143 	/* Store output block.  */
    144 	le32enc(out + 4*0, q[2*0]);
    145 	le32enc(out + 4*1, q[2*1]);
    146 	le32enc(out + 4*2, q[2*2]);
    147 	le32enc(out + 4*3, q[2*3]);
    148 
    149 	/* Paranoia: Zero temporary buffers.  */
    150 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    151 	explicit_memset(q, 0, sizeof q);
    152 }
    153 
    154 static void
    155 aesbear_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
    156     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
    157     uint32_t nrounds)
    158 {
    159 	uint32_t sk_exp[120];
    160 	uint32_t q[8];
    161 	uint32_t cv0, cv1, cv2, cv3;
    162 
    163 	KASSERT(nbytes % 16 == 0);
    164 
    165 	/* Skip if there's nothing to do.  */
    166 	if (nbytes == 0)
    167 		return;
    168 
    169 	/* Expand round keys for bitslicing.  */
    170 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
    171 
    172 	/* Initialize garbage block.  */
    173 	q[1] = q[3] = q[5] = q[7] = 0;
    174 
    175 	/* Load IV.  */
    176 	cv0 = le32dec(iv + 4*0);
    177 	cv1 = le32dec(iv + 4*1);
    178 	cv2 = le32dec(iv + 4*2);
    179 	cv3 = le32dec(iv + 4*3);
    180 
    181 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
    182 		/* Load input block and apply CV.  */
    183 		q[2*0] = cv0 ^ le32dec(in + 4*0);
    184 		q[2*1] = cv1 ^ le32dec(in + 4*1);
    185 		q[2*2] = cv2 ^ le32dec(in + 4*2);
    186 		q[2*3] = cv3 ^ le32dec(in + 4*3);
    187 
    188 		/* Transform to bitslice, encrypt, transform from bitslice.  */
    189 		br_aes_ct_ortho(q);
    190 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
    191 		br_aes_ct_ortho(q);
    192 
    193 		/* Remember ciphertext as CV and store output block.  */
    194 		cv0 = q[2*0];
    195 		cv1 = q[2*1];
    196 		cv2 = q[2*2];
    197 		cv3 = q[2*3];
    198 		le32enc(out + 4*0, cv0);
    199 		le32enc(out + 4*1, cv1);
    200 		le32enc(out + 4*2, cv2);
    201 		le32enc(out + 4*3, cv3);
    202 	}
    203 
    204 	/* Store updated IV.  */
    205 	le32enc(iv + 4*0, cv0);
    206 	le32enc(iv + 4*1, cv1);
    207 	le32enc(iv + 4*2, cv2);
    208 	le32enc(iv + 4*3, cv3);
    209 
    210 	/* Paranoia: Zero temporary buffers.  */
    211 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    212 	explicit_memset(q, 0, sizeof q);
    213 }
    214 
    215 static void
    216 aesbear_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
    217     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
    218     uint32_t nrounds)
    219 {
    220 	uint32_t sk_exp[120];
    221 	uint32_t q[8];
    222 	uint32_t cv0, cv1, cv2, cv3, iv0, iv1, iv2, iv3;
    223 
    224 	KASSERT(nbytes % 16 == 0);
    225 
    226 	/* Skip if there's nothing to do.  */
    227 	if (nbytes == 0)
    228 		return;
    229 
    230 	/* Expand round keys for bitslicing.  */
    231 	br_aes_ct_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk);
    232 
    233 	/* Load the IV.  */
    234 	iv0 = le32dec(iv + 4*0);
    235 	iv1 = le32dec(iv + 4*1);
    236 	iv2 = le32dec(iv + 4*2);
    237 	iv3 = le32dec(iv + 4*3);
    238 
    239 	/* Load the last cipher block.  */
    240 	cv0 = le32dec(in + nbytes - 16 + 4*0);
    241 	cv1 = le32dec(in + nbytes - 16 + 4*1);
    242 	cv2 = le32dec(in + nbytes - 16 + 4*2);
    243 	cv3 = le32dec(in + nbytes - 16 + 4*3);
    244 
    245 	/* Store the updated IV.  */
    246 	le32enc(iv + 4*0, cv0);
    247 	le32enc(iv + 4*1, cv1);
    248 	le32enc(iv + 4*2, cv2);
    249 	le32enc(iv + 4*3, cv3);
    250 
    251 	/* Handle the last cipher block separately if odd number.  */
    252 	if (nbytes % 32) {
    253 		KASSERT(nbytes % 32 == 16);
    254 
    255 		/* Set up the last cipher block and a garbage block.  */
    256 		q[2*0] = cv0;
    257 		q[2*1] = cv1;
    258 		q[2*2] = cv2;
    259 		q[2*3] = cv3;
    260 		q[1] = q[3] = q[5] = q[7] = 0;
    261 
    262 		/* Decrypt.  */
    263 		br_aes_ct_ortho(q);
    264 		br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
    265 		br_aes_ct_ortho(q);
    266 
    267 		/* If this was the only cipher block, we're done.  */
    268 		nbytes -= 16;
    269 		if (nbytes == 0)
    270 			goto out;
    271 
    272 		/*
    273 		 * Otherwise, load up the penultimate cipher block, and
    274 		 * store the output block.
    275 		 */
    276 		cv0 = le32dec(in + nbytes - 16 + 4*0);
    277 		cv1 = le32dec(in + nbytes - 16 + 4*1);
    278 		cv2 = le32dec(in + nbytes - 16 + 4*2);
    279 		cv3 = le32dec(in + nbytes - 16 + 4*3);
    280 		le32enc(out + nbytes + 4*0, cv0 ^ q[2*0]);
    281 		le32enc(out + nbytes + 4*1, cv1 ^ q[2*1]);
    282 		le32enc(out + nbytes + 4*2, cv2 ^ q[2*2]);
    283 		le32enc(out + nbytes + 4*3, cv3 ^ q[2*3]);
    284 	}
    285 
    286 	for (;;) {
    287 		KASSERT(nbytes >= 32);
    288 
    289 		/*
    290 		 * 1. Set up upper cipher block from cvN.
    291 		 * 2. Load lower cipher block into cvN and set it up.
    292 		 * 3. Decrypt.
    293 		 */
    294 		q[2*0 + 1] = cv0;
    295 		q[2*1 + 1] = cv1;
    296 		q[2*2 + 1] = cv2;
    297 		q[2*3 + 1] = cv3;
    298 		cv0 = q[2*0] = le32dec(in + nbytes - 32 + 4*0);
    299 		cv1 = q[2*1] = le32dec(in + nbytes - 32 + 4*1);
    300 		cv2 = q[2*2] = le32dec(in + nbytes - 32 + 4*2);
    301 		cv3 = q[2*3] = le32dec(in + nbytes - 32 + 4*3);
    302 
    303 		br_aes_ct_ortho(q);
    304 		br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
    305 		br_aes_ct_ortho(q);
    306 
    307 		/* Store the upper output block.  */
    308 		le32enc(out + nbytes - 16 + 4*0, q[2*0 + 1] ^ cv0);
    309 		le32enc(out + nbytes - 16 + 4*1, q[2*1 + 1] ^ cv1);
    310 		le32enc(out + nbytes - 16 + 4*2, q[2*2 + 1] ^ cv2);
    311 		le32enc(out + nbytes - 16 + 4*3, q[2*3 + 1] ^ cv3);
    312 
    313 		/* Stop if we've reached the first output block.  */
    314 		nbytes -= 32;
    315 		if (nbytes == 0)
    316 			goto out;
    317 
    318 		/*
    319 		 * Load the preceding cipher block, and apply it as the
    320 		 * chaining value to this one.
    321 		 */
    322 		cv0 = le32dec(in + nbytes - 16 + 4*0);
    323 		cv1 = le32dec(in + nbytes - 16 + 4*1);
    324 		cv2 = le32dec(in + nbytes - 16 + 4*2);
    325 		cv3 = le32dec(in + nbytes - 16 + 4*3);
    326 		le32enc(out + nbytes + 4*0, q[2*0] ^ cv0);
    327 		le32enc(out + nbytes + 4*1, q[2*1] ^ cv1);
    328 		le32enc(out + nbytes + 4*2, q[2*2] ^ cv2);
    329 		le32enc(out + nbytes + 4*3, q[2*3] ^ cv3);
    330 	}
    331 
    332 out:	/* Store the first output block.  */
    333 	le32enc(out + 4*0, q[2*0] ^ iv0);
    334 	le32enc(out + 4*1, q[2*1] ^ iv1);
    335 	le32enc(out + 4*2, q[2*2] ^ iv2);
    336 	le32enc(out + 4*3, q[2*3] ^ iv3);
    337 
    338 	/* Paranoia: Zero temporary buffers.  */
    339 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    340 	explicit_memset(q, 0, sizeof q);
    341 }
    342 
    343 static inline void
    344 aesbear_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
    345 {
    346 	uint32_t s0, s1, s2, s3;
    347 
    348 	s0 = *t0 >> 31;
    349 	s1 = *t1 >> 31;
    350 	s2 = *t2 >> 31;
    351 	s3 = *t3 >> 31;
    352 	*t0 = (*t0 << 1) ^ (-s3 & 0x87);
    353 	*t1 = (*t1 << 1) ^ s0;
    354 	*t2 = (*t2 << 1) ^ s1;
    355 	*t3 = (*t3 << 1) ^ s2;
    356 }
    357 
    358 static int
    359 aesbear_xts_update_selftest(void)
    360 {
    361 	static const struct {
    362 		uint32_t in[4], out[4];
    363 	} cases[] = {
    364 		{ {1}, {2} },
    365 		{ {0x80000000U,0,0,0}, {0,1,0,0} },
    366 		{ {0,0x80000000U,0,0}, {0,0,1,0} },
    367 		{ {0,0,0x80000000U,0}, {0,0,0,1} },
    368 		{ {0,0,0,0x80000000U}, {0x87,0,0,0} },
    369 		{ {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
    370 	};
    371 	unsigned i;
    372 	uint32_t t0, t1, t2, t3;
    373 
    374 	for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
    375 		t0 = cases[i].in[0];
    376 		t1 = cases[i].in[1];
    377 		t2 = cases[i].in[2];
    378 		t3 = cases[i].in[3];
    379 		aesbear_xts_update(&t0, &t1, &t2, &t3);
    380 		if (t0 != cases[i].out[0] ||
    381 		    t1 != cases[i].out[1] ||
    382 		    t2 != cases[i].out[2] ||
    383 		    t3 != cases[i].out[3])
    384 			return -1;
    385 	}
    386 
    387 	/* Success!  */
    388 	return 0;
    389 }
    390 
    391 static void
    392 aesbear_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
    393     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
    394     uint32_t nrounds)
    395 {
    396 	uint32_t sk_exp[120];
    397 	uint32_t q[8];
    398 	uint32_t t0, t1, t2, t3, u0, u1, u2, u3;
    399 
    400 	KASSERT(nbytes % 16 == 0);
    401 
    402 	/* Skip if there's nothing to do.  */
    403 	if (nbytes == 0)
    404 		return;
    405 
    406 	/* Expand round keys for bitslicing.  */
    407 	br_aes_ct_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk);
    408 
    409 	/* Load tweak.  */
    410 	t0 = le32dec(tweak + 4*0);
    411 	t1 = le32dec(tweak + 4*1);
    412 	t2 = le32dec(tweak + 4*2);
    413 	t3 = le32dec(tweak + 4*3);
    414 
    415 	/* Handle the first block separately if odd number.  */
    416 	if (nbytes % 32) {
    417 		KASSERT(nbytes % 32 == 16);
    418 
    419 		/* Load up the first block and a garbage block.  */
    420 		q[2*0] = le32dec(in + 4*0) ^ t0;
    421 		q[2*1] = le32dec(in + 4*1) ^ t1;
    422 		q[2*2] = le32dec(in + 4*2) ^ t2;
    423 		q[2*3] = le32dec(in + 4*3) ^ t3;
    424 		q[1] = q[3] = q[5] = q[7] = 0;
    425 
    426 		/* Encrypt two blocks.  */
    427 		br_aes_ct_ortho(q);
    428 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
    429 		br_aes_ct_ortho(q);
    430 
    431 		/* Store the first cipher block.  */
    432 		le32enc(out + 4*0, q[2*0] ^ t0);
    433 		le32enc(out + 4*1, q[2*1] ^ t1);
    434 		le32enc(out + 4*2, q[2*2] ^ t2);
    435 		le32enc(out + 4*3, q[2*3] ^ t3);
    436 
    437 		/* Advance to the next block.  */
    438 		aesbear_xts_update(&t0, &t1, &t2, &t3);
    439 		if ((nbytes -= 16) == 0)
    440 			goto out;
    441 		in += 16;
    442 		out += 16;
    443 	}
    444 
    445 	do {
    446 		KASSERT(nbytes >= 32);
    447 
    448 		/* Compute the upper tweak.  */
    449 		u0 = t0; u1 = t1; u2 = t2; u3 = t3;
    450 		aesbear_xts_update(&u0, &u1, &u2, &u3);
    451 
    452 		/* Load lower and upper blocks.  */
    453 		q[2*0] = le32dec(in + 4*0) ^ t0;
    454 		q[2*1] = le32dec(in + 4*1) ^ t1;
    455 		q[2*2] = le32dec(in + 4*2) ^ t2;
    456 		q[2*3] = le32dec(in + 4*3) ^ t3;
    457 		q[2*0 + 1] = le32dec(in + 16 + 4*0) ^ u0;
    458 		q[2*1 + 1] = le32dec(in + 16 + 4*1) ^ u1;
    459 		q[2*2 + 1] = le32dec(in + 16 + 4*2) ^ u2;
    460 		q[2*3 + 1] = le32dec(in + 16 + 4*3) ^ u3;
    461 
    462 		/* Encrypt two blocks.  */
    463 		br_aes_ct_ortho(q);
    464 		br_aes_ct_bitslice_encrypt(nrounds, sk_exp, q);
    465 		br_aes_ct_ortho(q);
    466 
    467 		/* Store lower and upper blocks.  */
    468 		le32enc(out + 4*0, q[2*0] ^ t0);
    469 		le32enc(out + 4*1, q[2*1] ^ t1);
    470 		le32enc(out + 4*2, q[2*2] ^ t2);
    471 		le32enc(out + 4*3, q[2*3] ^ t3);
    472 		le32enc(out + 16 + 4*0, q[2*0 + 1] ^ u0);
    473 		le32enc(out + 16 + 4*1, q[2*1 + 1] ^ u1);
    474 		le32enc(out + 16 + 4*2, q[2*2 + 1] ^ u2);
    475 		le32enc(out + 16 + 4*3, q[2*3 + 1] ^ u3);
    476 
    477 		/* Advance to the next pair of blocks.  */
    478 		t0 = u0; t1 = u1; t2 = u2; t3 = u3;
    479 		aesbear_xts_update(&t0, &t1, &t2, &t3);
    480 		in += 32;
    481 		out += 32;
    482 	} while (nbytes -= 32, nbytes);
    483 
    484 out:	/* Store the updated tweak.  */
    485 	le32enc(tweak + 4*0, t0);
    486 	le32enc(tweak + 4*1, t1);
    487 	le32enc(tweak + 4*2, t2);
    488 	le32enc(tweak + 4*3, t3);
    489 
    490 	/* Paranoia: Zero temporary buffers.  */
    491 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    492 	explicit_memset(q, 0, sizeof q);
    493 }
    494 
    495 static void
    496 aesbear_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
    497     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
    498     uint32_t nrounds)
    499 {
    500 	uint32_t sk_exp[120];
    501 	uint32_t q[8];
    502 	uint32_t t0, t1, t2, t3, u0, u1, u2, u3;
    503 
    504 	KASSERT(nbytes % 16 == 0);
    505 
    506 	/* Skip if there's nothing to do.  */
    507 	if (nbytes == 0)
    508 		return;
    509 
    510 	/* Expand round keys for bitslicing.  */
    511 	br_aes_ct_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk);
    512 
    513 	/* Load tweak.  */
    514 	t0 = le32dec(tweak + 4*0);
    515 	t1 = le32dec(tweak + 4*1);
    516 	t2 = le32dec(tweak + 4*2);
    517 	t3 = le32dec(tweak + 4*3);
    518 
    519 	/* Handle the first block separately if odd number.  */
    520 	if (nbytes % 32) {
    521 		KASSERT(nbytes % 32 == 16);
    522 
    523 		/* Load up the first block and a garbage block.  */
    524 		q[2*0] = le32dec(in + 4*0) ^ t0;
    525 		q[2*1] = le32dec(in + 4*1) ^ t1;
    526 		q[2*2] = le32dec(in + 4*2) ^ t2;
    527 		q[2*3] = le32dec(in + 4*3) ^ t3;
    528 		q[1] = q[3] = q[5] = q[7] = 0;
    529 
    530 		/* Decrypt two blocks.  */
    531 		br_aes_ct_ortho(q);
    532 		br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
    533 		br_aes_ct_ortho(q);
    534 
    535 		/* Store the first cipher block.  */
    536 		le32enc(out + 4*0, q[2*0] ^ t0);
    537 		le32enc(out + 4*1, q[2*1] ^ t1);
    538 		le32enc(out + 4*2, q[2*2] ^ t2);
    539 		le32enc(out + 4*3, q[2*3] ^ t3);
    540 
    541 		/* Advance to the next block.  */
    542 		aesbear_xts_update(&t0, &t1, &t2, &t3);
    543 		if ((nbytes -= 16) == 0)
    544 			goto out;
    545 		in += 16;
    546 		out += 16;
    547 	}
    548 
    549 	do {
    550 		KASSERT(nbytes >= 32);
    551 
    552 		/* Compute the upper tweak.  */
    553 		u0 = t0; u1 = t1; u2 = t2; u3 = t3;
    554 		aesbear_xts_update(&u0, &u1, &u2, &u3);
    555 
    556 		/* Load lower and upper blocks.  */
    557 		q[2*0] = le32dec(in + 4*0) ^ t0;
    558 		q[2*1] = le32dec(in + 4*1) ^ t1;
    559 		q[2*2] = le32dec(in + 4*2) ^ t2;
    560 		q[2*3] = le32dec(in + 4*3) ^ t3;
    561 		q[2*0 + 1] = le32dec(in + 16 + 4*0) ^ u0;
    562 		q[2*1 + 1] = le32dec(in + 16 + 4*1) ^ u1;
    563 		q[2*2 + 1] = le32dec(in + 16 + 4*2) ^ u2;
    564 		q[2*3 + 1] = le32dec(in + 16 + 4*3) ^ u3;
    565 
    566 		/* Encrypt two blocks.  */
    567 		br_aes_ct_ortho(q);
    568 		br_aes_ct_bitslice_decrypt(nrounds, sk_exp, q);
    569 		br_aes_ct_ortho(q);
    570 
    571 		/* Store lower and upper blocks.  */
    572 		le32enc(out + 4*0, q[2*0] ^ t0);
    573 		le32enc(out + 4*1, q[2*1] ^ t1);
    574 		le32enc(out + 4*2, q[2*2] ^ t2);
    575 		le32enc(out + 4*3, q[2*3] ^ t3);
    576 		le32enc(out + 16 + 4*0, q[2*0 + 1] ^ u0);
    577 		le32enc(out + 16 + 4*1, q[2*1 + 1] ^ u1);
    578 		le32enc(out + 16 + 4*2, q[2*2 + 1] ^ u2);
    579 		le32enc(out + 16 + 4*3, q[2*3 + 1] ^ u3);
    580 
    581 		/* Advance to the next pair of blocks.  */
    582 		t0 = u0; t1 = u1; t2 = u2; t3 = u3;
    583 		aesbear_xts_update(&t0, &t1, &t2, &t3);
    584 		in += 32;
    585 		out += 32;
    586 	} while (nbytes -= 32, nbytes);
    587 
    588 out:	/* Store the updated tweak.  */
    589 	le32enc(tweak + 4*0, t0);
    590 	le32enc(tweak + 4*1, t1);
    591 	le32enc(tweak + 4*2, t2);
    592 	le32enc(tweak + 4*3, t3);
    593 
    594 	/* Paranoia: Zero temporary buffers.  */
    595 	explicit_memset(sk_exp, 0, sizeof sk_exp);
    596 	explicit_memset(q, 0, sizeof q);
    597 }
    598 
    599 static int
    600 aesbear_probe(void)
    601 {
    602 
    603 	if (aesbear_xts_update_selftest())
    604 		return -1;
    605 
    606 	/* XXX test br_aes_ct_bitslice_decrypt */
    607 	/* XXX test br_aes_ct_bitslice_encrypt */
    608 	/* XXX test br_aes_ct_keysched */
    609 	/* XXX test br_aes_ct_ortho */
    610 	/* XXX test br_aes_ct_skey_expand */
    611 
    612 	return 0;
    613 }
    614 
    615 struct aes_impl aes_bear_impl = {
    616 	.ai_name = "BearSSL aes_ct",
    617 	.ai_probe = aesbear_probe,
    618 	.ai_setenckey = aesbear_setenckey,
    619 	.ai_setdeckey = aesbear_setdeckey,
    620 	.ai_enc = aesbear_enc,
    621 	.ai_dec = aesbear_dec,
    622 	.ai_cbc_enc = aesbear_cbc_enc,
    623 	.ai_cbc_dec = aesbear_cbc_dec,
    624 	.ai_xts_enc = aesbear_xts_enc,
    625 	.ai_xts_dec = aesbear_xts_dec,
    626 };
    627