1/*	$NetBSD: aes_bear64.c,v 1.1 2025/11/23 22:44:13 riastradh Exp $	*/
2
3/*-
4 * Copyright (c) 2025 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 *    notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 *    notice, this list of conditions and the following disclaimer in the
14 *    documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29#include <sys/cdefs.h>
30__KERNEL_RCSID(1, "$NetBSD: aes_bear64.c,v 1.1 2025/11/23 22:44:13 riastradh Exp $");
31
32#include <sys/types.h>
33#include <sys/endian.h>
34
35#ifdef _KERNEL
36#include <sys/systm.h>
37#else
38#include <assert.h>
39#include <err.h>
40#include <string.h>
41#define	KASSERT			assert
42#define	panic(fmt, args...)	err(1, fmt, args)
43#endif
44
45#include <crypto/aes/aes.h>
46#include <crypto/aes/aes_bear64.h>
47#include <crypto/aes/aes_impl.h>
48
49static void
50aesbear64_setkey(uint64_t rk[static 30], const void *key, uint32_t nrounds)
51{
52	size_t key_len;
53
54	switch (nrounds) {
55	case 10:
56		key_len = 16;
57		break;
58	case 12:
59		key_len = 24;
60		break;
61	case 14:
62		key_len = 32;
63		break;
64	default:
65		panic("invalid AES nrounds: %u", nrounds);
66	}
67
68	br_aes_ct64_keysched(rk, key, key_len);
69}
70
71static void
72aesbear64_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
73{
74
75	aesbear64_setkey(enc->aese_aes.aes_rk64, key, nrounds);
76}
77
78static void
79aesbear64_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
80{
81
82	/*
83	 * BearSSL computes InvMixColumns on the fly -- no need for
84	 * distinct decryption round keys.
85	 */
86	aesbear64_setkey(dec->aesd_aes.aes_rk64, key, nrounds);
87}
88
89static void
90aesbear64_enc(const struct aesenc *enc, const uint8_t in[static 16],
91    uint8_t out[static 16], uint32_t nrounds)
92{
93	uint64_t sk_exp[120];
94	uint32_t w[4];
95	uint64_t q[8];
96
97	/* Expand round keys for bitslicing.  */
98	br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
99
100	/* Load input block interleaved with garbage blocks.  */
101	w[0] = le32dec(in + 4*0);
102	w[1] = le32dec(in + 4*1);
103	w[2] = le32dec(in + 4*2);
104	w[3] = le32dec(in + 4*3);
105	br_aes_ct64_interleave_in(&q[0], &q[4], w);
106	q[1] = q[2] = q[3] = 0;
107	q[5] = q[6] = q[7] = 0;
108
109	/* Transform to bitslice, encrypt, transform from bitslice.  */
110	br_aes_ct64_ortho(q);
111	br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
112	br_aes_ct64_ortho(q);
113
114	/* Store output block.  */
115	br_aes_ct64_interleave_out(w, q[0], q[4]);
116	le32enc(out + 4*0, w[0]);
117	le32enc(out + 4*1, w[1]);
118	le32enc(out + 4*2, w[2]);
119	le32enc(out + 4*3, w[3]);
120
121	/* Paranoia: Zero temporary buffers.  */
122	explicit_memset(sk_exp, 0, sizeof sk_exp);
123	explicit_memset(q, 0, sizeof q);
124}
125
126static void
127aesbear64_dec(const struct aesdec *dec, const uint8_t in[static 16],
128    uint8_t out[static 16], uint32_t nrounds)
129{
130	uint64_t sk_exp[120];
131	uint32_t w[4];
132	uint64_t q[8];
133
134	/* Expand round keys for bitslicing.  */
135	br_aes_ct64_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64);
136
137	/* Load input block interleaved with garbage blocks.  */
138	w[0] = le32dec(in + 4*0);
139	w[1] = le32dec(in + 4*1);
140	w[2] = le32dec(in + 4*2);
141	w[3] = le32dec(in + 4*3);
142	br_aes_ct64_interleave_in(&q[0], &q[4], w);
143	q[1] = q[2] = q[3] = 0;
144	q[5] = q[6] = q[7] = 0;
145
146	/* Transform to bitslice, decrypt, transform from bitslice.  */
147	br_aes_ct64_ortho(q);
148	br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q);
149	br_aes_ct64_ortho(q);
150
151	/* Store output block.  */
152	br_aes_ct64_interleave_out(w, q[0], q[4]);
153	le32enc(out + 4*0, w[0]);
154	le32enc(out + 4*1, w[1]);
155	le32enc(out + 4*2, w[2]);
156	le32enc(out + 4*3, w[3]);
157
158	/* Paranoia: Zero temporary buffers.  */
159	explicit_memset(sk_exp, 0, sizeof sk_exp);
160	explicit_memset(q, 0, sizeof q);
161}
162
163static void
164aesbear64_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
165    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
166    uint32_t nrounds)
167{
168	uint64_t sk_exp[120];
169	uint32_t w[4];
170	uint64_t q[8];
171	uint32_t cv0, cv1, cv2, cv3;
172
173	KASSERT(nbytes % 16 == 0);
174
175	/* Skip if there's nothing to do.  */
176	if (nbytes == 0)
177		return;
178
179	/* Expand round keys for bitslicing.  */
180	br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
181
182	/* Initialize garbage blocks.  */
183	q[1] = q[2] = q[3] = 0;
184	q[5] = q[6] = q[7] = 0;
185
186	/* Load IV.  */
187	cv0 = le32dec(iv + 4*0);
188	cv1 = le32dec(iv + 4*1);
189	cv2 = le32dec(iv + 4*2);
190	cv3 = le32dec(iv + 4*3);
191
192	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
193		/* Load input block and apply CV.  */
194		w[0] = cv0 ^ le32dec(in + 4*0);
195		w[1] = cv1 ^ le32dec(in + 4*1);
196		w[2] = cv2 ^ le32dec(in + 4*2);
197		w[3] = cv3 ^ le32dec(in + 4*3);
198		br_aes_ct64_interleave_in(&q[0], &q[4], w);
199
200		/* Transform to bitslice, encrypt, transform from bitslice.  */
201		br_aes_ct64_ortho(q);
202		br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
203		br_aes_ct64_ortho(q);
204
205		/* Remember ciphertext as CV and store output block.  */
206		br_aes_ct64_interleave_out(w, q[0], q[4]);
207		cv0 = w[0];
208		cv1 = w[1];
209		cv2 = w[2];
210		cv3 = w[3];
211		le32enc(out + 4*0, cv0);
212		le32enc(out + 4*1, cv1);
213		le32enc(out + 4*2, cv2);
214		le32enc(out + 4*3, cv3);
215	}
216
217	/* Store updated IV.  */
218	le32enc(iv + 4*0, cv0);
219	le32enc(iv + 4*1, cv1);
220	le32enc(iv + 4*2, cv2);
221	le32enc(iv + 4*3, cv3);
222
223	/* Paranoia: Zero temporary buffers.  */
224	explicit_memset(sk_exp, 0, sizeof sk_exp);
225	explicit_memset(q, 0, sizeof q);
226}
227
228static void
229aesbear64_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
230    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
231    uint32_t nrounds)
232{
233	uint64_t sk_exp[120];
234	uint32_t w[4];
235	uint64_t q[8];
236	uint32_t cv0, cv1, cv2, cv3, iv0, iv1, iv2, iv3;
237	unsigned i;
238
239	KASSERT(nbytes % 16 == 0);
240
241	/* Skip if there's nothing to do.  */
242	if (nbytes == 0)
243		return;
244
245	/* Expand round keys for bitslicing.  */
246	br_aes_ct64_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64);
247
248	/* Load the IV.  */
249	iv0 = le32dec(iv + 4*0);
250	iv1 = le32dec(iv + 4*1);
251	iv2 = le32dec(iv + 4*2);
252	iv3 = le32dec(iv + 4*3);
253
254	/* Load the last cipher block.  */
255	cv0 = le32dec(in + nbytes - 16 + 4*0);
256	cv1 = le32dec(in + nbytes - 16 + 4*1);
257	cv2 = le32dec(in + nbytes - 16 + 4*2);
258	cv3 = le32dec(in + nbytes - 16 + 4*3);
259
260	/* Store the updated IV.  */
261	le32enc(iv + 4*0, cv0);
262	le32enc(iv + 4*1, cv1);
263	le32enc(iv + 4*2, cv2);
264	le32enc(iv + 4*3, cv3);
265
266	/* Handle the last cipher block separately if odd number.  */
267	if (nbytes % 64) {
268		unsigned n = (nbytes % 64)/16;
269
270		KASSERT(n == 1 || n == 2 || n == 3);
271
272		for (i = 4; i --> n;)
273			q[i] = q[4 + i] = 0;
274		KASSERT(i == n - 1);
275		w[0] = cv0;	/* le32dec(in + nbytes - 16*n + 16*i + 4*0) */
276		w[1] = cv1;	/* le32dec(in + nbytes - 16*n + 16*i + 4*1) */
277		w[2] = cv2;	/* le32dec(in + nbytes - 16*n + 16*i + 4*2) */
278		w[3] = cv3;	/* le32dec(in + nbytes - 16*n + 16*i + 4*3) */
279		br_aes_ct64_interleave_in(&q[i], &q[4 + i], w);
280		while (i --> 0) {
281			w[0] = le32dec(in + nbytes - 16*n + 16*i + 4*0);
282			w[1] = le32dec(in + nbytes - 16*n + 16*i + 4*1);
283			w[2] = le32dec(in + nbytes - 16*n + 16*i + 4*2);
284			w[3] = le32dec(in + nbytes - 16*n + 16*i + 4*3);
285			br_aes_ct64_interleave_in(&q[i], &q[4 + i], w);
286		}
287
288		/* Decrypt.  */
289		br_aes_ct64_ortho(q);
290		br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q);
291		br_aes_ct64_ortho(q);
292
293		for (i = n; i --> 1;) {
294			br_aes_ct64_interleave_out(w, q[i], q[4 + i]);
295			cv0 = le32dec(in + nbytes - 16*n + 16*(i - 1) + 4*0);
296			cv1 = le32dec(in + nbytes - 16*n + 16*(i - 1) + 4*1);
297			cv2 = le32dec(in + nbytes - 16*n + 16*(i - 1) + 4*2);
298			cv3 = le32dec(in + nbytes - 16*n + 16*(i - 1) + 4*3);
299			le32enc(out + nbytes - 16*n + 16*i + 4*0, w[0] ^ cv0);
300			le32enc(out + nbytes - 16*n + 16*i + 4*1, w[1] ^ cv1);
301			le32enc(out + nbytes - 16*n + 16*i + 4*2, w[2] ^ cv2);
302			le32enc(out + nbytes - 16*n + 16*i + 4*3, w[3] ^ cv3);
303		}
304		br_aes_ct64_interleave_out(w, q[0], q[4]);
305
306		/* If this was the only cipher block, we're done.  */
307		nbytes -= nbytes % 64;
308		if (nbytes == 0)
309			goto out;
310
311		/*
312		 * Otherwise, load up the previous cipher block, and
313		 * store the output block.
314		 */
315		cv0 = le32dec(in + nbytes - 16 + 4*0);
316		cv1 = le32dec(in + nbytes - 16 + 4*1);
317		cv2 = le32dec(in + nbytes - 16 + 4*2);
318		cv3 = le32dec(in + nbytes - 16 + 4*3);
319		le32enc(out + nbytes + 4*0, cv0 ^ w[0]);
320		le32enc(out + nbytes + 4*1, cv1 ^ w[1]);
321		le32enc(out + nbytes + 4*2, cv2 ^ w[2]);
322		le32enc(out + nbytes + 4*3, cv3 ^ w[3]);
323	}
324
325	for (;;) {
326		KASSERT(nbytes >= 64);
327
328		/* Load the input blocks.  */
329		w[0] = cv0;	/* le32dec(in + nbytes - 64 + 16*i + 4*0) */
330		w[1] = cv1;	/* le32dec(in + nbytes - 64 + 16*i + 4*1) */
331		w[2] = cv2;	/* le32dec(in + nbytes - 64 + 16*i + 4*2) */
332		w[3] = cv3;	/* le32dec(in + nbytes - 64 + 16*i + 4*3) */
333		br_aes_ct64_interleave_in(&q[3], &q[7], w);
334		for (i = 3; i --> 0;) {
335			w[0] = le32dec(in + nbytes - 64 + 16*i + 4*0);
336			w[1] = le32dec(in + nbytes - 64 + 16*i + 4*1);
337			w[2] = le32dec(in + nbytes - 64 + 16*i + 4*2);
338			w[3] = le32dec(in + nbytes - 64 + 16*i + 4*3);
339			br_aes_ct64_interleave_in(&q[i], &q[4 + i], w);
340		}
341
342		/* Decrypt.  */
343		br_aes_ct64_ortho(q);
344		br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q);
345		br_aes_ct64_ortho(q);
346
347		/* Store the upper output blocks.  */
348		for (i = 4; i --> 1;) {
349			br_aes_ct64_interleave_out(w, q[i], q[4 + i]);
350			cv0 = le32dec(in + nbytes - 64 + 16*(i - 1) + 4*0);
351			cv1 = le32dec(in + nbytes - 64 + 16*(i - 1) + 4*1);
352			cv2 = le32dec(in + nbytes - 64 + 16*(i - 1) + 4*2);
353			cv3 = le32dec(in + nbytes - 64 + 16*(i - 1) + 4*3);
354			le32enc(out + nbytes - 64 + 16*i + 4*0, w[0] ^ cv0);
355			le32enc(out + nbytes - 64 + 16*i + 4*1, w[1] ^ cv1);
356			le32enc(out + nbytes - 64 + 16*i + 4*2, w[2] ^ cv2);
357			le32enc(out + nbytes - 64 + 16*i + 4*3, w[3] ^ cv3);
358		}
359
360		/* Prepare the first output block.  */
361		br_aes_ct64_interleave_out(w, q[0], q[4]);
362
363		/* Stop if we've reached the first output block.  */
364		nbytes -= 64;
365		if (nbytes == 0)
366			goto out;
367
368		/*
369		 * Load the preceding cipher block, and apply it as the
370		 * chaining value to this one.
371		 */
372		cv0 = le32dec(in + nbytes - 16 + 4*0);
373		cv1 = le32dec(in + nbytes - 16 + 4*1);
374		cv2 = le32dec(in + nbytes - 16 + 4*2);
375		cv3 = le32dec(in + nbytes - 16 + 4*3);
376		le32enc(out + nbytes + 4*0, w[0] ^ cv0);
377		le32enc(out + nbytes + 4*1, w[1] ^ cv1);
378		le32enc(out + nbytes + 4*2, w[2] ^ cv2);
379		le32enc(out + nbytes + 4*3, w[3] ^ cv3);
380	}
381
382out:	/* Store the first output block.  */
383	le32enc(out + 4*0, w[0] ^ iv0);
384	le32enc(out + 4*1, w[1] ^ iv1);
385	le32enc(out + 4*2, w[2] ^ iv2);
386	le32enc(out + 4*3, w[3] ^ iv3);
387
388	/* Paranoia: Zero temporary buffers.  */
389	explicit_memset(sk_exp, 0, sizeof sk_exp);
390	explicit_memset(q, 0, sizeof q);
391}
392
393static inline void
394aesbear64_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
395{
396	uint32_t s0, s1, s2, s3;
397
398	s0 = *t0 >> 31;
399	s1 = *t1 >> 31;
400	s2 = *t2 >> 31;
401	s3 = *t3 >> 31;
402	*t0 = (*t0 << 1) ^ (-s3 & 0x87);
403	*t1 = (*t1 << 1) ^ s0;
404	*t2 = (*t2 << 1) ^ s1;
405	*t3 = (*t3 << 1) ^ s2;
406}
407
408static int
409aesbear64_xts_update_selftest(void)
410{
411	static const struct {
412		uint32_t in[4], out[4];
413	} cases[] = {
414		{ {1}, {2} },
415		{ {0x80000000U,0,0,0}, {0,1,0,0} },
416		{ {0,0x80000000U,0,0}, {0,0,1,0} },
417		{ {0,0,0x80000000U,0}, {0,0,0,1} },
418		{ {0,0,0,0x80000000U}, {0x87,0,0,0} },
419		{ {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
420	};
421	unsigned i;
422	uint32_t t0, t1, t2, t3;
423
424	for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
425		t0 = cases[i].in[0];
426		t1 = cases[i].in[1];
427		t2 = cases[i].in[2];
428		t3 = cases[i].in[3];
429		aesbear64_xts_update(&t0, &t1, &t2, &t3);
430		if (t0 != cases[i].out[0] ||
431		    t1 != cases[i].out[1] ||
432		    t2 != cases[i].out[2] ||
433		    t3 != cases[i].out[3])
434			return -1;
435	}
436
437	/* Success!  */
438	return 0;
439}
440
441static void
442aesbear64_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
443    uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
444    uint32_t nrounds)
445{
446	uint64_t sk_exp[120];
447	uint32_t w[4];
448	uint64_t q[8];
449	uint32_t t0, t1, t2, t3, u0, u1, u2, u3;
450	unsigned i;
451
452	KASSERT(nbytes % 16 == 0);
453
454	/* Skip if there's nothing to do.  */
455	if (nbytes == 0)
456		return;
457
458	/* Expand round keys for bitslicing.  */
459	br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
460
461	/* Load tweak.  */
462	t0 = le32dec(tweak + 4*0);
463	t1 = le32dec(tweak + 4*1);
464	t2 = le32dec(tweak + 4*2);
465	t3 = le32dec(tweak + 4*3);
466
467	/* Handle the first blocks separately if odd number.  */
468	if (nbytes % 64) {
469		unsigned n = (nbytes % 64)/16;
470
471		/* Load up the first blocks and garbage.  */
472		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < n; i++) {
473			w[0] = le32dec(in + 16*i + 4*0) ^ u0;
474			w[1] = le32dec(in + 16*i + 4*1) ^ u1;
475			w[2] = le32dec(in + 16*i + 4*2) ^ u2;
476			w[3] = le32dec(in + 16*i + 4*3) ^ u3;
477			aesbear64_xts_update(&u0, &u1, &u2, &u3);
478			br_aes_ct64_interleave_in(&q[i], &q[4 + i], w);
479		}
480		for (; i < 4; i++)
481			q[i] = q[4 + i] = 0;
482
483		/* Encrypt up to three blocks.  */
484		br_aes_ct64_ortho(q);
485		br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
486		br_aes_ct64_ortho(q);
487
488		/* Store up to three blocks.  */
489		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < n; i++) {
490			br_aes_ct64_interleave_out(w, q[i], q[4 + i]);
491			le32enc(out + 16*i + 4*0, w[0] ^ u0);
492			le32enc(out + 16*i + 4*1, w[1] ^ u1);
493			le32enc(out + 16*i + 4*2, w[2] ^ u2);
494			le32enc(out + 16*i + 4*3, w[3] ^ u3);
495			aesbear64_xts_update(&u0, &u1, &u2, &u3);
496		}
497
498		/* Advance to the next block.  */
499		t0 = u0, t1 = u1, t2 = u2, t3 = u3;
500		if ((nbytes -= 16*n) == 0)
501			goto out;
502		in += 16*n;
503		out += 16*n;
504	}
505
506	do {
507		KASSERT(nbytes >= 64);
508
509		/* Load four blocks.  */
510		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < 4; i++) {
511			w[0] = le32dec(in + 16*i + 4*0) ^ u0;
512			w[1] = le32dec(in + 16*i + 4*1) ^ u1;
513			w[2] = le32dec(in + 16*i + 4*2) ^ u2;
514			w[3] = le32dec(in + 16*i + 4*3) ^ u3;
515			aesbear64_xts_update(&u0, &u1, &u2, &u3);
516			br_aes_ct64_interleave_in(&q[i], &q[4 + i], w);
517		}
518
519		/* Encrypt four blocks.  */
520		br_aes_ct64_ortho(q);
521		br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
522		br_aes_ct64_ortho(q);
523
524		/* Store four blocks.  */
525		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < 4; i++) {
526			br_aes_ct64_interleave_out(w, q[i], q[4 + i]);
527			le32enc(out + 16*i + 4*0, w[0] ^ u0);
528			le32enc(out + 16*i + 4*1, w[1] ^ u1);
529			le32enc(out + 16*i + 4*2, w[2] ^ u2);
530			le32enc(out + 16*i + 4*3, w[3] ^ u3);
531			aesbear64_xts_update(&u0, &u1, &u2, &u3);
532		}
533
534		/* Advance to the next pair of blocks.  */
535		t0 = u0, t1 = u1, t2 = u2, t3 = u3;
536		in += 64;
537		out += 64;
538	} while (nbytes -= 64, nbytes);
539
540out:	/* Store the updated tweak.  */
541	le32enc(tweak + 4*0, t0);
542	le32enc(tweak + 4*1, t1);
543	le32enc(tweak + 4*2, t2);
544	le32enc(tweak + 4*3, t3);
545
546	/* Paranoia: Zero temporary buffers.  */
547	explicit_memset(sk_exp, 0, sizeof sk_exp);
548	explicit_memset(q, 0, sizeof q);
549}
550
551static void
552aesbear64_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
553    uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
554    uint32_t nrounds)
555{
556	uint64_t sk_exp[120];
557	uint32_t w[4];
558	uint64_t q[8];
559	uint32_t t0, t1, t2, t3, u0, u1, u2, u3;
560	unsigned i;
561
562	KASSERT(nbytes % 16 == 0);
563
564	/* Skip if there's nothing to do.  */
565	if (nbytes == 0)
566		return;
567
568	/* Expand round keys for bitslicing.  */
569	br_aes_ct64_skey_expand(sk_exp, nrounds, dec->aesd_aes.aes_rk64);
570
571	/* Load tweak.  */
572	t0 = le32dec(tweak + 4*0);
573	t1 = le32dec(tweak + 4*1);
574	t2 = le32dec(tweak + 4*2);
575	t3 = le32dec(tweak + 4*3);
576
577	/* Handle the first blocks separately if odd number.  */
578	if (nbytes % 64) {
579		unsigned n = (nbytes % 64)/16;
580
581		/* Load up the first blocks and garbage.  */
582		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < n; i++) {
583			w[0] = le32dec(in + 16*i + 4*0) ^ u0;
584			w[1] = le32dec(in + 16*i + 4*1) ^ u1;
585			w[2] = le32dec(in + 16*i + 4*2) ^ u2;
586			w[3] = le32dec(in + 16*i + 4*3) ^ u3;
587			aesbear64_xts_update(&u0, &u1, &u2, &u3);
588			br_aes_ct64_interleave_in(&q[i], &q[4 + i], w);
589		}
590		for (; i < 4; i++)
591			q[i] = q[4 + i] = 0;
592
593		/* Decrypt up to three blocks.  */
594		br_aes_ct64_ortho(q);
595		br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q);
596		br_aes_ct64_ortho(q);
597
598		/* Store up to three blocks.  */
599		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < n; i++) {
600			br_aes_ct64_interleave_out(w, q[i], q[4 + i]);
601			le32enc(out + 16*i + 4*0, w[0] ^ u0);
602			le32enc(out + 16*i + 4*1, w[1] ^ u1);
603			le32enc(out + 16*i + 4*2, w[2] ^ u2);
604			le32enc(out + 16*i + 4*3, w[3] ^ u3);
605			aesbear64_xts_update(&u0, &u1, &u2, &u3);
606		}
607
608		/* Advance to the next block.  */
609		t0 = u0, t1 = u1, t2 = u2, t3 = u3;
610		if ((nbytes -= 16*n) == 0)
611			goto out;
612		in += 16*n;
613		out += 16*n;
614	}
615
616	do {
617		KASSERT(nbytes >= 64);
618
619		/* Load four blocks.  */
620		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < 4; i++) {
621			w[0] = le32dec(in + 16*i + 4*0) ^ u0;
622			w[1] = le32dec(in + 16*i + 4*1) ^ u1;
623			w[2] = le32dec(in + 16*i + 4*2) ^ u2;
624			w[3] = le32dec(in + 16*i + 4*3) ^ u3;
625			aesbear64_xts_update(&u0, &u1, &u2, &u3);
626			br_aes_ct64_interleave_in(&q[i], &q[4 + i], w);
627		}
628
629		/* Decrypt four blocks.  */
630		br_aes_ct64_ortho(q);
631		br_aes_ct64_bitslice_decrypt(nrounds, sk_exp, q);
632		br_aes_ct64_ortho(q);
633
634		/* Store four blocks.  */
635		for (i = 0, u0 = t0, u1 = t1, u2 = t2, u3 = t3; i < 4; i++) {
636			br_aes_ct64_interleave_out(w, q[i], q[4 + i]);
637			le32enc(out + 16*i + 4*0, w[0] ^ u0);
638			le32enc(out + 16*i + 4*1, w[1] ^ u1);
639			le32enc(out + 16*i + 4*2, w[2] ^ u2);
640			le32enc(out + 16*i + 4*3, w[3] ^ u3);
641			aesbear64_xts_update(&u0, &u1, &u2, &u3);
642		}
643
644		/* Advance to the next pair of blocks.  */
645		t0 = u0, t1 = u1, t2 = u2, t3 = u3;
646		in += 64;
647		out += 64;
648	} while (nbytes -= 64, nbytes);
649
650out:	/* Store the updated tweak.  */
651	le32enc(tweak + 4*0, t0);
652	le32enc(tweak + 4*1, t1);
653	le32enc(tweak + 4*2, t2);
654	le32enc(tweak + 4*3, t3);
655
656	/* Paranoia: Zero temporary buffers.  */
657	explicit_memset(sk_exp, 0, sizeof sk_exp);
658	explicit_memset(q, 0, sizeof q);
659}
660
661static void
662aesbear64_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
663    size_t nbytes, uint8_t auth[static 16], uint32_t nrounds)
664{
665	uint64_t sk_exp[120];
666	uint32_t w[4];
667	uint64_t q[8];
668
669	KASSERT(nbytes);
670	KASSERT(nbytes % 16 == 0);
671
672	/* Expand round keys for bitslicing.  */
673	br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
674
675	/* Initialize garbage blocks.  */
676	q[1] = q[2] = q[3] = 0;
677	q[5] = q[6] = q[7] = 0;
678
679	/* Load initial authenticator.  */
680	w[0] = le32dec(auth + 4*0);
681	w[1] = le32dec(auth + 4*1);
682	w[2] = le32dec(auth + 4*2);
683	w[3] = le32dec(auth + 4*3);
684
685	for (; nbytes; nbytes -= 16, in += 16) {
686		/* Combine input block.  */
687		w[0] ^= le32dec(in + 4*0);
688		w[1] ^= le32dec(in + 4*1);
689		w[2] ^= le32dec(in + 4*2);
690		w[3] ^= le32dec(in + 4*3);
691		br_aes_ct64_interleave_in(&q[0], &q[4], w);
692
693		/* Transform to bitslice, encrypt, transform from bitslice.  */
694		br_aes_ct64_ortho(q);
695		br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
696		br_aes_ct64_ortho(q);
697
698		br_aes_ct64_interleave_out(w, q[0], q[4]);
699	}
700
701	/* Store updated authenticator.  */
702	le32enc(auth + 4*0, w[0]);
703	le32enc(auth + 4*1, w[1]);
704	le32enc(auth + 4*2, w[2]);
705	le32enc(auth + 4*3, w[3]);
706
707	/* Paranoia: Zero temporary buffers.  */
708	explicit_memset(sk_exp, 0, sizeof sk_exp);
709	explicit_memset(q, 0, sizeof q);
710}
711
712static void
713aesbear64_ccm_enc1(const struct aesenc *enc, const uint8_t *in, uint8_t *out,
714    size_t nbytes, uint8_t authctr[32], uint32_t nrounds)
715{
716	uint64_t sk_exp[120];
717	uint32_t w[4];
718	uint64_t q[8];
719	uint32_t c0, c1, c2, c3be;
720
721	KASSERT(nbytes);
722	KASSERT(nbytes % 16 == 0);
723
724	/* Expand round keys for bitslicing.  */
725	br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
726
727	/* Initialize garbage blocks.  */
728	q[2] = q[3] = 0;
729	q[6] = q[7] = 0;
730
731	/* Set first block to authenticator.  */
732	w[0] = le32dec(authctr + 4*0);
733	w[1] = le32dec(authctr + 4*1);
734	w[2] = le32dec(authctr + 4*2);
735	w[3] = le32dec(authctr + 4*3);
736
737	/* Load initial counter block, big-endian so we can increment it.  */
738	c0 = le32dec(authctr + 16 + 4*0);
739	c1 = le32dec(authctr + 16 + 4*1);
740	c2 = le32dec(authctr + 16 + 4*2);
741	c3be = bswap32(le32dec(authctr + 16 + 4*3));
742
743	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
744		/* Update authenticator.  */
745		w[0] ^= le32dec(in + 4*0);
746		w[1] ^= le32dec(in + 4*1);
747		w[2] ^= le32dec(in + 4*2);
748		w[3] ^= le32dec(in + 4*3);
749		br_aes_ct64_interleave_in(&q[0], &q[4], w);
750
751		/* Increment 32-bit counter.  */
752		w[0] = c0;
753		w[1] = c1;
754		w[2] = c2;
755		w[3] = bswap32(++c3be);
756		br_aes_ct64_interleave_in(&q[1], &q[5], w);
757
758		/* Encrypt authenticator and counter.  */
759		br_aes_ct64_ortho(q);
760		br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
761		br_aes_ct64_ortho(q);
762
763		/* Encrypt with CTR output.  */
764		br_aes_ct64_interleave_out(w, q[1], q[5]);
765		le32enc(out + 4*0, le32dec(in + 4*0) ^ w[0]);
766		le32enc(out + 4*1, le32dec(in + 4*1) ^ w[1]);
767		le32enc(out + 4*2, le32dec(in + 4*2) ^ w[2]);
768		le32enc(out + 4*3, le32dec(in + 4*3) ^ w[3]);
769
770		/* Fish out the authenticator so far.  */
771		br_aes_ct64_interleave_out(w, q[0], q[4]);
772	}
773
774	/* Update authenticator.  */
775	le32enc(authctr + 4*0, w[0]);
776	le32enc(authctr + 4*1, w[1]);
777	le32enc(authctr + 4*2, w[2]);
778	le32enc(authctr + 4*3, w[3]);
779
780	/* Update counter.  */
781	le32enc(authctr + 16 + 4*3, bswap32(c3be));
782
783	/* Paranoia: Zero temporary buffers.  */
784	explicit_memset(sk_exp, 0, sizeof sk_exp);
785	explicit_memset(q, 0, sizeof q);
786}
787
788static void
789aesbear64_ccm_dec1(const struct aesenc *enc, const uint8_t *in, uint8_t *out,
790    size_t nbytes, uint8_t authctr[32], uint32_t nrounds)
791{
792	uint64_t sk_exp[120];
793	uint32_t w[4];
794	uint64_t q[8];
795	uint32_t c0, c1, c2, c3be;
796	uint32_t b0, b1, b2, b3;
797
798	KASSERT(nbytes);
799	KASSERT(nbytes % 16 == 0);
800
801	/* Expand round keys for bitslicing.  */
802	br_aes_ct64_skey_expand(sk_exp, nrounds, enc->aese_aes.aes_rk64);
803
804	/* Initialize garbage blocks.  */
805	q[2] = q[3] = 0;
806	q[6] = q[7] = 0;
807
808	/* Load initial counter block, big-endian so we can increment it.  */
809	c0 = le32dec(authctr + 16 + 4*0);
810	c1 = le32dec(authctr + 16 + 4*1);
811	c2 = le32dec(authctr + 16 + 4*2);
812	c3be = bswap32(le32dec(authctr + 16 + 4*3));
813
814	/* Increment 32-bit counter.  */
815	w[0] = c0;
816	w[1] = c1;
817	w[2] = c2;
818	w[3] = bswap32(++c3be);
819	br_aes_ct64_interleave_in(&q[1], &q[5], w);
820
821	/*
822	 * Set the other block to garbage -- we don't have any
823	 * plaintext to authenticate yet.
824	 */
825	q[0] = q[4] = 0;
826
827	/* Encrypt first CTR.  */
828	br_aes_ct64_ortho(q);
829	br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
830	br_aes_ct64_ortho(q);
831
832	/* Load the initial authenticator.  */
833	w[0] = le32dec(authctr + 4*0);
834	w[1] = le32dec(authctr + 4*1);
835	w[2] = le32dec(authctr + 4*2);
836	w[3] = le32dec(authctr + 4*3);
837	br_aes_ct64_interleave_in(&q[0], &q[4], w);
838
839	for (;; in += 16, out += 16) {
840		/* Decrypt the block.  */
841		br_aes_ct64_interleave_out(w, q[1], q[5]);
842		b0 = le32dec(in + 4*0) ^ w[0];
843		b1 = le32dec(in + 4*1) ^ w[1];
844		b2 = le32dec(in + 4*2) ^ w[2];
845		b3 = le32dec(in + 4*3) ^ w[3];
846
847		/* Update authenticator.  */
848		br_aes_ct64_interleave_out(w, q[0], q[4]);
849		w[0] ^= b0;
850		w[1] ^= b1;
851		w[2] ^= b2;
852		w[3] ^= b3;
853		br_aes_ct64_interleave_in(&q[0], &q[4], w);
854
855		/* Store plaintext.  */
856		le32enc(out + 4*0, b0);
857		le32enc(out + 4*1, b1);
858		le32enc(out + 4*2, b2);
859		le32enc(out + 4*3, b3);
860
861		/* If this is the last block, stop.  */
862		if ((nbytes -= 16) == 0)
863			break;
864
865		/* Increment 32-bit counter.  */
866		w[0] = c0;
867		w[1] = c1;
868		w[2] = c2;
869		w[3] = bswap32(++c3be);
870		br_aes_ct64_interleave_in(&q[1], &q[5], w);
871
872		/* Authenticate previous plaintext, encrypt next CTR.  */
873		br_aes_ct64_ortho(q);
874		br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
875		br_aes_ct64_ortho(q);
876	}
877
878	/*
879	 * Authenticate last plaintext.  We're only doing this for the
880	 * authenticator, not for the counter, so don't bother to
881	 * initialize q[2*i].  (Even for the sake of sanitizers,
882	 * they're already initialized to something by now.)
883	 */
884	br_aes_ct64_ortho(q);
885	br_aes_ct64_bitslice_encrypt(nrounds, sk_exp, q);
886	br_aes_ct64_ortho(q);
887
888	/* Update authenticator.  */
889	br_aes_ct64_interleave_out(w, q[0], q[4]);
890	le32enc(authctr + 4*0, w[0]);
891	le32enc(authctr + 4*1, w[1]);
892	le32enc(authctr + 4*2, w[2]);
893	le32enc(authctr + 4*3, w[3]);
894
895	/* Update counter.  */
896	le32enc(authctr + 16 + 4*3, bswap32(c3be));
897
898	/* Paranoia: Zero temporary buffers.  */
899	explicit_memset(sk_exp, 0, sizeof sk_exp);
900	explicit_memset(q, 0, sizeof q);
901}
902
903static int
904aesbear64_probe(void)
905{
906
907	if (aesbear64_xts_update_selftest())
908		return -1;
909
910	/* XXX test br_aes_ct64_bitslice_decrypt */
911	/* XXX test br_aes_ct64_bitslice_encrypt */
912	/* XXX test br_aes_ct64_keysched */
913	/* XXX test br_aes_ct64_ortho */
914	/* XXX test br_aes_ct64_skey_expand */
915
916	return 0;
917}
918
919struct aes_impl aes_bear64_impl = {
920	.ai_name = "BearSSL aes_ct64",
921	.ai_probe = aesbear64_probe,
922	.ai_setenckey = aesbear64_setenckey,
923	.ai_setdeckey = aesbear64_setdeckey,
924	.ai_enc = aesbear64_enc,
925	.ai_dec = aesbear64_dec,
926	.ai_cbc_enc = aesbear64_cbc_enc,
927	.ai_cbc_dec = aesbear64_cbc_dec,
928	.ai_xts_enc = aesbear64_xts_enc,
929	.ai_xts_dec = aesbear64_xts_dec,
930	.ai_cbcmac_update1 = aesbear64_cbcmac_update1,
931	.ai_ccm_enc1 = aesbear64_ccm_enc1,
932	.ai_ccm_dec1 = aesbear64_ccm_dec1,
933};
934