hwaes.c revision 1.1
1/* $NetBSD: hwaes.c,v 1.1 2026/01/09 22:54:30 jmcneill Exp $ */
2
3/*-
4 * Copyright (c) 2025 Jared McNeill <jmcneill@invisible.ca>
5 * Copyright (c) 2020 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/*
31 * A driver for the Nintendo Wii's AES engine. The driver registers an AES
32 * implementation for kernel use via aes_md_init(). AES-128 requests are
33 * accelerated by hardware and all other requests are passed through to the
34 * default (BearSSL aes_ct) implementation.
35 */
36
37#include <sys/cdefs.h>
38__KERNEL_RCSID(0, "$NetBSD: hwaes.c,v 1.1 2026/01/09 22:54:30 jmcneill Exp $");
39
40#include <sys/param.h>
41#include <sys/bus.h>
42#include <sys/device.h>
43#include <sys/systm.h>
44#include <sys/callout.h>
45#include <sys/buf.h>
46#include <sys/cpu.h>
47
48#include <machine/wii.h>
49#include <machine/wiiu.h>
50#include <machine/pio.h>
51#include "ahb.h"
52
53#include <crypto/aes/aes.h>
54#include <crypto/aes/aes_bear.h>
55#include <crypto/aes/aes_impl.h>
56
57/* AES engine registers */
58#define AES_CTRL		0x00
59#define  AES_CTRL_EXEC		__BIT(31)
60#define  AES_CTRL_IRQ		__BIT(30)
61#define  AES_CTRL_ERR		__BIT(29)
62#define  AES_CTRL_ENA		__BIT(28)
63#define  AES_CTRL_DEC		__BIT(27)
64#define  AES_CTRL_IV		__BIT(12)
65#define  AES_CTRL_BLOCKS	__BITS(11, 0)
66#define AES_SRC			0x04
67#define AES_DEST		0x08
68#define AES_KEY			0x0c
69#define AES_IV			0x10
70
71/* Register frame size */
72#define AES_REG_SIZE		0x14
73
74/* Device limits */
75#define HWAES_BLOCK_LEN		16
76#define HWAES_ALIGN		16
77#define HWAES_MAX_BLOCKS	4096
78#define HWAES_MAX_AES_LEN	(HWAES_BLOCK_LEN * HWAES_MAX_BLOCKS)
79
80static int	hwaes_match(device_t, cfdata_t, void *);
81static void	hwaes_attach(device_t, device_t, void *);
82
83struct hwaes_softc;
84
85struct hwaes_dma {
86	bus_dmamap_t		dma_map;
87	void			*dma_addr;
88	size_t			dma_size;
89	bus_dma_segment_t	dma_segs[1];
90};
91
92struct hwaes_softc {
93	device_t		sc_dev;
94	bus_space_tag_t		sc_bst;
95	bus_space_handle_t	sc_bsh;
96	bus_dma_tag_t		sc_dmat;
97	struct hwaes_dma	sc_dma_bounce;
98};
99
100struct hwaes_softc *hwaes_sc;
101
102#define WR4(sc, reg, val)	\
103	bus_space_write_4((sc)->sc_bst, (sc)->sc_bsh, (reg), (val))
104#define RD4(sc, reg)		\
105	bus_space_read_4((sc)->sc_bst, (sc)->sc_bsh, (reg))
106
107CFATTACH_DECL_NEW(hwaes, sizeof(struct hwaes_softc),
108    hwaes_match, hwaes_attach, NULL, NULL);
109
110static int	hwaes_dma_alloc(struct hwaes_softc *, struct hwaes_dma *,
111				size_t, int);
112static void	hwaes_register(void);
113
114static int
115hwaes_match(device_t parent, cfdata_t cf, void *aux)
116{
117	return 1;
118}
119
120static void
121hwaes_attach(device_t parent, device_t self, void *aux)
122{
123	struct ahb_attach_args *aaa = aux;
124	struct hwaes_softc *sc = device_private(self);
125	bool enabled;
126	int error;
127
128	/*
129	 * Since aes_md_init() expects per-CPU engines and we only have one,
130	 * only enable AES offload in single CPU configurations.
131	 */
132	enabled = kcpuset_countset(kcpuset_attached) == 1;
133
134	aprint_naive("\n");
135	aprint_normal(": AES engine%s\n", enabled ? "" : " (disabled)");
136	if (!enabled) {
137		return;
138	}
139
140	sc->sc_dev = self;
141	sc->sc_dmat = aaa->aaa_dmat;
142	sc->sc_bst = aaa->aaa_bst;
143	error = bus_space_map(sc->sc_bst, aaa->aaa_addr, AES_REG_SIZE,
144	    0, &sc->sc_bsh);
145	if (error != 0) {
146		aprint_error_dev(self, "couldn't map registers (%d)\n", error);
147		return;
148	}
149
150	ahb_claim_device(self, IOPAESEN);
151
152	error = hwaes_dma_alloc(sc, &sc->sc_dma_bounce, HWAES_MAX_AES_LEN,
153	    BUS_DMA_WAITOK);
154	if (error != 0) {
155		return;
156	}
157
158	WR4(sc, AES_CTRL, 0);
159	for (;;) {
160		if (RD4(sc, AES_CTRL) == 0) {
161			break;
162		}
163	}
164
165	hwaes_sc = sc;
166	hwaes_register();
167}
168
169static int
170hwaes_dma_alloc(struct hwaes_softc *sc, struct hwaes_dma *dma, size_t size,
171    int flags)
172{
173	int error, nsegs;
174
175	dma->dma_size = size;
176
177	error = bus_dmamem_alloc(sc->sc_dmat, dma->dma_size, HWAES_ALIGN, 0,
178	    dma->dma_segs, 1, &nsegs, flags);
179	if (error != 0) {
180		aprint_error_dev(sc->sc_dev,
181		    "bus_dmamem_alloc failed: %d\n", error);
182		goto alloc_failed;
183	}
184	error = bus_dmamem_map(sc->sc_dmat, dma->dma_segs, nsegs,
185	    dma->dma_size, &dma->dma_addr, flags);
186	if (error != 0) {
187		aprint_error_dev(sc->sc_dev,
188		    "bus_dmamem_map failed: %d\n", error);
189		goto map_failed;
190	}
191	error = bus_dmamap_create(sc->sc_dmat, dma->dma_size, nsegs,
192	    dma->dma_size, 0, flags, &dma->dma_map);
193	if (error != 0) {
194		aprint_error_dev(sc->sc_dev,
195		    "bus_dmamap_create failed: %d\n", error);
196		goto create_failed;
197	}
198	error = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_addr,
199	    dma->dma_size, NULL, flags);
200	if (error != 0) {
201		aprint_error_dev(sc->sc_dev,
202		    "bus_dmamap_load failed: %d\n", error);
203		goto load_failed;
204	}
205
206	return 0;
207
208load_failed:
209	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
210create_failed:
211	bus_dmamem_unmap(sc->sc_dmat, dma->dma_addr, dma->dma_size);
212map_failed:
213	bus_dmamem_free(sc->sc_dmat, dma->dma_segs, nsegs);
214alloc_failed:
215	return error;
216}
217
218static int
219hwaes_probe(void)
220{
221	return 0;
222}
223
224static void
225hwaes_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
226{
227	if (nrounds == AES_128_NROUNDS) {
228		enc->aese_aes.aes_rk[0] = be32dec(key + 4*0);
229		enc->aese_aes.aes_rk[1] = be32dec(key + 4*1);
230		enc->aese_aes.aes_rk[2] = be32dec(key + 4*2);
231		enc->aese_aes.aes_rk[3] = be32dec(key + 4*3);
232	} else {
233		aes_bear_impl.ai_setenckey(enc, key, nrounds);
234	}
235}
236
237static void
238hwaes_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
239{
240	if (nrounds == AES_128_NROUNDS) {
241		dec->aesd_aes.aes_rk[0] = be32dec(key + 4*0);
242		dec->aesd_aes.aes_rk[1] = be32dec(key + 4*1);
243		dec->aesd_aes.aes_rk[2] = be32dec(key + 4*2);
244		dec->aesd_aes.aes_rk[3] = be32dec(key + 4*3);
245	} else {
246		aes_bear_impl.ai_setdeckey(dec, key, nrounds);
247	}
248}
249
250static void
251hwaes_exec_sync(uint32_t flags, uint16_t blocks)
252{
253	struct hwaes_softc *sc = hwaes_sc;
254	uint32_t ctrl;
255
256	KASSERT(blocks > 0);
257	KASSERT(blocks <= HWAES_MAX_BLOCKS);
258
259	WR4(sc, AES_SRC, sc->sc_dma_bounce.dma_segs[0].ds_addr);
260	WR4(sc, AES_DEST, sc->sc_dma_bounce.dma_segs[0].ds_addr);
261
262	ctrl = AES_CTRL_EXEC | AES_CTRL_ENA | flags;
263	ctrl |= __SHIFTIN(blocks - 1, AES_CTRL_BLOCKS);
264
265	WR4(sc, AES_CTRL, ctrl);
266	for (;;) {
267		ctrl = RD4(sc, AES_CTRL);
268		if ((ctrl & AES_CTRL_ERR) != 0) {
269			printf("AES error, AES_CTRL = %#x\n", ctrl);
270			break;
271		}
272		if ((ctrl & AES_CTRL_EXEC) == 0) {
273			break;
274		}
275	}
276}
277
278static void
279hwaes_enc(const struct aesenc *enc, const uint8_t in[static 16],
280    uint8_t out[static 16], uint32_t nrounds)
281{
282	struct hwaes_softc *sc = hwaes_sc;
283	unsigned n;
284	int s;
285
286	if (nrounds != AES_128_NROUNDS) {
287		aes_bear_impl.ai_enc(enc, in, out, nrounds);
288		return;
289	}
290
291	s = splvm();
292
293	for (n = 0; n < 4; n++) {
294		WR4(sc, AES_IV, 0);
295	}
296	for (n = 0; n < 4; n++) {
297		WR4(sc, AES_KEY, enc->aese_aes.aes_rk[n]);
298	}
299	memcpy(sc->sc_dma_bounce.dma_addr, in, HWAES_BLOCK_LEN);
300	bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
301	    0, HWAES_BLOCK_LEN, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
302	hwaes_exec_sync(0, 1);
303	bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
304	    0, HWAES_BLOCK_LEN, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
305	memcpy(out, sc->sc_dma_bounce.dma_addr, HWAES_BLOCK_LEN);
306
307	splx(s);
308}
309
310static void
311hwaes_encN(const struct aesenc *enc, const uint8_t in[static 16],
312    uint8_t out[static 16], size_t nblocks)
313{
314	for (size_t n = 0; n < nblocks; n++) {
315		hwaes_enc(enc, &in[n * HWAES_BLOCK_LEN],
316		    &out[n * HWAES_BLOCK_LEN], AES_128_NROUNDS);
317	}
318}
319
320static void
321hwaes_dec(const struct aesdec *dec, const uint8_t in[static 16],
322    uint8_t out[static 16], uint32_t nrounds)
323{
324	struct hwaes_softc *sc = hwaes_sc;
325	unsigned n;
326	int s;
327
328	if (nrounds != AES_128_NROUNDS) {
329		aes_bear_impl.ai_dec(dec, in, out, nrounds);
330		return;
331	}
332
333	s = splvm();
334
335	for (n = 0; n < 4; n++) {
336		WR4(sc, AES_IV, 0);
337	}
338	for (n = 0; n < 4; n++) {
339		WR4(sc, AES_KEY, dec->aesd_aes.aes_rk[n]);
340	}
341	memcpy(sc->sc_dma_bounce.dma_addr, in, HWAES_BLOCK_LEN);
342	bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
343	    0, HWAES_BLOCK_LEN, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
344	hwaes_exec_sync(AES_CTRL_DEC, 1);
345	bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
346	    0, HWAES_BLOCK_LEN, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
347	memcpy(out, sc->sc_dma_bounce.dma_addr, HWAES_BLOCK_LEN);
348
349	splx(s);
350}
351
352static void
353hwaes_decN(const struct aesdec *dec, const uint8_t in[static 16],
354    uint8_t out[static 16], size_t nblocks)
355{
356	for (size_t n = 0; n < nblocks; n++) {
357		hwaes_dec(dec, &in[n * HWAES_BLOCK_LEN],
358		    &out[n * HWAES_BLOCK_LEN], AES_128_NROUNDS);
359	}
360}
361
362static void
363hwaes_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
364    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
365    uint32_t nrounds)
366{
367	struct hwaes_softc *sc = hwaes_sc;
368	const uint8_t *inp = in;
369	uint8_t *outp = out;
370	uint32_t flags;
371	unsigned n;
372	int s;
373
374	if (nrounds != AES_128_NROUNDS) {
375		aes_bear_impl.ai_cbc_enc(enc, in, out, nbytes, iv, nrounds);
376		return;
377	}
378
379	KASSERT(nbytes % HWAES_BLOCK_LEN == 0);
380	if (nbytes == 0) {
381		return;
382	}
383
384	s = splvm();
385
386	for (n = 0; n < 4; n++) {
387		WR4(sc, AES_IV, be32dec(&iv[n * 4]));
388	}
389	for (n = 0; n < 4; n++) {
390		WR4(sc, AES_KEY, enc->aese_aes.aes_rk[n]);
391	}
392	flags = 0;
393	while (nbytes > 0) {
394		const size_t blocks = MIN(nbytes / HWAES_BLOCK_LEN,
395					  HWAES_MAX_BLOCKS);
396
397		memcpy(sc->sc_dma_bounce.dma_addr, inp,
398		    blocks * HWAES_BLOCK_LEN);
399		bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
400		    0, blocks * HWAES_BLOCK_LEN,
401		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
402		hwaes_exec_sync(flags, blocks);
403		bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
404		    0, blocks * HWAES_BLOCK_LEN,
405		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
406		memcpy(outp, sc->sc_dma_bounce.dma_addr,
407		    blocks * HWAES_BLOCK_LEN);
408
409		nbytes -= blocks * HWAES_BLOCK_LEN;
410		inp += blocks * HWAES_BLOCK_LEN;
411		outp += blocks * HWAES_BLOCK_LEN;
412		flags |= AES_CTRL_IV;
413	}
414
415	memcpy(iv, outp - HWAES_BLOCK_LEN, HWAES_BLOCK_LEN);
416
417	splx(s);
418}
419
420static void
421hwaes_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
422    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
423    uint32_t nrounds)
424{
425	struct hwaes_softc *sc = hwaes_sc;
426	const uint8_t *inp = in;
427	uint8_t *outp = out;
428	uint32_t flags;
429	unsigned n;
430	int s;
431
432	if (nrounds != AES_128_NROUNDS) {
433		aes_bear_impl.ai_cbc_dec(dec, in, out, nbytes, iv, nrounds);
434		return;
435	}
436
437	KASSERT(nbytes % HWAES_BLOCK_LEN == 0);
438	if (nbytes == 0) {
439		return;
440	}
441
442	s = splvm();
443
444	for (n = 0; n < 4; n++) {
445		WR4(sc, AES_IV, be32dec(&iv[n * 4]));
446	}
447
448	memcpy(iv, inp + nbytes - HWAES_BLOCK_LEN, HWAES_BLOCK_LEN);
449
450	for (n = 0; n < 4; n++) {
451		WR4(sc, AES_KEY, dec->aesd_aes.aes_rk[n]);
452	}
453	flags = AES_CTRL_DEC;
454	while (nbytes > 0) {
455		const size_t blocks = MIN(nbytes / HWAES_BLOCK_LEN,
456					  HWAES_MAX_BLOCKS);
457
458		memcpy(sc->sc_dma_bounce.dma_addr, inp,
459		    blocks * HWAES_BLOCK_LEN);
460		bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
461		    0, blocks * HWAES_BLOCK_LEN,
462		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
463		hwaes_exec_sync(flags, blocks);
464		bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
465		    0, blocks * HWAES_BLOCK_LEN,
466		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
467		memcpy(outp, sc->sc_dma_bounce.dma_addr,
468		    blocks * HWAES_BLOCK_LEN);
469
470		nbytes -= blocks * HWAES_BLOCK_LEN;
471		inp += blocks * HWAES_BLOCK_LEN;
472		outp += blocks * HWAES_BLOCK_LEN;
473		flags |= AES_CTRL_IV;
474	}
475
476	splx(s);
477}
478
479static void
480hwaes_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
481{
482	uint32_t s0, s1, s2, s3;
483
484	s0 = *t0 >> 31;
485	s1 = *t1 >> 31;
486	s2 = *t2 >> 31;
487	s3 = *t3 >> 31;
488	*t0 = (*t0 << 1) ^ (-s3 & 0x87);
489	*t1 = (*t1 << 1) ^ s0;
490	*t2 = (*t2 << 1) ^ s1;
491	*t3 = (*t3 << 1) ^ s2;
492}
493
494static void
495hwaes_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
496    uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
497    uint32_t nrounds)
498{
499	uint8_t block[16];
500	uint8_t tle[16];
501	uint32_t t[4];
502	const uint8_t *inp = in;
503	uint8_t *outp = out;
504
505	if (nrounds != AES_128_NROUNDS) {
506		aes_bear_impl.ai_xts_enc(enc, in, out, nbytes, tweak, nrounds);
507		return;
508	}
509
510	KASSERT(nbytes % 16 == 0);
511
512	t[0] = le32dec(tweak + 4*0);
513	t[1] = le32dec(tweak + 4*1);
514	t[2] = le32dec(tweak + 4*2);
515	t[3] = le32dec(tweak + 4*3);
516
517	while (nbytes > 0) {
518		le32enc(tle + 4*0, t[0]);
519		le32enc(tle + 4*1, t[1]);
520		le32enc(tle + 4*2, t[2]);
521		le32enc(tle + 4*3, t[3]);
522
523		for (unsigned n = 0; n < 16; n++) {
524			block[n] = inp[n] ^ tle[n];
525		}
526
527		hwaes_encN(enc, block, block, 1);
528
529		for (unsigned n = 0; n < 16; n++) {
530			outp[n] = block[n] ^ tle[n];
531		}
532
533		hwaes_xts_update(&t[0], &t[1], &t[2], &t[3]);
534
535		nbytes -= HWAES_BLOCK_LEN;
536		inp += HWAES_BLOCK_LEN;
537		outp += HWAES_BLOCK_LEN;
538	}
539
540	le32enc(tweak + 4*0, t[0]);
541	le32enc(tweak + 4*1, t[1]);
542	le32enc(tweak + 4*2, t[2]);
543	le32enc(tweak + 4*3, t[3]);
544
545	explicit_memset(t, 0, sizeof(t));
546	explicit_memset(block, 0, sizeof(block));
547	explicit_memset(tle, 0, sizeof(tle));
548}
549
550static void
551hwaes_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
552    uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
553    uint32_t nrounds)
554{
555	uint8_t block[16];
556	uint8_t tle[16];
557	uint32_t t[4];
558	const uint8_t *inp = in;
559	uint8_t *outp = out;
560
561	if (nrounds != AES_128_NROUNDS) {
562		aes_bear_impl.ai_xts_dec(dec, in, out, nbytes, tweak, nrounds);
563		return;
564	}
565
566	KASSERT(nbytes % 16 == 0);
567
568	t[0] = le32dec(tweak + 4*0);
569	t[1] = le32dec(tweak + 4*1);
570	t[2] = le32dec(tweak + 4*2);
571	t[3] = le32dec(tweak + 4*3);
572
573	while (nbytes > 0) {
574		le32enc(tle + 4*0, t[0]);
575		le32enc(tle + 4*1, t[1]);
576		le32enc(tle + 4*2, t[2]);
577		le32enc(tle + 4*3, t[3]);
578
579		for (unsigned n = 0; n < 16; n++) {
580			block[n] = inp[n] ^ tle[n];
581		}
582
583		hwaes_decN(dec, block, block, 1);
584
585		for (unsigned n = 0; n < 16; n++) {
586			outp[n] = block[n] ^ tle[n];
587		}
588
589		hwaes_xts_update(&t[0], &t[1], &t[2], &t[3]);
590
591		nbytes -= HWAES_BLOCK_LEN;
592		inp += HWAES_BLOCK_LEN;
593		outp += HWAES_BLOCK_LEN;
594	}
595
596	le32enc(tweak + 4*0, t[0]);
597	le32enc(tweak + 4*1, t[1]);
598	le32enc(tweak + 4*2, t[2]);
599	le32enc(tweak + 4*3, t[3]);
600
601	explicit_memset(t, 0, sizeof(t));
602	explicit_memset(block, 0, sizeof(block));
603	explicit_memset(tle, 0, sizeof(tle));
604}
605
606static void
607hwaes_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
608    size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
609{
610	const uint8_t *inp = in;
611
612	if (nrounds != AES_128_NROUNDS) {
613		aes_bear_impl.ai_cbcmac_update1(enc, in, nbytes, auth0, nrounds);
614		return;
615	}
616
617	KASSERT(nbytes % 16 == 0);
618
619	while (nbytes > 0) {
620		for (unsigned n = 0; n < 16; n++) {
621			auth0[n] = auth0[n] ^ inp[n];
622		}
623
624		hwaes_encN(enc, auth0, auth0, 1);
625
626		nbytes -= HWAES_BLOCK_LEN;
627		inp += HWAES_BLOCK_LEN;
628	}
629}
630
631static void
632hwaes_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
633    uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
634    uint32_t nrounds)
635{
636	const uint8_t *inp = in;
637	uint8_t *outp = out;
638	uint32_t c[4];
639
640	if (nrounds != AES_128_NROUNDS) {
641		aes_bear_impl.ai_ccm_enc1(enc, in, out, nbytes, authctr0, nrounds);
642		return;
643	}
644
645	KASSERT(nbytes % 16 == 0);
646
647	c[0] = le32dec(authctr0 + 16 + 4*0);
648	c[1] = le32dec(authctr0 + 16 + 4*1);
649	c[2] = le32dec(authctr0 + 16 + 4*2);
650	c[3] = be32dec(authctr0 + 16 + 4*3);
651
652	while (nbytes > 0) {
653		for (unsigned n = 0; n < 16; n++) {
654			authctr0[n] = authctr0[n] ^ inp[n];
655		}
656
657		le32enc(authctr0 + 16 + 4*0, c[0]);
658		le32enc(authctr0 + 16 + 4*1, c[1]);
659		le32enc(authctr0 + 16 + 4*2, c[2]);
660		be32enc(authctr0 + 16 + 4*3, ++c[3]);
661
662		hwaes_encN(enc, authctr0, authctr0, 2);
663
664		for (unsigned n = 0; n < 16; n++) {
665			outp[n] = inp[n] ^ authctr0[n + 16];
666		}
667
668		nbytes -= HWAES_BLOCK_LEN;
669		inp += HWAES_BLOCK_LEN;
670		outp += HWAES_BLOCK_LEN;
671	}
672
673	le32enc(authctr0 + 16 + 4*0, c[0]);
674	le32enc(authctr0 + 16 + 4*1, c[1]);
675	le32enc(authctr0 + 16 + 4*2, c[2]);
676	be32enc(authctr0 + 16 + 4*3, c[3]);
677}
678
679static void
680hwaes_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
681    uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
682    uint32_t nrounds)
683{
684	const uint8_t *inp = in;
685	uint8_t *outp = out;
686	uint32_t c[4];
687
688	if (nrounds != AES_128_NROUNDS) {
689		aes_bear_impl.ai_ccm_dec1(enc, in, out, nbytes, authctr0, nrounds);
690		return;
691	}
692
693	KASSERT(nbytes % 16 == 0);
694
695	c[0] = le32dec(authctr0 + 16 + 4*0);
696	c[1] = le32dec(authctr0 + 16 + 4*1);
697	c[2] = le32dec(authctr0 + 16 + 4*2);
698	c[3] = be32dec(authctr0 + 16 + 4*3);
699
700	be32enc(authctr0 + 16 + 4*3, ++c[3]);
701	hwaes_encN(enc, authctr0 + 16, authctr0 + 16, 1);
702
703	while (nbytes > 0) {
704		for (unsigned n = 0; n < 16; n++) {
705			outp[n] = authctr0[n + 16] ^ inp[n];
706			authctr0[n] = authctr0[n] ^ outp[n];
707		}
708		nbytes -= HWAES_BLOCK_LEN;
709		if (nbytes == 0) {
710			break;
711		}
712
713		inp += HWAES_BLOCK_LEN;
714		outp += HWAES_BLOCK_LEN;
715
716		le32enc(authctr0 + 16 + 4*0, c[0]);
717		le32enc(authctr0 + 16 + 4*1, c[1]);
718		le32enc(authctr0 + 16 + 4*2, c[2]);
719		be32enc(authctr0 + 16 + 4*3, ++c[3]);
720		hwaes_encN(enc, authctr0, authctr0, 2);
721	}
722	hwaes_encN(enc, authctr0, authctr0, 1);
723
724	le32enc(authctr0 + 16 + 4*0, c[0]);
725	le32enc(authctr0 + 16 + 4*1, c[1]);
726	le32enc(authctr0 + 16 + 4*2, c[2]);
727	be32enc(authctr0 + 16 + 4*3, c[3]);
728
729}
730
731static struct aes_impl aes_hwaes_impl = {
732	.ai_name = NULL,	/* filled in by hwaes_register */
733	.ai_probe = hwaes_probe,
734	.ai_setenckey = hwaes_setenckey,
735	.ai_setdeckey = hwaes_setdeckey,
736	.ai_enc = hwaes_enc,
737	.ai_dec = hwaes_dec,
738	.ai_cbc_enc = hwaes_cbc_enc,
739	.ai_cbc_dec = hwaes_cbc_dec,
740	.ai_xts_enc = hwaes_xts_enc,
741	.ai_xts_dec = hwaes_xts_dec,
742	.ai_cbcmac_update1 = hwaes_cbcmac_update1,
743	.ai_ccm_enc1 = hwaes_ccm_enc1,
744	.ai_ccm_dec1 = hwaes_ccm_dec1,
745};
746
747static void
748hwaes_register(void)
749{
750	if (wiiu_plat) {
751		aes_hwaes_impl.ai_name = "Latte AES engine";
752	} else {
753		aes_hwaes_impl.ai_name = "Hollywood AES engine";
754	}
755	aes_md_init(&aes_hwaes_impl);
756}
757