hwaes.c revision 1.1
1/* $NetBSD: hwaes.c,v 1.1 2025/11/11 21:34:48 jmcneill Exp $ */
2
3/*-
4 * Copyright (c) 2025 Jared McNeill <jmcneill@invisible.ca>
5 * Copyright (c) 2020 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
20 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 * POSSIBILITY OF SUCH DAMAGE.
28 */
29
30/*
31 * A driver for the Nintendo Wii's AES engine. The driver registers an AES
32 * implementation for kernel use via aes_md_init(). AES-128 requests are
33 * accelerated by hardware and all other requests are passed through to the
34 * default (BearSSL aes_ct) implementation.
35 */
36
37#include <sys/cdefs.h>
38__KERNEL_RCSID(0, "$NetBSD: hwaes.c,v 1.1 2025/11/11 21:34:48 jmcneill Exp $");
39
40#include <sys/param.h>
41#include <sys/bus.h>
42#include <sys/device.h>
43#include <sys/systm.h>
44#include <sys/callout.h>
45#include <sys/buf.h>
46
47#include <machine/wii.h>
48#include <machine/pio.h>
49#include "hollywood.h"
50
51#include <crypto/aes/aes.h>
52#include <crypto/aes/aes_bear.h>
53#include <crypto/aes/aes_impl.h>
54
55/* AES engine registers */
56#define AES_CTRL		0x00
57#define  AES_CTRL_EXEC		__BIT(31)
58#define  AES_CTRL_IRQ		__BIT(30)
59#define  AES_CTRL_ERR		__BIT(29)
60#define  AES_CTRL_ENA		__BIT(28)
61#define  AES_CTRL_DEC		__BIT(27)
62#define  AES_CTRL_IV		__BIT(12)
63#define  AES_CTRL_BLOCKS	__BITS(11, 0)
64#define AES_SRC			0x04
65#define AES_DEST		0x08
66#define AES_KEY			0x0c
67#define AES_IV			0x10
68
69/* Register frame size */
70#define AES_REG_SIZE		0x14
71
72/* Device limits */
73#define HWAES_BLOCK_LEN		16
74#define HWAES_ALIGN		16
75#define HWAES_MAX_BLOCKS	4096
76#define HWAES_MAX_AES_LEN	(HWAES_BLOCK_LEN * HWAES_MAX_BLOCKS)
77
78static int	hwaes_match(device_t, cfdata_t, void *);
79static void	hwaes_attach(device_t, device_t, void *);
80
81struct hwaes_softc;
82
83struct hwaes_dma {
84	bus_dmamap_t		dma_map;
85	void			*dma_addr;
86	size_t			dma_size;
87	bus_dma_segment_t	dma_segs[1];
88};
89
90struct hwaes_softc {
91	device_t		sc_dev;
92	bus_space_tag_t		sc_bst;
93	bus_space_handle_t	sc_bsh;
94	bus_dma_tag_t		sc_dmat;
95	struct hwaes_dma	sc_dma_bounce;
96};
97
98struct hwaes_softc *hwaes_sc;
99
100#define WR4(sc, reg, val)	\
101	bus_space_write_4((sc)->sc_bst, (sc)->sc_bsh, (reg), (val))
102#define RD4(sc, reg)		\
103	bus_space_read_4((sc)->sc_bst, (sc)->sc_bsh, (reg))
104
105CFATTACH_DECL_NEW(hwaes, sizeof(struct hwaes_softc),
106    hwaes_match, hwaes_attach, NULL, NULL);
107
108static int	hwaes_dma_alloc(struct hwaes_softc *, struct hwaes_dma *,
109				size_t, int);
110static void	hwaes_register(void);
111
112static int
113hwaes_match(device_t parent, cfdata_t cf, void *aux)
114{
115	return 1;
116}
117
118static void
119hwaes_attach(device_t parent, device_t self, void *aux)
120{
121	struct hollywood_attach_args *haa = aux;
122	struct hwaes_softc *sc = device_private(self);
123	int error;
124
125	sc->sc_dev = self;
126	sc->sc_dmat = haa->haa_dmat;
127	sc->sc_bst = haa->haa_bst;
128	error = bus_space_map(sc->sc_bst, haa->haa_addr, AES_REG_SIZE,
129	    0, &sc->sc_bsh);
130	if (error != 0) {
131		aprint_error(": couldn't map registers (%d)\n", error);
132		return;
133	}
134
135	aprint_naive("\n");
136	aprint_normal(": AES engine\n");
137
138	hollywood_claim_device(self, IOPAESEN);
139
140	error = hwaes_dma_alloc(sc, &sc->sc_dma_bounce, HWAES_MAX_AES_LEN,
141	    BUS_DMA_WAITOK);
142	if (error != 0) {
143		return;
144	}
145
146	WR4(sc, AES_CTRL, 0);
147	for (;;) {
148		if (RD4(sc, AES_CTRL) == 0) {
149			break;
150		}
151	}
152
153	hwaes_sc = sc;
154	hwaes_register();
155}
156
157static int
158hwaes_dma_alloc(struct hwaes_softc *sc, struct hwaes_dma *dma, size_t size,
159    int flags)
160{
161	int error, nsegs;
162
163	dma->dma_size = size;
164
165	error = bus_dmamem_alloc(sc->sc_dmat, dma->dma_size, HWAES_ALIGN, 0,
166	    dma->dma_segs, 1, &nsegs, flags);
167	if (error != 0) {
168		aprint_error_dev(sc->sc_dev,
169		    "bus_dmamem_alloc failed: %d\n", error);
170		goto alloc_failed;
171	}
172	error = bus_dmamem_map(sc->sc_dmat, dma->dma_segs, nsegs,
173	    dma->dma_size, &dma->dma_addr, flags);
174	if (error != 0) {
175		aprint_error_dev(sc->sc_dev,
176		    "bus_dmamem_map failed: %d\n", error);
177		goto map_failed;
178	}
179	error = bus_dmamap_create(sc->sc_dmat, dma->dma_size, nsegs,
180	    dma->dma_size, 0, flags, &dma->dma_map);
181	if (error != 0) {
182		aprint_error_dev(sc->sc_dev,
183		    "bus_dmamap_create failed: %d\n", error);
184		goto create_failed;
185	}
186	error = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_addr,
187	    dma->dma_size, NULL, flags);
188	if (error != 0) {
189		aprint_error_dev(sc->sc_dev,
190		    "bus_dmamap_load failed: %d\n", error);
191		goto load_failed;
192	}
193
194	return 0;
195
196load_failed:
197	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
198create_failed:
199	bus_dmamem_unmap(sc->sc_dmat, dma->dma_addr, dma->dma_size);
200map_failed:
201	bus_dmamem_free(sc->sc_dmat, dma->dma_segs, nsegs);
202alloc_failed:
203	return error;
204}
205
206static int
207hwaes_probe(void)
208{
209	return 0;
210}
211
212static void
213hwaes_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
214{
215	if (nrounds == AES_128_NROUNDS) {
216		enc->aese_aes.aes_rk[0] = be32dec(key + 4*0);
217		enc->aese_aes.aes_rk[1] = be32dec(key + 4*1);
218		enc->aese_aes.aes_rk[2] = be32dec(key + 4*2);
219		enc->aese_aes.aes_rk[3] = be32dec(key + 4*3);
220	} else {
221		aes_bear_impl.ai_setenckey(enc, key, nrounds);
222	}
223}
224
225static void
226hwaes_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
227{
228	if (nrounds == AES_128_NROUNDS) {
229		dec->aesd_aes.aes_rk[0] = be32dec(key + 4*0);
230		dec->aesd_aes.aes_rk[1] = be32dec(key + 4*1);
231		dec->aesd_aes.aes_rk[2] = be32dec(key + 4*2);
232		dec->aesd_aes.aes_rk[3] = be32dec(key + 4*3);
233	} else {
234		aes_bear_impl.ai_setdeckey(dec, key, nrounds);
235	}
236}
237
238static void
239hwaes_exec_sync(uint32_t flags, uint16_t blocks)
240{
241	struct hwaes_softc *sc = hwaes_sc;
242	uint32_t ctrl;
243
244	KASSERT(blocks > 0);
245	KASSERT(blocks <= HWAES_MAX_BLOCKS);
246
247	WR4(sc, AES_SRC, sc->sc_dma_bounce.dma_segs[0].ds_addr);
248	WR4(sc, AES_DEST, sc->sc_dma_bounce.dma_segs[0].ds_addr);
249
250	ctrl = AES_CTRL_EXEC | AES_CTRL_ENA | flags;
251	ctrl |= __SHIFTIN(blocks - 1, AES_CTRL_BLOCKS);
252
253	WR4(sc, AES_CTRL, ctrl);
254	for (;;) {
255		ctrl = RD4(sc, AES_CTRL);
256		if ((ctrl & AES_CTRL_ERR) != 0) {
257			printf("AES error, AES_CTRL = %#x\n", ctrl);
258			break;
259		}
260		if ((ctrl & AES_CTRL_EXEC) == 0) {
261			break;
262		}
263	}
264}
265
266static void
267hwaes_enc(const struct aesenc *enc, const uint8_t in[static 16],
268    uint8_t out[static 16], uint32_t nrounds)
269{
270	struct hwaes_softc *sc = hwaes_sc;
271	unsigned n;
272	int s;
273
274	if (nrounds != AES_128_NROUNDS) {
275		aes_bear_impl.ai_enc(enc, in, out, nrounds);
276		return;
277	}
278
279	s = splvm();
280
281	for (n = 0; n < 4; n++) {
282		WR4(sc, AES_IV, 0);
283	}
284	for (n = 0; n < 4; n++) {
285		WR4(sc, AES_KEY, enc->aese_aes.aes_rk[n]);
286	}
287	memcpy(sc->sc_dma_bounce.dma_addr, in, HWAES_BLOCK_LEN);
288	bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
289	    0, HWAES_BLOCK_LEN, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
290	hwaes_exec_sync(0, 1);
291	bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
292	    0, HWAES_BLOCK_LEN, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
293	memcpy(out, sc->sc_dma_bounce.dma_addr, HWAES_BLOCK_LEN);
294
295	splx(s);
296}
297
298static void
299hwaes_encN(const struct aesenc *enc, const uint8_t in[static 16],
300    uint8_t out[static 16], size_t nblocks)
301{
302	for (size_t n = 0; n < nblocks; n++) {
303		hwaes_enc(enc, &in[n * HWAES_BLOCK_LEN],
304		    &out[n * HWAES_BLOCK_LEN], AES_128_NROUNDS);
305	}
306}
307
308static void
309hwaes_dec(const struct aesdec *dec, const uint8_t in[static 16],
310    uint8_t out[static 16], uint32_t nrounds)
311{
312	struct hwaes_softc *sc = hwaes_sc;
313	unsigned n;
314	int s;
315
316	if (nrounds != AES_128_NROUNDS) {
317		aes_bear_impl.ai_dec(dec, in, out, nrounds);
318		return;
319	}
320
321	s = splvm();
322
323	for (n = 0; n < 4; n++) {
324		WR4(sc, AES_IV, 0);
325	}
326	for (n = 0; n < 4; n++) {
327		WR4(sc, AES_KEY, dec->aesd_aes.aes_rk[n]);
328	}
329	memcpy(sc->sc_dma_bounce.dma_addr, in, HWAES_BLOCK_LEN);
330	bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
331	    0, HWAES_BLOCK_LEN, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
332	hwaes_exec_sync(AES_CTRL_DEC, 1);
333	bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
334	    0, HWAES_BLOCK_LEN, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
335	memcpy(out, sc->sc_dma_bounce.dma_addr, HWAES_BLOCK_LEN);
336
337	splx(s);
338}
339
340static void
341hwaes_decN(const struct aesdec *dec, const uint8_t in[static 16],
342    uint8_t out[static 16], size_t nblocks)
343{
344	for (size_t n = 0; n < nblocks; n++) {
345		hwaes_dec(dec, &in[n * HWAES_BLOCK_LEN],
346		    &out[n * HWAES_BLOCK_LEN], AES_128_NROUNDS);
347	}
348}
349
350static void
351hwaes_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
352    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
353    uint32_t nrounds)
354{
355	struct hwaes_softc *sc = hwaes_sc;
356	const uint8_t *inp = in;
357	uint8_t *outp = out;
358	uint32_t flags;
359	unsigned n;
360	int s;
361
362	if (nrounds != AES_128_NROUNDS) {
363		aes_bear_impl.ai_cbc_enc(enc, in, out, nbytes, iv, nrounds);
364		return;
365	}
366
367	KASSERT(nbytes % HWAES_BLOCK_LEN == 0);
368	if (nbytes == 0) {
369		return;
370	}
371
372	s = splvm();
373
374	for (n = 0; n < 4; n++) {
375		WR4(sc, AES_IV, be32dec(&iv[n * 4]));
376	}
377	for (n = 0; n < 4; n++) {
378		WR4(sc, AES_KEY, enc->aese_aes.aes_rk[n]);
379	}
380	flags = 0;
381	while (nbytes > 0) {
382		const size_t blocks = MIN(nbytes / HWAES_BLOCK_LEN,
383					  HWAES_MAX_BLOCKS);
384
385		memcpy(sc->sc_dma_bounce.dma_addr, inp,
386		    blocks * HWAES_BLOCK_LEN);
387		bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
388		    0, blocks * HWAES_BLOCK_LEN,
389		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
390		hwaes_exec_sync(flags, blocks);
391		bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
392		    0, blocks * HWAES_BLOCK_LEN,
393		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
394		memcpy(outp, sc->sc_dma_bounce.dma_addr,
395		    blocks * HWAES_BLOCK_LEN);
396
397		nbytes -= blocks * HWAES_BLOCK_LEN;
398		inp += blocks * HWAES_BLOCK_LEN;
399		outp += blocks * HWAES_BLOCK_LEN;
400		flags |= AES_CTRL_IV;
401	}
402
403	memcpy(iv, outp - HWAES_BLOCK_LEN, HWAES_BLOCK_LEN);
404
405	splx(s);
406}
407
408static void
409hwaes_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
410    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
411    uint32_t nrounds)
412{
413	struct hwaes_softc *sc = hwaes_sc;
414	const uint8_t *inp = in;
415	uint8_t *outp = out;
416	uint32_t flags;
417	unsigned n;
418	int s;
419
420	if (nrounds != AES_128_NROUNDS) {
421		aes_bear_impl.ai_cbc_dec(dec, in, out, nbytes, iv, nrounds);
422		return;
423	}
424
425	KASSERT(nbytes % HWAES_BLOCK_LEN == 0);
426	if (nbytes == 0) {
427		return;
428	}
429
430	s = splvm();
431
432	for (n = 0; n < 4; n++) {
433		WR4(sc, AES_IV, be32dec(&iv[n * 4]));
434	}
435
436	memcpy(iv, inp + nbytes - HWAES_BLOCK_LEN, HWAES_BLOCK_LEN);
437
438	for (n = 0; n < 4; n++) {
439		WR4(sc, AES_KEY, dec->aesd_aes.aes_rk[n]);
440	}
441	flags = AES_CTRL_DEC;
442	while (nbytes > 0) {
443		const size_t blocks = MIN(nbytes / HWAES_BLOCK_LEN,
444					  HWAES_MAX_BLOCKS);
445
446		memcpy(sc->sc_dma_bounce.dma_addr, inp,
447		    blocks * HWAES_BLOCK_LEN);
448		bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
449		    0, blocks * HWAES_BLOCK_LEN,
450		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
451		hwaes_exec_sync(flags, blocks);
452		bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
453		    0, blocks * HWAES_BLOCK_LEN,
454		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
455		memcpy(outp, sc->sc_dma_bounce.dma_addr,
456		    blocks * HWAES_BLOCK_LEN);
457
458		nbytes -= blocks * HWAES_BLOCK_LEN;
459		inp += blocks * HWAES_BLOCK_LEN;
460		outp += blocks * HWAES_BLOCK_LEN;
461		flags |= AES_CTRL_IV;
462	}
463
464	splx(s);
465}
466
467static void
468hwaes_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
469{
470	uint32_t s0, s1, s2, s3;
471
472	s0 = *t0 >> 31;
473	s1 = *t1 >> 31;
474	s2 = *t2 >> 31;
475	s3 = *t3 >> 31;
476	*t0 = (*t0 << 1) ^ (-s3 & 0x87);
477	*t1 = (*t1 << 1) ^ s0;
478	*t2 = (*t2 << 1) ^ s1;
479	*t3 = (*t3 << 1) ^ s2;
480}
481
482static void
483hwaes_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
484    uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
485    uint32_t nrounds)
486{
487	uint8_t block[16];
488	uint8_t tle[16];
489	uint32_t t[4];
490	const uint8_t *inp = in;
491	uint8_t *outp = out;
492
493	if (nrounds != AES_128_NROUNDS) {
494		aes_bear_impl.ai_xts_enc(enc, in, out, nbytes, tweak, nrounds);
495		return;
496	}
497
498	KASSERT(nbytes % 16 == 0);
499
500	t[0] = le32dec(tweak + 4*0);
501	t[1] = le32dec(tweak + 4*1);
502	t[2] = le32dec(tweak + 4*2);
503	t[3] = le32dec(tweak + 4*3);
504
505	while (nbytes > 0) {
506		le32enc(tle + 4*0, t[0]);
507		le32enc(tle + 4*1, t[1]);
508		le32enc(tle + 4*2, t[2]);
509		le32enc(tle + 4*3, t[3]);
510
511		for (unsigned n = 0; n < 16; n++) {
512			block[n] = inp[n] ^ tle[n];
513		}
514
515		hwaes_encN(enc, block, block, 1);
516
517		for (unsigned n = 0; n < 16; n++) {
518			outp[n] = block[n] ^ tle[n];
519		}
520
521		hwaes_xts_update(&t[0], &t[1], &t[2], &t[3]);
522
523		nbytes -= HWAES_BLOCK_LEN;
524		inp += HWAES_BLOCK_LEN;
525		outp += HWAES_BLOCK_LEN;
526	}
527
528	le32enc(tweak + 4*0, t[0]);
529	le32enc(tweak + 4*1, t[1]);
530	le32enc(tweak + 4*2, t[2]);
531	le32enc(tweak + 4*3, t[3]);
532
533	explicit_memset(t, 0, sizeof(t));
534	explicit_memset(block, 0, sizeof(block));
535	explicit_memset(tle, 0, sizeof(tle));
536}
537
538static void
539hwaes_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
540    uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
541    uint32_t nrounds)
542{
543	uint8_t block[16];
544	uint8_t tle[16];
545	uint32_t t[4];
546	const uint8_t *inp = in;
547	uint8_t *outp = out;
548
549	if (nrounds != AES_128_NROUNDS) {
550		aes_bear_impl.ai_xts_dec(dec, in, out, nbytes, tweak, nrounds);
551		return;
552	}
553
554	KASSERT(nbytes % 16 == 0);
555
556	t[0] = le32dec(tweak + 4*0);
557	t[1] = le32dec(tweak + 4*1);
558	t[2] = le32dec(tweak + 4*2);
559	t[3] = le32dec(tweak + 4*3);
560
561	while (nbytes > 0) {
562		le32enc(tle + 4*0, t[0]);
563		le32enc(tle + 4*1, t[1]);
564		le32enc(tle + 4*2, t[2]);
565		le32enc(tle + 4*3, t[3]);
566
567		for (unsigned n = 0; n < 16; n++) {
568			block[n] = inp[n] ^ tle[n];
569		}
570
571		hwaes_decN(dec, block, block, 1);
572
573		for (unsigned n = 0; n < 16; n++) {
574			outp[n] = block[n] ^ tle[n];
575		}
576
577		hwaes_xts_update(&t[0], &t[1], &t[2], &t[3]);
578
579		nbytes -= HWAES_BLOCK_LEN;
580		inp += HWAES_BLOCK_LEN;
581		outp += HWAES_BLOCK_LEN;
582	}
583
584	le32enc(tweak + 4*0, t[0]);
585	le32enc(tweak + 4*1, t[1]);
586	le32enc(tweak + 4*2, t[2]);
587	le32enc(tweak + 4*3, t[3]);
588
589	explicit_memset(t, 0, sizeof(t));
590	explicit_memset(block, 0, sizeof(block));
591	explicit_memset(tle, 0, sizeof(tle));
592}
593
594static void
595hwaes_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
596    size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
597{
598	const uint8_t *inp = in;
599
600	if (nrounds != AES_128_NROUNDS) {
601		aes_bear_impl.ai_cbcmac_update1(enc, in, nbytes, auth0, nrounds);
602		return;
603	}
604
605	KASSERT(nbytes % 16 == 0);
606
607	while (nbytes > 0) {
608		for (unsigned n = 0; n < 16; n++) {
609			auth0[n] = auth0[n] ^ inp[n];
610		}
611
612		hwaes_encN(enc, auth0, auth0, 1);
613
614		nbytes -= HWAES_BLOCK_LEN;
615		inp += HWAES_BLOCK_LEN;
616	}
617}
618
619static void
620hwaes_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
621    uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
622    uint32_t nrounds)
623{
624	const uint8_t *inp = in;
625	uint8_t *outp = out;
626	uint32_t c[4];
627
628	if (nrounds != AES_128_NROUNDS) {
629		aes_bear_impl.ai_ccm_enc1(enc, in, out, nbytes, authctr0, nrounds);
630		return;
631	}
632
633	KASSERT(nbytes % 16 == 0);
634
635	c[0] = le32dec(authctr0 + 16 + 4*0);
636	c[1] = le32dec(authctr0 + 16 + 4*1);
637	c[2] = le32dec(authctr0 + 16 + 4*2);
638	c[3] = be32dec(authctr0 + 16 + 4*3);
639
640	while (nbytes > 0) {
641		for (unsigned n = 0; n < 16; n++) {
642			authctr0[n] = authctr0[n] ^ inp[n];
643		}
644
645		le32enc(authctr0 + 16 + 4*0, c[0]);
646		le32enc(authctr0 + 16 + 4*1, c[1]);
647		le32enc(authctr0 + 16 + 4*2, c[2]);
648		be32enc(authctr0 + 16 + 4*3, ++c[3]);
649
650		hwaes_encN(enc, authctr0, authctr0, 2);
651
652		for (unsigned n = 0; n < 16; n++) {
653			outp[n] = inp[n] ^ authctr0[n + 16];
654		}
655
656		nbytes -= HWAES_BLOCK_LEN;
657		inp += HWAES_BLOCK_LEN;
658		outp += HWAES_BLOCK_LEN;
659	}
660
661	le32enc(authctr0 + 16 + 4*0, c[0]);
662	le32enc(authctr0 + 16 + 4*1, c[1]);
663	le32enc(authctr0 + 16 + 4*2, c[2]);
664	be32enc(authctr0 + 16 + 4*3, c[3]);
665}
666
667static void
668hwaes_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
669    uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
670    uint32_t nrounds)
671{
672	const uint8_t *inp = in;
673	uint8_t *outp = out;
674	uint32_t c[4];
675
676	if (nrounds != AES_128_NROUNDS) {
677		aes_bear_impl.ai_ccm_dec1(enc, in, out, nbytes, authctr0, nrounds);
678		return;
679	}
680
681	KASSERT(nbytes % 16 == 0);
682
683	c[0] = le32dec(authctr0 + 16 + 4*0);
684	c[1] = le32dec(authctr0 + 16 + 4*1);
685	c[2] = le32dec(authctr0 + 16 + 4*2);
686	c[3] = be32dec(authctr0 + 16 + 4*3);
687
688	be32enc(authctr0 + 16 + 4*3, ++c[3]);
689	hwaes_encN(enc, authctr0 + 16, authctr0 + 16, 1);
690
691	while (nbytes > 0) {
692		for (unsigned n = 0; n < 16; n++) {
693			outp[n] = authctr0[n + 16] ^ inp[n];
694			authctr0[n] = authctr0[n] ^ outp[n];
695		}
696		nbytes -= HWAES_BLOCK_LEN;
697		if (nbytes == 0) {
698			break;
699		}
700
701		inp += HWAES_BLOCK_LEN;
702		outp += HWAES_BLOCK_LEN;
703
704		le32enc(authctr0 + 16 + 4*0, c[0]);
705		le32enc(authctr0 + 16 + 4*1, c[1]);
706		le32enc(authctr0 + 16 + 4*2, c[2]);
707		be32enc(authctr0 + 16 + 4*3, ++c[3]);
708		hwaes_encN(enc, authctr0, authctr0, 2);
709	}
710	hwaes_encN(enc, authctr0, authctr0, 1);
711
712	le32enc(authctr0 + 16 + 4*0, c[0]);
713	le32enc(authctr0 + 16 + 4*1, c[1]);
714	le32enc(authctr0 + 16 + 4*2, c[2]);
715	be32enc(authctr0 + 16 + 4*3, c[3]);
716
717}
718
719static struct aes_impl aes_hwaes_impl = {
720	.ai_name = "Hollywood AES engine",
721	.ai_probe = hwaes_probe,
722	.ai_setenckey = hwaes_setenckey,
723	.ai_setdeckey = hwaes_setdeckey,
724	.ai_enc = hwaes_enc,
725	.ai_dec = hwaes_dec,
726	.ai_cbc_enc = hwaes_cbc_enc,
727	.ai_cbc_dec = hwaes_cbc_dec,
728	.ai_xts_enc = hwaes_xts_enc,
729	.ai_xts_dec = hwaes_xts_dec,
730	.ai_cbcmac_update1 = hwaes_cbcmac_update1,
731	.ai_ccm_enc1 = hwaes_ccm_enc1,
732	.ai_ccm_dec1 = hwaes_ccm_dec1,
733};
734
735static void
736hwaes_register(void)
737{
738	aes_md_init(&aes_hwaes_impl);
739}
740