11.1Sjmcneill/* $NetBSD: hwaes.c,v 1.1 2025/11/11 21:34:48 jmcneill Exp $ */
21.1Sjmcneill
31.1Sjmcneill/*-
41.1Sjmcneill * Copyright (c) 2025 Jared McNeill <jmcneill@invisible.ca>
51.1Sjmcneill * Copyright (c) 2020 The NetBSD Foundation, Inc.
61.1Sjmcneill * All rights reserved.
71.1Sjmcneill *
81.1Sjmcneill * Redistribution and use in source and binary forms, with or without
91.1Sjmcneill * modification, are permitted provided that the following conditions
101.1Sjmcneill * are met:
111.1Sjmcneill * 1. Redistributions of source code must retain the above copyright
121.1Sjmcneill *    notice, this list of conditions and the following disclaimer.
131.1Sjmcneill * 2. Redistributions in binary form must reproduce the above copyright
141.1Sjmcneill *    notice, this list of conditions and the following disclaimer in the
151.1Sjmcneill *    documentation and/or other materials provided with the distribution.
161.1Sjmcneill *
171.1Sjmcneill * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
181.1Sjmcneill * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
191.1Sjmcneill * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
201.1Sjmcneill * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
211.1Sjmcneill * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
221.1Sjmcneill * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
231.1Sjmcneill * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
241.1Sjmcneill * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
251.1Sjmcneill * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
261.1Sjmcneill * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
271.1Sjmcneill * POSSIBILITY OF SUCH DAMAGE.
281.1Sjmcneill */
291.1Sjmcneill
301.1Sjmcneill/*
311.1Sjmcneill * A driver for the Nintendo Wii's AES engine. The driver registers an AES
321.1Sjmcneill * implementation for kernel use via aes_md_init(). AES-128 requests are
331.1Sjmcneill * accelerated by hardware and all other requests are passed through to the
341.1Sjmcneill * default (BearSSL aes_ct) implementation.
351.1Sjmcneill */
361.1Sjmcneill
371.1Sjmcneill#include <sys/cdefs.h>
381.1Sjmcneill__KERNEL_RCSID(0, "$NetBSD: hwaes.c,v 1.1 2025/11/11 21:34:48 jmcneill Exp $");
391.1Sjmcneill
401.1Sjmcneill#include <sys/param.h>
411.1Sjmcneill#include <sys/bus.h>
421.1Sjmcneill#include <sys/device.h>
431.1Sjmcneill#include <sys/systm.h>
441.1Sjmcneill#include <sys/callout.h>
451.1Sjmcneill#include <sys/buf.h>
461.1Sjmcneill
471.1Sjmcneill#include <machine/wii.h>
481.1Sjmcneill#include <machine/pio.h>
491.1Sjmcneill#include "hollywood.h"
501.1Sjmcneill
511.1Sjmcneill#include <crypto/aes/aes.h>
521.1Sjmcneill#include <crypto/aes/aes_bear.h>
531.1Sjmcneill#include <crypto/aes/aes_impl.h>
541.1Sjmcneill
551.1Sjmcneill/* AES engine registers */
561.1Sjmcneill#define AES_CTRL		0x00
571.1Sjmcneill#define  AES_CTRL_EXEC		__BIT(31)
581.1Sjmcneill#define  AES_CTRL_IRQ		__BIT(30)
591.1Sjmcneill#define  AES_CTRL_ERR		__BIT(29)
601.1Sjmcneill#define  AES_CTRL_ENA		__BIT(28)
611.1Sjmcneill#define  AES_CTRL_DEC		__BIT(27)
621.1Sjmcneill#define  AES_CTRL_IV		__BIT(12)
631.1Sjmcneill#define  AES_CTRL_BLOCKS	__BITS(11, 0)
641.1Sjmcneill#define AES_SRC			0x04
651.1Sjmcneill#define AES_DEST		0x08
661.1Sjmcneill#define AES_KEY			0x0c
671.1Sjmcneill#define AES_IV			0x10
681.1Sjmcneill
691.1Sjmcneill/* Register frame size */
701.1Sjmcneill#define AES_REG_SIZE		0x14
711.1Sjmcneill
721.1Sjmcneill/* Device limits */
731.1Sjmcneill#define HWAES_BLOCK_LEN		16
741.1Sjmcneill#define HWAES_ALIGN		16
751.1Sjmcneill#define HWAES_MAX_BLOCKS	4096
761.1Sjmcneill#define HWAES_MAX_AES_LEN	(HWAES_BLOCK_LEN * HWAES_MAX_BLOCKS)
771.1Sjmcneill
781.1Sjmcneillstatic int	hwaes_match(device_t, cfdata_t, void *);
791.1Sjmcneillstatic void	hwaes_attach(device_t, device_t, void *);
801.1Sjmcneill
811.1Sjmcneillstruct hwaes_softc;
821.1Sjmcneill
831.1Sjmcneillstruct hwaes_dma {
841.1Sjmcneill	bus_dmamap_t		dma_map;
851.1Sjmcneill	void			*dma_addr;
861.1Sjmcneill	size_t			dma_size;
871.1Sjmcneill	bus_dma_segment_t	dma_segs[1];
881.1Sjmcneill};
891.1Sjmcneill
901.1Sjmcneillstruct hwaes_softc {
911.1Sjmcneill	device_t		sc_dev;
921.1Sjmcneill	bus_space_tag_t		sc_bst;
931.1Sjmcneill	bus_space_handle_t	sc_bsh;
941.1Sjmcneill	bus_dma_tag_t		sc_dmat;
951.1Sjmcneill	struct hwaes_dma	sc_dma_bounce;
961.1Sjmcneill};
971.1Sjmcneill
981.1Sjmcneillstruct hwaes_softc *hwaes_sc;
991.1Sjmcneill
1001.1Sjmcneill#define WR4(sc, reg, val)	\
1011.1Sjmcneill	bus_space_write_4((sc)->sc_bst, (sc)->sc_bsh, (reg), (val))
1021.1Sjmcneill#define RD4(sc, reg)		\
1031.1Sjmcneill	bus_space_read_4((sc)->sc_bst, (sc)->sc_bsh, (reg))
1041.1Sjmcneill
1051.1SjmcneillCFATTACH_DECL_NEW(hwaes, sizeof(struct hwaes_softc),
1061.1Sjmcneill    hwaes_match, hwaes_attach, NULL, NULL);
1071.1Sjmcneill
1081.1Sjmcneillstatic int	hwaes_dma_alloc(struct hwaes_softc *, struct hwaes_dma *,
1091.1Sjmcneill				size_t, int);
1101.1Sjmcneillstatic void	hwaes_register(void);
1111.1Sjmcneill
1121.1Sjmcneillstatic int
1131.1Sjmcneillhwaes_match(device_t parent, cfdata_t cf, void *aux)
1141.1Sjmcneill{
1151.1Sjmcneill	return 1;
1161.1Sjmcneill}
1171.1Sjmcneill
1181.1Sjmcneillstatic void
1191.1Sjmcneillhwaes_attach(device_t parent, device_t self, void *aux)
1201.1Sjmcneill{
1211.1Sjmcneill	struct hollywood_attach_args *haa = aux;
1221.1Sjmcneill	struct hwaes_softc *sc = device_private(self);
1231.1Sjmcneill	int error;
1241.1Sjmcneill
1251.1Sjmcneill	sc->sc_dev = self;
1261.1Sjmcneill	sc->sc_dmat = haa->haa_dmat;
1271.1Sjmcneill	sc->sc_bst = haa->haa_bst;
1281.1Sjmcneill	error = bus_space_map(sc->sc_bst, haa->haa_addr, AES_REG_SIZE,
1291.1Sjmcneill	    0, &sc->sc_bsh);
1301.1Sjmcneill	if (error != 0) {
1311.1Sjmcneill		aprint_error(": couldn't map registers (%d)\n", error);
1321.1Sjmcneill		return;
1331.1Sjmcneill	}
1341.1Sjmcneill
1351.1Sjmcneill	aprint_naive("\n");
1361.1Sjmcneill	aprint_normal(": AES engine\n");
1371.1Sjmcneill
1381.1Sjmcneill	hollywood_claim_device(self, IOPAESEN);
1391.1Sjmcneill
1401.1Sjmcneill	error = hwaes_dma_alloc(sc, &sc->sc_dma_bounce, HWAES_MAX_AES_LEN,
1411.1Sjmcneill	    BUS_DMA_WAITOK);
1421.1Sjmcneill	if (error != 0) {
1431.1Sjmcneill		return;
1441.1Sjmcneill	}
1451.1Sjmcneill
1461.1Sjmcneill	WR4(sc, AES_CTRL, 0);
1471.1Sjmcneill	for (;;) {
1481.1Sjmcneill		if (RD4(sc, AES_CTRL) == 0) {
1491.1Sjmcneill			break;
1501.1Sjmcneill		}
1511.1Sjmcneill	}
1521.1Sjmcneill
1531.1Sjmcneill	hwaes_sc = sc;
1541.1Sjmcneill	hwaes_register();
1551.1Sjmcneill}
1561.1Sjmcneill
1571.1Sjmcneillstatic int
1581.1Sjmcneillhwaes_dma_alloc(struct hwaes_softc *sc, struct hwaes_dma *dma, size_t size,
1591.1Sjmcneill    int flags)
1601.1Sjmcneill{
1611.1Sjmcneill	int error, nsegs;
1621.1Sjmcneill
1631.1Sjmcneill	dma->dma_size = size;
1641.1Sjmcneill
1651.1Sjmcneill	error = bus_dmamem_alloc(sc->sc_dmat, dma->dma_size, HWAES_ALIGN, 0,
1661.1Sjmcneill	    dma->dma_segs, 1, &nsegs, flags);
1671.1Sjmcneill	if (error != 0) {
1681.1Sjmcneill		aprint_error_dev(sc->sc_dev,
1691.1Sjmcneill		    "bus_dmamem_alloc failed: %d\n", error);
1701.1Sjmcneill		goto alloc_failed;
1711.1Sjmcneill	}
1721.1Sjmcneill	error = bus_dmamem_map(sc->sc_dmat, dma->dma_segs, nsegs,
1731.1Sjmcneill	    dma->dma_size, &dma->dma_addr, flags);
1741.1Sjmcneill	if (error != 0) {
1751.1Sjmcneill		aprint_error_dev(sc->sc_dev,
1761.1Sjmcneill		    "bus_dmamem_map failed: %d\n", error);
1771.1Sjmcneill		goto map_failed;
1781.1Sjmcneill	}
1791.1Sjmcneill	error = bus_dmamap_create(sc->sc_dmat, dma->dma_size, nsegs,
1801.1Sjmcneill	    dma->dma_size, 0, flags, &dma->dma_map);
1811.1Sjmcneill	if (error != 0) {
1821.1Sjmcneill		aprint_error_dev(sc->sc_dev,
1831.1Sjmcneill		    "bus_dmamap_create failed: %d\n", error);
1841.1Sjmcneill		goto create_failed;
1851.1Sjmcneill	}
1861.1Sjmcneill	error = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_addr,
1871.1Sjmcneill	    dma->dma_size, NULL, flags);
1881.1Sjmcneill	if (error != 0) {
1891.1Sjmcneill		aprint_error_dev(sc->sc_dev,
1901.1Sjmcneill		    "bus_dmamap_load failed: %d\n", error);
1911.1Sjmcneill		goto load_failed;
1921.1Sjmcneill	}
1931.1Sjmcneill
1941.1Sjmcneill	return 0;
1951.1Sjmcneill
1961.1Sjmcneillload_failed:
1971.1Sjmcneill	bus_dmamap_destroy(sc->sc_dmat, dma->dma_map);
1981.1Sjmcneillcreate_failed:
1991.1Sjmcneill	bus_dmamem_unmap(sc->sc_dmat, dma->dma_addr, dma->dma_size);
2001.1Sjmcneillmap_failed:
2011.1Sjmcneill	bus_dmamem_free(sc->sc_dmat, dma->dma_segs, nsegs);
2021.1Sjmcneillalloc_failed:
2031.1Sjmcneill	return error;
2041.1Sjmcneill}
2051.1Sjmcneill
2061.1Sjmcneillstatic int
2071.1Sjmcneillhwaes_probe(void)
2081.1Sjmcneill{
2091.1Sjmcneill	return 0;
2101.1Sjmcneill}
2111.1Sjmcneill
2121.1Sjmcneillstatic void
2131.1Sjmcneillhwaes_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
2141.1Sjmcneill{
2151.1Sjmcneill	if (nrounds == AES_128_NROUNDS) {
2161.1Sjmcneill		enc->aese_aes.aes_rk[0] = be32dec(key + 4*0);
2171.1Sjmcneill		enc->aese_aes.aes_rk[1] = be32dec(key + 4*1);
2181.1Sjmcneill		enc->aese_aes.aes_rk[2] = be32dec(key + 4*2);
2191.1Sjmcneill		enc->aese_aes.aes_rk[3] = be32dec(key + 4*3);
2201.1Sjmcneill	} else {
2211.1Sjmcneill		aes_bear_impl.ai_setenckey(enc, key, nrounds);
2221.1Sjmcneill	}
2231.1Sjmcneill}
2241.1Sjmcneill
2251.1Sjmcneillstatic void
2261.1Sjmcneillhwaes_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
2271.1Sjmcneill{
2281.1Sjmcneill	if (nrounds == AES_128_NROUNDS) {
2291.1Sjmcneill		dec->aesd_aes.aes_rk[0] = be32dec(key + 4*0);
2301.1Sjmcneill		dec->aesd_aes.aes_rk[1] = be32dec(key + 4*1);
2311.1Sjmcneill		dec->aesd_aes.aes_rk[2] = be32dec(key + 4*2);
2321.1Sjmcneill		dec->aesd_aes.aes_rk[3] = be32dec(key + 4*3);
2331.1Sjmcneill	} else {
2341.1Sjmcneill		aes_bear_impl.ai_setdeckey(dec, key, nrounds);
2351.1Sjmcneill	}
2361.1Sjmcneill}
2371.1Sjmcneill
2381.1Sjmcneillstatic void
2391.1Sjmcneillhwaes_exec_sync(uint32_t flags, uint16_t blocks)
2401.1Sjmcneill{
2411.1Sjmcneill	struct hwaes_softc *sc = hwaes_sc;
2421.1Sjmcneill	uint32_t ctrl;
2431.1Sjmcneill
2441.1Sjmcneill	KASSERT(blocks > 0);
2451.1Sjmcneill	KASSERT(blocks <= HWAES_MAX_BLOCKS);
2461.1Sjmcneill
2471.1Sjmcneill	WR4(sc, AES_SRC, sc->sc_dma_bounce.dma_segs[0].ds_addr);
2481.1Sjmcneill	WR4(sc, AES_DEST, sc->sc_dma_bounce.dma_segs[0].ds_addr);
2491.1Sjmcneill
2501.1Sjmcneill	ctrl = AES_CTRL_EXEC | AES_CTRL_ENA | flags;
2511.1Sjmcneill	ctrl |= __SHIFTIN(blocks - 1, AES_CTRL_BLOCKS);
2521.1Sjmcneill
2531.1Sjmcneill	WR4(sc, AES_CTRL, ctrl);
2541.1Sjmcneill	for (;;) {
2551.1Sjmcneill		ctrl = RD4(sc, AES_CTRL);
2561.1Sjmcneill		if ((ctrl & AES_CTRL_ERR) != 0) {
2571.1Sjmcneill			printf("AES error, AES_CTRL = %#x\n", ctrl);
2581.1Sjmcneill			break;
2591.1Sjmcneill		}
2601.1Sjmcneill		if ((ctrl & AES_CTRL_EXEC) == 0) {
2611.1Sjmcneill			break;
2621.1Sjmcneill		}
2631.1Sjmcneill	}
2641.1Sjmcneill}
2651.1Sjmcneill
2661.1Sjmcneillstatic void
2671.1Sjmcneillhwaes_enc(const struct aesenc *enc, const uint8_t in[static 16],
2681.1Sjmcneill    uint8_t out[static 16], uint32_t nrounds)
2691.1Sjmcneill{
2701.1Sjmcneill	struct hwaes_softc *sc = hwaes_sc;
2711.1Sjmcneill	unsigned n;
2721.1Sjmcneill	int s;
2731.1Sjmcneill
2741.1Sjmcneill	if (nrounds != AES_128_NROUNDS) {
2751.1Sjmcneill		aes_bear_impl.ai_enc(enc, in, out, nrounds);
2761.1Sjmcneill		return;
2771.1Sjmcneill	}
2781.1Sjmcneill
2791.1Sjmcneill	s = splvm();
2801.1Sjmcneill
2811.1Sjmcneill	for (n = 0; n < 4; n++) {
2821.1Sjmcneill		WR4(sc, AES_IV, 0);
2831.1Sjmcneill	}
2841.1Sjmcneill	for (n = 0; n < 4; n++) {
2851.1Sjmcneill		WR4(sc, AES_KEY, enc->aese_aes.aes_rk[n]);
2861.1Sjmcneill	}
2871.1Sjmcneill	memcpy(sc->sc_dma_bounce.dma_addr, in, HWAES_BLOCK_LEN);
2881.1Sjmcneill	bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
2891.1Sjmcneill	    0, HWAES_BLOCK_LEN, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
2901.1Sjmcneill	hwaes_exec_sync(0, 1);
2911.1Sjmcneill	bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
2921.1Sjmcneill	    0, HWAES_BLOCK_LEN, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
2931.1Sjmcneill	memcpy(out, sc->sc_dma_bounce.dma_addr, HWAES_BLOCK_LEN);
2941.1Sjmcneill
2951.1Sjmcneill	splx(s);
2961.1Sjmcneill}
2971.1Sjmcneill
2981.1Sjmcneillstatic void
2991.1Sjmcneillhwaes_encN(const struct aesenc *enc, const uint8_t in[static 16],
3001.1Sjmcneill    uint8_t out[static 16], size_t nblocks)
3011.1Sjmcneill{
3021.1Sjmcneill	for (size_t n = 0; n < nblocks; n++) {
3031.1Sjmcneill		hwaes_enc(enc, &in[n * HWAES_BLOCK_LEN],
3041.1Sjmcneill		    &out[n * HWAES_BLOCK_LEN], AES_128_NROUNDS);
3051.1Sjmcneill	}
3061.1Sjmcneill}
3071.1Sjmcneill
3081.1Sjmcneillstatic void
3091.1Sjmcneillhwaes_dec(const struct aesdec *dec, const uint8_t in[static 16],
3101.1Sjmcneill    uint8_t out[static 16], uint32_t nrounds)
3111.1Sjmcneill{
3121.1Sjmcneill	struct hwaes_softc *sc = hwaes_sc;
3131.1Sjmcneill	unsigned n;
3141.1Sjmcneill	int s;
3151.1Sjmcneill
3161.1Sjmcneill	if (nrounds != AES_128_NROUNDS) {
3171.1Sjmcneill		aes_bear_impl.ai_dec(dec, in, out, nrounds);
3181.1Sjmcneill		return;
3191.1Sjmcneill	}
3201.1Sjmcneill
3211.1Sjmcneill	s = splvm();
3221.1Sjmcneill
3231.1Sjmcneill	for (n = 0; n < 4; n++) {
3241.1Sjmcneill		WR4(sc, AES_IV, 0);
3251.1Sjmcneill	}
3261.1Sjmcneill	for (n = 0; n < 4; n++) {
3271.1Sjmcneill		WR4(sc, AES_KEY, dec->aesd_aes.aes_rk[n]);
3281.1Sjmcneill	}
3291.1Sjmcneill	memcpy(sc->sc_dma_bounce.dma_addr, in, HWAES_BLOCK_LEN);
3301.1Sjmcneill	bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
3311.1Sjmcneill	    0, HWAES_BLOCK_LEN, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3321.1Sjmcneill	hwaes_exec_sync(AES_CTRL_DEC, 1);
3331.1Sjmcneill	bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
3341.1Sjmcneill	    0, HWAES_BLOCK_LEN, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3351.1Sjmcneill	memcpy(out, sc->sc_dma_bounce.dma_addr, HWAES_BLOCK_LEN);
3361.1Sjmcneill
3371.1Sjmcneill	splx(s);
3381.1Sjmcneill}
3391.1Sjmcneill
3401.1Sjmcneillstatic void
3411.1Sjmcneillhwaes_decN(const struct aesdec *dec, const uint8_t in[static 16],
3421.1Sjmcneill    uint8_t out[static 16], size_t nblocks)
3431.1Sjmcneill{
3441.1Sjmcneill	for (size_t n = 0; n < nblocks; n++) {
3451.1Sjmcneill		hwaes_dec(dec, &in[n * HWAES_BLOCK_LEN],
3461.1Sjmcneill		    &out[n * HWAES_BLOCK_LEN], AES_128_NROUNDS);
3471.1Sjmcneill	}
3481.1Sjmcneill}
3491.1Sjmcneill
3501.1Sjmcneillstatic void
3511.1Sjmcneillhwaes_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
3521.1Sjmcneill    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
3531.1Sjmcneill    uint32_t nrounds)
3541.1Sjmcneill{
3551.1Sjmcneill	struct hwaes_softc *sc = hwaes_sc;
3561.1Sjmcneill	const uint8_t *inp = in;
3571.1Sjmcneill	uint8_t *outp = out;
3581.1Sjmcneill	uint32_t flags;
3591.1Sjmcneill	unsigned n;
3601.1Sjmcneill	int s;
3611.1Sjmcneill
3621.1Sjmcneill	if (nrounds != AES_128_NROUNDS) {
3631.1Sjmcneill		aes_bear_impl.ai_cbc_enc(enc, in, out, nbytes, iv, nrounds);
3641.1Sjmcneill		return;
3651.1Sjmcneill	}
3661.1Sjmcneill
3671.1Sjmcneill	KASSERT(nbytes % HWAES_BLOCK_LEN == 0);
3681.1Sjmcneill	if (nbytes == 0) {
3691.1Sjmcneill		return;
3701.1Sjmcneill	}
3711.1Sjmcneill
3721.1Sjmcneill	s = splvm();
3731.1Sjmcneill
3741.1Sjmcneill	for (n = 0; n < 4; n++) {
3751.1Sjmcneill		WR4(sc, AES_IV, be32dec(&iv[n * 4]));
3761.1Sjmcneill	}
3771.1Sjmcneill	for (n = 0; n < 4; n++) {
3781.1Sjmcneill		WR4(sc, AES_KEY, enc->aese_aes.aes_rk[n]);
3791.1Sjmcneill	}
3801.1Sjmcneill	flags = 0;
3811.1Sjmcneill	while (nbytes > 0) {
3821.1Sjmcneill		const size_t blocks = MIN(nbytes / HWAES_BLOCK_LEN,
3831.1Sjmcneill					  HWAES_MAX_BLOCKS);
3841.1Sjmcneill
3851.1Sjmcneill		memcpy(sc->sc_dma_bounce.dma_addr, inp,
3861.1Sjmcneill		    blocks * HWAES_BLOCK_LEN);
3871.1Sjmcneill		bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
3881.1Sjmcneill		    0, blocks * HWAES_BLOCK_LEN,
3891.1Sjmcneill		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
3901.1Sjmcneill		hwaes_exec_sync(flags, blocks);
3911.1Sjmcneill		bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
3921.1Sjmcneill		    0, blocks * HWAES_BLOCK_LEN,
3931.1Sjmcneill		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
3941.1Sjmcneill		memcpy(outp, sc->sc_dma_bounce.dma_addr,
3951.1Sjmcneill		    blocks * HWAES_BLOCK_LEN);
3961.1Sjmcneill
3971.1Sjmcneill		nbytes -= blocks * HWAES_BLOCK_LEN;
3981.1Sjmcneill		inp += blocks * HWAES_BLOCK_LEN;
3991.1Sjmcneill		outp += blocks * HWAES_BLOCK_LEN;
4001.1Sjmcneill		flags |= AES_CTRL_IV;
4011.1Sjmcneill	}
4021.1Sjmcneill
4031.1Sjmcneill	memcpy(iv, outp - HWAES_BLOCK_LEN, HWAES_BLOCK_LEN);
4041.1Sjmcneill
4051.1Sjmcneill	splx(s);
4061.1Sjmcneill}
4071.1Sjmcneill
4081.1Sjmcneillstatic void
4091.1Sjmcneillhwaes_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
4101.1Sjmcneill    uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
4111.1Sjmcneill    uint32_t nrounds)
4121.1Sjmcneill{
4131.1Sjmcneill	struct hwaes_softc *sc = hwaes_sc;
4141.1Sjmcneill	const uint8_t *inp = in;
4151.1Sjmcneill	uint8_t *outp = out;
4161.1Sjmcneill	uint32_t flags;
4171.1Sjmcneill	unsigned n;
4181.1Sjmcneill	int s;
4191.1Sjmcneill
4201.1Sjmcneill	if (nrounds != AES_128_NROUNDS) {
4211.1Sjmcneill		aes_bear_impl.ai_cbc_dec(dec, in, out, nbytes, iv, nrounds);
4221.1Sjmcneill		return;
4231.1Sjmcneill	}
4241.1Sjmcneill
4251.1Sjmcneill	KASSERT(nbytes % HWAES_BLOCK_LEN == 0);
4261.1Sjmcneill	if (nbytes == 0) {
4271.1Sjmcneill		return;
4281.1Sjmcneill	}
4291.1Sjmcneill
4301.1Sjmcneill	s = splvm();
4311.1Sjmcneill
4321.1Sjmcneill	for (n = 0; n < 4; n++) {
4331.1Sjmcneill		WR4(sc, AES_IV, be32dec(&iv[n * 4]));
4341.1Sjmcneill	}
4351.1Sjmcneill
4361.1Sjmcneill	memcpy(iv, inp + nbytes - HWAES_BLOCK_LEN, HWAES_BLOCK_LEN);
4371.1Sjmcneill
4381.1Sjmcneill	for (n = 0; n < 4; n++) {
4391.1Sjmcneill		WR4(sc, AES_KEY, dec->aesd_aes.aes_rk[n]);
4401.1Sjmcneill	}
4411.1Sjmcneill	flags = AES_CTRL_DEC;
4421.1Sjmcneill	while (nbytes > 0) {
4431.1Sjmcneill		const size_t blocks = MIN(nbytes / HWAES_BLOCK_LEN,
4441.1Sjmcneill					  HWAES_MAX_BLOCKS);
4451.1Sjmcneill
4461.1Sjmcneill		memcpy(sc->sc_dma_bounce.dma_addr, inp,
4471.1Sjmcneill		    blocks * HWAES_BLOCK_LEN);
4481.1Sjmcneill		bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
4491.1Sjmcneill		    0, blocks * HWAES_BLOCK_LEN,
4501.1Sjmcneill		    BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
4511.1Sjmcneill		hwaes_exec_sync(flags, blocks);
4521.1Sjmcneill		bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map,
4531.1Sjmcneill		    0, blocks * HWAES_BLOCK_LEN,
4541.1Sjmcneill		    BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE);
4551.1Sjmcneill		memcpy(outp, sc->sc_dma_bounce.dma_addr,
4561.1Sjmcneill		    blocks * HWAES_BLOCK_LEN);
4571.1Sjmcneill
4581.1Sjmcneill		nbytes -= blocks * HWAES_BLOCK_LEN;
4591.1Sjmcneill		inp += blocks * HWAES_BLOCK_LEN;
4601.1Sjmcneill		outp += blocks * HWAES_BLOCK_LEN;
4611.1Sjmcneill		flags |= AES_CTRL_IV;
4621.1Sjmcneill	}
4631.1Sjmcneill
4641.1Sjmcneill	splx(s);
4651.1Sjmcneill}
4661.1Sjmcneill
4671.1Sjmcneillstatic void
4681.1Sjmcneillhwaes_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
4691.1Sjmcneill{
4701.1Sjmcneill	uint32_t s0, s1, s2, s3;
4711.1Sjmcneill
4721.1Sjmcneill	s0 = *t0 >> 31;
4731.1Sjmcneill	s1 = *t1 >> 31;
4741.1Sjmcneill	s2 = *t2 >> 31;
4751.1Sjmcneill	s3 = *t3 >> 31;
4761.1Sjmcneill	*t0 = (*t0 << 1) ^ (-s3 & 0x87);
4771.1Sjmcneill	*t1 = (*t1 << 1) ^ s0;
4781.1Sjmcneill	*t2 = (*t2 << 1) ^ s1;
4791.1Sjmcneill	*t3 = (*t3 << 1) ^ s2;
4801.1Sjmcneill}
4811.1Sjmcneill
4821.1Sjmcneillstatic void
4831.1Sjmcneillhwaes_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
4841.1Sjmcneill    uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
4851.1Sjmcneill    uint32_t nrounds)
4861.1Sjmcneill{
4871.1Sjmcneill	uint8_t block[16];
4881.1Sjmcneill	uint8_t tle[16];
4891.1Sjmcneill	uint32_t t[4];
4901.1Sjmcneill	const uint8_t *inp = in;
4911.1Sjmcneill	uint8_t *outp = out;
4921.1Sjmcneill
4931.1Sjmcneill	if (nrounds != AES_128_NROUNDS) {
4941.1Sjmcneill		aes_bear_impl.ai_xts_enc(enc, in, out, nbytes, tweak, nrounds);
4951.1Sjmcneill		return;
4961.1Sjmcneill	}
4971.1Sjmcneill
4981.1Sjmcneill	KASSERT(nbytes % 16 == 0);
4991.1Sjmcneill
5001.1Sjmcneill	t[0] = le32dec(tweak + 4*0);
5011.1Sjmcneill	t[1] = le32dec(tweak + 4*1);
5021.1Sjmcneill	t[2] = le32dec(tweak + 4*2);
5031.1Sjmcneill	t[3] = le32dec(tweak + 4*3);
5041.1Sjmcneill
5051.1Sjmcneill	while (nbytes > 0) {
5061.1Sjmcneill		le32enc(tle + 4*0, t[0]);
5071.1Sjmcneill		le32enc(tle + 4*1, t[1]);
5081.1Sjmcneill		le32enc(tle + 4*2, t[2]);
5091.1Sjmcneill		le32enc(tle + 4*3, t[3]);
5101.1Sjmcneill
5111.1Sjmcneill		for (unsigned n = 0; n < 16; n++) {
5121.1Sjmcneill			block[n] = inp[n] ^ tle[n];
5131.1Sjmcneill		}
5141.1Sjmcneill
5151.1Sjmcneill		hwaes_encN(enc, block, block, 1);
5161.1Sjmcneill
5171.1Sjmcneill		for (unsigned n = 0; n < 16; n++) {
5181.1Sjmcneill			outp[n] = block[n] ^ tle[n];
5191.1Sjmcneill		}
5201.1Sjmcneill
5211.1Sjmcneill		hwaes_xts_update(&t[0], &t[1], &t[2], &t[3]);
5221.1Sjmcneill
5231.1Sjmcneill		nbytes -= HWAES_BLOCK_LEN;
5241.1Sjmcneill		inp += HWAES_BLOCK_LEN;
5251.1Sjmcneill		outp += HWAES_BLOCK_LEN;
5261.1Sjmcneill	}
5271.1Sjmcneill
5281.1Sjmcneill	le32enc(tweak + 4*0, t[0]);
5291.1Sjmcneill	le32enc(tweak + 4*1, t[1]);
5301.1Sjmcneill	le32enc(tweak + 4*2, t[2]);
5311.1Sjmcneill	le32enc(tweak + 4*3, t[3]);
5321.1Sjmcneill
5331.1Sjmcneill	explicit_memset(t, 0, sizeof(t));
5341.1Sjmcneill	explicit_memset(block, 0, sizeof(block));
5351.1Sjmcneill	explicit_memset(tle, 0, sizeof(tle));
5361.1Sjmcneill}
5371.1Sjmcneill
5381.1Sjmcneillstatic void
5391.1Sjmcneillhwaes_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
5401.1Sjmcneill    uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
5411.1Sjmcneill    uint32_t nrounds)
5421.1Sjmcneill{
5431.1Sjmcneill	uint8_t block[16];
5441.1Sjmcneill	uint8_t tle[16];
5451.1Sjmcneill	uint32_t t[4];
5461.1Sjmcneill	const uint8_t *inp = in;
5471.1Sjmcneill	uint8_t *outp = out;
5481.1Sjmcneill
5491.1Sjmcneill	if (nrounds != AES_128_NROUNDS) {
5501.1Sjmcneill		aes_bear_impl.ai_xts_dec(dec, in, out, nbytes, tweak, nrounds);
5511.1Sjmcneill		return;
5521.1Sjmcneill	}
5531.1Sjmcneill
5541.1Sjmcneill	KASSERT(nbytes % 16 == 0);
5551.1Sjmcneill
5561.1Sjmcneill	t[0] = le32dec(tweak + 4*0);
5571.1Sjmcneill	t[1] = le32dec(tweak + 4*1);
5581.1Sjmcneill	t[2] = le32dec(tweak + 4*2);
5591.1Sjmcneill	t[3] = le32dec(tweak + 4*3);
5601.1Sjmcneill
5611.1Sjmcneill	while (nbytes > 0) {
5621.1Sjmcneill		le32enc(tle + 4*0, t[0]);
5631.1Sjmcneill		le32enc(tle + 4*1, t[1]);
5641.1Sjmcneill		le32enc(tle + 4*2, t[2]);
5651.1Sjmcneill		le32enc(tle + 4*3, t[3]);
5661.1Sjmcneill
5671.1Sjmcneill		for (unsigned n = 0; n < 16; n++) {
5681.1Sjmcneill			block[n] = inp[n] ^ tle[n];
5691.1Sjmcneill		}
5701.1Sjmcneill
5711.1Sjmcneill		hwaes_decN(dec, block, block, 1);
5721.1Sjmcneill
5731.1Sjmcneill		for (unsigned n = 0; n < 16; n++) {
5741.1Sjmcneill			outp[n] = block[n] ^ tle[n];
5751.1Sjmcneill		}
5761.1Sjmcneill
5771.1Sjmcneill		hwaes_xts_update(&t[0], &t[1], &t[2], &t[3]);
5781.1Sjmcneill
5791.1Sjmcneill		nbytes -= HWAES_BLOCK_LEN;
5801.1Sjmcneill		inp += HWAES_BLOCK_LEN;
5811.1Sjmcneill		outp += HWAES_BLOCK_LEN;
5821.1Sjmcneill	}
5831.1Sjmcneill
5841.1Sjmcneill	le32enc(tweak + 4*0, t[0]);
5851.1Sjmcneill	le32enc(tweak + 4*1, t[1]);
5861.1Sjmcneill	le32enc(tweak + 4*2, t[2]);
5871.1Sjmcneill	le32enc(tweak + 4*3, t[3]);
5881.1Sjmcneill
5891.1Sjmcneill	explicit_memset(t, 0, sizeof(t));
5901.1Sjmcneill	explicit_memset(block, 0, sizeof(block));
5911.1Sjmcneill	explicit_memset(tle, 0, sizeof(tle));
5921.1Sjmcneill}
5931.1Sjmcneill
5941.1Sjmcneillstatic void
5951.1Sjmcneillhwaes_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
5961.1Sjmcneill    size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
5971.1Sjmcneill{
5981.1Sjmcneill	const uint8_t *inp = in;
5991.1Sjmcneill
6001.1Sjmcneill	if (nrounds != AES_128_NROUNDS) {
6011.1Sjmcneill		aes_bear_impl.ai_cbcmac_update1(enc, in, nbytes, auth0, nrounds);
6021.1Sjmcneill		return;
6031.1Sjmcneill	}
6041.1Sjmcneill
6051.1Sjmcneill	KASSERT(nbytes % 16 == 0);
6061.1Sjmcneill
6071.1Sjmcneill	while (nbytes > 0) {
6081.1Sjmcneill		for (unsigned n = 0; n < 16; n++) {
6091.1Sjmcneill			auth0[n] = auth0[n] ^ inp[n];
6101.1Sjmcneill		}
6111.1Sjmcneill
6121.1Sjmcneill		hwaes_encN(enc, auth0, auth0, 1);
6131.1Sjmcneill
6141.1Sjmcneill		nbytes -= HWAES_BLOCK_LEN;
6151.1Sjmcneill		inp += HWAES_BLOCK_LEN;
6161.1Sjmcneill	}
6171.1Sjmcneill}
6181.1Sjmcneill
6191.1Sjmcneillstatic void
6201.1Sjmcneillhwaes_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
6211.1Sjmcneill    uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
6221.1Sjmcneill    uint32_t nrounds)
6231.1Sjmcneill{
6241.1Sjmcneill	const uint8_t *inp = in;
6251.1Sjmcneill	uint8_t *outp = out;
6261.1Sjmcneill	uint32_t c[4];
6271.1Sjmcneill
6281.1Sjmcneill	if (nrounds != AES_128_NROUNDS) {
6291.1Sjmcneill		aes_bear_impl.ai_ccm_enc1(enc, in, out, nbytes, authctr0, nrounds);
6301.1Sjmcneill		return;
6311.1Sjmcneill	}
6321.1Sjmcneill
6331.1Sjmcneill	KASSERT(nbytes % 16 == 0);
6341.1Sjmcneill
6351.1Sjmcneill	c[0] = le32dec(authctr0 + 16 + 4*0);
6361.1Sjmcneill	c[1] = le32dec(authctr0 + 16 + 4*1);
6371.1Sjmcneill	c[2] = le32dec(authctr0 + 16 + 4*2);
6381.1Sjmcneill	c[3] = be32dec(authctr0 + 16 + 4*3);
6391.1Sjmcneill
6401.1Sjmcneill	while (nbytes > 0) {
6411.1Sjmcneill		for (unsigned n = 0; n < 16; n++) {
6421.1Sjmcneill			authctr0[n] = authctr0[n] ^ inp[n];
6431.1Sjmcneill		}
6441.1Sjmcneill
6451.1Sjmcneill		le32enc(authctr0 + 16 + 4*0, c[0]);
6461.1Sjmcneill		le32enc(authctr0 + 16 + 4*1, c[1]);
6471.1Sjmcneill		le32enc(authctr0 + 16 + 4*2, c[2]);
6481.1Sjmcneill		be32enc(authctr0 + 16 + 4*3, ++c[3]);
6491.1Sjmcneill
6501.1Sjmcneill		hwaes_encN(enc, authctr0, authctr0, 2);
6511.1Sjmcneill
6521.1Sjmcneill		for (unsigned n = 0; n < 16; n++) {
6531.1Sjmcneill			outp[n] = inp[n] ^ authctr0[n + 16];
6541.1Sjmcneill		}
6551.1Sjmcneill
6561.1Sjmcneill		nbytes -= HWAES_BLOCK_LEN;
6571.1Sjmcneill		inp += HWAES_BLOCK_LEN;
6581.1Sjmcneill		outp += HWAES_BLOCK_LEN;
6591.1Sjmcneill	}
6601.1Sjmcneill
6611.1Sjmcneill	le32enc(authctr0 + 16 + 4*0, c[0]);
6621.1Sjmcneill	le32enc(authctr0 + 16 + 4*1, c[1]);
6631.1Sjmcneill	le32enc(authctr0 + 16 + 4*2, c[2]);
6641.1Sjmcneill	be32enc(authctr0 + 16 + 4*3, c[3]);
6651.1Sjmcneill}
6661.1Sjmcneill
6671.1Sjmcneillstatic void
6681.1Sjmcneillhwaes_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
6691.1Sjmcneill    uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
6701.1Sjmcneill    uint32_t nrounds)
6711.1Sjmcneill{
6721.1Sjmcneill	const uint8_t *inp = in;
6731.1Sjmcneill	uint8_t *outp = out;
6741.1Sjmcneill	uint32_t c[4];
6751.1Sjmcneill
6761.1Sjmcneill	if (nrounds != AES_128_NROUNDS) {
6771.1Sjmcneill		aes_bear_impl.ai_ccm_dec1(enc, in, out, nbytes, authctr0, nrounds);
6781.1Sjmcneill		return;
6791.1Sjmcneill	}
6801.1Sjmcneill
6811.1Sjmcneill	KASSERT(nbytes % 16 == 0);
6821.1Sjmcneill
6831.1Sjmcneill	c[0] = le32dec(authctr0 + 16 + 4*0);
6841.1Sjmcneill	c[1] = le32dec(authctr0 + 16 + 4*1);
6851.1Sjmcneill	c[2] = le32dec(authctr0 + 16 + 4*2);
6861.1Sjmcneill	c[3] = be32dec(authctr0 + 16 + 4*3);
6871.1Sjmcneill
6881.1Sjmcneill	be32enc(authctr0 + 16 + 4*3, ++c[3]);
6891.1Sjmcneill	hwaes_encN(enc, authctr0 + 16, authctr0 + 16, 1);
6901.1Sjmcneill
6911.1Sjmcneill	while (nbytes > 0) {
6921.1Sjmcneill		for (unsigned n = 0; n < 16; n++) {
6931.1Sjmcneill			outp[n] = authctr0[n + 16] ^ inp[n];
6941.1Sjmcneill			authctr0[n] = authctr0[n] ^ outp[n];
6951.1Sjmcneill		}
6961.1Sjmcneill		nbytes -= HWAES_BLOCK_LEN;
6971.1Sjmcneill		if (nbytes == 0) {
6981.1Sjmcneill			break;
6991.1Sjmcneill		}
7001.1Sjmcneill
7011.1Sjmcneill		inp += HWAES_BLOCK_LEN;
7021.1Sjmcneill		outp += HWAES_BLOCK_LEN;
7031.1Sjmcneill
7041.1Sjmcneill		le32enc(authctr0 + 16 + 4*0, c[0]);
7051.1Sjmcneill		le32enc(authctr0 + 16 + 4*1, c[1]);
7061.1Sjmcneill		le32enc(authctr0 + 16 + 4*2, c[2]);
7071.1Sjmcneill		be32enc(authctr0 + 16 + 4*3, ++c[3]);
7081.1Sjmcneill		hwaes_encN(enc, authctr0, authctr0, 2);
7091.1Sjmcneill	}
7101.1Sjmcneill	hwaes_encN(enc, authctr0, authctr0, 1);
7111.1Sjmcneill
7121.1Sjmcneill	le32enc(authctr0 + 16 + 4*0, c[0]);
7131.1Sjmcneill	le32enc(authctr0 + 16 + 4*1, c[1]);
7141.1Sjmcneill	le32enc(authctr0 + 16 + 4*2, c[2]);
7151.1Sjmcneill	be32enc(authctr0 + 16 + 4*3, c[3]);
7161.1Sjmcneill
7171.1Sjmcneill}
7181.1Sjmcneill
7191.1Sjmcneillstatic struct aes_impl aes_hwaes_impl = {
7201.1Sjmcneill	.ai_name = "Hollywood AES engine",
7211.1Sjmcneill	.ai_probe = hwaes_probe,
7221.1Sjmcneill	.ai_setenckey = hwaes_setenckey,
7231.1Sjmcneill	.ai_setdeckey = hwaes_setdeckey,
7241.1Sjmcneill	.ai_enc = hwaes_enc,
7251.1Sjmcneill	.ai_dec = hwaes_dec,
7261.1Sjmcneill	.ai_cbc_enc = hwaes_cbc_enc,
7271.1Sjmcneill	.ai_cbc_dec = hwaes_cbc_dec,
7281.1Sjmcneill	.ai_xts_enc = hwaes_xts_enc,
7291.1Sjmcneill	.ai_xts_dec = hwaes_xts_dec,
7301.1Sjmcneill	.ai_cbcmac_update1 = hwaes_cbcmac_update1,
7311.1Sjmcneill	.ai_ccm_enc1 = hwaes_ccm_enc1,
7321.1Sjmcneill	.ai_ccm_dec1 = hwaes_ccm_dec1,
7331.1Sjmcneill};
7341.1Sjmcneill
7351.1Sjmcneillstatic void
7361.1Sjmcneillhwaes_register(void)
7371.1Sjmcneill{
7381.1Sjmcneill	aes_md_init(&aes_hwaes_impl);
7391.1Sjmcneill}
740