11.1Sjmcneill/* $NetBSD: hwaes.c,v 1.1 2025/11/11 21:34:48 jmcneill Exp $ */ 21.1Sjmcneill 31.1Sjmcneill/*- 41.1Sjmcneill * Copyright (c) 2025 Jared McNeill <jmcneill@invisible.ca> 51.1Sjmcneill * Copyright (c) 2020 The NetBSD Foundation, Inc. 61.1Sjmcneill * All rights reserved. 71.1Sjmcneill * 81.1Sjmcneill * Redistribution and use in source and binary forms, with or without 91.1Sjmcneill * modification, are permitted provided that the following conditions 101.1Sjmcneill * are met: 111.1Sjmcneill * 1. Redistributions of source code must retain the above copyright 121.1Sjmcneill * notice, this list of conditions and the following disclaimer. 131.1Sjmcneill * 2. Redistributions in binary form must reproduce the above copyright 141.1Sjmcneill * notice, this list of conditions and the following disclaimer in the 151.1Sjmcneill * documentation and/or other materials provided with the distribution. 161.1Sjmcneill * 171.1Sjmcneill * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 181.1Sjmcneill * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 191.1Sjmcneill * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 201.1Sjmcneill * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 211.1Sjmcneill * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 221.1Sjmcneill * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 231.1Sjmcneill * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 241.1Sjmcneill * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 251.1Sjmcneill * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 261.1Sjmcneill * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 271.1Sjmcneill * POSSIBILITY OF SUCH DAMAGE. 281.1Sjmcneill */ 291.1Sjmcneill 301.1Sjmcneill/* 311.1Sjmcneill * A driver for the Nintendo Wii's AES engine. The driver registers an AES 321.1Sjmcneill * implementation for kernel use via aes_md_init(). AES-128 requests are 331.1Sjmcneill * accelerated by hardware and all other requests are passed through to the 341.1Sjmcneill * default (BearSSL aes_ct) implementation. 351.1Sjmcneill */ 361.1Sjmcneill 371.1Sjmcneill#include <sys/cdefs.h> 381.1Sjmcneill__KERNEL_RCSID(0, "$NetBSD: hwaes.c,v 1.1 2025/11/11 21:34:48 jmcneill Exp $"); 391.1Sjmcneill 401.1Sjmcneill#include <sys/param.h> 411.1Sjmcneill#include <sys/bus.h> 421.1Sjmcneill#include <sys/device.h> 431.1Sjmcneill#include <sys/systm.h> 441.1Sjmcneill#include <sys/callout.h> 451.1Sjmcneill#include <sys/buf.h> 461.1Sjmcneill 471.1Sjmcneill#include <machine/wii.h> 481.1Sjmcneill#include <machine/pio.h> 491.1Sjmcneill#include "hollywood.h" 501.1Sjmcneill 511.1Sjmcneill#include <crypto/aes/aes.h> 521.1Sjmcneill#include <crypto/aes/aes_bear.h> 531.1Sjmcneill#include <crypto/aes/aes_impl.h> 541.1Sjmcneill 551.1Sjmcneill/* AES engine registers */ 561.1Sjmcneill#define AES_CTRL 0x00 571.1Sjmcneill#define AES_CTRL_EXEC __BIT(31) 581.1Sjmcneill#define AES_CTRL_IRQ __BIT(30) 591.1Sjmcneill#define AES_CTRL_ERR __BIT(29) 601.1Sjmcneill#define AES_CTRL_ENA __BIT(28) 611.1Sjmcneill#define AES_CTRL_DEC __BIT(27) 621.1Sjmcneill#define AES_CTRL_IV __BIT(12) 631.1Sjmcneill#define AES_CTRL_BLOCKS __BITS(11, 0) 641.1Sjmcneill#define AES_SRC 0x04 651.1Sjmcneill#define AES_DEST 0x08 661.1Sjmcneill#define AES_KEY 0x0c 671.1Sjmcneill#define AES_IV 0x10 681.1Sjmcneill 691.1Sjmcneill/* Register frame size */ 701.1Sjmcneill#define AES_REG_SIZE 0x14 711.1Sjmcneill 721.1Sjmcneill/* Device limits */ 731.1Sjmcneill#define HWAES_BLOCK_LEN 16 741.1Sjmcneill#define HWAES_ALIGN 16 751.1Sjmcneill#define HWAES_MAX_BLOCKS 4096 761.1Sjmcneill#define HWAES_MAX_AES_LEN (HWAES_BLOCK_LEN * HWAES_MAX_BLOCKS) 771.1Sjmcneill 781.1Sjmcneillstatic int hwaes_match(device_t, cfdata_t, void *); 791.1Sjmcneillstatic void hwaes_attach(device_t, device_t, void *); 801.1Sjmcneill 811.1Sjmcneillstruct hwaes_softc; 821.1Sjmcneill 831.1Sjmcneillstruct hwaes_dma { 841.1Sjmcneill bus_dmamap_t dma_map; 851.1Sjmcneill void *dma_addr; 861.1Sjmcneill size_t dma_size; 871.1Sjmcneill bus_dma_segment_t dma_segs[1]; 881.1Sjmcneill}; 891.1Sjmcneill 901.1Sjmcneillstruct hwaes_softc { 911.1Sjmcneill device_t sc_dev; 921.1Sjmcneill bus_space_tag_t sc_bst; 931.1Sjmcneill bus_space_handle_t sc_bsh; 941.1Sjmcneill bus_dma_tag_t sc_dmat; 951.1Sjmcneill struct hwaes_dma sc_dma_bounce; 961.1Sjmcneill}; 971.1Sjmcneill 981.1Sjmcneillstruct hwaes_softc *hwaes_sc; 991.1Sjmcneill 1001.1Sjmcneill#define WR4(sc, reg, val) \ 1011.1Sjmcneill bus_space_write_4((sc)->sc_bst, (sc)->sc_bsh, (reg), (val)) 1021.1Sjmcneill#define RD4(sc, reg) \ 1031.1Sjmcneill bus_space_read_4((sc)->sc_bst, (sc)->sc_bsh, (reg)) 1041.1Sjmcneill 1051.1SjmcneillCFATTACH_DECL_NEW(hwaes, sizeof(struct hwaes_softc), 1061.1Sjmcneill hwaes_match, hwaes_attach, NULL, NULL); 1071.1Sjmcneill 1081.1Sjmcneillstatic int hwaes_dma_alloc(struct hwaes_softc *, struct hwaes_dma *, 1091.1Sjmcneill size_t, int); 1101.1Sjmcneillstatic void hwaes_register(void); 1111.1Sjmcneill 1121.1Sjmcneillstatic int 1131.1Sjmcneillhwaes_match(device_t parent, cfdata_t cf, void *aux) 1141.1Sjmcneill{ 1151.1Sjmcneill return 1; 1161.1Sjmcneill} 1171.1Sjmcneill 1181.1Sjmcneillstatic void 1191.1Sjmcneillhwaes_attach(device_t parent, device_t self, void *aux) 1201.1Sjmcneill{ 1211.1Sjmcneill struct hollywood_attach_args *haa = aux; 1221.1Sjmcneill struct hwaes_softc *sc = device_private(self); 1231.1Sjmcneill int error; 1241.1Sjmcneill 1251.1Sjmcneill sc->sc_dev = self; 1261.1Sjmcneill sc->sc_dmat = haa->haa_dmat; 1271.1Sjmcneill sc->sc_bst = haa->haa_bst; 1281.1Sjmcneill error = bus_space_map(sc->sc_bst, haa->haa_addr, AES_REG_SIZE, 1291.1Sjmcneill 0, &sc->sc_bsh); 1301.1Sjmcneill if (error != 0) { 1311.1Sjmcneill aprint_error(": couldn't map registers (%d)\n", error); 1321.1Sjmcneill return; 1331.1Sjmcneill } 1341.1Sjmcneill 1351.1Sjmcneill aprint_naive("\n"); 1361.1Sjmcneill aprint_normal(": AES engine\n"); 1371.1Sjmcneill 1381.1Sjmcneill hollywood_claim_device(self, IOPAESEN); 1391.1Sjmcneill 1401.1Sjmcneill error = hwaes_dma_alloc(sc, &sc->sc_dma_bounce, HWAES_MAX_AES_LEN, 1411.1Sjmcneill BUS_DMA_WAITOK); 1421.1Sjmcneill if (error != 0) { 1431.1Sjmcneill return; 1441.1Sjmcneill } 1451.1Sjmcneill 1461.1Sjmcneill WR4(sc, AES_CTRL, 0); 1471.1Sjmcneill for (;;) { 1481.1Sjmcneill if (RD4(sc, AES_CTRL) == 0) { 1491.1Sjmcneill break; 1501.1Sjmcneill } 1511.1Sjmcneill } 1521.1Sjmcneill 1531.1Sjmcneill hwaes_sc = sc; 1541.1Sjmcneill hwaes_register(); 1551.1Sjmcneill} 1561.1Sjmcneill 1571.1Sjmcneillstatic int 1581.1Sjmcneillhwaes_dma_alloc(struct hwaes_softc *sc, struct hwaes_dma *dma, size_t size, 1591.1Sjmcneill int flags) 1601.1Sjmcneill{ 1611.1Sjmcneill int error, nsegs; 1621.1Sjmcneill 1631.1Sjmcneill dma->dma_size = size; 1641.1Sjmcneill 1651.1Sjmcneill error = bus_dmamem_alloc(sc->sc_dmat, dma->dma_size, HWAES_ALIGN, 0, 1661.1Sjmcneill dma->dma_segs, 1, &nsegs, flags); 1671.1Sjmcneill if (error != 0) { 1681.1Sjmcneill aprint_error_dev(sc->sc_dev, 1691.1Sjmcneill "bus_dmamem_alloc failed: %d\n", error); 1701.1Sjmcneill goto alloc_failed; 1711.1Sjmcneill } 1721.1Sjmcneill error = bus_dmamem_map(sc->sc_dmat, dma->dma_segs, nsegs, 1731.1Sjmcneill dma->dma_size, &dma->dma_addr, flags); 1741.1Sjmcneill if (error != 0) { 1751.1Sjmcneill aprint_error_dev(sc->sc_dev, 1761.1Sjmcneill "bus_dmamem_map failed: %d\n", error); 1771.1Sjmcneill goto map_failed; 1781.1Sjmcneill } 1791.1Sjmcneill error = bus_dmamap_create(sc->sc_dmat, dma->dma_size, nsegs, 1801.1Sjmcneill dma->dma_size, 0, flags, &dma->dma_map); 1811.1Sjmcneill if (error != 0) { 1821.1Sjmcneill aprint_error_dev(sc->sc_dev, 1831.1Sjmcneill "bus_dmamap_create failed: %d\n", error); 1841.1Sjmcneill goto create_failed; 1851.1Sjmcneill } 1861.1Sjmcneill error = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_addr, 1871.1Sjmcneill dma->dma_size, NULL, flags); 1881.1Sjmcneill if (error != 0) { 1891.1Sjmcneill aprint_error_dev(sc->sc_dev, 1901.1Sjmcneill "bus_dmamap_load failed: %d\n", error); 1911.1Sjmcneill goto load_failed; 1921.1Sjmcneill } 1931.1Sjmcneill 1941.1Sjmcneill return 0; 1951.1Sjmcneill 1961.1Sjmcneillload_failed: 1971.1Sjmcneill bus_dmamap_destroy(sc->sc_dmat, dma->dma_map); 1981.1Sjmcneillcreate_failed: 1991.1Sjmcneill bus_dmamem_unmap(sc->sc_dmat, dma->dma_addr, dma->dma_size); 2001.1Sjmcneillmap_failed: 2011.1Sjmcneill bus_dmamem_free(sc->sc_dmat, dma->dma_segs, nsegs); 2021.1Sjmcneillalloc_failed: 2031.1Sjmcneill return error; 2041.1Sjmcneill} 2051.1Sjmcneill 2061.1Sjmcneillstatic int 2071.1Sjmcneillhwaes_probe(void) 2081.1Sjmcneill{ 2091.1Sjmcneill return 0; 2101.1Sjmcneill} 2111.1Sjmcneill 2121.1Sjmcneillstatic void 2131.1Sjmcneillhwaes_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds) 2141.1Sjmcneill{ 2151.1Sjmcneill if (nrounds == AES_128_NROUNDS) { 2161.1Sjmcneill enc->aese_aes.aes_rk[0] = be32dec(key + 4*0); 2171.1Sjmcneill enc->aese_aes.aes_rk[1] = be32dec(key + 4*1); 2181.1Sjmcneill enc->aese_aes.aes_rk[2] = be32dec(key + 4*2); 2191.1Sjmcneill enc->aese_aes.aes_rk[3] = be32dec(key + 4*3); 2201.1Sjmcneill } else { 2211.1Sjmcneill aes_bear_impl.ai_setenckey(enc, key, nrounds); 2221.1Sjmcneill } 2231.1Sjmcneill} 2241.1Sjmcneill 2251.1Sjmcneillstatic void 2261.1Sjmcneillhwaes_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds) 2271.1Sjmcneill{ 2281.1Sjmcneill if (nrounds == AES_128_NROUNDS) { 2291.1Sjmcneill dec->aesd_aes.aes_rk[0] = be32dec(key + 4*0); 2301.1Sjmcneill dec->aesd_aes.aes_rk[1] = be32dec(key + 4*1); 2311.1Sjmcneill dec->aesd_aes.aes_rk[2] = be32dec(key + 4*2); 2321.1Sjmcneill dec->aesd_aes.aes_rk[3] = be32dec(key + 4*3); 2331.1Sjmcneill } else { 2341.1Sjmcneill aes_bear_impl.ai_setdeckey(dec, key, nrounds); 2351.1Sjmcneill } 2361.1Sjmcneill} 2371.1Sjmcneill 2381.1Sjmcneillstatic void 2391.1Sjmcneillhwaes_exec_sync(uint32_t flags, uint16_t blocks) 2401.1Sjmcneill{ 2411.1Sjmcneill struct hwaes_softc *sc = hwaes_sc; 2421.1Sjmcneill uint32_t ctrl; 2431.1Sjmcneill 2441.1Sjmcneill KASSERT(blocks > 0); 2451.1Sjmcneill KASSERT(blocks <= HWAES_MAX_BLOCKS); 2461.1Sjmcneill 2471.1Sjmcneill WR4(sc, AES_SRC, sc->sc_dma_bounce.dma_segs[0].ds_addr); 2481.1Sjmcneill WR4(sc, AES_DEST, sc->sc_dma_bounce.dma_segs[0].ds_addr); 2491.1Sjmcneill 2501.1Sjmcneill ctrl = AES_CTRL_EXEC | AES_CTRL_ENA | flags; 2511.1Sjmcneill ctrl |= __SHIFTIN(blocks - 1, AES_CTRL_BLOCKS); 2521.1Sjmcneill 2531.1Sjmcneill WR4(sc, AES_CTRL, ctrl); 2541.1Sjmcneill for (;;) { 2551.1Sjmcneill ctrl = RD4(sc, AES_CTRL); 2561.1Sjmcneill if ((ctrl & AES_CTRL_ERR) != 0) { 2571.1Sjmcneill printf("AES error, AES_CTRL = %#x\n", ctrl); 2581.1Sjmcneill break; 2591.1Sjmcneill } 2601.1Sjmcneill if ((ctrl & AES_CTRL_EXEC) == 0) { 2611.1Sjmcneill break; 2621.1Sjmcneill } 2631.1Sjmcneill } 2641.1Sjmcneill} 2651.1Sjmcneill 2661.1Sjmcneillstatic void 2671.1Sjmcneillhwaes_enc(const struct aesenc *enc, const uint8_t in[static 16], 2681.1Sjmcneill uint8_t out[static 16], uint32_t nrounds) 2691.1Sjmcneill{ 2701.1Sjmcneill struct hwaes_softc *sc = hwaes_sc; 2711.1Sjmcneill unsigned n; 2721.1Sjmcneill int s; 2731.1Sjmcneill 2741.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 2751.1Sjmcneill aes_bear_impl.ai_enc(enc, in, out, nrounds); 2761.1Sjmcneill return; 2771.1Sjmcneill } 2781.1Sjmcneill 2791.1Sjmcneill s = splvm(); 2801.1Sjmcneill 2811.1Sjmcneill for (n = 0; n < 4; n++) { 2821.1Sjmcneill WR4(sc, AES_IV, 0); 2831.1Sjmcneill } 2841.1Sjmcneill for (n = 0; n < 4; n++) { 2851.1Sjmcneill WR4(sc, AES_KEY, enc->aese_aes.aes_rk[n]); 2861.1Sjmcneill } 2871.1Sjmcneill memcpy(sc->sc_dma_bounce.dma_addr, in, HWAES_BLOCK_LEN); 2881.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 2891.1Sjmcneill 0, HWAES_BLOCK_LEN, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 2901.1Sjmcneill hwaes_exec_sync(0, 1); 2911.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 2921.1Sjmcneill 0, HWAES_BLOCK_LEN, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 2931.1Sjmcneill memcpy(out, sc->sc_dma_bounce.dma_addr, HWAES_BLOCK_LEN); 2941.1Sjmcneill 2951.1Sjmcneill splx(s); 2961.1Sjmcneill} 2971.1Sjmcneill 2981.1Sjmcneillstatic void 2991.1Sjmcneillhwaes_encN(const struct aesenc *enc, const uint8_t in[static 16], 3001.1Sjmcneill uint8_t out[static 16], size_t nblocks) 3011.1Sjmcneill{ 3021.1Sjmcneill for (size_t n = 0; n < nblocks; n++) { 3031.1Sjmcneill hwaes_enc(enc, &in[n * HWAES_BLOCK_LEN], 3041.1Sjmcneill &out[n * HWAES_BLOCK_LEN], AES_128_NROUNDS); 3051.1Sjmcneill } 3061.1Sjmcneill} 3071.1Sjmcneill 3081.1Sjmcneillstatic void 3091.1Sjmcneillhwaes_dec(const struct aesdec *dec, const uint8_t in[static 16], 3101.1Sjmcneill uint8_t out[static 16], uint32_t nrounds) 3111.1Sjmcneill{ 3121.1Sjmcneill struct hwaes_softc *sc = hwaes_sc; 3131.1Sjmcneill unsigned n; 3141.1Sjmcneill int s; 3151.1Sjmcneill 3161.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 3171.1Sjmcneill aes_bear_impl.ai_dec(dec, in, out, nrounds); 3181.1Sjmcneill return; 3191.1Sjmcneill } 3201.1Sjmcneill 3211.1Sjmcneill s = splvm(); 3221.1Sjmcneill 3231.1Sjmcneill for (n = 0; n < 4; n++) { 3241.1Sjmcneill WR4(sc, AES_IV, 0); 3251.1Sjmcneill } 3261.1Sjmcneill for (n = 0; n < 4; n++) { 3271.1Sjmcneill WR4(sc, AES_KEY, dec->aesd_aes.aes_rk[n]); 3281.1Sjmcneill } 3291.1Sjmcneill memcpy(sc->sc_dma_bounce.dma_addr, in, HWAES_BLOCK_LEN); 3301.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 3311.1Sjmcneill 0, HWAES_BLOCK_LEN, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 3321.1Sjmcneill hwaes_exec_sync(AES_CTRL_DEC, 1); 3331.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 3341.1Sjmcneill 0, HWAES_BLOCK_LEN, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 3351.1Sjmcneill memcpy(out, sc->sc_dma_bounce.dma_addr, HWAES_BLOCK_LEN); 3361.1Sjmcneill 3371.1Sjmcneill splx(s); 3381.1Sjmcneill} 3391.1Sjmcneill 3401.1Sjmcneillstatic void 3411.1Sjmcneillhwaes_decN(const struct aesdec *dec, const uint8_t in[static 16], 3421.1Sjmcneill uint8_t out[static 16], size_t nblocks) 3431.1Sjmcneill{ 3441.1Sjmcneill for (size_t n = 0; n < nblocks; n++) { 3451.1Sjmcneill hwaes_dec(dec, &in[n * HWAES_BLOCK_LEN], 3461.1Sjmcneill &out[n * HWAES_BLOCK_LEN], AES_128_NROUNDS); 3471.1Sjmcneill } 3481.1Sjmcneill} 3491.1Sjmcneill 3501.1Sjmcneillstatic void 3511.1Sjmcneillhwaes_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16], 3521.1Sjmcneill uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 3531.1Sjmcneill uint32_t nrounds) 3541.1Sjmcneill{ 3551.1Sjmcneill struct hwaes_softc *sc = hwaes_sc; 3561.1Sjmcneill const uint8_t *inp = in; 3571.1Sjmcneill uint8_t *outp = out; 3581.1Sjmcneill uint32_t flags; 3591.1Sjmcneill unsigned n; 3601.1Sjmcneill int s; 3611.1Sjmcneill 3621.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 3631.1Sjmcneill aes_bear_impl.ai_cbc_enc(enc, in, out, nbytes, iv, nrounds); 3641.1Sjmcneill return; 3651.1Sjmcneill } 3661.1Sjmcneill 3671.1Sjmcneill KASSERT(nbytes % HWAES_BLOCK_LEN == 0); 3681.1Sjmcneill if (nbytes == 0) { 3691.1Sjmcneill return; 3701.1Sjmcneill } 3711.1Sjmcneill 3721.1Sjmcneill s = splvm(); 3731.1Sjmcneill 3741.1Sjmcneill for (n = 0; n < 4; n++) { 3751.1Sjmcneill WR4(sc, AES_IV, be32dec(&iv[n * 4])); 3761.1Sjmcneill } 3771.1Sjmcneill for (n = 0; n < 4; n++) { 3781.1Sjmcneill WR4(sc, AES_KEY, enc->aese_aes.aes_rk[n]); 3791.1Sjmcneill } 3801.1Sjmcneill flags = 0; 3811.1Sjmcneill while (nbytes > 0) { 3821.1Sjmcneill const size_t blocks = MIN(nbytes / HWAES_BLOCK_LEN, 3831.1Sjmcneill HWAES_MAX_BLOCKS); 3841.1Sjmcneill 3851.1Sjmcneill memcpy(sc->sc_dma_bounce.dma_addr, inp, 3861.1Sjmcneill blocks * HWAES_BLOCK_LEN); 3871.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 3881.1Sjmcneill 0, blocks * HWAES_BLOCK_LEN, 3891.1Sjmcneill BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 3901.1Sjmcneill hwaes_exec_sync(flags, blocks); 3911.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 3921.1Sjmcneill 0, blocks * HWAES_BLOCK_LEN, 3931.1Sjmcneill BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 3941.1Sjmcneill memcpy(outp, sc->sc_dma_bounce.dma_addr, 3951.1Sjmcneill blocks * HWAES_BLOCK_LEN); 3961.1Sjmcneill 3971.1Sjmcneill nbytes -= blocks * HWAES_BLOCK_LEN; 3981.1Sjmcneill inp += blocks * HWAES_BLOCK_LEN; 3991.1Sjmcneill outp += blocks * HWAES_BLOCK_LEN; 4001.1Sjmcneill flags |= AES_CTRL_IV; 4011.1Sjmcneill } 4021.1Sjmcneill 4031.1Sjmcneill memcpy(iv, outp - HWAES_BLOCK_LEN, HWAES_BLOCK_LEN); 4041.1Sjmcneill 4051.1Sjmcneill splx(s); 4061.1Sjmcneill} 4071.1Sjmcneill 4081.1Sjmcneillstatic void 4091.1Sjmcneillhwaes_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16], 4101.1Sjmcneill uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 4111.1Sjmcneill uint32_t nrounds) 4121.1Sjmcneill{ 4131.1Sjmcneill struct hwaes_softc *sc = hwaes_sc; 4141.1Sjmcneill const uint8_t *inp = in; 4151.1Sjmcneill uint8_t *outp = out; 4161.1Sjmcneill uint32_t flags; 4171.1Sjmcneill unsigned n; 4181.1Sjmcneill int s; 4191.1Sjmcneill 4201.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 4211.1Sjmcneill aes_bear_impl.ai_cbc_dec(dec, in, out, nbytes, iv, nrounds); 4221.1Sjmcneill return; 4231.1Sjmcneill } 4241.1Sjmcneill 4251.1Sjmcneill KASSERT(nbytes % HWAES_BLOCK_LEN == 0); 4261.1Sjmcneill if (nbytes == 0) { 4271.1Sjmcneill return; 4281.1Sjmcneill } 4291.1Sjmcneill 4301.1Sjmcneill s = splvm(); 4311.1Sjmcneill 4321.1Sjmcneill for (n = 0; n < 4; n++) { 4331.1Sjmcneill WR4(sc, AES_IV, be32dec(&iv[n * 4])); 4341.1Sjmcneill } 4351.1Sjmcneill 4361.1Sjmcneill memcpy(iv, inp + nbytes - HWAES_BLOCK_LEN, HWAES_BLOCK_LEN); 4371.1Sjmcneill 4381.1Sjmcneill for (n = 0; n < 4; n++) { 4391.1Sjmcneill WR4(sc, AES_KEY, dec->aesd_aes.aes_rk[n]); 4401.1Sjmcneill } 4411.1Sjmcneill flags = AES_CTRL_DEC; 4421.1Sjmcneill while (nbytes > 0) { 4431.1Sjmcneill const size_t blocks = MIN(nbytes / HWAES_BLOCK_LEN, 4441.1Sjmcneill HWAES_MAX_BLOCKS); 4451.1Sjmcneill 4461.1Sjmcneill memcpy(sc->sc_dma_bounce.dma_addr, inp, 4471.1Sjmcneill blocks * HWAES_BLOCK_LEN); 4481.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 4491.1Sjmcneill 0, blocks * HWAES_BLOCK_LEN, 4501.1Sjmcneill BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 4511.1Sjmcneill hwaes_exec_sync(flags, blocks); 4521.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 4531.1Sjmcneill 0, blocks * HWAES_BLOCK_LEN, 4541.1Sjmcneill BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 4551.1Sjmcneill memcpy(outp, sc->sc_dma_bounce.dma_addr, 4561.1Sjmcneill blocks * HWAES_BLOCK_LEN); 4571.1Sjmcneill 4581.1Sjmcneill nbytes -= blocks * HWAES_BLOCK_LEN; 4591.1Sjmcneill inp += blocks * HWAES_BLOCK_LEN; 4601.1Sjmcneill outp += blocks * HWAES_BLOCK_LEN; 4611.1Sjmcneill flags |= AES_CTRL_IV; 4621.1Sjmcneill } 4631.1Sjmcneill 4641.1Sjmcneill splx(s); 4651.1Sjmcneill} 4661.1Sjmcneill 4671.1Sjmcneillstatic void 4681.1Sjmcneillhwaes_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3) 4691.1Sjmcneill{ 4701.1Sjmcneill uint32_t s0, s1, s2, s3; 4711.1Sjmcneill 4721.1Sjmcneill s0 = *t0 >> 31; 4731.1Sjmcneill s1 = *t1 >> 31; 4741.1Sjmcneill s2 = *t2 >> 31; 4751.1Sjmcneill s3 = *t3 >> 31; 4761.1Sjmcneill *t0 = (*t0 << 1) ^ (-s3 & 0x87); 4771.1Sjmcneill *t1 = (*t1 << 1) ^ s0; 4781.1Sjmcneill *t2 = (*t2 << 1) ^ s1; 4791.1Sjmcneill *t3 = (*t3 << 1) ^ s2; 4801.1Sjmcneill} 4811.1Sjmcneill 4821.1Sjmcneillstatic void 4831.1Sjmcneillhwaes_xts_enc(const struct aesenc *enc, const uint8_t in[static 16], 4841.1Sjmcneill uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 4851.1Sjmcneill uint32_t nrounds) 4861.1Sjmcneill{ 4871.1Sjmcneill uint8_t block[16]; 4881.1Sjmcneill uint8_t tle[16]; 4891.1Sjmcneill uint32_t t[4]; 4901.1Sjmcneill const uint8_t *inp = in; 4911.1Sjmcneill uint8_t *outp = out; 4921.1Sjmcneill 4931.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 4941.1Sjmcneill aes_bear_impl.ai_xts_enc(enc, in, out, nbytes, tweak, nrounds); 4951.1Sjmcneill return; 4961.1Sjmcneill } 4971.1Sjmcneill 4981.1Sjmcneill KASSERT(nbytes % 16 == 0); 4991.1Sjmcneill 5001.1Sjmcneill t[0] = le32dec(tweak + 4*0); 5011.1Sjmcneill t[1] = le32dec(tweak + 4*1); 5021.1Sjmcneill t[2] = le32dec(tweak + 4*2); 5031.1Sjmcneill t[3] = le32dec(tweak + 4*3); 5041.1Sjmcneill 5051.1Sjmcneill while (nbytes > 0) { 5061.1Sjmcneill le32enc(tle + 4*0, t[0]); 5071.1Sjmcneill le32enc(tle + 4*1, t[1]); 5081.1Sjmcneill le32enc(tle + 4*2, t[2]); 5091.1Sjmcneill le32enc(tle + 4*3, t[3]); 5101.1Sjmcneill 5111.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 5121.1Sjmcneill block[n] = inp[n] ^ tle[n]; 5131.1Sjmcneill } 5141.1Sjmcneill 5151.1Sjmcneill hwaes_encN(enc, block, block, 1); 5161.1Sjmcneill 5171.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 5181.1Sjmcneill outp[n] = block[n] ^ tle[n]; 5191.1Sjmcneill } 5201.1Sjmcneill 5211.1Sjmcneill hwaes_xts_update(&t[0], &t[1], &t[2], &t[3]); 5221.1Sjmcneill 5231.1Sjmcneill nbytes -= HWAES_BLOCK_LEN; 5241.1Sjmcneill inp += HWAES_BLOCK_LEN; 5251.1Sjmcneill outp += HWAES_BLOCK_LEN; 5261.1Sjmcneill } 5271.1Sjmcneill 5281.1Sjmcneill le32enc(tweak + 4*0, t[0]); 5291.1Sjmcneill le32enc(tweak + 4*1, t[1]); 5301.1Sjmcneill le32enc(tweak + 4*2, t[2]); 5311.1Sjmcneill le32enc(tweak + 4*3, t[3]); 5321.1Sjmcneill 5331.1Sjmcneill explicit_memset(t, 0, sizeof(t)); 5341.1Sjmcneill explicit_memset(block, 0, sizeof(block)); 5351.1Sjmcneill explicit_memset(tle, 0, sizeof(tle)); 5361.1Sjmcneill} 5371.1Sjmcneill 5381.1Sjmcneillstatic void 5391.1Sjmcneillhwaes_xts_dec(const struct aesdec *dec, const uint8_t in[static 16], 5401.1Sjmcneill uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 5411.1Sjmcneill uint32_t nrounds) 5421.1Sjmcneill{ 5431.1Sjmcneill uint8_t block[16]; 5441.1Sjmcneill uint8_t tle[16]; 5451.1Sjmcneill uint32_t t[4]; 5461.1Sjmcneill const uint8_t *inp = in; 5471.1Sjmcneill uint8_t *outp = out; 5481.1Sjmcneill 5491.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 5501.1Sjmcneill aes_bear_impl.ai_xts_dec(dec, in, out, nbytes, tweak, nrounds); 5511.1Sjmcneill return; 5521.1Sjmcneill } 5531.1Sjmcneill 5541.1Sjmcneill KASSERT(nbytes % 16 == 0); 5551.1Sjmcneill 5561.1Sjmcneill t[0] = le32dec(tweak + 4*0); 5571.1Sjmcneill t[1] = le32dec(tweak + 4*1); 5581.1Sjmcneill t[2] = le32dec(tweak + 4*2); 5591.1Sjmcneill t[3] = le32dec(tweak + 4*3); 5601.1Sjmcneill 5611.1Sjmcneill while (nbytes > 0) { 5621.1Sjmcneill le32enc(tle + 4*0, t[0]); 5631.1Sjmcneill le32enc(tle + 4*1, t[1]); 5641.1Sjmcneill le32enc(tle + 4*2, t[2]); 5651.1Sjmcneill le32enc(tle + 4*3, t[3]); 5661.1Sjmcneill 5671.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 5681.1Sjmcneill block[n] = inp[n] ^ tle[n]; 5691.1Sjmcneill } 5701.1Sjmcneill 5711.1Sjmcneill hwaes_decN(dec, block, block, 1); 5721.1Sjmcneill 5731.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 5741.1Sjmcneill outp[n] = block[n] ^ tle[n]; 5751.1Sjmcneill } 5761.1Sjmcneill 5771.1Sjmcneill hwaes_xts_update(&t[0], &t[1], &t[2], &t[3]); 5781.1Sjmcneill 5791.1Sjmcneill nbytes -= HWAES_BLOCK_LEN; 5801.1Sjmcneill inp += HWAES_BLOCK_LEN; 5811.1Sjmcneill outp += HWAES_BLOCK_LEN; 5821.1Sjmcneill } 5831.1Sjmcneill 5841.1Sjmcneill le32enc(tweak + 4*0, t[0]); 5851.1Sjmcneill le32enc(tweak + 4*1, t[1]); 5861.1Sjmcneill le32enc(tweak + 4*2, t[2]); 5871.1Sjmcneill le32enc(tweak + 4*3, t[3]); 5881.1Sjmcneill 5891.1Sjmcneill explicit_memset(t, 0, sizeof(t)); 5901.1Sjmcneill explicit_memset(block, 0, sizeof(block)); 5911.1Sjmcneill explicit_memset(tle, 0, sizeof(tle)); 5921.1Sjmcneill} 5931.1Sjmcneill 5941.1Sjmcneillstatic void 5951.1Sjmcneillhwaes_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], 5961.1Sjmcneill size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds) 5971.1Sjmcneill{ 5981.1Sjmcneill const uint8_t *inp = in; 5991.1Sjmcneill 6001.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 6011.1Sjmcneill aes_bear_impl.ai_cbcmac_update1(enc, in, nbytes, auth0, nrounds); 6021.1Sjmcneill return; 6031.1Sjmcneill } 6041.1Sjmcneill 6051.1Sjmcneill KASSERT(nbytes % 16 == 0); 6061.1Sjmcneill 6071.1Sjmcneill while (nbytes > 0) { 6081.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 6091.1Sjmcneill auth0[n] = auth0[n] ^ inp[n]; 6101.1Sjmcneill } 6111.1Sjmcneill 6121.1Sjmcneill hwaes_encN(enc, auth0, auth0, 1); 6131.1Sjmcneill 6141.1Sjmcneill nbytes -= HWAES_BLOCK_LEN; 6151.1Sjmcneill inp += HWAES_BLOCK_LEN; 6161.1Sjmcneill } 6171.1Sjmcneill} 6181.1Sjmcneill 6191.1Sjmcneillstatic void 6201.1Sjmcneillhwaes_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16], 6211.1Sjmcneill uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], 6221.1Sjmcneill uint32_t nrounds) 6231.1Sjmcneill{ 6241.1Sjmcneill const uint8_t *inp = in; 6251.1Sjmcneill uint8_t *outp = out; 6261.1Sjmcneill uint32_t c[4]; 6271.1Sjmcneill 6281.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 6291.1Sjmcneill aes_bear_impl.ai_ccm_enc1(enc, in, out, nbytes, authctr0, nrounds); 6301.1Sjmcneill return; 6311.1Sjmcneill } 6321.1Sjmcneill 6331.1Sjmcneill KASSERT(nbytes % 16 == 0); 6341.1Sjmcneill 6351.1Sjmcneill c[0] = le32dec(authctr0 + 16 + 4*0); 6361.1Sjmcneill c[1] = le32dec(authctr0 + 16 + 4*1); 6371.1Sjmcneill c[2] = le32dec(authctr0 + 16 + 4*2); 6381.1Sjmcneill c[3] = be32dec(authctr0 + 16 + 4*3); 6391.1Sjmcneill 6401.1Sjmcneill while (nbytes > 0) { 6411.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 6421.1Sjmcneill authctr0[n] = authctr0[n] ^ inp[n]; 6431.1Sjmcneill } 6441.1Sjmcneill 6451.1Sjmcneill le32enc(authctr0 + 16 + 4*0, c[0]); 6461.1Sjmcneill le32enc(authctr0 + 16 + 4*1, c[1]); 6471.1Sjmcneill le32enc(authctr0 + 16 + 4*2, c[2]); 6481.1Sjmcneill be32enc(authctr0 + 16 + 4*3, ++c[3]); 6491.1Sjmcneill 6501.1Sjmcneill hwaes_encN(enc, authctr0, authctr0, 2); 6511.1Sjmcneill 6521.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 6531.1Sjmcneill outp[n] = inp[n] ^ authctr0[n + 16]; 6541.1Sjmcneill } 6551.1Sjmcneill 6561.1Sjmcneill nbytes -= HWAES_BLOCK_LEN; 6571.1Sjmcneill inp += HWAES_BLOCK_LEN; 6581.1Sjmcneill outp += HWAES_BLOCK_LEN; 6591.1Sjmcneill } 6601.1Sjmcneill 6611.1Sjmcneill le32enc(authctr0 + 16 + 4*0, c[0]); 6621.1Sjmcneill le32enc(authctr0 + 16 + 4*1, c[1]); 6631.1Sjmcneill le32enc(authctr0 + 16 + 4*2, c[2]); 6641.1Sjmcneill be32enc(authctr0 + 16 + 4*3, c[3]); 6651.1Sjmcneill} 6661.1Sjmcneill 6671.1Sjmcneillstatic void 6681.1Sjmcneillhwaes_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16], 6691.1Sjmcneill uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], 6701.1Sjmcneill uint32_t nrounds) 6711.1Sjmcneill{ 6721.1Sjmcneill const uint8_t *inp = in; 6731.1Sjmcneill uint8_t *outp = out; 6741.1Sjmcneill uint32_t c[4]; 6751.1Sjmcneill 6761.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 6771.1Sjmcneill aes_bear_impl.ai_ccm_dec1(enc, in, out, nbytes, authctr0, nrounds); 6781.1Sjmcneill return; 6791.1Sjmcneill } 6801.1Sjmcneill 6811.1Sjmcneill KASSERT(nbytes % 16 == 0); 6821.1Sjmcneill 6831.1Sjmcneill c[0] = le32dec(authctr0 + 16 + 4*0); 6841.1Sjmcneill c[1] = le32dec(authctr0 + 16 + 4*1); 6851.1Sjmcneill c[2] = le32dec(authctr0 + 16 + 4*2); 6861.1Sjmcneill c[3] = be32dec(authctr0 + 16 + 4*3); 6871.1Sjmcneill 6881.1Sjmcneill be32enc(authctr0 + 16 + 4*3, ++c[3]); 6891.1Sjmcneill hwaes_encN(enc, authctr0 + 16, authctr0 + 16, 1); 6901.1Sjmcneill 6911.1Sjmcneill while (nbytes > 0) { 6921.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 6931.1Sjmcneill outp[n] = authctr0[n + 16] ^ inp[n]; 6941.1Sjmcneill authctr0[n] = authctr0[n] ^ outp[n]; 6951.1Sjmcneill } 6961.1Sjmcneill nbytes -= HWAES_BLOCK_LEN; 6971.1Sjmcneill if (nbytes == 0) { 6981.1Sjmcneill break; 6991.1Sjmcneill } 7001.1Sjmcneill 7011.1Sjmcneill inp += HWAES_BLOCK_LEN; 7021.1Sjmcneill outp += HWAES_BLOCK_LEN; 7031.1Sjmcneill 7041.1Sjmcneill le32enc(authctr0 + 16 + 4*0, c[0]); 7051.1Sjmcneill le32enc(authctr0 + 16 + 4*1, c[1]); 7061.1Sjmcneill le32enc(authctr0 + 16 + 4*2, c[2]); 7071.1Sjmcneill be32enc(authctr0 + 16 + 4*3, ++c[3]); 7081.1Sjmcneill hwaes_encN(enc, authctr0, authctr0, 2); 7091.1Sjmcneill } 7101.1Sjmcneill hwaes_encN(enc, authctr0, authctr0, 1); 7111.1Sjmcneill 7121.1Sjmcneill le32enc(authctr0 + 16 + 4*0, c[0]); 7131.1Sjmcneill le32enc(authctr0 + 16 + 4*1, c[1]); 7141.1Sjmcneill le32enc(authctr0 + 16 + 4*2, c[2]); 7151.1Sjmcneill be32enc(authctr0 + 16 + 4*3, c[3]); 7161.1Sjmcneill 7171.1Sjmcneill} 7181.1Sjmcneill 7191.1Sjmcneillstatic struct aes_impl aes_hwaes_impl = { 7201.1Sjmcneill .ai_name = "Hollywood AES engine", 7211.1Sjmcneill .ai_probe = hwaes_probe, 7221.1Sjmcneill .ai_setenckey = hwaes_setenckey, 7231.1Sjmcneill .ai_setdeckey = hwaes_setdeckey, 7241.1Sjmcneill .ai_enc = hwaes_enc, 7251.1Sjmcneill .ai_dec = hwaes_dec, 7261.1Sjmcneill .ai_cbc_enc = hwaes_cbc_enc, 7271.1Sjmcneill .ai_cbc_dec = hwaes_cbc_dec, 7281.1Sjmcneill .ai_xts_enc = hwaes_xts_enc, 7291.1Sjmcneill .ai_xts_dec = hwaes_xts_dec, 7301.1Sjmcneill .ai_cbcmac_update1 = hwaes_cbcmac_update1, 7311.1Sjmcneill .ai_ccm_enc1 = hwaes_ccm_enc1, 7321.1Sjmcneill .ai_ccm_dec1 = hwaes_ccm_dec1, 7331.1Sjmcneill}; 7341.1Sjmcneill 7351.1Sjmcneillstatic void 7361.1Sjmcneillhwaes_register(void) 7371.1Sjmcneill{ 7381.1Sjmcneill aes_md_init(&aes_hwaes_impl); 7391.1Sjmcneill} 740