hwaes.c revision 1.1
11.1Sjmcneill/* $NetBSD: hwaes.c,v 1.1 2026/01/09 22:54:30 jmcneill Exp $ */ 21.1Sjmcneill 31.1Sjmcneill/*- 41.1Sjmcneill * Copyright (c) 2025 Jared McNeill <jmcneill@invisible.ca> 51.1Sjmcneill * Copyright (c) 2020 The NetBSD Foundation, Inc. 61.1Sjmcneill * All rights reserved. 71.1Sjmcneill * 81.1Sjmcneill * Redistribution and use in source and binary forms, with or without 91.1Sjmcneill * modification, are permitted provided that the following conditions 101.1Sjmcneill * are met: 111.1Sjmcneill * 1. Redistributions of source code must retain the above copyright 121.1Sjmcneill * notice, this list of conditions and the following disclaimer. 131.1Sjmcneill * 2. Redistributions in binary form must reproduce the above copyright 141.1Sjmcneill * notice, this list of conditions and the following disclaimer in the 151.1Sjmcneill * documentation and/or other materials provided with the distribution. 161.1Sjmcneill * 171.1Sjmcneill * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 181.1Sjmcneill * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 191.1Sjmcneill * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 201.1Sjmcneill * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 211.1Sjmcneill * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 221.1Sjmcneill * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 231.1Sjmcneill * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 241.1Sjmcneill * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 251.1Sjmcneill * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 261.1Sjmcneill * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 271.1Sjmcneill * POSSIBILITY OF SUCH DAMAGE. 281.1Sjmcneill */ 291.1Sjmcneill 301.1Sjmcneill/* 311.1Sjmcneill * A driver for the Nintendo Wii's AES engine. The driver registers an AES 321.1Sjmcneill * implementation for kernel use via aes_md_init(). AES-128 requests are 331.1Sjmcneill * accelerated by hardware and all other requests are passed through to the 341.1Sjmcneill * default (BearSSL aes_ct) implementation. 351.1Sjmcneill */ 361.1Sjmcneill 371.1Sjmcneill#include <sys/cdefs.h> 381.1Sjmcneill__KERNEL_RCSID(0, "$NetBSD: hwaes.c,v 1.1 2026/01/09 22:54:30 jmcneill Exp $"); 391.1Sjmcneill 401.1Sjmcneill#include <sys/param.h> 411.1Sjmcneill#include <sys/bus.h> 421.1Sjmcneill#include <sys/device.h> 431.1Sjmcneill#include <sys/systm.h> 441.1Sjmcneill#include <sys/callout.h> 451.1Sjmcneill#include <sys/buf.h> 461.1Sjmcneill#include <sys/cpu.h> 471.1Sjmcneill 481.1Sjmcneill#include <machine/wii.h> 491.1Sjmcneill#include <machine/wiiu.h> 501.1Sjmcneill#include <machine/pio.h> 511.1Sjmcneill#include "ahb.h" 521.1Sjmcneill 531.1Sjmcneill#include <crypto/aes/aes.h> 541.1Sjmcneill#include <crypto/aes/aes_bear.h> 551.1Sjmcneill#include <crypto/aes/aes_impl.h> 561.1Sjmcneill 571.1Sjmcneill/* AES engine registers */ 581.1Sjmcneill#define AES_CTRL 0x00 591.1Sjmcneill#define AES_CTRL_EXEC __BIT(31) 601.1Sjmcneill#define AES_CTRL_IRQ __BIT(30) 611.1Sjmcneill#define AES_CTRL_ERR __BIT(29) 621.1Sjmcneill#define AES_CTRL_ENA __BIT(28) 631.1Sjmcneill#define AES_CTRL_DEC __BIT(27) 641.1Sjmcneill#define AES_CTRL_IV __BIT(12) 651.1Sjmcneill#define AES_CTRL_BLOCKS __BITS(11, 0) 661.1Sjmcneill#define AES_SRC 0x04 671.1Sjmcneill#define AES_DEST 0x08 681.1Sjmcneill#define AES_KEY 0x0c 691.1Sjmcneill#define AES_IV 0x10 701.1Sjmcneill 711.1Sjmcneill/* Register frame size */ 721.1Sjmcneill#define AES_REG_SIZE 0x14 731.1Sjmcneill 741.1Sjmcneill/* Device limits */ 751.1Sjmcneill#define HWAES_BLOCK_LEN 16 761.1Sjmcneill#define HWAES_ALIGN 16 771.1Sjmcneill#define HWAES_MAX_BLOCKS 4096 781.1Sjmcneill#define HWAES_MAX_AES_LEN (HWAES_BLOCK_LEN * HWAES_MAX_BLOCKS) 791.1Sjmcneill 801.1Sjmcneillstatic int hwaes_match(device_t, cfdata_t, void *); 811.1Sjmcneillstatic void hwaes_attach(device_t, device_t, void *); 821.1Sjmcneill 831.1Sjmcneillstruct hwaes_softc; 841.1Sjmcneill 851.1Sjmcneillstruct hwaes_dma { 861.1Sjmcneill bus_dmamap_t dma_map; 871.1Sjmcneill void *dma_addr; 881.1Sjmcneill size_t dma_size; 891.1Sjmcneill bus_dma_segment_t dma_segs[1]; 901.1Sjmcneill}; 911.1Sjmcneill 921.1Sjmcneillstruct hwaes_softc { 931.1Sjmcneill device_t sc_dev; 941.1Sjmcneill bus_space_tag_t sc_bst; 951.1Sjmcneill bus_space_handle_t sc_bsh; 961.1Sjmcneill bus_dma_tag_t sc_dmat; 971.1Sjmcneill struct hwaes_dma sc_dma_bounce; 981.1Sjmcneill}; 991.1Sjmcneill 1001.1Sjmcneillstruct hwaes_softc *hwaes_sc; 1011.1Sjmcneill 1021.1Sjmcneill#define WR4(sc, reg, val) \ 1031.1Sjmcneill bus_space_write_4((sc)->sc_bst, (sc)->sc_bsh, (reg), (val)) 1041.1Sjmcneill#define RD4(sc, reg) \ 1051.1Sjmcneill bus_space_read_4((sc)->sc_bst, (sc)->sc_bsh, (reg)) 1061.1Sjmcneill 1071.1SjmcneillCFATTACH_DECL_NEW(hwaes, sizeof(struct hwaes_softc), 1081.1Sjmcneill hwaes_match, hwaes_attach, NULL, NULL); 1091.1Sjmcneill 1101.1Sjmcneillstatic int hwaes_dma_alloc(struct hwaes_softc *, struct hwaes_dma *, 1111.1Sjmcneill size_t, int); 1121.1Sjmcneillstatic void hwaes_register(void); 1131.1Sjmcneill 1141.1Sjmcneillstatic int 1151.1Sjmcneillhwaes_match(device_t parent, cfdata_t cf, void *aux) 1161.1Sjmcneill{ 1171.1Sjmcneill return 1; 1181.1Sjmcneill} 1191.1Sjmcneill 1201.1Sjmcneillstatic void 1211.1Sjmcneillhwaes_attach(device_t parent, device_t self, void *aux) 1221.1Sjmcneill{ 1231.1Sjmcneill struct ahb_attach_args *aaa = aux; 1241.1Sjmcneill struct hwaes_softc *sc = device_private(self); 1251.1Sjmcneill bool enabled; 1261.1Sjmcneill int error; 1271.1Sjmcneill 1281.1Sjmcneill /* 1291.1Sjmcneill * Since aes_md_init() expects per-CPU engines and we only have one, 1301.1Sjmcneill * only enable AES offload in single CPU configurations. 1311.1Sjmcneill */ 1321.1Sjmcneill enabled = kcpuset_countset(kcpuset_attached) == 1; 1331.1Sjmcneill 1341.1Sjmcneill aprint_naive("\n"); 1351.1Sjmcneill aprint_normal(": AES engine%s\n", enabled ? "" : " (disabled)"); 1361.1Sjmcneill if (!enabled) { 1371.1Sjmcneill return; 1381.1Sjmcneill } 1391.1Sjmcneill 1401.1Sjmcneill sc->sc_dev = self; 1411.1Sjmcneill sc->sc_dmat = aaa->aaa_dmat; 1421.1Sjmcneill sc->sc_bst = aaa->aaa_bst; 1431.1Sjmcneill error = bus_space_map(sc->sc_bst, aaa->aaa_addr, AES_REG_SIZE, 1441.1Sjmcneill 0, &sc->sc_bsh); 1451.1Sjmcneill if (error != 0) { 1461.1Sjmcneill aprint_error_dev(self, "couldn't map registers (%d)\n", error); 1471.1Sjmcneill return; 1481.1Sjmcneill } 1491.1Sjmcneill 1501.1Sjmcneill ahb_claim_device(self, IOPAESEN); 1511.1Sjmcneill 1521.1Sjmcneill error = hwaes_dma_alloc(sc, &sc->sc_dma_bounce, HWAES_MAX_AES_LEN, 1531.1Sjmcneill BUS_DMA_WAITOK); 1541.1Sjmcneill if (error != 0) { 1551.1Sjmcneill return; 1561.1Sjmcneill } 1571.1Sjmcneill 1581.1Sjmcneill WR4(sc, AES_CTRL, 0); 1591.1Sjmcneill for (;;) { 1601.1Sjmcneill if (RD4(sc, AES_CTRL) == 0) { 1611.1Sjmcneill break; 1621.1Sjmcneill } 1631.1Sjmcneill } 1641.1Sjmcneill 1651.1Sjmcneill hwaes_sc = sc; 1661.1Sjmcneill hwaes_register(); 1671.1Sjmcneill} 1681.1Sjmcneill 1691.1Sjmcneillstatic int 1701.1Sjmcneillhwaes_dma_alloc(struct hwaes_softc *sc, struct hwaes_dma *dma, size_t size, 1711.1Sjmcneill int flags) 1721.1Sjmcneill{ 1731.1Sjmcneill int error, nsegs; 1741.1Sjmcneill 1751.1Sjmcneill dma->dma_size = size; 1761.1Sjmcneill 1771.1Sjmcneill error = bus_dmamem_alloc(sc->sc_dmat, dma->dma_size, HWAES_ALIGN, 0, 1781.1Sjmcneill dma->dma_segs, 1, &nsegs, flags); 1791.1Sjmcneill if (error != 0) { 1801.1Sjmcneill aprint_error_dev(sc->sc_dev, 1811.1Sjmcneill "bus_dmamem_alloc failed: %d\n", error); 1821.1Sjmcneill goto alloc_failed; 1831.1Sjmcneill } 1841.1Sjmcneill error = bus_dmamem_map(sc->sc_dmat, dma->dma_segs, nsegs, 1851.1Sjmcneill dma->dma_size, &dma->dma_addr, flags); 1861.1Sjmcneill if (error != 0) { 1871.1Sjmcneill aprint_error_dev(sc->sc_dev, 1881.1Sjmcneill "bus_dmamem_map failed: %d\n", error); 1891.1Sjmcneill goto map_failed; 1901.1Sjmcneill } 1911.1Sjmcneill error = bus_dmamap_create(sc->sc_dmat, dma->dma_size, nsegs, 1921.1Sjmcneill dma->dma_size, 0, flags, &dma->dma_map); 1931.1Sjmcneill if (error != 0) { 1941.1Sjmcneill aprint_error_dev(sc->sc_dev, 1951.1Sjmcneill "bus_dmamap_create failed: %d\n", error); 1961.1Sjmcneill goto create_failed; 1971.1Sjmcneill } 1981.1Sjmcneill error = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_addr, 1991.1Sjmcneill dma->dma_size, NULL, flags); 2001.1Sjmcneill if (error != 0) { 2011.1Sjmcneill aprint_error_dev(sc->sc_dev, 2021.1Sjmcneill "bus_dmamap_load failed: %d\n", error); 2031.1Sjmcneill goto load_failed; 2041.1Sjmcneill } 2051.1Sjmcneill 2061.1Sjmcneill return 0; 2071.1Sjmcneill 2081.1Sjmcneillload_failed: 2091.1Sjmcneill bus_dmamap_destroy(sc->sc_dmat, dma->dma_map); 2101.1Sjmcneillcreate_failed: 2111.1Sjmcneill bus_dmamem_unmap(sc->sc_dmat, dma->dma_addr, dma->dma_size); 2121.1Sjmcneillmap_failed: 2131.1Sjmcneill bus_dmamem_free(sc->sc_dmat, dma->dma_segs, nsegs); 2141.1Sjmcneillalloc_failed: 2151.1Sjmcneill return error; 2161.1Sjmcneill} 2171.1Sjmcneill 2181.1Sjmcneillstatic int 2191.1Sjmcneillhwaes_probe(void) 2201.1Sjmcneill{ 2211.1Sjmcneill return 0; 2221.1Sjmcneill} 2231.1Sjmcneill 2241.1Sjmcneillstatic void 2251.1Sjmcneillhwaes_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds) 2261.1Sjmcneill{ 2271.1Sjmcneill if (nrounds == AES_128_NROUNDS) { 2281.1Sjmcneill enc->aese_aes.aes_rk[0] = be32dec(key + 4*0); 2291.1Sjmcneill enc->aese_aes.aes_rk[1] = be32dec(key + 4*1); 2301.1Sjmcneill enc->aese_aes.aes_rk[2] = be32dec(key + 4*2); 2311.1Sjmcneill enc->aese_aes.aes_rk[3] = be32dec(key + 4*3); 2321.1Sjmcneill } else { 2331.1Sjmcneill aes_bear_impl.ai_setenckey(enc, key, nrounds); 2341.1Sjmcneill } 2351.1Sjmcneill} 2361.1Sjmcneill 2371.1Sjmcneillstatic void 2381.1Sjmcneillhwaes_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds) 2391.1Sjmcneill{ 2401.1Sjmcneill if (nrounds == AES_128_NROUNDS) { 2411.1Sjmcneill dec->aesd_aes.aes_rk[0] = be32dec(key + 4*0); 2421.1Sjmcneill dec->aesd_aes.aes_rk[1] = be32dec(key + 4*1); 2431.1Sjmcneill dec->aesd_aes.aes_rk[2] = be32dec(key + 4*2); 2441.1Sjmcneill dec->aesd_aes.aes_rk[3] = be32dec(key + 4*3); 2451.1Sjmcneill } else { 2461.1Sjmcneill aes_bear_impl.ai_setdeckey(dec, key, nrounds); 2471.1Sjmcneill } 2481.1Sjmcneill} 2491.1Sjmcneill 2501.1Sjmcneillstatic void 2511.1Sjmcneillhwaes_exec_sync(uint32_t flags, uint16_t blocks) 2521.1Sjmcneill{ 2531.1Sjmcneill struct hwaes_softc *sc = hwaes_sc; 2541.1Sjmcneill uint32_t ctrl; 2551.1Sjmcneill 2561.1Sjmcneill KASSERT(blocks > 0); 2571.1Sjmcneill KASSERT(blocks <= HWAES_MAX_BLOCKS); 2581.1Sjmcneill 2591.1Sjmcneill WR4(sc, AES_SRC, sc->sc_dma_bounce.dma_segs[0].ds_addr); 2601.1Sjmcneill WR4(sc, AES_DEST, sc->sc_dma_bounce.dma_segs[0].ds_addr); 2611.1Sjmcneill 2621.1Sjmcneill ctrl = AES_CTRL_EXEC | AES_CTRL_ENA | flags; 2631.1Sjmcneill ctrl |= __SHIFTIN(blocks - 1, AES_CTRL_BLOCKS); 2641.1Sjmcneill 2651.1Sjmcneill WR4(sc, AES_CTRL, ctrl); 2661.1Sjmcneill for (;;) { 2671.1Sjmcneill ctrl = RD4(sc, AES_CTRL); 2681.1Sjmcneill if ((ctrl & AES_CTRL_ERR) != 0) { 2691.1Sjmcneill printf("AES error, AES_CTRL = %#x\n", ctrl); 2701.1Sjmcneill break; 2711.1Sjmcneill } 2721.1Sjmcneill if ((ctrl & AES_CTRL_EXEC) == 0) { 2731.1Sjmcneill break; 2741.1Sjmcneill } 2751.1Sjmcneill } 2761.1Sjmcneill} 2771.1Sjmcneill 2781.1Sjmcneillstatic void 2791.1Sjmcneillhwaes_enc(const struct aesenc *enc, const uint8_t in[static 16], 2801.1Sjmcneill uint8_t out[static 16], uint32_t nrounds) 2811.1Sjmcneill{ 2821.1Sjmcneill struct hwaes_softc *sc = hwaes_sc; 2831.1Sjmcneill unsigned n; 2841.1Sjmcneill int s; 2851.1Sjmcneill 2861.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 2871.1Sjmcneill aes_bear_impl.ai_enc(enc, in, out, nrounds); 2881.1Sjmcneill return; 2891.1Sjmcneill } 2901.1Sjmcneill 2911.1Sjmcneill s = splvm(); 2921.1Sjmcneill 2931.1Sjmcneill for (n = 0; n < 4; n++) { 2941.1Sjmcneill WR4(sc, AES_IV, 0); 2951.1Sjmcneill } 2961.1Sjmcneill for (n = 0; n < 4; n++) { 2971.1Sjmcneill WR4(sc, AES_KEY, enc->aese_aes.aes_rk[n]); 2981.1Sjmcneill } 2991.1Sjmcneill memcpy(sc->sc_dma_bounce.dma_addr, in, HWAES_BLOCK_LEN); 3001.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 3011.1Sjmcneill 0, HWAES_BLOCK_LEN, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 3021.1Sjmcneill hwaes_exec_sync(0, 1); 3031.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 3041.1Sjmcneill 0, HWAES_BLOCK_LEN, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 3051.1Sjmcneill memcpy(out, sc->sc_dma_bounce.dma_addr, HWAES_BLOCK_LEN); 3061.1Sjmcneill 3071.1Sjmcneill splx(s); 3081.1Sjmcneill} 3091.1Sjmcneill 3101.1Sjmcneillstatic void 3111.1Sjmcneillhwaes_encN(const struct aesenc *enc, const uint8_t in[static 16], 3121.1Sjmcneill uint8_t out[static 16], size_t nblocks) 3131.1Sjmcneill{ 3141.1Sjmcneill for (size_t n = 0; n < nblocks; n++) { 3151.1Sjmcneill hwaes_enc(enc, &in[n * HWAES_BLOCK_LEN], 3161.1Sjmcneill &out[n * HWAES_BLOCK_LEN], AES_128_NROUNDS); 3171.1Sjmcneill } 3181.1Sjmcneill} 3191.1Sjmcneill 3201.1Sjmcneillstatic void 3211.1Sjmcneillhwaes_dec(const struct aesdec *dec, const uint8_t in[static 16], 3221.1Sjmcneill uint8_t out[static 16], uint32_t nrounds) 3231.1Sjmcneill{ 3241.1Sjmcneill struct hwaes_softc *sc = hwaes_sc; 3251.1Sjmcneill unsigned n; 3261.1Sjmcneill int s; 3271.1Sjmcneill 3281.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 3291.1Sjmcneill aes_bear_impl.ai_dec(dec, in, out, nrounds); 3301.1Sjmcneill return; 3311.1Sjmcneill } 3321.1Sjmcneill 3331.1Sjmcneill s = splvm(); 3341.1Sjmcneill 3351.1Sjmcneill for (n = 0; n < 4; n++) { 3361.1Sjmcneill WR4(sc, AES_IV, 0); 3371.1Sjmcneill } 3381.1Sjmcneill for (n = 0; n < 4; n++) { 3391.1Sjmcneill WR4(sc, AES_KEY, dec->aesd_aes.aes_rk[n]); 3401.1Sjmcneill } 3411.1Sjmcneill memcpy(sc->sc_dma_bounce.dma_addr, in, HWAES_BLOCK_LEN); 3421.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 3431.1Sjmcneill 0, HWAES_BLOCK_LEN, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 3441.1Sjmcneill hwaes_exec_sync(AES_CTRL_DEC, 1); 3451.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 3461.1Sjmcneill 0, HWAES_BLOCK_LEN, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 3471.1Sjmcneill memcpy(out, sc->sc_dma_bounce.dma_addr, HWAES_BLOCK_LEN); 3481.1Sjmcneill 3491.1Sjmcneill splx(s); 3501.1Sjmcneill} 3511.1Sjmcneill 3521.1Sjmcneillstatic void 3531.1Sjmcneillhwaes_decN(const struct aesdec *dec, const uint8_t in[static 16], 3541.1Sjmcneill uint8_t out[static 16], size_t nblocks) 3551.1Sjmcneill{ 3561.1Sjmcneill for (size_t n = 0; n < nblocks; n++) { 3571.1Sjmcneill hwaes_dec(dec, &in[n * HWAES_BLOCK_LEN], 3581.1Sjmcneill &out[n * HWAES_BLOCK_LEN], AES_128_NROUNDS); 3591.1Sjmcneill } 3601.1Sjmcneill} 3611.1Sjmcneill 3621.1Sjmcneillstatic void 3631.1Sjmcneillhwaes_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16], 3641.1Sjmcneill uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 3651.1Sjmcneill uint32_t nrounds) 3661.1Sjmcneill{ 3671.1Sjmcneill struct hwaes_softc *sc = hwaes_sc; 3681.1Sjmcneill const uint8_t *inp = in; 3691.1Sjmcneill uint8_t *outp = out; 3701.1Sjmcneill uint32_t flags; 3711.1Sjmcneill unsigned n; 3721.1Sjmcneill int s; 3731.1Sjmcneill 3741.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 3751.1Sjmcneill aes_bear_impl.ai_cbc_enc(enc, in, out, nbytes, iv, nrounds); 3761.1Sjmcneill return; 3771.1Sjmcneill } 3781.1Sjmcneill 3791.1Sjmcneill KASSERT(nbytes % HWAES_BLOCK_LEN == 0); 3801.1Sjmcneill if (nbytes == 0) { 3811.1Sjmcneill return; 3821.1Sjmcneill } 3831.1Sjmcneill 3841.1Sjmcneill s = splvm(); 3851.1Sjmcneill 3861.1Sjmcneill for (n = 0; n < 4; n++) { 3871.1Sjmcneill WR4(sc, AES_IV, be32dec(&iv[n * 4])); 3881.1Sjmcneill } 3891.1Sjmcneill for (n = 0; n < 4; n++) { 3901.1Sjmcneill WR4(sc, AES_KEY, enc->aese_aes.aes_rk[n]); 3911.1Sjmcneill } 3921.1Sjmcneill flags = 0; 3931.1Sjmcneill while (nbytes > 0) { 3941.1Sjmcneill const size_t blocks = MIN(nbytes / HWAES_BLOCK_LEN, 3951.1Sjmcneill HWAES_MAX_BLOCKS); 3961.1Sjmcneill 3971.1Sjmcneill memcpy(sc->sc_dma_bounce.dma_addr, inp, 3981.1Sjmcneill blocks * HWAES_BLOCK_LEN); 3991.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 4001.1Sjmcneill 0, blocks * HWAES_BLOCK_LEN, 4011.1Sjmcneill BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 4021.1Sjmcneill hwaes_exec_sync(flags, blocks); 4031.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 4041.1Sjmcneill 0, blocks * HWAES_BLOCK_LEN, 4051.1Sjmcneill BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 4061.1Sjmcneill memcpy(outp, sc->sc_dma_bounce.dma_addr, 4071.1Sjmcneill blocks * HWAES_BLOCK_LEN); 4081.1Sjmcneill 4091.1Sjmcneill nbytes -= blocks * HWAES_BLOCK_LEN; 4101.1Sjmcneill inp += blocks * HWAES_BLOCK_LEN; 4111.1Sjmcneill outp += blocks * HWAES_BLOCK_LEN; 4121.1Sjmcneill flags |= AES_CTRL_IV; 4131.1Sjmcneill } 4141.1Sjmcneill 4151.1Sjmcneill memcpy(iv, outp - HWAES_BLOCK_LEN, HWAES_BLOCK_LEN); 4161.1Sjmcneill 4171.1Sjmcneill splx(s); 4181.1Sjmcneill} 4191.1Sjmcneill 4201.1Sjmcneillstatic void 4211.1Sjmcneillhwaes_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16], 4221.1Sjmcneill uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 4231.1Sjmcneill uint32_t nrounds) 4241.1Sjmcneill{ 4251.1Sjmcneill struct hwaes_softc *sc = hwaes_sc; 4261.1Sjmcneill const uint8_t *inp = in; 4271.1Sjmcneill uint8_t *outp = out; 4281.1Sjmcneill uint32_t flags; 4291.1Sjmcneill unsigned n; 4301.1Sjmcneill int s; 4311.1Sjmcneill 4321.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 4331.1Sjmcneill aes_bear_impl.ai_cbc_dec(dec, in, out, nbytes, iv, nrounds); 4341.1Sjmcneill return; 4351.1Sjmcneill } 4361.1Sjmcneill 4371.1Sjmcneill KASSERT(nbytes % HWAES_BLOCK_LEN == 0); 4381.1Sjmcneill if (nbytes == 0) { 4391.1Sjmcneill return; 4401.1Sjmcneill } 4411.1Sjmcneill 4421.1Sjmcneill s = splvm(); 4431.1Sjmcneill 4441.1Sjmcneill for (n = 0; n < 4; n++) { 4451.1Sjmcneill WR4(sc, AES_IV, be32dec(&iv[n * 4])); 4461.1Sjmcneill } 4471.1Sjmcneill 4481.1Sjmcneill memcpy(iv, inp + nbytes - HWAES_BLOCK_LEN, HWAES_BLOCK_LEN); 4491.1Sjmcneill 4501.1Sjmcneill for (n = 0; n < 4; n++) { 4511.1Sjmcneill WR4(sc, AES_KEY, dec->aesd_aes.aes_rk[n]); 4521.1Sjmcneill } 4531.1Sjmcneill flags = AES_CTRL_DEC; 4541.1Sjmcneill while (nbytes > 0) { 4551.1Sjmcneill const size_t blocks = MIN(nbytes / HWAES_BLOCK_LEN, 4561.1Sjmcneill HWAES_MAX_BLOCKS); 4571.1Sjmcneill 4581.1Sjmcneill memcpy(sc->sc_dma_bounce.dma_addr, inp, 4591.1Sjmcneill blocks * HWAES_BLOCK_LEN); 4601.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 4611.1Sjmcneill 0, blocks * HWAES_BLOCK_LEN, 4621.1Sjmcneill BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 4631.1Sjmcneill hwaes_exec_sync(flags, blocks); 4641.1Sjmcneill bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 4651.1Sjmcneill 0, blocks * HWAES_BLOCK_LEN, 4661.1Sjmcneill BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 4671.1Sjmcneill memcpy(outp, sc->sc_dma_bounce.dma_addr, 4681.1Sjmcneill blocks * HWAES_BLOCK_LEN); 4691.1Sjmcneill 4701.1Sjmcneill nbytes -= blocks * HWAES_BLOCK_LEN; 4711.1Sjmcneill inp += blocks * HWAES_BLOCK_LEN; 4721.1Sjmcneill outp += blocks * HWAES_BLOCK_LEN; 4731.1Sjmcneill flags |= AES_CTRL_IV; 4741.1Sjmcneill } 4751.1Sjmcneill 4761.1Sjmcneill splx(s); 4771.1Sjmcneill} 4781.1Sjmcneill 4791.1Sjmcneillstatic void 4801.1Sjmcneillhwaes_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3) 4811.1Sjmcneill{ 4821.1Sjmcneill uint32_t s0, s1, s2, s3; 4831.1Sjmcneill 4841.1Sjmcneill s0 = *t0 >> 31; 4851.1Sjmcneill s1 = *t1 >> 31; 4861.1Sjmcneill s2 = *t2 >> 31; 4871.1Sjmcneill s3 = *t3 >> 31; 4881.1Sjmcneill *t0 = (*t0 << 1) ^ (-s3 & 0x87); 4891.1Sjmcneill *t1 = (*t1 << 1) ^ s0; 4901.1Sjmcneill *t2 = (*t2 << 1) ^ s1; 4911.1Sjmcneill *t3 = (*t3 << 1) ^ s2; 4921.1Sjmcneill} 4931.1Sjmcneill 4941.1Sjmcneillstatic void 4951.1Sjmcneillhwaes_xts_enc(const struct aesenc *enc, const uint8_t in[static 16], 4961.1Sjmcneill uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 4971.1Sjmcneill uint32_t nrounds) 4981.1Sjmcneill{ 4991.1Sjmcneill uint8_t block[16]; 5001.1Sjmcneill uint8_t tle[16]; 5011.1Sjmcneill uint32_t t[4]; 5021.1Sjmcneill const uint8_t *inp = in; 5031.1Sjmcneill uint8_t *outp = out; 5041.1Sjmcneill 5051.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 5061.1Sjmcneill aes_bear_impl.ai_xts_enc(enc, in, out, nbytes, tweak, nrounds); 5071.1Sjmcneill return; 5081.1Sjmcneill } 5091.1Sjmcneill 5101.1Sjmcneill KASSERT(nbytes % 16 == 0); 5111.1Sjmcneill 5121.1Sjmcneill t[0] = le32dec(tweak + 4*0); 5131.1Sjmcneill t[1] = le32dec(tweak + 4*1); 5141.1Sjmcneill t[2] = le32dec(tweak + 4*2); 5151.1Sjmcneill t[3] = le32dec(tweak + 4*3); 5161.1Sjmcneill 5171.1Sjmcneill while (nbytes > 0) { 5181.1Sjmcneill le32enc(tle + 4*0, t[0]); 5191.1Sjmcneill le32enc(tle + 4*1, t[1]); 5201.1Sjmcneill le32enc(tle + 4*2, t[2]); 5211.1Sjmcneill le32enc(tle + 4*3, t[3]); 5221.1Sjmcneill 5231.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 5241.1Sjmcneill block[n] = inp[n] ^ tle[n]; 5251.1Sjmcneill } 5261.1Sjmcneill 5271.1Sjmcneill hwaes_encN(enc, block, block, 1); 5281.1Sjmcneill 5291.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 5301.1Sjmcneill outp[n] = block[n] ^ tle[n]; 5311.1Sjmcneill } 5321.1Sjmcneill 5331.1Sjmcneill hwaes_xts_update(&t[0], &t[1], &t[2], &t[3]); 5341.1Sjmcneill 5351.1Sjmcneill nbytes -= HWAES_BLOCK_LEN; 5361.1Sjmcneill inp += HWAES_BLOCK_LEN; 5371.1Sjmcneill outp += HWAES_BLOCK_LEN; 5381.1Sjmcneill } 5391.1Sjmcneill 5401.1Sjmcneill le32enc(tweak + 4*0, t[0]); 5411.1Sjmcneill le32enc(tweak + 4*1, t[1]); 5421.1Sjmcneill le32enc(tweak + 4*2, t[2]); 5431.1Sjmcneill le32enc(tweak + 4*3, t[3]); 5441.1Sjmcneill 5451.1Sjmcneill explicit_memset(t, 0, sizeof(t)); 5461.1Sjmcneill explicit_memset(block, 0, sizeof(block)); 5471.1Sjmcneill explicit_memset(tle, 0, sizeof(tle)); 5481.1Sjmcneill} 5491.1Sjmcneill 5501.1Sjmcneillstatic void 5511.1Sjmcneillhwaes_xts_dec(const struct aesdec *dec, const uint8_t in[static 16], 5521.1Sjmcneill uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 5531.1Sjmcneill uint32_t nrounds) 5541.1Sjmcneill{ 5551.1Sjmcneill uint8_t block[16]; 5561.1Sjmcneill uint8_t tle[16]; 5571.1Sjmcneill uint32_t t[4]; 5581.1Sjmcneill const uint8_t *inp = in; 5591.1Sjmcneill uint8_t *outp = out; 5601.1Sjmcneill 5611.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 5621.1Sjmcneill aes_bear_impl.ai_xts_dec(dec, in, out, nbytes, tweak, nrounds); 5631.1Sjmcneill return; 5641.1Sjmcneill } 5651.1Sjmcneill 5661.1Sjmcneill KASSERT(nbytes % 16 == 0); 5671.1Sjmcneill 5681.1Sjmcneill t[0] = le32dec(tweak + 4*0); 5691.1Sjmcneill t[1] = le32dec(tweak + 4*1); 5701.1Sjmcneill t[2] = le32dec(tweak + 4*2); 5711.1Sjmcneill t[3] = le32dec(tweak + 4*3); 5721.1Sjmcneill 5731.1Sjmcneill while (nbytes > 0) { 5741.1Sjmcneill le32enc(tle + 4*0, t[0]); 5751.1Sjmcneill le32enc(tle + 4*1, t[1]); 5761.1Sjmcneill le32enc(tle + 4*2, t[2]); 5771.1Sjmcneill le32enc(tle + 4*3, t[3]); 5781.1Sjmcneill 5791.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 5801.1Sjmcneill block[n] = inp[n] ^ tle[n]; 5811.1Sjmcneill } 5821.1Sjmcneill 5831.1Sjmcneill hwaes_decN(dec, block, block, 1); 5841.1Sjmcneill 5851.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 5861.1Sjmcneill outp[n] = block[n] ^ tle[n]; 5871.1Sjmcneill } 5881.1Sjmcneill 5891.1Sjmcneill hwaes_xts_update(&t[0], &t[1], &t[2], &t[3]); 5901.1Sjmcneill 5911.1Sjmcneill nbytes -= HWAES_BLOCK_LEN; 5921.1Sjmcneill inp += HWAES_BLOCK_LEN; 5931.1Sjmcneill outp += HWAES_BLOCK_LEN; 5941.1Sjmcneill } 5951.1Sjmcneill 5961.1Sjmcneill le32enc(tweak + 4*0, t[0]); 5971.1Sjmcneill le32enc(tweak + 4*1, t[1]); 5981.1Sjmcneill le32enc(tweak + 4*2, t[2]); 5991.1Sjmcneill le32enc(tweak + 4*3, t[3]); 6001.1Sjmcneill 6011.1Sjmcneill explicit_memset(t, 0, sizeof(t)); 6021.1Sjmcneill explicit_memset(block, 0, sizeof(block)); 6031.1Sjmcneill explicit_memset(tle, 0, sizeof(tle)); 6041.1Sjmcneill} 6051.1Sjmcneill 6061.1Sjmcneillstatic void 6071.1Sjmcneillhwaes_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], 6081.1Sjmcneill size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds) 6091.1Sjmcneill{ 6101.1Sjmcneill const uint8_t *inp = in; 6111.1Sjmcneill 6121.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 6131.1Sjmcneill aes_bear_impl.ai_cbcmac_update1(enc, in, nbytes, auth0, nrounds); 6141.1Sjmcneill return; 6151.1Sjmcneill } 6161.1Sjmcneill 6171.1Sjmcneill KASSERT(nbytes % 16 == 0); 6181.1Sjmcneill 6191.1Sjmcneill while (nbytes > 0) { 6201.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 6211.1Sjmcneill auth0[n] = auth0[n] ^ inp[n]; 6221.1Sjmcneill } 6231.1Sjmcneill 6241.1Sjmcneill hwaes_encN(enc, auth0, auth0, 1); 6251.1Sjmcneill 6261.1Sjmcneill nbytes -= HWAES_BLOCK_LEN; 6271.1Sjmcneill inp += HWAES_BLOCK_LEN; 6281.1Sjmcneill } 6291.1Sjmcneill} 6301.1Sjmcneill 6311.1Sjmcneillstatic void 6321.1Sjmcneillhwaes_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16], 6331.1Sjmcneill uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], 6341.1Sjmcneill uint32_t nrounds) 6351.1Sjmcneill{ 6361.1Sjmcneill const uint8_t *inp = in; 6371.1Sjmcneill uint8_t *outp = out; 6381.1Sjmcneill uint32_t c[4]; 6391.1Sjmcneill 6401.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 6411.1Sjmcneill aes_bear_impl.ai_ccm_enc1(enc, in, out, nbytes, authctr0, nrounds); 6421.1Sjmcneill return; 6431.1Sjmcneill } 6441.1Sjmcneill 6451.1Sjmcneill KASSERT(nbytes % 16 == 0); 6461.1Sjmcneill 6471.1Sjmcneill c[0] = le32dec(authctr0 + 16 + 4*0); 6481.1Sjmcneill c[1] = le32dec(authctr0 + 16 + 4*1); 6491.1Sjmcneill c[2] = le32dec(authctr0 + 16 + 4*2); 6501.1Sjmcneill c[3] = be32dec(authctr0 + 16 + 4*3); 6511.1Sjmcneill 6521.1Sjmcneill while (nbytes > 0) { 6531.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 6541.1Sjmcneill authctr0[n] = authctr0[n] ^ inp[n]; 6551.1Sjmcneill } 6561.1Sjmcneill 6571.1Sjmcneill le32enc(authctr0 + 16 + 4*0, c[0]); 6581.1Sjmcneill le32enc(authctr0 + 16 + 4*1, c[1]); 6591.1Sjmcneill le32enc(authctr0 + 16 + 4*2, c[2]); 6601.1Sjmcneill be32enc(authctr0 + 16 + 4*3, ++c[3]); 6611.1Sjmcneill 6621.1Sjmcneill hwaes_encN(enc, authctr0, authctr0, 2); 6631.1Sjmcneill 6641.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 6651.1Sjmcneill outp[n] = inp[n] ^ authctr0[n + 16]; 6661.1Sjmcneill } 6671.1Sjmcneill 6681.1Sjmcneill nbytes -= HWAES_BLOCK_LEN; 6691.1Sjmcneill inp += HWAES_BLOCK_LEN; 6701.1Sjmcneill outp += HWAES_BLOCK_LEN; 6711.1Sjmcneill } 6721.1Sjmcneill 6731.1Sjmcneill le32enc(authctr0 + 16 + 4*0, c[0]); 6741.1Sjmcneill le32enc(authctr0 + 16 + 4*1, c[1]); 6751.1Sjmcneill le32enc(authctr0 + 16 + 4*2, c[2]); 6761.1Sjmcneill be32enc(authctr0 + 16 + 4*3, c[3]); 6771.1Sjmcneill} 6781.1Sjmcneill 6791.1Sjmcneillstatic void 6801.1Sjmcneillhwaes_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16], 6811.1Sjmcneill uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], 6821.1Sjmcneill uint32_t nrounds) 6831.1Sjmcneill{ 6841.1Sjmcneill const uint8_t *inp = in; 6851.1Sjmcneill uint8_t *outp = out; 6861.1Sjmcneill uint32_t c[4]; 6871.1Sjmcneill 6881.1Sjmcneill if (nrounds != AES_128_NROUNDS) { 6891.1Sjmcneill aes_bear_impl.ai_ccm_dec1(enc, in, out, nbytes, authctr0, nrounds); 6901.1Sjmcneill return; 6911.1Sjmcneill } 6921.1Sjmcneill 6931.1Sjmcneill KASSERT(nbytes % 16 == 0); 6941.1Sjmcneill 6951.1Sjmcneill c[0] = le32dec(authctr0 + 16 + 4*0); 6961.1Sjmcneill c[1] = le32dec(authctr0 + 16 + 4*1); 6971.1Sjmcneill c[2] = le32dec(authctr0 + 16 + 4*2); 6981.1Sjmcneill c[3] = be32dec(authctr0 + 16 + 4*3); 6991.1Sjmcneill 7001.1Sjmcneill be32enc(authctr0 + 16 + 4*3, ++c[3]); 7011.1Sjmcneill hwaes_encN(enc, authctr0 + 16, authctr0 + 16, 1); 7021.1Sjmcneill 7031.1Sjmcneill while (nbytes > 0) { 7041.1Sjmcneill for (unsigned n = 0; n < 16; n++) { 7051.1Sjmcneill outp[n] = authctr0[n + 16] ^ inp[n]; 7061.1Sjmcneill authctr0[n] = authctr0[n] ^ outp[n]; 7071.1Sjmcneill } 7081.1Sjmcneill nbytes -= HWAES_BLOCK_LEN; 7091.1Sjmcneill if (nbytes == 0) { 7101.1Sjmcneill break; 7111.1Sjmcneill } 7121.1Sjmcneill 7131.1Sjmcneill inp += HWAES_BLOCK_LEN; 7141.1Sjmcneill outp += HWAES_BLOCK_LEN; 7151.1Sjmcneill 7161.1Sjmcneill le32enc(authctr0 + 16 + 4*0, c[0]); 7171.1Sjmcneill le32enc(authctr0 + 16 + 4*1, c[1]); 7181.1Sjmcneill le32enc(authctr0 + 16 + 4*2, c[2]); 7191.1Sjmcneill be32enc(authctr0 + 16 + 4*3, ++c[3]); 7201.1Sjmcneill hwaes_encN(enc, authctr0, authctr0, 2); 7211.1Sjmcneill } 7221.1Sjmcneill hwaes_encN(enc, authctr0, authctr0, 1); 7231.1Sjmcneill 7241.1Sjmcneill le32enc(authctr0 + 16 + 4*0, c[0]); 7251.1Sjmcneill le32enc(authctr0 + 16 + 4*1, c[1]); 7261.1Sjmcneill le32enc(authctr0 + 16 + 4*2, c[2]); 7271.1Sjmcneill be32enc(authctr0 + 16 + 4*3, c[3]); 7281.1Sjmcneill 7291.1Sjmcneill} 7301.1Sjmcneill 7311.1Sjmcneillstatic struct aes_impl aes_hwaes_impl = { 7321.1Sjmcneill .ai_name = NULL, /* filled in by hwaes_register */ 7331.1Sjmcneill .ai_probe = hwaes_probe, 7341.1Sjmcneill .ai_setenckey = hwaes_setenckey, 7351.1Sjmcneill .ai_setdeckey = hwaes_setdeckey, 7361.1Sjmcneill .ai_enc = hwaes_enc, 7371.1Sjmcneill .ai_dec = hwaes_dec, 7381.1Sjmcneill .ai_cbc_enc = hwaes_cbc_enc, 7391.1Sjmcneill .ai_cbc_dec = hwaes_cbc_dec, 7401.1Sjmcneill .ai_xts_enc = hwaes_xts_enc, 7411.1Sjmcneill .ai_xts_dec = hwaes_xts_dec, 7421.1Sjmcneill .ai_cbcmac_update1 = hwaes_cbcmac_update1, 7431.1Sjmcneill .ai_ccm_enc1 = hwaes_ccm_enc1, 7441.1Sjmcneill .ai_ccm_dec1 = hwaes_ccm_dec1, 7451.1Sjmcneill}; 7461.1Sjmcneill 7471.1Sjmcneillstatic void 7481.1Sjmcneillhwaes_register(void) 7491.1Sjmcneill{ 7501.1Sjmcneill if (wiiu_plat) { 7511.1Sjmcneill aes_hwaes_impl.ai_name = "Latte AES engine"; 7521.1Sjmcneill } else { 7531.1Sjmcneill aes_hwaes_impl.ai_name = "Hollywood AES engine"; 7541.1Sjmcneill } 7551.1Sjmcneill aes_md_init(&aes_hwaes_impl); 7561.1Sjmcneill} 757