hwaes.c revision 1.1
1/* $NetBSD: hwaes.c,v 1.1 2026/01/09 22:54:30 jmcneill Exp $ */ 2 3/*- 4 * Copyright (c) 2025 Jared McNeill <jmcneill@invisible.ca> 5 * Copyright (c) 2020 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30/* 31 * A driver for the Nintendo Wii's AES engine. The driver registers an AES 32 * implementation for kernel use via aes_md_init(). AES-128 requests are 33 * accelerated by hardware and all other requests are passed through to the 34 * default (BearSSL aes_ct) implementation. 35 */ 36 37#include <sys/cdefs.h> 38__KERNEL_RCSID(0, "$NetBSD: hwaes.c,v 1.1 2026/01/09 22:54:30 jmcneill Exp $"); 39 40#include <sys/param.h> 41#include <sys/bus.h> 42#include <sys/device.h> 43#include <sys/systm.h> 44#include <sys/callout.h> 45#include <sys/buf.h> 46#include <sys/cpu.h> 47 48#include <machine/wii.h> 49#include <machine/wiiu.h> 50#include <machine/pio.h> 51#include "ahb.h" 52 53#include <crypto/aes/aes.h> 54#include <crypto/aes/aes_bear.h> 55#include <crypto/aes/aes_impl.h> 56 57/* AES engine registers */ 58#define AES_CTRL 0x00 59#define AES_CTRL_EXEC __BIT(31) 60#define AES_CTRL_IRQ __BIT(30) 61#define AES_CTRL_ERR __BIT(29) 62#define AES_CTRL_ENA __BIT(28) 63#define AES_CTRL_DEC __BIT(27) 64#define AES_CTRL_IV __BIT(12) 65#define AES_CTRL_BLOCKS __BITS(11, 0) 66#define AES_SRC 0x04 67#define AES_DEST 0x08 68#define AES_KEY 0x0c 69#define AES_IV 0x10 70 71/* Register frame size */ 72#define AES_REG_SIZE 0x14 73 74/* Device limits */ 75#define HWAES_BLOCK_LEN 16 76#define HWAES_ALIGN 16 77#define HWAES_MAX_BLOCKS 4096 78#define HWAES_MAX_AES_LEN (HWAES_BLOCK_LEN * HWAES_MAX_BLOCKS) 79 80static int hwaes_match(device_t, cfdata_t, void *); 81static void hwaes_attach(device_t, device_t, void *); 82 83struct hwaes_softc; 84 85struct hwaes_dma { 86 bus_dmamap_t dma_map; 87 void *dma_addr; 88 size_t dma_size; 89 bus_dma_segment_t dma_segs[1]; 90}; 91 92struct hwaes_softc { 93 device_t sc_dev; 94 bus_space_tag_t sc_bst; 95 bus_space_handle_t sc_bsh; 96 bus_dma_tag_t sc_dmat; 97 struct hwaes_dma sc_dma_bounce; 98}; 99 100struct hwaes_softc *hwaes_sc; 101 102#define WR4(sc, reg, val) \ 103 bus_space_write_4((sc)->sc_bst, (sc)->sc_bsh, (reg), (val)) 104#define RD4(sc, reg) \ 105 bus_space_read_4((sc)->sc_bst, (sc)->sc_bsh, (reg)) 106 107CFATTACH_DECL_NEW(hwaes, sizeof(struct hwaes_softc), 108 hwaes_match, hwaes_attach, NULL, NULL); 109 110static int hwaes_dma_alloc(struct hwaes_softc *, struct hwaes_dma *, 111 size_t, int); 112static void hwaes_register(void); 113 114static int 115hwaes_match(device_t parent, cfdata_t cf, void *aux) 116{ 117 return 1; 118} 119 120static void 121hwaes_attach(device_t parent, device_t self, void *aux) 122{ 123 struct ahb_attach_args *aaa = aux; 124 struct hwaes_softc *sc = device_private(self); 125 bool enabled; 126 int error; 127 128 /* 129 * Since aes_md_init() expects per-CPU engines and we only have one, 130 * only enable AES offload in single CPU configurations. 131 */ 132 enabled = kcpuset_countset(kcpuset_attached) == 1; 133 134 aprint_naive("\n"); 135 aprint_normal(": AES engine%s\n", enabled ? "" : " (disabled)"); 136 if (!enabled) { 137 return; 138 } 139 140 sc->sc_dev = self; 141 sc->sc_dmat = aaa->aaa_dmat; 142 sc->sc_bst = aaa->aaa_bst; 143 error = bus_space_map(sc->sc_bst, aaa->aaa_addr, AES_REG_SIZE, 144 0, &sc->sc_bsh); 145 if (error != 0) { 146 aprint_error_dev(self, "couldn't map registers (%d)\n", error); 147 return; 148 } 149 150 ahb_claim_device(self, IOPAESEN); 151 152 error = hwaes_dma_alloc(sc, &sc->sc_dma_bounce, HWAES_MAX_AES_LEN, 153 BUS_DMA_WAITOK); 154 if (error != 0) { 155 return; 156 } 157 158 WR4(sc, AES_CTRL, 0); 159 for (;;) { 160 if (RD4(sc, AES_CTRL) == 0) { 161 break; 162 } 163 } 164 165 hwaes_sc = sc; 166 hwaes_register(); 167} 168 169static int 170hwaes_dma_alloc(struct hwaes_softc *sc, struct hwaes_dma *dma, size_t size, 171 int flags) 172{ 173 int error, nsegs; 174 175 dma->dma_size = size; 176 177 error = bus_dmamem_alloc(sc->sc_dmat, dma->dma_size, HWAES_ALIGN, 0, 178 dma->dma_segs, 1, &nsegs, flags); 179 if (error != 0) { 180 aprint_error_dev(sc->sc_dev, 181 "bus_dmamem_alloc failed: %d\n", error); 182 goto alloc_failed; 183 } 184 error = bus_dmamem_map(sc->sc_dmat, dma->dma_segs, nsegs, 185 dma->dma_size, &dma->dma_addr, flags); 186 if (error != 0) { 187 aprint_error_dev(sc->sc_dev, 188 "bus_dmamem_map failed: %d\n", error); 189 goto map_failed; 190 } 191 error = bus_dmamap_create(sc->sc_dmat, dma->dma_size, nsegs, 192 dma->dma_size, 0, flags, &dma->dma_map); 193 if (error != 0) { 194 aprint_error_dev(sc->sc_dev, 195 "bus_dmamap_create failed: %d\n", error); 196 goto create_failed; 197 } 198 error = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_addr, 199 dma->dma_size, NULL, flags); 200 if (error != 0) { 201 aprint_error_dev(sc->sc_dev, 202 "bus_dmamap_load failed: %d\n", error); 203 goto load_failed; 204 } 205 206 return 0; 207 208load_failed: 209 bus_dmamap_destroy(sc->sc_dmat, dma->dma_map); 210create_failed: 211 bus_dmamem_unmap(sc->sc_dmat, dma->dma_addr, dma->dma_size); 212map_failed: 213 bus_dmamem_free(sc->sc_dmat, dma->dma_segs, nsegs); 214alloc_failed: 215 return error; 216} 217 218static int 219hwaes_probe(void) 220{ 221 return 0; 222} 223 224static void 225hwaes_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds) 226{ 227 if (nrounds == AES_128_NROUNDS) { 228 enc->aese_aes.aes_rk[0] = be32dec(key + 4*0); 229 enc->aese_aes.aes_rk[1] = be32dec(key + 4*1); 230 enc->aese_aes.aes_rk[2] = be32dec(key + 4*2); 231 enc->aese_aes.aes_rk[3] = be32dec(key + 4*3); 232 } else { 233 aes_bear_impl.ai_setenckey(enc, key, nrounds); 234 } 235} 236 237static void 238hwaes_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds) 239{ 240 if (nrounds == AES_128_NROUNDS) { 241 dec->aesd_aes.aes_rk[0] = be32dec(key + 4*0); 242 dec->aesd_aes.aes_rk[1] = be32dec(key + 4*1); 243 dec->aesd_aes.aes_rk[2] = be32dec(key + 4*2); 244 dec->aesd_aes.aes_rk[3] = be32dec(key + 4*3); 245 } else { 246 aes_bear_impl.ai_setdeckey(dec, key, nrounds); 247 } 248} 249 250static void 251hwaes_exec_sync(uint32_t flags, uint16_t blocks) 252{ 253 struct hwaes_softc *sc = hwaes_sc; 254 uint32_t ctrl; 255 256 KASSERT(blocks > 0); 257 KASSERT(blocks <= HWAES_MAX_BLOCKS); 258 259 WR4(sc, AES_SRC, sc->sc_dma_bounce.dma_segs[0].ds_addr); 260 WR4(sc, AES_DEST, sc->sc_dma_bounce.dma_segs[0].ds_addr); 261 262 ctrl = AES_CTRL_EXEC | AES_CTRL_ENA | flags; 263 ctrl |= __SHIFTIN(blocks - 1, AES_CTRL_BLOCKS); 264 265 WR4(sc, AES_CTRL, ctrl); 266 for (;;) { 267 ctrl = RD4(sc, AES_CTRL); 268 if ((ctrl & AES_CTRL_ERR) != 0) { 269 printf("AES error, AES_CTRL = %#x\n", ctrl); 270 break; 271 } 272 if ((ctrl & AES_CTRL_EXEC) == 0) { 273 break; 274 } 275 } 276} 277 278static void 279hwaes_enc(const struct aesenc *enc, const uint8_t in[static 16], 280 uint8_t out[static 16], uint32_t nrounds) 281{ 282 struct hwaes_softc *sc = hwaes_sc; 283 unsigned n; 284 int s; 285 286 if (nrounds != AES_128_NROUNDS) { 287 aes_bear_impl.ai_enc(enc, in, out, nrounds); 288 return; 289 } 290 291 s = splvm(); 292 293 for (n = 0; n < 4; n++) { 294 WR4(sc, AES_IV, 0); 295 } 296 for (n = 0; n < 4; n++) { 297 WR4(sc, AES_KEY, enc->aese_aes.aes_rk[n]); 298 } 299 memcpy(sc->sc_dma_bounce.dma_addr, in, HWAES_BLOCK_LEN); 300 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 301 0, HWAES_BLOCK_LEN, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 302 hwaes_exec_sync(0, 1); 303 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 304 0, HWAES_BLOCK_LEN, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 305 memcpy(out, sc->sc_dma_bounce.dma_addr, HWAES_BLOCK_LEN); 306 307 splx(s); 308} 309 310static void 311hwaes_encN(const struct aesenc *enc, const uint8_t in[static 16], 312 uint8_t out[static 16], size_t nblocks) 313{ 314 for (size_t n = 0; n < nblocks; n++) { 315 hwaes_enc(enc, &in[n * HWAES_BLOCK_LEN], 316 &out[n * HWAES_BLOCK_LEN], AES_128_NROUNDS); 317 } 318} 319 320static void 321hwaes_dec(const struct aesdec *dec, const uint8_t in[static 16], 322 uint8_t out[static 16], uint32_t nrounds) 323{ 324 struct hwaes_softc *sc = hwaes_sc; 325 unsigned n; 326 int s; 327 328 if (nrounds != AES_128_NROUNDS) { 329 aes_bear_impl.ai_dec(dec, in, out, nrounds); 330 return; 331 } 332 333 s = splvm(); 334 335 for (n = 0; n < 4; n++) { 336 WR4(sc, AES_IV, 0); 337 } 338 for (n = 0; n < 4; n++) { 339 WR4(sc, AES_KEY, dec->aesd_aes.aes_rk[n]); 340 } 341 memcpy(sc->sc_dma_bounce.dma_addr, in, HWAES_BLOCK_LEN); 342 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 343 0, HWAES_BLOCK_LEN, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 344 hwaes_exec_sync(AES_CTRL_DEC, 1); 345 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 346 0, HWAES_BLOCK_LEN, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 347 memcpy(out, sc->sc_dma_bounce.dma_addr, HWAES_BLOCK_LEN); 348 349 splx(s); 350} 351 352static void 353hwaes_decN(const struct aesdec *dec, const uint8_t in[static 16], 354 uint8_t out[static 16], size_t nblocks) 355{ 356 for (size_t n = 0; n < nblocks; n++) { 357 hwaes_dec(dec, &in[n * HWAES_BLOCK_LEN], 358 &out[n * HWAES_BLOCK_LEN], AES_128_NROUNDS); 359 } 360} 361 362static void 363hwaes_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16], 364 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 365 uint32_t nrounds) 366{ 367 struct hwaes_softc *sc = hwaes_sc; 368 const uint8_t *inp = in; 369 uint8_t *outp = out; 370 uint32_t flags; 371 unsigned n; 372 int s; 373 374 if (nrounds != AES_128_NROUNDS) { 375 aes_bear_impl.ai_cbc_enc(enc, in, out, nbytes, iv, nrounds); 376 return; 377 } 378 379 KASSERT(nbytes % HWAES_BLOCK_LEN == 0); 380 if (nbytes == 0) { 381 return; 382 } 383 384 s = splvm(); 385 386 for (n = 0; n < 4; n++) { 387 WR4(sc, AES_IV, be32dec(&iv[n * 4])); 388 } 389 for (n = 0; n < 4; n++) { 390 WR4(sc, AES_KEY, enc->aese_aes.aes_rk[n]); 391 } 392 flags = 0; 393 while (nbytes > 0) { 394 const size_t blocks = MIN(nbytes / HWAES_BLOCK_LEN, 395 HWAES_MAX_BLOCKS); 396 397 memcpy(sc->sc_dma_bounce.dma_addr, inp, 398 blocks * HWAES_BLOCK_LEN); 399 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 400 0, blocks * HWAES_BLOCK_LEN, 401 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 402 hwaes_exec_sync(flags, blocks); 403 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 404 0, blocks * HWAES_BLOCK_LEN, 405 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 406 memcpy(outp, sc->sc_dma_bounce.dma_addr, 407 blocks * HWAES_BLOCK_LEN); 408 409 nbytes -= blocks * HWAES_BLOCK_LEN; 410 inp += blocks * HWAES_BLOCK_LEN; 411 outp += blocks * HWAES_BLOCK_LEN; 412 flags |= AES_CTRL_IV; 413 } 414 415 memcpy(iv, outp - HWAES_BLOCK_LEN, HWAES_BLOCK_LEN); 416 417 splx(s); 418} 419 420static void 421hwaes_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16], 422 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 423 uint32_t nrounds) 424{ 425 struct hwaes_softc *sc = hwaes_sc; 426 const uint8_t *inp = in; 427 uint8_t *outp = out; 428 uint32_t flags; 429 unsigned n; 430 int s; 431 432 if (nrounds != AES_128_NROUNDS) { 433 aes_bear_impl.ai_cbc_dec(dec, in, out, nbytes, iv, nrounds); 434 return; 435 } 436 437 KASSERT(nbytes % HWAES_BLOCK_LEN == 0); 438 if (nbytes == 0) { 439 return; 440 } 441 442 s = splvm(); 443 444 for (n = 0; n < 4; n++) { 445 WR4(sc, AES_IV, be32dec(&iv[n * 4])); 446 } 447 448 memcpy(iv, inp + nbytes - HWAES_BLOCK_LEN, HWAES_BLOCK_LEN); 449 450 for (n = 0; n < 4; n++) { 451 WR4(sc, AES_KEY, dec->aesd_aes.aes_rk[n]); 452 } 453 flags = AES_CTRL_DEC; 454 while (nbytes > 0) { 455 const size_t blocks = MIN(nbytes / HWAES_BLOCK_LEN, 456 HWAES_MAX_BLOCKS); 457 458 memcpy(sc->sc_dma_bounce.dma_addr, inp, 459 blocks * HWAES_BLOCK_LEN); 460 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 461 0, blocks * HWAES_BLOCK_LEN, 462 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 463 hwaes_exec_sync(flags, blocks); 464 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 465 0, blocks * HWAES_BLOCK_LEN, 466 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 467 memcpy(outp, sc->sc_dma_bounce.dma_addr, 468 blocks * HWAES_BLOCK_LEN); 469 470 nbytes -= blocks * HWAES_BLOCK_LEN; 471 inp += blocks * HWAES_BLOCK_LEN; 472 outp += blocks * HWAES_BLOCK_LEN; 473 flags |= AES_CTRL_IV; 474 } 475 476 splx(s); 477} 478 479static void 480hwaes_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3) 481{ 482 uint32_t s0, s1, s2, s3; 483 484 s0 = *t0 >> 31; 485 s1 = *t1 >> 31; 486 s2 = *t2 >> 31; 487 s3 = *t3 >> 31; 488 *t0 = (*t0 << 1) ^ (-s3 & 0x87); 489 *t1 = (*t1 << 1) ^ s0; 490 *t2 = (*t2 << 1) ^ s1; 491 *t3 = (*t3 << 1) ^ s2; 492} 493 494static void 495hwaes_xts_enc(const struct aesenc *enc, const uint8_t in[static 16], 496 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 497 uint32_t nrounds) 498{ 499 uint8_t block[16]; 500 uint8_t tle[16]; 501 uint32_t t[4]; 502 const uint8_t *inp = in; 503 uint8_t *outp = out; 504 505 if (nrounds != AES_128_NROUNDS) { 506 aes_bear_impl.ai_xts_enc(enc, in, out, nbytes, tweak, nrounds); 507 return; 508 } 509 510 KASSERT(nbytes % 16 == 0); 511 512 t[0] = le32dec(tweak + 4*0); 513 t[1] = le32dec(tweak + 4*1); 514 t[2] = le32dec(tweak + 4*2); 515 t[3] = le32dec(tweak + 4*3); 516 517 while (nbytes > 0) { 518 le32enc(tle + 4*0, t[0]); 519 le32enc(tle + 4*1, t[1]); 520 le32enc(tle + 4*2, t[2]); 521 le32enc(tle + 4*3, t[3]); 522 523 for (unsigned n = 0; n < 16; n++) { 524 block[n] = inp[n] ^ tle[n]; 525 } 526 527 hwaes_encN(enc, block, block, 1); 528 529 for (unsigned n = 0; n < 16; n++) { 530 outp[n] = block[n] ^ tle[n]; 531 } 532 533 hwaes_xts_update(&t[0], &t[1], &t[2], &t[3]); 534 535 nbytes -= HWAES_BLOCK_LEN; 536 inp += HWAES_BLOCK_LEN; 537 outp += HWAES_BLOCK_LEN; 538 } 539 540 le32enc(tweak + 4*0, t[0]); 541 le32enc(tweak + 4*1, t[1]); 542 le32enc(tweak + 4*2, t[2]); 543 le32enc(tweak + 4*3, t[3]); 544 545 explicit_memset(t, 0, sizeof(t)); 546 explicit_memset(block, 0, sizeof(block)); 547 explicit_memset(tle, 0, sizeof(tle)); 548} 549 550static void 551hwaes_xts_dec(const struct aesdec *dec, const uint8_t in[static 16], 552 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 553 uint32_t nrounds) 554{ 555 uint8_t block[16]; 556 uint8_t tle[16]; 557 uint32_t t[4]; 558 const uint8_t *inp = in; 559 uint8_t *outp = out; 560 561 if (nrounds != AES_128_NROUNDS) { 562 aes_bear_impl.ai_xts_dec(dec, in, out, nbytes, tweak, nrounds); 563 return; 564 } 565 566 KASSERT(nbytes % 16 == 0); 567 568 t[0] = le32dec(tweak + 4*0); 569 t[1] = le32dec(tweak + 4*1); 570 t[2] = le32dec(tweak + 4*2); 571 t[3] = le32dec(tweak + 4*3); 572 573 while (nbytes > 0) { 574 le32enc(tle + 4*0, t[0]); 575 le32enc(tle + 4*1, t[1]); 576 le32enc(tle + 4*2, t[2]); 577 le32enc(tle + 4*3, t[3]); 578 579 for (unsigned n = 0; n < 16; n++) { 580 block[n] = inp[n] ^ tle[n]; 581 } 582 583 hwaes_decN(dec, block, block, 1); 584 585 for (unsigned n = 0; n < 16; n++) { 586 outp[n] = block[n] ^ tle[n]; 587 } 588 589 hwaes_xts_update(&t[0], &t[1], &t[2], &t[3]); 590 591 nbytes -= HWAES_BLOCK_LEN; 592 inp += HWAES_BLOCK_LEN; 593 outp += HWAES_BLOCK_LEN; 594 } 595 596 le32enc(tweak + 4*0, t[0]); 597 le32enc(tweak + 4*1, t[1]); 598 le32enc(tweak + 4*2, t[2]); 599 le32enc(tweak + 4*3, t[3]); 600 601 explicit_memset(t, 0, sizeof(t)); 602 explicit_memset(block, 0, sizeof(block)); 603 explicit_memset(tle, 0, sizeof(tle)); 604} 605 606static void 607hwaes_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], 608 size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds) 609{ 610 const uint8_t *inp = in; 611 612 if (nrounds != AES_128_NROUNDS) { 613 aes_bear_impl.ai_cbcmac_update1(enc, in, nbytes, auth0, nrounds); 614 return; 615 } 616 617 KASSERT(nbytes % 16 == 0); 618 619 while (nbytes > 0) { 620 for (unsigned n = 0; n < 16; n++) { 621 auth0[n] = auth0[n] ^ inp[n]; 622 } 623 624 hwaes_encN(enc, auth0, auth0, 1); 625 626 nbytes -= HWAES_BLOCK_LEN; 627 inp += HWAES_BLOCK_LEN; 628 } 629} 630 631static void 632hwaes_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16], 633 uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], 634 uint32_t nrounds) 635{ 636 const uint8_t *inp = in; 637 uint8_t *outp = out; 638 uint32_t c[4]; 639 640 if (nrounds != AES_128_NROUNDS) { 641 aes_bear_impl.ai_ccm_enc1(enc, in, out, nbytes, authctr0, nrounds); 642 return; 643 } 644 645 KASSERT(nbytes % 16 == 0); 646 647 c[0] = le32dec(authctr0 + 16 + 4*0); 648 c[1] = le32dec(authctr0 + 16 + 4*1); 649 c[2] = le32dec(authctr0 + 16 + 4*2); 650 c[3] = be32dec(authctr0 + 16 + 4*3); 651 652 while (nbytes > 0) { 653 for (unsigned n = 0; n < 16; n++) { 654 authctr0[n] = authctr0[n] ^ inp[n]; 655 } 656 657 le32enc(authctr0 + 16 + 4*0, c[0]); 658 le32enc(authctr0 + 16 + 4*1, c[1]); 659 le32enc(authctr0 + 16 + 4*2, c[2]); 660 be32enc(authctr0 + 16 + 4*3, ++c[3]); 661 662 hwaes_encN(enc, authctr0, authctr0, 2); 663 664 for (unsigned n = 0; n < 16; n++) { 665 outp[n] = inp[n] ^ authctr0[n + 16]; 666 } 667 668 nbytes -= HWAES_BLOCK_LEN; 669 inp += HWAES_BLOCK_LEN; 670 outp += HWAES_BLOCK_LEN; 671 } 672 673 le32enc(authctr0 + 16 + 4*0, c[0]); 674 le32enc(authctr0 + 16 + 4*1, c[1]); 675 le32enc(authctr0 + 16 + 4*2, c[2]); 676 be32enc(authctr0 + 16 + 4*3, c[3]); 677} 678 679static void 680hwaes_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16], 681 uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], 682 uint32_t nrounds) 683{ 684 const uint8_t *inp = in; 685 uint8_t *outp = out; 686 uint32_t c[4]; 687 688 if (nrounds != AES_128_NROUNDS) { 689 aes_bear_impl.ai_ccm_dec1(enc, in, out, nbytes, authctr0, nrounds); 690 return; 691 } 692 693 KASSERT(nbytes % 16 == 0); 694 695 c[0] = le32dec(authctr0 + 16 + 4*0); 696 c[1] = le32dec(authctr0 + 16 + 4*1); 697 c[2] = le32dec(authctr0 + 16 + 4*2); 698 c[3] = be32dec(authctr0 + 16 + 4*3); 699 700 be32enc(authctr0 + 16 + 4*3, ++c[3]); 701 hwaes_encN(enc, authctr0 + 16, authctr0 + 16, 1); 702 703 while (nbytes > 0) { 704 for (unsigned n = 0; n < 16; n++) { 705 outp[n] = authctr0[n + 16] ^ inp[n]; 706 authctr0[n] = authctr0[n] ^ outp[n]; 707 } 708 nbytes -= HWAES_BLOCK_LEN; 709 if (nbytes == 0) { 710 break; 711 } 712 713 inp += HWAES_BLOCK_LEN; 714 outp += HWAES_BLOCK_LEN; 715 716 le32enc(authctr0 + 16 + 4*0, c[0]); 717 le32enc(authctr0 + 16 + 4*1, c[1]); 718 le32enc(authctr0 + 16 + 4*2, c[2]); 719 be32enc(authctr0 + 16 + 4*3, ++c[3]); 720 hwaes_encN(enc, authctr0, authctr0, 2); 721 } 722 hwaes_encN(enc, authctr0, authctr0, 1); 723 724 le32enc(authctr0 + 16 + 4*0, c[0]); 725 le32enc(authctr0 + 16 + 4*1, c[1]); 726 le32enc(authctr0 + 16 + 4*2, c[2]); 727 be32enc(authctr0 + 16 + 4*3, c[3]); 728 729} 730 731static struct aes_impl aes_hwaes_impl = { 732 .ai_name = NULL, /* filled in by hwaes_register */ 733 .ai_probe = hwaes_probe, 734 .ai_setenckey = hwaes_setenckey, 735 .ai_setdeckey = hwaes_setdeckey, 736 .ai_enc = hwaes_enc, 737 .ai_dec = hwaes_dec, 738 .ai_cbc_enc = hwaes_cbc_enc, 739 .ai_cbc_dec = hwaes_cbc_dec, 740 .ai_xts_enc = hwaes_xts_enc, 741 .ai_xts_dec = hwaes_xts_dec, 742 .ai_cbcmac_update1 = hwaes_cbcmac_update1, 743 .ai_ccm_enc1 = hwaes_ccm_enc1, 744 .ai_ccm_dec1 = hwaes_ccm_dec1, 745}; 746 747static void 748hwaes_register(void) 749{ 750 if (wiiu_plat) { 751 aes_hwaes_impl.ai_name = "Latte AES engine"; 752 } else { 753 aes_hwaes_impl.ai_name = "Hollywood AES engine"; 754 } 755 aes_md_init(&aes_hwaes_impl); 756} 757