hwaes.c revision 1.1
1/* $NetBSD: hwaes.c,v 1.1 2025/11/11 21:34:48 jmcneill Exp $ */ 2 3/*- 4 * Copyright (c) 2025 Jared McNeill <jmcneill@invisible.ca> 5 * Copyright (c) 2020 The NetBSD Foundation, Inc. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 18 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 19 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 20 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 21 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 22 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 23 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 24 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 25 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 26 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 27 * POSSIBILITY OF SUCH DAMAGE. 28 */ 29 30/* 31 * A driver for the Nintendo Wii's AES engine. The driver registers an AES 32 * implementation for kernel use via aes_md_init(). AES-128 requests are 33 * accelerated by hardware and all other requests are passed through to the 34 * default (BearSSL aes_ct) implementation. 35 */ 36 37#include <sys/cdefs.h> 38__KERNEL_RCSID(0, "$NetBSD: hwaes.c,v 1.1 2025/11/11 21:34:48 jmcneill Exp $"); 39 40#include <sys/param.h> 41#include <sys/bus.h> 42#include <sys/device.h> 43#include <sys/systm.h> 44#include <sys/callout.h> 45#include <sys/buf.h> 46 47#include <machine/wii.h> 48#include <machine/pio.h> 49#include "hollywood.h" 50 51#include <crypto/aes/aes.h> 52#include <crypto/aes/aes_bear.h> 53#include <crypto/aes/aes_impl.h> 54 55/* AES engine registers */ 56#define AES_CTRL 0x00 57#define AES_CTRL_EXEC __BIT(31) 58#define AES_CTRL_IRQ __BIT(30) 59#define AES_CTRL_ERR __BIT(29) 60#define AES_CTRL_ENA __BIT(28) 61#define AES_CTRL_DEC __BIT(27) 62#define AES_CTRL_IV __BIT(12) 63#define AES_CTRL_BLOCKS __BITS(11, 0) 64#define AES_SRC 0x04 65#define AES_DEST 0x08 66#define AES_KEY 0x0c 67#define AES_IV 0x10 68 69/* Register frame size */ 70#define AES_REG_SIZE 0x14 71 72/* Device limits */ 73#define HWAES_BLOCK_LEN 16 74#define HWAES_ALIGN 16 75#define HWAES_MAX_BLOCKS 4096 76#define HWAES_MAX_AES_LEN (HWAES_BLOCK_LEN * HWAES_MAX_BLOCKS) 77 78static int hwaes_match(device_t, cfdata_t, void *); 79static void hwaes_attach(device_t, device_t, void *); 80 81struct hwaes_softc; 82 83struct hwaes_dma { 84 bus_dmamap_t dma_map; 85 void *dma_addr; 86 size_t dma_size; 87 bus_dma_segment_t dma_segs[1]; 88}; 89 90struct hwaes_softc { 91 device_t sc_dev; 92 bus_space_tag_t sc_bst; 93 bus_space_handle_t sc_bsh; 94 bus_dma_tag_t sc_dmat; 95 struct hwaes_dma sc_dma_bounce; 96}; 97 98struct hwaes_softc *hwaes_sc; 99 100#define WR4(sc, reg, val) \ 101 bus_space_write_4((sc)->sc_bst, (sc)->sc_bsh, (reg), (val)) 102#define RD4(sc, reg) \ 103 bus_space_read_4((sc)->sc_bst, (sc)->sc_bsh, (reg)) 104 105CFATTACH_DECL_NEW(hwaes, sizeof(struct hwaes_softc), 106 hwaes_match, hwaes_attach, NULL, NULL); 107 108static int hwaes_dma_alloc(struct hwaes_softc *, struct hwaes_dma *, 109 size_t, int); 110static void hwaes_register(void); 111 112static int 113hwaes_match(device_t parent, cfdata_t cf, void *aux) 114{ 115 return 1; 116} 117 118static void 119hwaes_attach(device_t parent, device_t self, void *aux) 120{ 121 struct hollywood_attach_args *haa = aux; 122 struct hwaes_softc *sc = device_private(self); 123 int error; 124 125 sc->sc_dev = self; 126 sc->sc_dmat = haa->haa_dmat; 127 sc->sc_bst = haa->haa_bst; 128 error = bus_space_map(sc->sc_bst, haa->haa_addr, AES_REG_SIZE, 129 0, &sc->sc_bsh); 130 if (error != 0) { 131 aprint_error(": couldn't map registers (%d)\n", error); 132 return; 133 } 134 135 aprint_naive("\n"); 136 aprint_normal(": AES engine\n"); 137 138 hollywood_claim_device(self, IOPAESEN); 139 140 error = hwaes_dma_alloc(sc, &sc->sc_dma_bounce, HWAES_MAX_AES_LEN, 141 BUS_DMA_WAITOK); 142 if (error != 0) { 143 return; 144 } 145 146 WR4(sc, AES_CTRL, 0); 147 for (;;) { 148 if (RD4(sc, AES_CTRL) == 0) { 149 break; 150 } 151 } 152 153 hwaes_sc = sc; 154 hwaes_register(); 155} 156 157static int 158hwaes_dma_alloc(struct hwaes_softc *sc, struct hwaes_dma *dma, size_t size, 159 int flags) 160{ 161 int error, nsegs; 162 163 dma->dma_size = size; 164 165 error = bus_dmamem_alloc(sc->sc_dmat, dma->dma_size, HWAES_ALIGN, 0, 166 dma->dma_segs, 1, &nsegs, flags); 167 if (error != 0) { 168 aprint_error_dev(sc->sc_dev, 169 "bus_dmamem_alloc failed: %d\n", error); 170 goto alloc_failed; 171 } 172 error = bus_dmamem_map(sc->sc_dmat, dma->dma_segs, nsegs, 173 dma->dma_size, &dma->dma_addr, flags); 174 if (error != 0) { 175 aprint_error_dev(sc->sc_dev, 176 "bus_dmamem_map failed: %d\n", error); 177 goto map_failed; 178 } 179 error = bus_dmamap_create(sc->sc_dmat, dma->dma_size, nsegs, 180 dma->dma_size, 0, flags, &dma->dma_map); 181 if (error != 0) { 182 aprint_error_dev(sc->sc_dev, 183 "bus_dmamap_create failed: %d\n", error); 184 goto create_failed; 185 } 186 error = bus_dmamap_load(sc->sc_dmat, dma->dma_map, dma->dma_addr, 187 dma->dma_size, NULL, flags); 188 if (error != 0) { 189 aprint_error_dev(sc->sc_dev, 190 "bus_dmamap_load failed: %d\n", error); 191 goto load_failed; 192 } 193 194 return 0; 195 196load_failed: 197 bus_dmamap_destroy(sc->sc_dmat, dma->dma_map); 198create_failed: 199 bus_dmamem_unmap(sc->sc_dmat, dma->dma_addr, dma->dma_size); 200map_failed: 201 bus_dmamem_free(sc->sc_dmat, dma->dma_segs, nsegs); 202alloc_failed: 203 return error; 204} 205 206static int 207hwaes_probe(void) 208{ 209 return 0; 210} 211 212static void 213hwaes_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds) 214{ 215 if (nrounds == AES_128_NROUNDS) { 216 enc->aese_aes.aes_rk[0] = be32dec(key + 4*0); 217 enc->aese_aes.aes_rk[1] = be32dec(key + 4*1); 218 enc->aese_aes.aes_rk[2] = be32dec(key + 4*2); 219 enc->aese_aes.aes_rk[3] = be32dec(key + 4*3); 220 } else { 221 aes_bear_impl.ai_setenckey(enc, key, nrounds); 222 } 223} 224 225static void 226hwaes_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds) 227{ 228 if (nrounds == AES_128_NROUNDS) { 229 dec->aesd_aes.aes_rk[0] = be32dec(key + 4*0); 230 dec->aesd_aes.aes_rk[1] = be32dec(key + 4*1); 231 dec->aesd_aes.aes_rk[2] = be32dec(key + 4*2); 232 dec->aesd_aes.aes_rk[3] = be32dec(key + 4*3); 233 } else { 234 aes_bear_impl.ai_setdeckey(dec, key, nrounds); 235 } 236} 237 238static void 239hwaes_exec_sync(uint32_t flags, uint16_t blocks) 240{ 241 struct hwaes_softc *sc = hwaes_sc; 242 uint32_t ctrl; 243 244 KASSERT(blocks > 0); 245 KASSERT(blocks <= HWAES_MAX_BLOCKS); 246 247 WR4(sc, AES_SRC, sc->sc_dma_bounce.dma_segs[0].ds_addr); 248 WR4(sc, AES_DEST, sc->sc_dma_bounce.dma_segs[0].ds_addr); 249 250 ctrl = AES_CTRL_EXEC | AES_CTRL_ENA | flags; 251 ctrl |= __SHIFTIN(blocks - 1, AES_CTRL_BLOCKS); 252 253 WR4(sc, AES_CTRL, ctrl); 254 for (;;) { 255 ctrl = RD4(sc, AES_CTRL); 256 if ((ctrl & AES_CTRL_ERR) != 0) { 257 printf("AES error, AES_CTRL = %#x\n", ctrl); 258 break; 259 } 260 if ((ctrl & AES_CTRL_EXEC) == 0) { 261 break; 262 } 263 } 264} 265 266static void 267hwaes_enc(const struct aesenc *enc, const uint8_t in[static 16], 268 uint8_t out[static 16], uint32_t nrounds) 269{ 270 struct hwaes_softc *sc = hwaes_sc; 271 unsigned n; 272 int s; 273 274 if (nrounds != AES_128_NROUNDS) { 275 aes_bear_impl.ai_enc(enc, in, out, nrounds); 276 return; 277 } 278 279 s = splvm(); 280 281 for (n = 0; n < 4; n++) { 282 WR4(sc, AES_IV, 0); 283 } 284 for (n = 0; n < 4; n++) { 285 WR4(sc, AES_KEY, enc->aese_aes.aes_rk[n]); 286 } 287 memcpy(sc->sc_dma_bounce.dma_addr, in, HWAES_BLOCK_LEN); 288 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 289 0, HWAES_BLOCK_LEN, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 290 hwaes_exec_sync(0, 1); 291 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 292 0, HWAES_BLOCK_LEN, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 293 memcpy(out, sc->sc_dma_bounce.dma_addr, HWAES_BLOCK_LEN); 294 295 splx(s); 296} 297 298static void 299hwaes_encN(const struct aesenc *enc, const uint8_t in[static 16], 300 uint8_t out[static 16], size_t nblocks) 301{ 302 for (size_t n = 0; n < nblocks; n++) { 303 hwaes_enc(enc, &in[n * HWAES_BLOCK_LEN], 304 &out[n * HWAES_BLOCK_LEN], AES_128_NROUNDS); 305 } 306} 307 308static void 309hwaes_dec(const struct aesdec *dec, const uint8_t in[static 16], 310 uint8_t out[static 16], uint32_t nrounds) 311{ 312 struct hwaes_softc *sc = hwaes_sc; 313 unsigned n; 314 int s; 315 316 if (nrounds != AES_128_NROUNDS) { 317 aes_bear_impl.ai_dec(dec, in, out, nrounds); 318 return; 319 } 320 321 s = splvm(); 322 323 for (n = 0; n < 4; n++) { 324 WR4(sc, AES_IV, 0); 325 } 326 for (n = 0; n < 4; n++) { 327 WR4(sc, AES_KEY, dec->aesd_aes.aes_rk[n]); 328 } 329 memcpy(sc->sc_dma_bounce.dma_addr, in, HWAES_BLOCK_LEN); 330 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 331 0, HWAES_BLOCK_LEN, BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 332 hwaes_exec_sync(AES_CTRL_DEC, 1); 333 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 334 0, HWAES_BLOCK_LEN, BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 335 memcpy(out, sc->sc_dma_bounce.dma_addr, HWAES_BLOCK_LEN); 336 337 splx(s); 338} 339 340static void 341hwaes_decN(const struct aesdec *dec, const uint8_t in[static 16], 342 uint8_t out[static 16], size_t nblocks) 343{ 344 for (size_t n = 0; n < nblocks; n++) { 345 hwaes_dec(dec, &in[n * HWAES_BLOCK_LEN], 346 &out[n * HWAES_BLOCK_LEN], AES_128_NROUNDS); 347 } 348} 349 350static void 351hwaes_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16], 352 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 353 uint32_t nrounds) 354{ 355 struct hwaes_softc *sc = hwaes_sc; 356 const uint8_t *inp = in; 357 uint8_t *outp = out; 358 uint32_t flags; 359 unsigned n; 360 int s; 361 362 if (nrounds != AES_128_NROUNDS) { 363 aes_bear_impl.ai_cbc_enc(enc, in, out, nbytes, iv, nrounds); 364 return; 365 } 366 367 KASSERT(nbytes % HWAES_BLOCK_LEN == 0); 368 if (nbytes == 0) { 369 return; 370 } 371 372 s = splvm(); 373 374 for (n = 0; n < 4; n++) { 375 WR4(sc, AES_IV, be32dec(&iv[n * 4])); 376 } 377 for (n = 0; n < 4; n++) { 378 WR4(sc, AES_KEY, enc->aese_aes.aes_rk[n]); 379 } 380 flags = 0; 381 while (nbytes > 0) { 382 const size_t blocks = MIN(nbytes / HWAES_BLOCK_LEN, 383 HWAES_MAX_BLOCKS); 384 385 memcpy(sc->sc_dma_bounce.dma_addr, inp, 386 blocks * HWAES_BLOCK_LEN); 387 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 388 0, blocks * HWAES_BLOCK_LEN, 389 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 390 hwaes_exec_sync(flags, blocks); 391 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 392 0, blocks * HWAES_BLOCK_LEN, 393 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 394 memcpy(outp, sc->sc_dma_bounce.dma_addr, 395 blocks * HWAES_BLOCK_LEN); 396 397 nbytes -= blocks * HWAES_BLOCK_LEN; 398 inp += blocks * HWAES_BLOCK_LEN; 399 outp += blocks * HWAES_BLOCK_LEN; 400 flags |= AES_CTRL_IV; 401 } 402 403 memcpy(iv, outp - HWAES_BLOCK_LEN, HWAES_BLOCK_LEN); 404 405 splx(s); 406} 407 408static void 409hwaes_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16], 410 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 411 uint32_t nrounds) 412{ 413 struct hwaes_softc *sc = hwaes_sc; 414 const uint8_t *inp = in; 415 uint8_t *outp = out; 416 uint32_t flags; 417 unsigned n; 418 int s; 419 420 if (nrounds != AES_128_NROUNDS) { 421 aes_bear_impl.ai_cbc_dec(dec, in, out, nbytes, iv, nrounds); 422 return; 423 } 424 425 KASSERT(nbytes % HWAES_BLOCK_LEN == 0); 426 if (nbytes == 0) { 427 return; 428 } 429 430 s = splvm(); 431 432 for (n = 0; n < 4; n++) { 433 WR4(sc, AES_IV, be32dec(&iv[n * 4])); 434 } 435 436 memcpy(iv, inp + nbytes - HWAES_BLOCK_LEN, HWAES_BLOCK_LEN); 437 438 for (n = 0; n < 4; n++) { 439 WR4(sc, AES_KEY, dec->aesd_aes.aes_rk[n]); 440 } 441 flags = AES_CTRL_DEC; 442 while (nbytes > 0) { 443 const size_t blocks = MIN(nbytes / HWAES_BLOCK_LEN, 444 HWAES_MAX_BLOCKS); 445 446 memcpy(sc->sc_dma_bounce.dma_addr, inp, 447 blocks * HWAES_BLOCK_LEN); 448 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 449 0, blocks * HWAES_BLOCK_LEN, 450 BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE); 451 hwaes_exec_sync(flags, blocks); 452 bus_dmamap_sync(sc->sc_dmat, sc->sc_dma_bounce.dma_map, 453 0, blocks * HWAES_BLOCK_LEN, 454 BUS_DMASYNC_POSTREAD | BUS_DMASYNC_POSTWRITE); 455 memcpy(outp, sc->sc_dma_bounce.dma_addr, 456 blocks * HWAES_BLOCK_LEN); 457 458 nbytes -= blocks * HWAES_BLOCK_LEN; 459 inp += blocks * HWAES_BLOCK_LEN; 460 outp += blocks * HWAES_BLOCK_LEN; 461 flags |= AES_CTRL_IV; 462 } 463 464 splx(s); 465} 466 467static void 468hwaes_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3) 469{ 470 uint32_t s0, s1, s2, s3; 471 472 s0 = *t0 >> 31; 473 s1 = *t1 >> 31; 474 s2 = *t2 >> 31; 475 s3 = *t3 >> 31; 476 *t0 = (*t0 << 1) ^ (-s3 & 0x87); 477 *t1 = (*t1 << 1) ^ s0; 478 *t2 = (*t2 << 1) ^ s1; 479 *t3 = (*t3 << 1) ^ s2; 480} 481 482static void 483hwaes_xts_enc(const struct aesenc *enc, const uint8_t in[static 16], 484 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 485 uint32_t nrounds) 486{ 487 uint8_t block[16]; 488 uint8_t tle[16]; 489 uint32_t t[4]; 490 const uint8_t *inp = in; 491 uint8_t *outp = out; 492 493 if (nrounds != AES_128_NROUNDS) { 494 aes_bear_impl.ai_xts_enc(enc, in, out, nbytes, tweak, nrounds); 495 return; 496 } 497 498 KASSERT(nbytes % 16 == 0); 499 500 t[0] = le32dec(tweak + 4*0); 501 t[1] = le32dec(tweak + 4*1); 502 t[2] = le32dec(tweak + 4*2); 503 t[3] = le32dec(tweak + 4*3); 504 505 while (nbytes > 0) { 506 le32enc(tle + 4*0, t[0]); 507 le32enc(tle + 4*1, t[1]); 508 le32enc(tle + 4*2, t[2]); 509 le32enc(tle + 4*3, t[3]); 510 511 for (unsigned n = 0; n < 16; n++) { 512 block[n] = inp[n] ^ tle[n]; 513 } 514 515 hwaes_encN(enc, block, block, 1); 516 517 for (unsigned n = 0; n < 16; n++) { 518 outp[n] = block[n] ^ tle[n]; 519 } 520 521 hwaes_xts_update(&t[0], &t[1], &t[2], &t[3]); 522 523 nbytes -= HWAES_BLOCK_LEN; 524 inp += HWAES_BLOCK_LEN; 525 outp += HWAES_BLOCK_LEN; 526 } 527 528 le32enc(tweak + 4*0, t[0]); 529 le32enc(tweak + 4*1, t[1]); 530 le32enc(tweak + 4*2, t[2]); 531 le32enc(tweak + 4*3, t[3]); 532 533 explicit_memset(t, 0, sizeof(t)); 534 explicit_memset(block, 0, sizeof(block)); 535 explicit_memset(tle, 0, sizeof(tle)); 536} 537 538static void 539hwaes_xts_dec(const struct aesdec *dec, const uint8_t in[static 16], 540 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 541 uint32_t nrounds) 542{ 543 uint8_t block[16]; 544 uint8_t tle[16]; 545 uint32_t t[4]; 546 const uint8_t *inp = in; 547 uint8_t *outp = out; 548 549 if (nrounds != AES_128_NROUNDS) { 550 aes_bear_impl.ai_xts_dec(dec, in, out, nbytes, tweak, nrounds); 551 return; 552 } 553 554 KASSERT(nbytes % 16 == 0); 555 556 t[0] = le32dec(tweak + 4*0); 557 t[1] = le32dec(tweak + 4*1); 558 t[2] = le32dec(tweak + 4*2); 559 t[3] = le32dec(tweak + 4*3); 560 561 while (nbytes > 0) { 562 le32enc(tle + 4*0, t[0]); 563 le32enc(tle + 4*1, t[1]); 564 le32enc(tle + 4*2, t[2]); 565 le32enc(tle + 4*3, t[3]); 566 567 for (unsigned n = 0; n < 16; n++) { 568 block[n] = inp[n] ^ tle[n]; 569 } 570 571 hwaes_decN(dec, block, block, 1); 572 573 for (unsigned n = 0; n < 16; n++) { 574 outp[n] = block[n] ^ tle[n]; 575 } 576 577 hwaes_xts_update(&t[0], &t[1], &t[2], &t[3]); 578 579 nbytes -= HWAES_BLOCK_LEN; 580 inp += HWAES_BLOCK_LEN; 581 outp += HWAES_BLOCK_LEN; 582 } 583 584 le32enc(tweak + 4*0, t[0]); 585 le32enc(tweak + 4*1, t[1]); 586 le32enc(tweak + 4*2, t[2]); 587 le32enc(tweak + 4*3, t[3]); 588 589 explicit_memset(t, 0, sizeof(t)); 590 explicit_memset(block, 0, sizeof(block)); 591 explicit_memset(tle, 0, sizeof(tle)); 592} 593 594static void 595hwaes_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], 596 size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds) 597{ 598 const uint8_t *inp = in; 599 600 if (nrounds != AES_128_NROUNDS) { 601 aes_bear_impl.ai_cbcmac_update1(enc, in, nbytes, auth0, nrounds); 602 return; 603 } 604 605 KASSERT(nbytes % 16 == 0); 606 607 while (nbytes > 0) { 608 for (unsigned n = 0; n < 16; n++) { 609 auth0[n] = auth0[n] ^ inp[n]; 610 } 611 612 hwaes_encN(enc, auth0, auth0, 1); 613 614 nbytes -= HWAES_BLOCK_LEN; 615 inp += HWAES_BLOCK_LEN; 616 } 617} 618 619static void 620hwaes_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16], 621 uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], 622 uint32_t nrounds) 623{ 624 const uint8_t *inp = in; 625 uint8_t *outp = out; 626 uint32_t c[4]; 627 628 if (nrounds != AES_128_NROUNDS) { 629 aes_bear_impl.ai_ccm_enc1(enc, in, out, nbytes, authctr0, nrounds); 630 return; 631 } 632 633 KASSERT(nbytes % 16 == 0); 634 635 c[0] = le32dec(authctr0 + 16 + 4*0); 636 c[1] = le32dec(authctr0 + 16 + 4*1); 637 c[2] = le32dec(authctr0 + 16 + 4*2); 638 c[3] = be32dec(authctr0 + 16 + 4*3); 639 640 while (nbytes > 0) { 641 for (unsigned n = 0; n < 16; n++) { 642 authctr0[n] = authctr0[n] ^ inp[n]; 643 } 644 645 le32enc(authctr0 + 16 + 4*0, c[0]); 646 le32enc(authctr0 + 16 + 4*1, c[1]); 647 le32enc(authctr0 + 16 + 4*2, c[2]); 648 be32enc(authctr0 + 16 + 4*3, ++c[3]); 649 650 hwaes_encN(enc, authctr0, authctr0, 2); 651 652 for (unsigned n = 0; n < 16; n++) { 653 outp[n] = inp[n] ^ authctr0[n + 16]; 654 } 655 656 nbytes -= HWAES_BLOCK_LEN; 657 inp += HWAES_BLOCK_LEN; 658 outp += HWAES_BLOCK_LEN; 659 } 660 661 le32enc(authctr0 + 16 + 4*0, c[0]); 662 le32enc(authctr0 + 16 + 4*1, c[1]); 663 le32enc(authctr0 + 16 + 4*2, c[2]); 664 be32enc(authctr0 + 16 + 4*3, c[3]); 665} 666 667static void 668hwaes_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16], 669 uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], 670 uint32_t nrounds) 671{ 672 const uint8_t *inp = in; 673 uint8_t *outp = out; 674 uint32_t c[4]; 675 676 if (nrounds != AES_128_NROUNDS) { 677 aes_bear_impl.ai_ccm_dec1(enc, in, out, nbytes, authctr0, nrounds); 678 return; 679 } 680 681 KASSERT(nbytes % 16 == 0); 682 683 c[0] = le32dec(authctr0 + 16 + 4*0); 684 c[1] = le32dec(authctr0 + 16 + 4*1); 685 c[2] = le32dec(authctr0 + 16 + 4*2); 686 c[3] = be32dec(authctr0 + 16 + 4*3); 687 688 be32enc(authctr0 + 16 + 4*3, ++c[3]); 689 hwaes_encN(enc, authctr0 + 16, authctr0 + 16, 1); 690 691 while (nbytes > 0) { 692 for (unsigned n = 0; n < 16; n++) { 693 outp[n] = authctr0[n + 16] ^ inp[n]; 694 authctr0[n] = authctr0[n] ^ outp[n]; 695 } 696 nbytes -= HWAES_BLOCK_LEN; 697 if (nbytes == 0) { 698 break; 699 } 700 701 inp += HWAES_BLOCK_LEN; 702 outp += HWAES_BLOCK_LEN; 703 704 le32enc(authctr0 + 16 + 4*0, c[0]); 705 le32enc(authctr0 + 16 + 4*1, c[1]); 706 le32enc(authctr0 + 16 + 4*2, c[2]); 707 be32enc(authctr0 + 16 + 4*3, ++c[3]); 708 hwaes_encN(enc, authctr0, authctr0, 2); 709 } 710 hwaes_encN(enc, authctr0, authctr0, 1); 711 712 le32enc(authctr0 + 16 + 4*0, c[0]); 713 le32enc(authctr0 + 16 + 4*1, c[1]); 714 le32enc(authctr0 + 16 + 4*2, c[2]); 715 be32enc(authctr0 + 16 + 4*3, c[3]); 716 717} 718 719static struct aes_impl aes_hwaes_impl = { 720 .ai_name = "Hollywood AES engine", 721 .ai_probe = hwaes_probe, 722 .ai_setenckey = hwaes_setenckey, 723 .ai_setdeckey = hwaes_setdeckey, 724 .ai_enc = hwaes_enc, 725 .ai_dec = hwaes_dec, 726 .ai_cbc_enc = hwaes_cbc_enc, 727 .ai_cbc_dec = hwaes_cbc_dec, 728 .ai_xts_enc = hwaes_xts_enc, 729 .ai_xts_dec = hwaes_xts_dec, 730 .ai_cbcmac_update1 = hwaes_cbcmac_update1, 731 .ai_ccm_enc1 = hwaes_ccm_enc1, 732 .ai_ccm_dec1 = hwaes_ccm_dec1, 733}; 734 735static void 736hwaes_register(void) 737{ 738 aes_md_init(&aes_hwaes_impl); 739} 740