1 /* $NetBSD: aes_via.c,v 1.11 2026/01/08 11:25:59 nia Exp $ */ 2 3 /*- 4 * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 * POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.11 2026/01/08 11:25:59 nia Exp $"); 31 32 #ifdef _KERNEL 33 #include <sys/types.h> 34 #include <sys/endian.h> 35 #include <sys/evcnt.h> 36 #include <sys/systm.h> 37 #else 38 #include <assert.h> 39 #include <err.h> 40 #include <stdint.h> 41 #include <string.h> 42 #include <endian.h> 43 #define KASSERT assert 44 #define panic(fmt, args...) err(1, fmt, args) 45 struct evcnt { uint64_t ev_count; }; 46 #define EVCNT_INITIALIZER(a,b,c,d) {0} 47 #define EVCNT_ATTACH_STATIC(name) static char name##_attach __unused = 0 48 #endif 49 50 #include <crypto/aes/aes.h> 51 #include <crypto/aes/aes_impl.h> 52 #include <crypto/aes/aes_keysched.h> 53 54 #ifdef _KERNEL 55 #include <x86/cpufunc.h> 56 #include <x86/cpuvar.h> 57 #include <x86/fpu.h> 58 #include <x86/specialreg.h> 59 #include <x86/via_padlock.h> 60 #else 61 #include <cpuid.h> 62 #define fpu_kern_enter() ((void)0) 63 #define fpu_kern_leave() ((void)0) 64 #define C3_CRYPT_CWLO_ROUND_M 0x0000000f 65 #define C3_CRYPT_CWLO_ALG_M 0x00000070 66 #define C3_CRYPT_CWLO_ALG_AES 0x00000000 67 #define C3_CRYPT_CWLO_KEYGEN_M 0x00000080 68 #define C3_CRYPT_CWLO_KEYGEN_HW 0x00000000 69 #define C3_CRYPT_CWLO_KEYGEN_SW 0x00000080 70 #define C3_CRYPT_CWLO_NORMAL 0x00000000 71 #define C3_CRYPT_CWLO_INTERMEDIATE 0x00000100 72 #define C3_CRYPT_CWLO_ENCRYPT 0x00000000 73 #define C3_CRYPT_CWLO_DECRYPT 0x00000200 74 #define C3_CRYPT_CWLO_KEY128 0x0000000a /* 128bit, 10 rds */ 75 #define C3_CRYPT_CWLO_KEY192 0x0000040c /* 192bit, 12 rds */ 76 #define C3_CRYPT_CWLO_KEY256 0x0000080e /* 256bit, 15 rds */ 77 #endif 78 79 static void 80 aesvia_reload_keys(void) 81 { 82 83 asm volatile("pushf; popf"); 84 } 85 86 static uint32_t 87 aesvia_keylen_cw0(unsigned nrounds) 88 { 89 90 /* 91 * Determine the control word bits for the key size / number of 92 * rounds. For AES-128, the hardware can do key expansion on 93 * the fly; for AES-192 and AES-256, software must do it. 94 */ 95 switch (nrounds) { 96 case AES_128_NROUNDS: 97 return C3_CRYPT_CWLO_KEY128; 98 case AES_192_NROUNDS: 99 return C3_CRYPT_CWLO_KEY192 | C3_CRYPT_CWLO_KEYGEN_SW; 100 case AES_256_NROUNDS: 101 return C3_CRYPT_CWLO_KEY256 | C3_CRYPT_CWLO_KEYGEN_SW; 102 default: 103 panic("invalid AES nrounds: %u", nrounds); 104 } 105 } 106 107 static void 108 aesvia_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds) 109 { 110 size_t key_len; 111 112 /* 113 * For AES-128, VIA PadLock only needs the original key itself. 114 * 115 * For AES-192 and AES-256, VIA PadLock needs software to 116 * compute the standard AES key schedule. 117 */ 118 switch (nrounds) { 119 case AES_128_NROUNDS: 120 enc->aese_aes.aes_rk[0] = le32dec(key + 4*0); 121 enc->aese_aes.aes_rk[1] = le32dec(key + 4*1); 122 enc->aese_aes.aes_rk[2] = le32dec(key + 4*2); 123 enc->aese_aes.aes_rk[3] = le32dec(key + 4*3); 124 return; 125 case AES_192_NROUNDS: 126 key_len = 24; 127 break; 128 case AES_256_NROUNDS: 129 key_len = 32; 130 break; 131 default: 132 panic("invalid AES nrounds: %u", nrounds); 133 } 134 aes_keysched_enc(enc->aese_aes.aes_rk, key, key_len); 135 } 136 137 static void 138 aesvia_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds) 139 { 140 size_t key_len; 141 142 switch (nrounds) { 143 case AES_128_NROUNDS: 144 dec->aesd_aes.aes_rk[0] = le32dec(key + 4*0); 145 dec->aesd_aes.aes_rk[1] = le32dec(key + 4*1); 146 dec->aesd_aes.aes_rk[2] = le32dec(key + 4*2); 147 dec->aesd_aes.aes_rk[3] = le32dec(key + 4*3); 148 return; 149 case AES_192_NROUNDS: 150 key_len = 24; 151 break; 152 case AES_256_NROUNDS: 153 key_len = 32; 154 break; 155 default: 156 panic("invalid AES nrounds: %u", nrounds); 157 } 158 aes_keysched_dec(dec->aesd_aes.aes_rk, key, key_len); 159 } 160 161 static inline void 162 aesvia_encN(const struct aesenc *enc, const uint8_t in[static 16], 163 uint8_t out[static 16], size_t nblocks, uint32_t cw0) 164 { 165 const uint32_t cw[4] __aligned(16) = { 166 [0] = (cw0 167 | C3_CRYPT_CWLO_ALG_AES 168 | C3_CRYPT_CWLO_ENCRYPT 169 | C3_CRYPT_CWLO_NORMAL), 170 }; 171 172 KASSERT(((uintptr_t)enc & 0xf) == 0); 173 KASSERT(((uintptr_t)in & 0xf) == 0); 174 KASSERT(((uintptr_t)out & 0xf) == 0); 175 176 asm volatile("rep xcryptecb" 177 : "+c"(nblocks), "+S"(in), "+D"(out) 178 : "b"(enc), "d"(cw) 179 : "memory", "cc"); 180 } 181 182 static inline void 183 aesvia_decN(const struct aesdec *dec, const uint8_t in[static 16], 184 uint8_t out[static 16], size_t nblocks, uint32_t cw0) 185 { 186 const uint32_t cw[4] __aligned(16) = { 187 [0] = (cw0 188 | C3_CRYPT_CWLO_ALG_AES 189 | C3_CRYPT_CWLO_DECRYPT 190 | C3_CRYPT_CWLO_NORMAL), 191 }; 192 193 KASSERT(((uintptr_t)dec & 0xf) == 0); 194 KASSERT(((uintptr_t)in & 0xf) == 0); 195 KASSERT(((uintptr_t)out & 0xf) == 0); 196 197 asm volatile("rep xcryptecb" 198 : "+c"(nblocks), "+S"(in), "+D"(out) 199 : "b"(dec), "d"(cw) 200 : "memory", "cc"); 201 } 202 203 static struct evcnt enc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 204 NULL, "aesvia", "enc aligned"); 205 EVCNT_ATTACH_STATIC(enc_aligned_evcnt); 206 static struct evcnt enc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 207 NULL, "aesvia", "dec unaligned"); 208 EVCNT_ATTACH_STATIC(enc_unaligned_evcnt); 209 210 static void 211 aesvia_enc(const struct aesenc *enc, const uint8_t in[static 16], 212 uint8_t out[static 16], uint32_t nrounds) 213 { 214 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 215 216 fpu_kern_enter(); 217 aesvia_reload_keys(); 218 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 && 219 ((uintptr_t)in & 0xff0) != 0xff0) { 220 enc_aligned_evcnt.ev_count++; 221 aesvia_encN(enc, in, out, 1, cw0); 222 } else { 223 enc_unaligned_evcnt.ev_count++; 224 /* 225 * VIA requires 16-byte/128-bit alignment, and 226 * xcrypt-ecb reads one block past the one we're 227 * working on -- which may go past the end of the page 228 * into unmapped territory. Use a bounce buffer if 229 * either constraint is violated. 230 */ 231 uint8_t inbuf[16] __aligned(16); 232 uint8_t outbuf[16] __aligned(16); 233 234 memcpy(inbuf, in, 16); 235 aesvia_encN(enc, inbuf, outbuf, 1, cw0); 236 memcpy(out, outbuf, 16); 237 238 explicit_memset(inbuf, 0, sizeof inbuf); 239 explicit_memset(outbuf, 0, sizeof outbuf); 240 } 241 fpu_kern_leave(); 242 } 243 244 static struct evcnt dec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 245 NULL, "aesvia", "dec aligned"); 246 EVCNT_ATTACH_STATIC(dec_aligned_evcnt); 247 static struct evcnt dec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 248 NULL, "aesvia", "dec unaligned"); 249 EVCNT_ATTACH_STATIC(dec_unaligned_evcnt); 250 251 static void 252 aesvia_dec(const struct aesdec *dec, const uint8_t in[static 16], 253 uint8_t out[static 16], uint32_t nrounds) 254 { 255 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 256 257 fpu_kern_enter(); 258 aesvia_reload_keys(); 259 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 && 260 ((uintptr_t)in & 0xff0) != 0xff0) { 261 dec_aligned_evcnt.ev_count++; 262 aesvia_decN(dec, in, out, 1, cw0); 263 } else { 264 dec_unaligned_evcnt.ev_count++; 265 /* 266 * VIA requires 16-byte/128-bit alignment, and 267 * xcrypt-ecb reads one block past the one we're 268 * working on -- which may go past the end of the page 269 * into unmapped territory. Use a bounce buffer if 270 * either constraint is violated. 271 */ 272 uint8_t inbuf[16] __aligned(16); 273 uint8_t outbuf[16] __aligned(16); 274 275 memcpy(inbuf, in, 16); 276 aesvia_decN(dec, inbuf, outbuf, 1, cw0); 277 memcpy(out, outbuf, 16); 278 279 explicit_memset(inbuf, 0, sizeof inbuf); 280 explicit_memset(outbuf, 0, sizeof outbuf); 281 } 282 fpu_kern_leave(); 283 } 284 285 static inline void 286 aesvia_cbc_encN(const struct aesenc *enc, const uint8_t in[static 16], 287 uint8_t out[static 16], size_t nblocks, uint8_t **ivp, uint32_t cw0) 288 { 289 const uint32_t cw[4] __aligned(16) = { 290 [0] = (cw0 291 | C3_CRYPT_CWLO_ALG_AES 292 | C3_CRYPT_CWLO_ENCRYPT 293 | C3_CRYPT_CWLO_NORMAL), 294 }; 295 296 KASSERT(((uintptr_t)enc & 0xf) == 0); 297 KASSERT(((uintptr_t)in & 0xf) == 0); 298 KASSERT(((uintptr_t)out & 0xf) == 0); 299 KASSERT(((uintptr_t)*ivp & 0xf) == 0); 300 301 /* 302 * Register effects: 303 * - Counts nblocks down to zero. 304 * - Advances in by nblocks (units of blocks). 305 * - Advances out by nblocks (units of blocks). 306 * - Updates *ivp to point at the last block of out. 307 */ 308 asm volatile("rep xcryptcbc" 309 : "+c"(nblocks), "+S"(in), "+D"(out), "+a"(*ivp) 310 : "b"(enc), "d"(cw) 311 : "memory", "cc"); 312 } 313 314 static inline void 315 aesvia_cbc_decN(const struct aesdec *dec, const uint8_t in[static 16], 316 uint8_t out[static 16], size_t nblocks, uint8_t iv[static 16], 317 uint32_t cw0) 318 { 319 const uint32_t cw[4] __aligned(16) = { 320 [0] = (cw0 321 | C3_CRYPT_CWLO_ALG_AES 322 | C3_CRYPT_CWLO_DECRYPT 323 | C3_CRYPT_CWLO_NORMAL), 324 }; 325 326 KASSERT(((uintptr_t)dec & 0xf) == 0); 327 KASSERT(((uintptr_t)in & 0xf) == 0); 328 KASSERT(((uintptr_t)out & 0xf) == 0); 329 KASSERT(((uintptr_t)iv & 0xf) == 0); 330 331 /* 332 * Register effects: 333 * - Counts nblocks down to zero. 334 * - Advances in by nblocks (units of blocks). 335 * - Advances out by nblocks (units of blocks). 336 * Memory side effects: 337 * - Writes what was the last block of in at the address iv. 338 */ 339 asm volatile("rep xcryptcbc" 340 : "+c"(nblocks), "+S"(in), "+D"(out) 341 : "a"(iv), "b"(dec), "d"(cw) 342 : "memory", "cc"); 343 } 344 345 static inline void 346 xor128(void *x, const void *a, const void *b) 347 { 348 uint32_t *x32 = x; 349 const uint32_t *a32 = a; 350 const uint32_t *b32 = b; 351 352 x32[0] = a32[0] ^ b32[0]; 353 x32[1] = a32[1] ^ b32[1]; 354 x32[2] = a32[2] ^ b32[2]; 355 x32[3] = a32[3] ^ b32[3]; 356 } 357 358 static struct evcnt cbcenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 359 NULL, "aesvia", "cbcenc aligned"); 360 EVCNT_ATTACH_STATIC(cbcenc_aligned_evcnt); 361 static struct evcnt cbcenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 362 NULL, "aesvia", "cbcenc unaligned"); 363 EVCNT_ATTACH_STATIC(cbcenc_unaligned_evcnt); 364 365 static void 366 aesvia_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16], 367 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 368 uint32_t nrounds) 369 { 370 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 371 372 KASSERT(nbytes % 16 == 0); 373 if (nbytes == 0) 374 return; 375 376 fpu_kern_enter(); 377 aesvia_reload_keys(); 378 if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) { 379 cbcenc_aligned_evcnt.ev_count++; 380 uint8_t *ivp = iv; 381 aesvia_cbc_encN(enc, in, out, nbytes/16, &ivp, cw0); 382 memcpy(iv, ivp, 16); 383 } else { 384 cbcenc_unaligned_evcnt.ev_count++; 385 uint8_t cv[16] __aligned(16); 386 uint8_t tmp[16] __aligned(16); 387 388 memcpy(cv, iv, 16); 389 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 390 memcpy(tmp, in, 16); 391 xor128(tmp, tmp, cv); 392 aesvia_encN(enc, tmp, cv, 1, cw0); 393 memcpy(out, cv, 16); 394 } 395 memcpy(iv, cv, 16); 396 } 397 fpu_kern_leave(); 398 } 399 400 static struct evcnt cbcdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 401 NULL, "aesvia", "cbcdec aligned"); 402 EVCNT_ATTACH_STATIC(cbcdec_aligned_evcnt); 403 static struct evcnt cbcdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 404 NULL, "aesvia", "cbcdec unaligned"); 405 EVCNT_ATTACH_STATIC(cbcdec_unaligned_evcnt); 406 407 static void 408 aesvia_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16], 409 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 410 uint32_t nrounds) 411 { 412 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 413 414 KASSERT(nbytes % 16 == 0); 415 if (nbytes == 0) 416 return; 417 418 fpu_kern_enter(); 419 aesvia_reload_keys(); 420 if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) { 421 cbcdec_aligned_evcnt.ev_count++; 422 aesvia_cbc_decN(dec, in, out, nbytes/16, iv, cw0); 423 } else { 424 cbcdec_unaligned_evcnt.ev_count++; 425 uint8_t iv0[16] __aligned(16); 426 uint8_t cv[16] __aligned(16); 427 uint8_t tmp[16] __aligned(16); 428 429 memcpy(iv0, iv, 16); 430 memcpy(cv, in + nbytes - 16, 16); 431 memcpy(iv, cv, 16); 432 433 for (;;) { 434 aesvia_decN(dec, cv, tmp, 1, cw0); 435 if ((nbytes -= 16) == 0) 436 break; 437 memcpy(cv, in + nbytes - 16, 16); 438 xor128(tmp, tmp, cv); 439 memcpy(out + nbytes, tmp, 16); 440 } 441 442 xor128(tmp, tmp, iv0); 443 memcpy(out, tmp, 16); 444 explicit_memset(tmp, 0, sizeof tmp); 445 } 446 fpu_kern_leave(); 447 } 448 449 static inline void 450 aesvia_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3) 451 { 452 uint32_t s0, s1, s2, s3; 453 454 s0 = *t0 >> 31; 455 s1 = *t1 >> 31; 456 s2 = *t2 >> 31; 457 s3 = *t3 >> 31; 458 *t0 = (*t0 << 1) ^ (-s3 & 0x87); 459 *t1 = (*t1 << 1) ^ s0; 460 *t2 = (*t2 << 1) ^ s1; 461 *t3 = (*t3 << 1) ^ s2; 462 } 463 464 static int 465 aesvia_xts_update_selftest(void) 466 { 467 static const struct { 468 uint32_t in[4], out[4]; 469 } cases[] = { 470 { {1}, {2} }, 471 { {0x80000000U,0,0,0}, {0,1,0,0} }, 472 { {0,0x80000000U,0,0}, {0,0,1,0} }, 473 { {0,0,0x80000000U,0}, {0,0,0,1} }, 474 { {0,0,0,0x80000000U}, {0x87,0,0,0} }, 475 { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} }, 476 }; 477 unsigned i; 478 uint32_t t0, t1, t2, t3; 479 480 for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { 481 t0 = cases[i].in[0]; 482 t1 = cases[i].in[1]; 483 t2 = cases[i].in[2]; 484 t3 = cases[i].in[3]; 485 aesvia_xts_update(&t0, &t1, &t2, &t3); 486 if (t0 != cases[i].out[0] || 487 t1 != cases[i].out[1] || 488 t2 != cases[i].out[2] || 489 t3 != cases[i].out[3]) 490 return -1; 491 } 492 493 /* Success! */ 494 return 0; 495 } 496 497 static struct evcnt xtsenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 498 NULL, "aesvia", "xtsenc aligned"); 499 EVCNT_ATTACH_STATIC(xtsenc_aligned_evcnt); 500 static struct evcnt xtsenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 501 NULL, "aesvia", "xtsenc unaligned"); 502 EVCNT_ATTACH_STATIC(xtsenc_unaligned_evcnt); 503 504 static void 505 aesvia_xts_enc(const struct aesenc *enc, const uint8_t in[static 16], 506 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 507 uint32_t nrounds) 508 { 509 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 510 uint32_t t[4]; 511 512 KASSERT(nbytes % 16 == 0); 513 514 memcpy(t, tweak, 16); 515 516 fpu_kern_enter(); 517 aesvia_reload_keys(); 518 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) { 519 xtsenc_aligned_evcnt.ev_count++; 520 unsigned lastblock = 0; 521 uint32_t buf[8*4] __aligned(16); 522 523 /* 524 * Make sure the last block is not the last block of a 525 * page. (Note that we store the AES input in `out' as 526 * a temporary buffer, rather than reading it directly 527 * from `in', since we have to combine the tweak 528 * first.) 529 */ 530 lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0); 531 nbytes -= lastblock; 532 533 /* 534 * Handle an odd number of initial blocks so we can 535 * process the rest in eight-block (128-byte) chunks. 536 */ 537 if (nbytes % 128) { 538 unsigned nbytes128 = nbytes % 128; 539 540 nbytes -= nbytes128; 541 for (; nbytes128; nbytes128 -= 16, in += 16, out += 16) 542 { 543 xor128(out, in, t); 544 aesvia_encN(enc, out, out, 1, cw0); 545 xor128(out, out, t); 546 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 547 } 548 } 549 550 /* Process eight blocks at a time. */ 551 for (; nbytes; nbytes -= 128, in += 128, out += 128) { 552 unsigned i; 553 for (i = 0; i < 8; i++) { 554 memcpy(buf + 4*i, t, 16); 555 xor128(out + 4*i, in + 4*i, t); 556 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 557 } 558 aesvia_encN(enc, out, out, 8, cw0); 559 for (i = 0; i < 8; i++) 560 xor128(out + 4*i, in + 4*i, buf + 4*i); 561 } 562 563 /* Handle the last block of a page, if necessary. */ 564 if (lastblock) { 565 xor128(buf, in, t); 566 aesvia_encN(enc, (const void *)buf, out, 1, cw0); 567 } 568 569 explicit_memset(buf, 0, sizeof buf); 570 } else { 571 xtsenc_unaligned_evcnt.ev_count++; 572 uint8_t buf[16] __aligned(16); 573 574 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 575 memcpy(buf, in, 16); 576 xor128(buf, buf, t); 577 aesvia_encN(enc, buf, buf, 1, cw0); 578 xor128(buf, buf, t); 579 memcpy(out, buf, 16); 580 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 581 } 582 583 explicit_memset(buf, 0, sizeof buf); 584 } 585 fpu_kern_leave(); 586 587 memcpy(tweak, t, 16); 588 explicit_memset(t, 0, sizeof t); 589 } 590 591 static struct evcnt xtsdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 592 NULL, "aesvia", "xtsdec aligned"); 593 EVCNT_ATTACH_STATIC(xtsdec_aligned_evcnt); 594 static struct evcnt xtsdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 595 NULL, "aesvia", "xtsdec unaligned"); 596 EVCNT_ATTACH_STATIC(xtsdec_unaligned_evcnt); 597 598 static void 599 aesvia_xts_dec(const struct aesdec *dec, const uint8_t in[static 16], 600 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 601 uint32_t nrounds) 602 { 603 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 604 uint32_t t[4]; 605 606 KASSERT(nbytes % 16 == 0); 607 608 memcpy(t, tweak, 16); 609 610 fpu_kern_enter(); 611 aesvia_reload_keys(); 612 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) { 613 xtsdec_aligned_evcnt.ev_count++; 614 unsigned lastblock = 0; 615 uint32_t buf[8*4] __aligned(16); 616 617 /* 618 * Make sure the last block is not the last block of a 619 * page. (Note that we store the AES input in `out' as 620 * a temporary buffer, rather than reading it directly 621 * from `in', since we have to combine the tweak 622 * first.) 623 */ 624 lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0); 625 nbytes -= lastblock; 626 627 /* 628 * Handle an odd number of initial blocks so we can 629 * process the rest in eight-block (128-byte) chunks. 630 */ 631 if (nbytes % 128) { 632 unsigned nbytes128 = nbytes % 128; 633 634 nbytes -= nbytes128; 635 for (; nbytes128; nbytes128 -= 16, in += 16, out += 16) 636 { 637 xor128(out, in, t); 638 aesvia_decN(dec, out, out, 1, cw0); 639 xor128(out, out, t); 640 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 641 } 642 } 643 644 /* Process eight blocks at a time. */ 645 for (; nbytes; nbytes -= 128, in += 128, out += 128) { 646 unsigned i; 647 for (i = 0; i < 8; i++) { 648 memcpy(buf + 4*i, t, 16); 649 xor128(out + 4*i, in + 4*i, t); 650 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 651 } 652 aesvia_decN(dec, out, out, 8, cw0); 653 for (i = 0; i < 8; i++) 654 xor128(out + 4*i, in + 4*i, buf + 4*i); 655 } 656 657 /* Handle the last block of a page, if necessary. */ 658 if (lastblock) { 659 xor128(buf, in, t); 660 aesvia_decN(dec, (const void *)buf, out, 1, cw0); 661 } 662 663 explicit_memset(buf, 0, sizeof buf); 664 } else { 665 xtsdec_unaligned_evcnt.ev_count++; 666 uint8_t buf[16] __aligned(16); 667 668 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 669 memcpy(buf, in, 16); 670 xor128(buf, buf, t); 671 aesvia_decN(dec, buf, buf, 1, cw0); 672 xor128(buf, buf, t); 673 memcpy(out, buf, 16); 674 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]); 675 } 676 677 explicit_memset(buf, 0, sizeof buf); 678 } 679 fpu_kern_leave(); 680 681 memcpy(tweak, t, 16); 682 explicit_memset(t, 0, sizeof t); 683 } 684 685 static struct evcnt cbcmac_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 686 NULL, "aesvia", "cbcmac aligned"); 687 EVCNT_ATTACH_STATIC(cbcmac_aligned_evcnt); 688 static struct evcnt cbcmac_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 689 NULL, "aesvia", "cbcmac unaligned"); 690 EVCNT_ATTACH_STATIC(cbcmac_unaligned_evcnt); 691 692 static void 693 aesvia_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], 694 size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds) 695 { 696 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 697 uint8_t authbuf[16] __aligned(16); 698 uint8_t *auth = auth0; 699 700 KASSERT(nbytes); 701 KASSERT(nbytes % 16 == 0); 702 703 if ((uintptr_t)auth0 & 0xf) { 704 memcpy(authbuf, auth0, 16); 705 auth = authbuf; 706 cbcmac_unaligned_evcnt.ev_count++; 707 } else { 708 cbcmac_aligned_evcnt.ev_count++; 709 } 710 711 fpu_kern_enter(); 712 aesvia_reload_keys(); 713 for (; nbytes; nbytes -= 16, in += 16) { 714 xor128(auth, auth, in); 715 aesvia_encN(enc, auth, auth, 1, cw0); 716 } 717 fpu_kern_leave(); 718 719 if ((uintptr_t)auth0 & 0xf) { 720 memcpy(auth0, authbuf, 16); 721 explicit_memset(authbuf, 0, sizeof authbuf); 722 } 723 } 724 725 static struct evcnt ccmenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 726 NULL, "aesvia", "ccmenc aligned"); 727 EVCNT_ATTACH_STATIC(ccmenc_aligned_evcnt); 728 static struct evcnt ccmenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 729 NULL, "aesvia", "ccmenc unaligned"); 730 EVCNT_ATTACH_STATIC(ccmenc_unaligned_evcnt); 731 732 static void 733 aesvia_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16], 734 uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], 735 uint32_t nrounds) 736 { 737 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 738 uint8_t authctrbuf[32] __aligned(16); 739 uint8_t *authctr; 740 uint32_t c0, c1, c2, c3; 741 742 KASSERT(nbytes); 743 KASSERT(nbytes % 16 == 0); 744 745 if ((uintptr_t)authctr0 & 0xf) { 746 memcpy(authctrbuf, authctr0, 16); 747 authctr = authctrbuf; 748 ccmenc_unaligned_evcnt.ev_count++; 749 } else { 750 authctr = authctr0; 751 ccmenc_aligned_evcnt.ev_count++; 752 } 753 c0 = le32dec(authctr0 + 16 + 4*0); 754 c1 = le32dec(authctr0 + 16 + 4*1); 755 c2 = le32dec(authctr0 + 16 + 4*2); 756 c3 = be32dec(authctr0 + 16 + 4*3); 757 758 /* 759 * In principle we could use REP XCRYPTCTR here, but that 760 * doesn't help to compute the CBC-MAC step, and certain VIA 761 * CPUs have some weird errata with REP XCRYPTCTR that make it 762 * kind of a pain to use. So let's just use REP XCRYPTECB to 763 * simultaneously compute the CBC-MAC step and the CTR step. 764 * (Maybe some VIA CPUs will compute REP XCRYPTECB in parallel, 765 * who knows...) 766 */ 767 fpu_kern_enter(); 768 aesvia_reload_keys(); 769 for (; nbytes; nbytes -= 16, in += 16, out += 16) { 770 xor128(authctr, authctr, in); 771 le32enc(authctr + 16 + 4*0, c0); 772 le32enc(authctr + 16 + 4*1, c1); 773 le32enc(authctr + 16 + 4*2, c2); 774 be32enc(authctr + 16 + 4*3, ++c3); 775 aesvia_encN(enc, authctr, authctr, 2, cw0); 776 xor128(out, in, authctr + 16); 777 } 778 fpu_kern_leave(); 779 780 if ((uintptr_t)authctr0 & 0xf) { 781 memcpy(authctr0, authctrbuf, 16); 782 explicit_memset(authctrbuf, 0, sizeof authctrbuf); 783 } 784 785 le32enc(authctr0 + 16 + 4*0, c0); 786 le32enc(authctr0 + 16 + 4*1, c1); 787 le32enc(authctr0 + 16 + 4*2, c2); 788 be32enc(authctr0 + 16 + 4*3, c3); 789 } 790 791 static struct evcnt ccmdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 792 NULL, "aesvia", "ccmdec aligned"); 793 EVCNT_ATTACH_STATIC(ccmdec_aligned_evcnt); 794 static struct evcnt ccmdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC, 795 NULL, "aesvia", "ccmdec unaligned"); 796 EVCNT_ATTACH_STATIC(ccmdec_unaligned_evcnt); 797 798 static void 799 aesvia_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16], 800 uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32], 801 uint32_t nrounds) 802 { 803 const uint32_t cw0 = aesvia_keylen_cw0(nrounds); 804 uint8_t authctrbuf[32] __aligned(16); 805 uint8_t *authctr; 806 uint32_t c0, c1, c2, c3; 807 808 KASSERT(nbytes); 809 KASSERT(nbytes % 16 == 0); 810 811 c0 = le32dec(authctr0 + 16 + 4*0); 812 c1 = le32dec(authctr0 + 16 + 4*1); 813 c2 = le32dec(authctr0 + 16 + 4*2); 814 c3 = be32dec(authctr0 + 16 + 4*3); 815 816 if ((uintptr_t)authctr0 & 0xf) { 817 memcpy(authctrbuf, authctr0, 16); 818 authctr = authctrbuf; 819 le32enc(authctr + 16 + 4*0, c0); 820 le32enc(authctr + 16 + 4*1, c1); 821 le32enc(authctr + 16 + 4*2, c2); 822 ccmdec_unaligned_evcnt.ev_count++; 823 } else { 824 authctr = authctr0; 825 ccmdec_aligned_evcnt.ev_count++; 826 } 827 828 fpu_kern_enter(); 829 aesvia_reload_keys(); 830 be32enc(authctr + 16 + 4*3, ++c3); 831 aesvia_encN(enc, authctr + 16, authctr + 16, 1, cw0); 832 for (;; in += 16, out += 16) { 833 xor128(out, authctr + 16, in); 834 xor128(authctr, authctr, out); 835 if ((nbytes -= 16) == 0) 836 break; 837 le32enc(authctr + 16 + 4*0, c0); 838 le32enc(authctr + 16 + 4*1, c1); 839 le32enc(authctr + 16 + 4*2, c2); 840 be32enc(authctr + 16 + 4*3, ++c3); 841 aesvia_encN(enc, authctr, authctr, 2, cw0); 842 } 843 aesvia_encN(enc, authctr, authctr, 1, cw0); 844 fpu_kern_leave(); 845 846 if ((uintptr_t)authctr0 & 0xf) { 847 memcpy(authctr0, authctrbuf, 16); 848 explicit_memset(authctrbuf, 0, sizeof authctrbuf); 849 } 850 851 le32enc(authctr0 + 16 + 4*0, c0); 852 le32enc(authctr0 + 16 + 4*1, c1); 853 le32enc(authctr0 + 16 + 4*2, c2); 854 be32enc(authctr0 + 16 + 4*3, c3); 855 } 856 857 static int 858 aesvia_probe(void) 859 { 860 861 /* Verify that the CPU advertises VIA ACE support. */ 862 #ifdef _KERNEL 863 if ((cpu_feature[4] & CPUID_VIA_HAS_ACE) == 0) 864 return -1; 865 #else 866 /* 867 * From the VIA PadLock Programming Guide: 868 * https://web.archive.org/web/20220104214041/http://linux.via.com.tw/support/beginDownload.action?eleid=181&fid=261 869 */ 870 unsigned eax, ebx, ecx, edx; 871 if (!__get_cpuid(0, &eax, &ebx, &ecx, &edx)) 872 return -1; 873 if (ebx != signature_CENTAUR_ebx || 874 ecx != signature_CENTAUR_ecx || 875 edx != signature_CENTAUR_edx) 876 return -1; 877 if (eax < 0xc0000000) 878 return -1; 879 if (!__get_cpuid(0xc0000000, &eax, &ebx, &ecx, &edx)) 880 return -1; 881 if (eax < 0xc0000001) 882 return -1; 883 if (!__get_cpuid(0xc0000001, &eax, &ebx, &ecx, &edx)) 884 return -1; 885 /* Check whether ACE or ACE2 is both supported and enabled. */ 886 if ((edx & 0x000000c0) != 0x000000c0 || 887 (edx & 0x00000300) != 0x00000300) 888 return -1; 889 #endif 890 891 /* Verify that our XTS tweak update logic works. */ 892 if (aesvia_xts_update_selftest()) 893 return -1; 894 895 /* Success! */ 896 return 0; 897 } 898 899 struct aes_impl aes_via_impl = { 900 .ai_name = "VIA ACE", 901 .ai_probe = aesvia_probe, 902 .ai_setenckey = aesvia_setenckey, 903 .ai_setdeckey = aesvia_setdeckey, 904 .ai_enc = aesvia_enc, 905 .ai_dec = aesvia_dec, 906 .ai_cbc_enc = aesvia_cbc_enc, 907 .ai_cbc_dec = aesvia_cbc_dec, 908 .ai_xts_enc = aesvia_xts_enc, 909 .ai_xts_dec = aesvia_xts_dec, 910 .ai_cbcmac_update1 = aesvia_cbcmac_update1, 911 .ai_ccm_enc1 = aesvia_ccm_enc1, 912 .ai_ccm_dec1 = aesvia_ccm_dec1, 913 }; 914