aes_via.c revision 1.7 1 /* $NetBSD: aes_via.c,v 1.7 2024/06/16 13:03:48 christos Exp $ */
2
3 /*-
4 * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 * POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 #include <sys/cdefs.h>
30 __KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.7 2024/06/16 13:03:48 christos Exp $");
31
32 #ifdef _KERNEL
33 #include <sys/types.h>
34 #include <sys/evcnt.h>
35 #include <sys/systm.h>
36 #else
37 #include <assert.h>
38 #include <err.h>
39 #include <stdint.h>
40 #include <string.h>
41 #define KASSERT assert
42 #define panic(fmt, args...) err(1, fmt, args)
43 struct evcnt { uint64_t ev_count; };
44 #define EVCNT_INITIALIZER(a,b,c,d) {0}
45 #define EVCNT_ATTACH_STATIC(name) static char name##_attach __unused = 0
46 #endif
47
48 #include <crypto/aes/aes.h>
49 #include <crypto/aes/aes_bear.h>
50 #include <crypto/aes/aes_impl.h>
51
52 #ifdef _KERNEL
53 #include <x86/cpufunc.h>
54 #include <x86/cpuvar.h>
55 #include <x86/fpu.h>
56 #include <x86/specialreg.h>
57 #include <x86/via_padlock.h>
58 #else
59 #include <cpuid.h>
60 #define fpu_kern_enter() ((void)0)
61 #define fpu_kern_leave() ((void)0)
62 #define C3_CRYPT_CWLO_ROUND_M 0x0000000f
63 #define C3_CRYPT_CWLO_ALG_M 0x00000070
64 #define C3_CRYPT_CWLO_ALG_AES 0x00000000
65 #define C3_CRYPT_CWLO_KEYGEN_M 0x00000080
66 #define C3_CRYPT_CWLO_KEYGEN_HW 0x00000000
67 #define C3_CRYPT_CWLO_KEYGEN_SW 0x00000080
68 #define C3_CRYPT_CWLO_NORMAL 0x00000000
69 #define C3_CRYPT_CWLO_INTERMEDIATE 0x00000100
70 #define C3_CRYPT_CWLO_ENCRYPT 0x00000000
71 #define C3_CRYPT_CWLO_DECRYPT 0x00000200
72 #define C3_CRYPT_CWLO_KEY128 0x0000000a /* 128bit, 10 rds */
73 #define C3_CRYPT_CWLO_KEY192 0x0000040c /* 192bit, 12 rds */
74 #define C3_CRYPT_CWLO_KEY256 0x0000080e /* 256bit, 15 rds */
75 #endif
76
77 static void
78 aesvia_reload_keys(void)
79 {
80
81 asm volatile("pushf; popf");
82 }
83
84 static uint32_t
85 aesvia_keylen_cw0(unsigned nrounds)
86 {
87
88 /*
89 * Determine the control word bits for the key size / number of
90 * rounds. For AES-128, the hardware can do key expansion on
91 * the fly; for AES-192 and AES-256, software must do it.
92 */
93 switch (nrounds) {
94 case AES_128_NROUNDS:
95 return C3_CRYPT_CWLO_KEY128;
96 case AES_192_NROUNDS:
97 return C3_CRYPT_CWLO_KEY192 | C3_CRYPT_CWLO_KEYGEN_SW;
98 case AES_256_NROUNDS:
99 return C3_CRYPT_CWLO_KEY256 | C3_CRYPT_CWLO_KEYGEN_SW;
100 default:
101 panic("invalid AES nrounds: %u", nrounds);
102 }
103 }
104
105 static void
106 aesvia_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
107 {
108 size_t key_len;
109
110 switch (nrounds) {
111 case AES_128_NROUNDS:
112 enc->aese_aes.aes_rk[0] = le32dec(key + 4*0);
113 enc->aese_aes.aes_rk[1] = le32dec(key + 4*1);
114 enc->aese_aes.aes_rk[2] = le32dec(key + 4*2);
115 enc->aese_aes.aes_rk[3] = le32dec(key + 4*3);
116 return;
117 case AES_192_NROUNDS:
118 key_len = 24;
119 break;
120 case AES_256_NROUNDS:
121 key_len = 32;
122 break;
123 default:
124 panic("invalid AES nrounds: %u", nrounds);
125 }
126 br_aes_ct_keysched_stdenc(enc->aese_aes.aes_rk, key, key_len);
127 }
128
129 static void
130 aesvia_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
131 {
132 size_t key_len;
133
134 switch (nrounds) {
135 case AES_128_NROUNDS:
136 dec->aesd_aes.aes_rk[0] = le32dec(key + 4*0);
137 dec->aesd_aes.aes_rk[1] = le32dec(key + 4*1);
138 dec->aesd_aes.aes_rk[2] = le32dec(key + 4*2);
139 dec->aesd_aes.aes_rk[3] = le32dec(key + 4*3);
140 return;
141 case AES_192_NROUNDS:
142 key_len = 24;
143 break;
144 case AES_256_NROUNDS:
145 key_len = 32;
146 break;
147 default:
148 panic("invalid AES nrounds: %u", nrounds);
149 }
150 br_aes_ct_keysched_stddec(dec->aesd_aes.aes_rk, key, key_len);
151 }
152
153 static inline void
154 aesvia_encN(const struct aesenc *enc, const uint8_t in[static 16],
155 uint8_t out[static 16], size_t nblocks, uint32_t cw0)
156 {
157 const uint32_t cw[4] __aligned(16) = {
158 [0] = (cw0
159 | C3_CRYPT_CWLO_ALG_AES
160 | C3_CRYPT_CWLO_ENCRYPT
161 | C3_CRYPT_CWLO_NORMAL),
162 };
163
164 KASSERT(((uintptr_t)enc & 0xf) == 0);
165 KASSERT(((uintptr_t)in & 0xf) == 0);
166 KASSERT(((uintptr_t)out & 0xf) == 0);
167
168 asm volatile("rep xcryptecb"
169 : "+c"(nblocks), "+S"(in), "+D"(out)
170 : "b"(enc), "d"(cw)
171 : "memory", "cc");
172 }
173
174 static inline void
175 aesvia_decN(const struct aesdec *dec, const uint8_t in[static 16],
176 uint8_t out[static 16], size_t nblocks, uint32_t cw0)
177 {
178 const uint32_t cw[4] __aligned(16) = {
179 [0] = (cw0
180 | C3_CRYPT_CWLO_ALG_AES
181 | C3_CRYPT_CWLO_DECRYPT
182 | C3_CRYPT_CWLO_NORMAL),
183 };
184
185 KASSERT(((uintptr_t)dec & 0xf) == 0);
186 KASSERT(((uintptr_t)in & 0xf) == 0);
187 KASSERT(((uintptr_t)out & 0xf) == 0);
188
189 asm volatile("rep xcryptecb"
190 : "+c"(nblocks), "+S"(in), "+D"(out)
191 : "b"(dec), "d"(cw)
192 : "memory", "cc");
193 }
194
195 static struct evcnt enc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
196 NULL, "aesvia", "enc aligned");
197 EVCNT_ATTACH_STATIC(enc_aligned_evcnt);
198 static struct evcnt enc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
199 NULL, "aesvia", "dec unaligned");
200 EVCNT_ATTACH_STATIC(enc_unaligned_evcnt);
201
202 static void
203 aesvia_enc(const struct aesenc *enc, const uint8_t in[static 16],
204 uint8_t out[static 16], uint32_t nrounds)
205 {
206 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
207
208 fpu_kern_enter();
209 aesvia_reload_keys();
210 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
211 ((uintptr_t)in & 0xff0) != 0xff0) {
212 enc_aligned_evcnt.ev_count++;
213 aesvia_encN(enc, in, out, 1, cw0);
214 } else {
215 enc_unaligned_evcnt.ev_count++;
216 /*
217 * VIA requires 16-byte/128-bit alignment, and
218 * xcrypt-ecb reads one block past the one we're
219 * working on -- which may go past the end of the page
220 * into unmapped territory. Use a bounce buffer if
221 * either constraint is violated.
222 */
223 uint8_t inbuf[16] __aligned(16);
224 uint8_t outbuf[16] __aligned(16);
225
226 memcpy(inbuf, in, 16);
227 aesvia_encN(enc, inbuf, outbuf, 1, cw0);
228 memcpy(out, outbuf, 16);
229
230 explicit_memset(inbuf, 0, sizeof inbuf);
231 explicit_memset(outbuf, 0, sizeof outbuf);
232 }
233 fpu_kern_leave();
234 }
235
236 static struct evcnt dec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
237 NULL, "aesvia", "dec aligned");
238 EVCNT_ATTACH_STATIC(dec_aligned_evcnt);
239 static struct evcnt dec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
240 NULL, "aesvia", "dec unaligned");
241 EVCNT_ATTACH_STATIC(dec_unaligned_evcnt);
242
243 static void
244 aesvia_dec(const struct aesdec *dec, const uint8_t in[static 16],
245 uint8_t out[static 16], uint32_t nrounds)
246 {
247 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
248
249 fpu_kern_enter();
250 aesvia_reload_keys();
251 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
252 ((uintptr_t)in & 0xff0) != 0xff0) {
253 dec_aligned_evcnt.ev_count++;
254 aesvia_decN(dec, in, out, 1, cw0);
255 } else {
256 dec_unaligned_evcnt.ev_count++;
257 /*
258 * VIA requires 16-byte/128-bit alignment, and
259 * xcrypt-ecb reads one block past the one we're
260 * working on -- which may go past the end of the page
261 * into unmapped territory. Use a bounce buffer if
262 * either constraint is violated.
263 */
264 uint8_t inbuf[16] __aligned(16);
265 uint8_t outbuf[16] __aligned(16);
266
267 memcpy(inbuf, in, 16);
268 aesvia_decN(dec, inbuf, outbuf, 1, cw0);
269 memcpy(out, outbuf, 16);
270
271 explicit_memset(inbuf, 0, sizeof inbuf);
272 explicit_memset(outbuf, 0, sizeof outbuf);
273 }
274 fpu_kern_leave();
275 }
276
277 static inline void
278 aesvia_cbc_encN(const struct aesenc *enc, const uint8_t in[static 16],
279 uint8_t out[static 16], size_t nblocks, uint8_t **ivp, uint32_t cw0)
280 {
281 const uint32_t cw[4] __aligned(16) = {
282 [0] = (cw0
283 | C3_CRYPT_CWLO_ALG_AES
284 | C3_CRYPT_CWLO_ENCRYPT
285 | C3_CRYPT_CWLO_NORMAL),
286 };
287
288 KASSERT(((uintptr_t)enc & 0xf) == 0);
289 KASSERT(((uintptr_t)in & 0xf) == 0);
290 KASSERT(((uintptr_t)out & 0xf) == 0);
291 KASSERT(((uintptr_t)*ivp & 0xf) == 0);
292
293 /*
294 * Register effects:
295 * - Counts nblocks down to zero.
296 * - Advances in by nblocks (units of blocks).
297 * - Advances out by nblocks (units of blocks).
298 * - Updates *ivp to point at the last block of out.
299 */
300 asm volatile("rep xcryptcbc"
301 : "+c"(nblocks), "+S"(in), "+D"(out), "+a"(*ivp)
302 : "b"(enc), "d"(cw)
303 : "memory", "cc");
304 }
305
306 static inline void
307 aesvia_cbc_decN(const struct aesdec *dec, const uint8_t in[static 16],
308 uint8_t out[static 16], size_t nblocks, uint8_t iv[static 16],
309 uint32_t cw0)
310 {
311 const uint32_t cw[4] __aligned(16) = {
312 [0] = (cw0
313 | C3_CRYPT_CWLO_ALG_AES
314 | C3_CRYPT_CWLO_DECRYPT
315 | C3_CRYPT_CWLO_NORMAL),
316 };
317
318 KASSERT(((uintptr_t)dec & 0xf) == 0);
319 KASSERT(((uintptr_t)in & 0xf) == 0);
320 KASSERT(((uintptr_t)out & 0xf) == 0);
321 KASSERT(((uintptr_t)iv & 0xf) == 0);
322
323 /*
324 * Register effects:
325 * - Counts nblocks down to zero.
326 * - Advances in by nblocks (units of blocks).
327 * - Advances out by nblocks (units of blocks).
328 * Memory side effects:
329 * - Writes what was the last block of in at the address iv.
330 */
331 asm volatile("rep xcryptcbc"
332 : "+c"(nblocks), "+S"(in), "+D"(out)
333 : "a"(iv), "b"(dec), "d"(cw)
334 : "memory", "cc");
335 }
336
337 static inline void
338 xor128(void *x, const void *a, const void *b)
339 {
340 uint32_t *x32 = x;
341 const uint32_t *a32 = a;
342 const uint32_t *b32 = b;
343
344 x32[0] = a32[0] ^ b32[0];
345 x32[1] = a32[1] ^ b32[1];
346 x32[2] = a32[2] ^ b32[2];
347 x32[3] = a32[3] ^ b32[3];
348 }
349
350 static struct evcnt cbcenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
351 NULL, "aesvia", "cbcenc aligned");
352 EVCNT_ATTACH_STATIC(cbcenc_aligned_evcnt);
353 static struct evcnt cbcenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
354 NULL, "aesvia", "cbcenc unaligned");
355 EVCNT_ATTACH_STATIC(cbcenc_unaligned_evcnt);
356
357 static void
358 aesvia_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
359 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
360 uint32_t nrounds)
361 {
362 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
363
364 KASSERT(nbytes % 16 == 0);
365 if (nbytes == 0)
366 return;
367
368 fpu_kern_enter();
369 aesvia_reload_keys();
370 if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) {
371 cbcenc_aligned_evcnt.ev_count++;
372 uint8_t *ivp = iv;
373 aesvia_cbc_encN(enc, in, out, nbytes/16, &ivp, cw0);
374 memcpy(iv, ivp, 16);
375 } else {
376 cbcenc_unaligned_evcnt.ev_count++;
377 uint8_t cv[16] __aligned(16);
378 uint8_t tmp[16] __aligned(16);
379
380 memcpy(cv, iv, 16);
381 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
382 memcpy(tmp, in, 16);
383 xor128(tmp, tmp, cv);
384 aesvia_encN(enc, tmp, cv, 1, cw0);
385 memcpy(out, cv, 16);
386 }
387 memcpy(iv, cv, 16);
388 }
389 fpu_kern_leave();
390 }
391
392 static struct evcnt cbcdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
393 NULL, "aesvia", "cbcdec aligned");
394 EVCNT_ATTACH_STATIC(cbcdec_aligned_evcnt);
395 static struct evcnt cbcdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
396 NULL, "aesvia", "cbcdec unaligned");
397 EVCNT_ATTACH_STATIC(cbcdec_unaligned_evcnt);
398
399 static void
400 aesvia_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
401 uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
402 uint32_t nrounds)
403 {
404 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
405
406 KASSERT(nbytes % 16 == 0);
407 if (nbytes == 0)
408 return;
409
410 fpu_kern_enter();
411 aesvia_reload_keys();
412 if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) {
413 cbcdec_aligned_evcnt.ev_count++;
414 aesvia_cbc_decN(dec, in, out, nbytes/16, iv, cw0);
415 } else {
416 cbcdec_unaligned_evcnt.ev_count++;
417 uint8_t iv0[16] __aligned(16);
418 uint8_t cv[16] __aligned(16);
419 uint8_t tmp[16] __aligned(16);
420
421 memcpy(iv0, iv, 16);
422 memcpy(cv, in + nbytes - 16, 16);
423 memcpy(iv, cv, 16);
424
425 for (;;) {
426 aesvia_decN(dec, cv, tmp, 1, cw0);
427 if ((nbytes -= 16) == 0)
428 break;
429 memcpy(cv, in + nbytes - 16, 16);
430 xor128(tmp, tmp, cv);
431 // XXX: is this right? (subtracting 16)
432 memcpy(out + nbytes - 16, tmp, 16);
433 }
434
435 xor128(tmp, tmp, iv0);
436 memcpy(out, tmp, 16);
437 explicit_memset(tmp, 0, sizeof tmp);
438 }
439 fpu_kern_leave();
440 }
441
442 static inline void
443 aesvia_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
444 {
445 uint32_t s0, s1, s2, s3;
446
447 s0 = *t0 >> 31;
448 s1 = *t1 >> 31;
449 s2 = *t2 >> 31;
450 s3 = *t3 >> 31;
451 *t0 = (*t0 << 1) ^ (-s3 & 0x87);
452 *t1 = (*t1 << 1) ^ s0;
453 *t2 = (*t2 << 1) ^ s1;
454 *t3 = (*t3 << 1) ^ s2;
455 }
456
457 static int
458 aesvia_xts_update_selftest(void)
459 {
460 static const struct {
461 uint32_t in[4], out[4];
462 } cases[] = {
463 { {1}, {2} },
464 { {0x80000000U,0,0,0}, {0,1,0,0} },
465 { {0,0x80000000U,0,0}, {0,0,1,0} },
466 { {0,0,0x80000000U,0}, {0,0,0,1} },
467 { {0,0,0,0x80000000U}, {0x87,0,0,0} },
468 { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
469 };
470 unsigned i;
471 uint32_t t0, t1, t2, t3;
472
473 for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
474 t0 = cases[i].in[0];
475 t1 = cases[i].in[1];
476 t2 = cases[i].in[2];
477 t3 = cases[i].in[3];
478 aesvia_xts_update(&t0, &t1, &t2, &t3);
479 if (t0 != cases[i].out[0] ||
480 t1 != cases[i].out[1] ||
481 t2 != cases[i].out[2] ||
482 t3 != cases[i].out[3])
483 return -1;
484 }
485
486 /* Success! */
487 return 0;
488 }
489
490 static struct evcnt xtsenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
491 NULL, "aesvia", "xtsenc aligned");
492 EVCNT_ATTACH_STATIC(xtsenc_aligned_evcnt);
493 static struct evcnt xtsenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
494 NULL, "aesvia", "xtsenc unaligned");
495 EVCNT_ATTACH_STATIC(xtsenc_unaligned_evcnt);
496
497 static void
498 aesvia_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
499 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
500 uint32_t nrounds)
501 {
502 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
503 uint32_t t[4];
504
505 KASSERT(nbytes % 16 == 0);
506
507 memcpy(t, tweak, 16);
508
509 fpu_kern_enter();
510 aesvia_reload_keys();
511 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) {
512 xtsenc_aligned_evcnt.ev_count++;
513 unsigned lastblock = 0;
514 uint32_t buf[8*4] __aligned(16);
515
516 /*
517 * Make sure the last block is not the last block of a
518 * page. (Note that we store the AES input in `out' as
519 * a temporary buffer, rather than reading it directly
520 * from `in', since we have to combine the tweak
521 * first.)
522 */
523 lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0);
524 nbytes -= lastblock;
525
526 /*
527 * Handle an odd number of initial blocks so we can
528 * process the rest in eight-block (128-byte) chunks.
529 */
530 if (nbytes % 128) {
531 unsigned nbytes128 = nbytes % 128;
532
533 nbytes -= nbytes128;
534 for (; nbytes128; nbytes128 -= 16, in += 16, out += 16)
535 {
536 xor128(out, in, t);
537 aesvia_encN(enc, out, out, 1, cw0);
538 xor128(out, out, t);
539 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
540 }
541 }
542
543 /* Process eight blocks at a time. */
544 for (; nbytes; nbytes -= 128, in += 128, out += 128) {
545 unsigned i;
546 for (i = 0; i < 8; i++) {
547 memcpy(buf + 4*i, t, 16);
548 xor128(out + 4*i, in + 4*i, t);
549 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
550 }
551 aesvia_encN(enc, out, out, 8, cw0);
552 for (i = 0; i < 8; i++)
553 xor128(out + 4*i, in + 4*i, buf + 4*i);
554 }
555
556 /* Handle the last block of a page, if necessary. */
557 if (lastblock) {
558 xor128(buf, in, t);
559 aesvia_encN(enc, (const void *)buf, out, 1, cw0);
560 }
561
562 explicit_memset(buf, 0, sizeof buf);
563 } else {
564 xtsenc_unaligned_evcnt.ev_count++;
565 uint8_t buf[16] __aligned(16);
566
567 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
568 memcpy(buf, in, 16);
569 xor128(buf, buf, t);
570 aesvia_encN(enc, buf, buf, 1, cw0);
571 xor128(buf, buf, t);
572 memcpy(out, buf, 16);
573 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
574 }
575
576 explicit_memset(buf, 0, sizeof buf);
577 }
578 fpu_kern_leave();
579
580 memcpy(tweak, t, 16);
581 explicit_memset(t, 0, sizeof t);
582 }
583
584 static struct evcnt xtsdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
585 NULL, "aesvia", "xtsdec aligned");
586 EVCNT_ATTACH_STATIC(xtsdec_aligned_evcnt);
587 static struct evcnt xtsdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
588 NULL, "aesvia", "xtsdec unaligned");
589 EVCNT_ATTACH_STATIC(xtsdec_unaligned_evcnt);
590
591 static void
592 aesvia_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
593 uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
594 uint32_t nrounds)
595 {
596 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
597 uint32_t t[4];
598
599 KASSERT(nbytes % 16 == 0);
600
601 memcpy(t, tweak, 16);
602
603 fpu_kern_enter();
604 aesvia_reload_keys();
605 if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) {
606 xtsdec_aligned_evcnt.ev_count++;
607 unsigned lastblock = 0;
608 uint32_t buf[8*4] __aligned(16);
609
610 /*
611 * Make sure the last block is not the last block of a
612 * page. (Note that we store the AES input in `out' as
613 * a temporary buffer, rather than reading it directly
614 * from `in', since we have to combine the tweak
615 * first.)
616 */
617 lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0);
618 nbytes -= lastblock;
619
620 /*
621 * Handle an odd number of initial blocks so we can
622 * process the rest in eight-block (128-byte) chunks.
623 */
624 if (nbytes % 128) {
625 unsigned nbytes128 = nbytes % 128;
626
627 nbytes -= nbytes128;
628 for (; nbytes128; nbytes128 -= 16, in += 16, out += 16)
629 {
630 xor128(out, in, t);
631 aesvia_decN(dec, out, out, 1, cw0);
632 xor128(out, out, t);
633 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
634 }
635 }
636
637 /* Process eight blocks at a time. */
638 for (; nbytes; nbytes -= 128, in += 128, out += 128) {
639 unsigned i;
640 for (i = 0; i < 8; i++) {
641 memcpy(buf + 4*i, t, 16);
642 xor128(out + 4*i, in + 4*i, t);
643 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
644 }
645 aesvia_decN(dec, out, out, 8, cw0);
646 for (i = 0; i < 8; i++)
647 xor128(out + 4*i, in + 4*i, buf + 4*i);
648 }
649
650 /* Handle the last block of a page, if necessary. */
651 if (lastblock) {
652 xor128(buf, in, t);
653 aesvia_decN(dec, (const void *)buf, out, 1, cw0);
654 }
655
656 explicit_memset(buf, 0, sizeof buf);
657 } else {
658 xtsdec_unaligned_evcnt.ev_count++;
659 uint8_t buf[16] __aligned(16);
660
661 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
662 memcpy(buf, in, 16);
663 xor128(buf, buf, t);
664 aesvia_decN(dec, buf, buf, 1, cw0);
665 xor128(buf, buf, t);
666 memcpy(out, buf, 16);
667 aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
668 }
669
670 explicit_memset(buf, 0, sizeof buf);
671 }
672 fpu_kern_leave();
673
674 memcpy(tweak, t, 16);
675 explicit_memset(t, 0, sizeof t);
676 }
677
678 static struct evcnt cbcmac_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
679 NULL, "aesvia", "cbcmac aligned");
680 EVCNT_ATTACH_STATIC(cbcmac_aligned_evcnt);
681 static struct evcnt cbcmac_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
682 NULL, "aesvia", "cbcmac unaligned");
683 EVCNT_ATTACH_STATIC(cbcmac_unaligned_evcnt);
684
685 static void
686 aesvia_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
687 size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
688 {
689 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
690 uint8_t authbuf[16] __aligned(16);
691 uint8_t *auth = auth0;
692
693 KASSERT(nbytes);
694 KASSERT(nbytes % 16 == 0);
695
696 if ((uintptr_t)auth0 & 0xf) {
697 memcpy(authbuf, auth0, 16);
698 auth = authbuf;
699 cbcmac_unaligned_evcnt.ev_count++;
700 } else {
701 cbcmac_aligned_evcnt.ev_count++;
702 }
703
704 fpu_kern_enter();
705 aesvia_reload_keys();
706 for (; nbytes; nbytes -= 16, in += 16) {
707 xor128(auth, auth, in);
708 aesvia_encN(enc, auth, auth, 1, cw0);
709 }
710 fpu_kern_leave();
711
712 if ((uintptr_t)auth0 & 0xf) {
713 memcpy(auth0, authbuf, 16);
714 explicit_memset(authbuf, 0, sizeof authbuf);
715 }
716 }
717
718 static struct evcnt ccmenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
719 NULL, "aesvia", "ccmenc aligned");
720 EVCNT_ATTACH_STATIC(ccmenc_aligned_evcnt);
721 static struct evcnt ccmenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
722 NULL, "aesvia", "ccmenc unaligned");
723 EVCNT_ATTACH_STATIC(ccmenc_unaligned_evcnt);
724
725 static void
726 aesvia_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
727 uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
728 uint32_t nrounds)
729 {
730 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
731 uint8_t authctrbuf[32] __aligned(16);
732 uint8_t *authctr;
733 uint32_t c0, c1, c2, c3;
734
735 KASSERT(nbytes);
736 KASSERT(nbytes % 16 == 0);
737
738 if ((uintptr_t)authctr0 & 0xf) {
739 memcpy(authctrbuf, authctr0, 16);
740 authctr = authctrbuf;
741 ccmenc_unaligned_evcnt.ev_count++;
742 } else {
743 authctr = authctr0;
744 ccmenc_aligned_evcnt.ev_count++;
745 }
746 c0 = le32dec(authctr0 + 16 + 4*0);
747 c1 = le32dec(authctr0 + 16 + 4*1);
748 c2 = le32dec(authctr0 + 16 + 4*2);
749 c3 = be32dec(authctr0 + 16 + 4*3);
750
751 /*
752 * In principle we could use REP XCRYPTCTR here, but that
753 * doesn't help to compute the CBC-MAC step, and certain VIA
754 * CPUs have some weird errata with REP XCRYPTCTR that make it
755 * kind of a pain to use. So let's just use REP XCRYPTECB to
756 * simultaneously compute the CBC-MAC step and the CTR step.
757 * (Maybe some VIA CPUs will compute REP XCRYPTECB in parallel,
758 * who knows...)
759 */
760 fpu_kern_enter();
761 aesvia_reload_keys();
762 for (; nbytes; nbytes -= 16, in += 16, out += 16) {
763 xor128(authctr, authctr, in);
764 le32enc(authctr + 16 + 4*0, c0);
765 le32enc(authctr + 16 + 4*1, c1);
766 le32enc(authctr + 16 + 4*2, c2);
767 be32enc(authctr + 16 + 4*3, ++c3);
768 aesvia_encN(enc, authctr, authctr, 2, cw0);
769 xor128(out, in, authctr + 16);
770 }
771 fpu_kern_leave();
772
773 if ((uintptr_t)authctr0 & 0xf) {
774 memcpy(authctr0, authctrbuf, 16);
775 explicit_memset(authctrbuf, 0, sizeof authctrbuf);
776 }
777
778 le32enc(authctr0 + 16 + 4*0, c0);
779 le32enc(authctr0 + 16 + 4*1, c1);
780 le32enc(authctr0 + 16 + 4*2, c2);
781 be32enc(authctr0 + 16 + 4*3, c3);
782 }
783
784 static struct evcnt ccmdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
785 NULL, "aesvia", "ccmdec aligned");
786 EVCNT_ATTACH_STATIC(ccmdec_aligned_evcnt);
787 static struct evcnt ccmdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
788 NULL, "aesvia", "ccmdec unaligned");
789 EVCNT_ATTACH_STATIC(ccmdec_unaligned_evcnt);
790
791 static void
792 aesvia_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
793 uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
794 uint32_t nrounds)
795 {
796 const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
797 uint8_t authctrbuf[32] __aligned(16);
798 uint8_t *authctr;
799 uint32_t c0, c1, c2, c3;
800
801 KASSERT(nbytes);
802 KASSERT(nbytes % 16 == 0);
803
804 c0 = le32dec(authctr0 + 16 + 4*0);
805 c1 = le32dec(authctr0 + 16 + 4*1);
806 c2 = le32dec(authctr0 + 16 + 4*2);
807 c3 = be32dec(authctr0 + 16 + 4*3);
808
809 if ((uintptr_t)authctr0 & 0xf) {
810 memcpy(authctrbuf, authctr0, 16);
811 authctr = authctrbuf;
812 le32enc(authctr + 16 + 4*0, c0);
813 le32enc(authctr + 16 + 4*1, c1);
814 le32enc(authctr + 16 + 4*2, c2);
815 ccmdec_unaligned_evcnt.ev_count++;
816 } else {
817 authctr = authctr0;
818 ccmdec_aligned_evcnt.ev_count++;
819 }
820
821 fpu_kern_enter();
822 aesvia_reload_keys();
823 be32enc(authctr + 16 + 4*3, ++c3);
824 aesvia_encN(enc, authctr + 16, authctr + 16, 1, cw0);
825 for (;; in += 16, out += 16) {
826 xor128(out, authctr + 16, in);
827 xor128(authctr, authctr, out);
828 if ((nbytes -= 16) == 0)
829 break;
830 le32enc(authctr + 16 + 4*0, c0);
831 le32enc(authctr + 16 + 4*1, c1);
832 le32enc(authctr + 16 + 4*2, c2);
833 be32enc(authctr + 16 + 4*3, ++c3);
834 aesvia_encN(enc, authctr, authctr, 2, cw0);
835 }
836 aesvia_encN(enc, authctr, authctr, 1, cw0);
837 fpu_kern_leave();
838
839 if ((uintptr_t)authctr0 & 0xf) {
840 memcpy(authctr0, authctrbuf, 16);
841 explicit_memset(authctrbuf, 0, sizeof authctrbuf);
842 }
843
844 le32enc(authctr0 + 16 + 4*0, c0);
845 le32enc(authctr0 + 16 + 4*1, c1);
846 le32enc(authctr0 + 16 + 4*2, c2);
847 be32enc(authctr0 + 16 + 4*3, c3);
848 }
849
850 static int
851 aesvia_probe(void)
852 {
853
854 /* Verify that the CPU advertises VIA ACE support. */
855 #ifdef _KERNEL
856 if ((cpu_feature[4] & CPUID_VIA_HAS_ACE) == 0)
857 return -1;
858 #else
859 /*
860 * From the VIA PadLock Programming Guide:
861 * http://linux.via.com.tw/support/beginDownload.action?eleid=181&fid=261
862 */
863 unsigned eax, ebx, ecx, edx;
864 if (!__get_cpuid(0, &eax, &ebx, &ecx, &edx))
865 return -1;
866 if (ebx != signature_CENTAUR_ebx ||
867 ecx != signature_CENTAUR_ecx ||
868 edx != signature_CENTAUR_edx)
869 return -1;
870 if (eax < 0xc0000000)
871 return -1;
872 if (!__get_cpuid(0xc0000000, &eax, &ebx, &ecx, &edx))
873 return -1;
874 if (eax < 0xc0000001)
875 return -1;
876 if (!__get_cpuid(0xc0000001, &eax, &ebx, &ecx, &edx))
877 return -1;
878 /* Check whether ACE or ACE2 is both supported and enabled. */
879 if ((edx & 0x000000c0) != 0x000000c0 ||
880 (edx & 0x00000300) != 0x00000300)
881 return -1;
882 #endif
883
884 /* Verify that our XTS tweak update logic works. */
885 if (aesvia_xts_update_selftest())
886 return -1;
887
888 /* Success! */
889 return 0;
890 }
891
892 struct aes_impl aes_via_impl = {
893 .ai_name = "VIA ACE",
894 .ai_probe = aesvia_probe,
895 .ai_setenckey = aesvia_setenckey,
896 .ai_setdeckey = aesvia_setdeckey,
897 .ai_enc = aesvia_enc,
898 .ai_dec = aesvia_dec,
899 .ai_cbc_enc = aesvia_cbc_enc,
900 .ai_cbc_dec = aesvia_cbc_dec,
901 .ai_xts_enc = aesvia_xts_enc,
902 .ai_xts_dec = aesvia_xts_dec,
903 .ai_cbcmac_update1 = aesvia_cbcmac_update1,
904 .ai_ccm_enc1 = aesvia_ccm_enc1,
905 .ai_ccm_dec1 = aesvia_ccm_dec1,
906 };
907