Home | History | Annotate | Line # | Download | only in x86
      1  1.9    rillig /*	$NetBSD: aes_via.c,v 1.9 2024/06/16 16:30:52 rillig Exp $	*/
      2  1.1  riastrad 
      3  1.1  riastrad /*-
      4  1.1  riastrad  * Copyright (c) 2020 The NetBSD Foundation, Inc.
      5  1.1  riastrad  * All rights reserved.
      6  1.1  riastrad  *
      7  1.1  riastrad  * Redistribution and use in source and binary forms, with or without
      8  1.1  riastrad  * modification, are permitted provided that the following conditions
      9  1.1  riastrad  * are met:
     10  1.1  riastrad  * 1. Redistributions of source code must retain the above copyright
     11  1.1  riastrad  *    notice, this list of conditions and the following disclaimer.
     12  1.1  riastrad  * 2. Redistributions in binary form must reproduce the above copyright
     13  1.1  riastrad  *    notice, this list of conditions and the following disclaimer in the
     14  1.1  riastrad  *    documentation and/or other materials provided with the distribution.
     15  1.1  riastrad  *
     16  1.1  riastrad  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  1.1  riastrad  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  1.1  riastrad  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  1.1  riastrad  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  1.1  riastrad  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  1.1  riastrad  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  1.1  riastrad  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  1.1  riastrad  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  1.1  riastrad  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  1.1  riastrad  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  1.1  riastrad  * POSSIBILITY OF SUCH DAMAGE.
     27  1.1  riastrad  */
     28  1.1  riastrad 
     29  1.1  riastrad #include <sys/cdefs.h>
     30  1.9    rillig __KERNEL_RCSID(1, "$NetBSD: aes_via.c,v 1.9 2024/06/16 16:30:52 rillig Exp $");
     31  1.1  riastrad 
     32  1.3  riastrad #ifdef _KERNEL
     33  1.1  riastrad #include <sys/types.h>
     34  1.1  riastrad #include <sys/evcnt.h>
     35  1.1  riastrad #include <sys/systm.h>
     36  1.3  riastrad #else
     37  1.3  riastrad #include <assert.h>
     38  1.3  riastrad #include <err.h>
     39  1.3  riastrad #include <stdint.h>
     40  1.3  riastrad #include <string.h>
     41  1.3  riastrad #define	KASSERT			assert
     42  1.3  riastrad #define	panic(fmt, args...)	err(1, fmt, args)
     43  1.3  riastrad struct evcnt { uint64_t ev_count; };
     44  1.3  riastrad #define	EVCNT_INITIALIZER(a,b,c,d) {0}
     45  1.3  riastrad #define	EVCNT_ATTACH_STATIC(name)	static char name##_attach __unused = 0
     46  1.3  riastrad #endif
     47  1.1  riastrad 
     48  1.1  riastrad #include <crypto/aes/aes.h>
     49  1.1  riastrad #include <crypto/aes/aes_bear.h>
     50  1.4  riastrad #include <crypto/aes/aes_impl.h>
     51  1.1  riastrad 
     52  1.3  riastrad #ifdef _KERNEL
     53  1.1  riastrad #include <x86/cpufunc.h>
     54  1.1  riastrad #include <x86/cpuvar.h>
     55  1.1  riastrad #include <x86/fpu.h>
     56  1.1  riastrad #include <x86/specialreg.h>
     57  1.1  riastrad #include <x86/via_padlock.h>
     58  1.3  riastrad #else
     59  1.3  riastrad #include <cpuid.h>
     60  1.3  riastrad #define	fpu_kern_enter()	((void)0)
     61  1.3  riastrad #define	fpu_kern_leave()	((void)0)
     62  1.3  riastrad #define C3_CRYPT_CWLO_ROUND_M		0x0000000f
     63  1.3  riastrad #define C3_CRYPT_CWLO_ALG_M		0x00000070
     64  1.3  riastrad #define C3_CRYPT_CWLO_ALG_AES		0x00000000
     65  1.3  riastrad #define C3_CRYPT_CWLO_KEYGEN_M		0x00000080
     66  1.3  riastrad #define C3_CRYPT_CWLO_KEYGEN_HW		0x00000000
     67  1.3  riastrad #define C3_CRYPT_CWLO_KEYGEN_SW		0x00000080
     68  1.3  riastrad #define C3_CRYPT_CWLO_NORMAL		0x00000000
     69  1.3  riastrad #define C3_CRYPT_CWLO_INTERMEDIATE	0x00000100
     70  1.3  riastrad #define C3_CRYPT_CWLO_ENCRYPT		0x00000000
     71  1.3  riastrad #define C3_CRYPT_CWLO_DECRYPT		0x00000200
     72  1.3  riastrad #define C3_CRYPT_CWLO_KEY128		0x0000000a      /* 128bit, 10 rds */
     73  1.3  riastrad #define C3_CRYPT_CWLO_KEY192		0x0000040c      /* 192bit, 12 rds */
     74  1.3  riastrad #define C3_CRYPT_CWLO_KEY256		0x0000080e      /* 256bit, 15 rds */
     75  1.3  riastrad #endif
     76  1.1  riastrad 
     77  1.1  riastrad static void
     78  1.1  riastrad aesvia_reload_keys(void)
     79  1.1  riastrad {
     80  1.1  riastrad 
     81  1.1  riastrad 	asm volatile("pushf; popf");
     82  1.1  riastrad }
     83  1.1  riastrad 
     84  1.1  riastrad static uint32_t
     85  1.1  riastrad aesvia_keylen_cw0(unsigned nrounds)
     86  1.1  riastrad {
     87  1.1  riastrad 
     88  1.1  riastrad 	/*
     89  1.1  riastrad 	 * Determine the control word bits for the key size / number of
     90  1.1  riastrad 	 * rounds.  For AES-128, the hardware can do key expansion on
     91  1.1  riastrad 	 * the fly; for AES-192 and AES-256, software must do it.
     92  1.1  riastrad 	 */
     93  1.1  riastrad 	switch (nrounds) {
     94  1.1  riastrad 	case AES_128_NROUNDS:
     95  1.1  riastrad 		return C3_CRYPT_CWLO_KEY128;
     96  1.1  riastrad 	case AES_192_NROUNDS:
     97  1.1  riastrad 		return C3_CRYPT_CWLO_KEY192 | C3_CRYPT_CWLO_KEYGEN_SW;
     98  1.1  riastrad 	case AES_256_NROUNDS:
     99  1.1  riastrad 		return C3_CRYPT_CWLO_KEY256 | C3_CRYPT_CWLO_KEYGEN_SW;
    100  1.1  riastrad 	default:
    101  1.1  riastrad 		panic("invalid AES nrounds: %u", nrounds);
    102  1.1  riastrad 	}
    103  1.1  riastrad }
    104  1.1  riastrad 
    105  1.1  riastrad static void
    106  1.1  riastrad aesvia_setenckey(struct aesenc *enc, const uint8_t *key, uint32_t nrounds)
    107  1.1  riastrad {
    108  1.1  riastrad 	size_t key_len;
    109  1.1  riastrad 
    110  1.1  riastrad 	switch (nrounds) {
    111  1.1  riastrad 	case AES_128_NROUNDS:
    112  1.1  riastrad 		enc->aese_aes.aes_rk[0] = le32dec(key + 4*0);
    113  1.1  riastrad 		enc->aese_aes.aes_rk[1] = le32dec(key + 4*1);
    114  1.1  riastrad 		enc->aese_aes.aes_rk[2] = le32dec(key + 4*2);
    115  1.1  riastrad 		enc->aese_aes.aes_rk[3] = le32dec(key + 4*3);
    116  1.1  riastrad 		return;
    117  1.1  riastrad 	case AES_192_NROUNDS:
    118  1.1  riastrad 		key_len = 24;
    119  1.1  riastrad 		break;
    120  1.1  riastrad 	case AES_256_NROUNDS:
    121  1.1  riastrad 		key_len = 32;
    122  1.1  riastrad 		break;
    123  1.1  riastrad 	default:
    124  1.1  riastrad 		panic("invalid AES nrounds: %u", nrounds);
    125  1.1  riastrad 	}
    126  1.1  riastrad 	br_aes_ct_keysched_stdenc(enc->aese_aes.aes_rk, key, key_len);
    127  1.1  riastrad }
    128  1.1  riastrad 
    129  1.1  riastrad static void
    130  1.1  riastrad aesvia_setdeckey(struct aesdec *dec, const uint8_t *key, uint32_t nrounds)
    131  1.1  riastrad {
    132  1.1  riastrad 	size_t key_len;
    133  1.1  riastrad 
    134  1.1  riastrad 	switch (nrounds) {
    135  1.1  riastrad 	case AES_128_NROUNDS:
    136  1.1  riastrad 		dec->aesd_aes.aes_rk[0] = le32dec(key + 4*0);
    137  1.1  riastrad 		dec->aesd_aes.aes_rk[1] = le32dec(key + 4*1);
    138  1.1  riastrad 		dec->aesd_aes.aes_rk[2] = le32dec(key + 4*2);
    139  1.1  riastrad 		dec->aesd_aes.aes_rk[3] = le32dec(key + 4*3);
    140  1.1  riastrad 		return;
    141  1.1  riastrad 	case AES_192_NROUNDS:
    142  1.1  riastrad 		key_len = 24;
    143  1.1  riastrad 		break;
    144  1.1  riastrad 	case AES_256_NROUNDS:
    145  1.1  riastrad 		key_len = 32;
    146  1.1  riastrad 		break;
    147  1.1  riastrad 	default:
    148  1.1  riastrad 		panic("invalid AES nrounds: %u", nrounds);
    149  1.1  riastrad 	}
    150  1.1  riastrad 	br_aes_ct_keysched_stddec(dec->aesd_aes.aes_rk, key, key_len);
    151  1.1  riastrad }
    152  1.1  riastrad 
    153  1.1  riastrad static inline void
    154  1.2  riastrad aesvia_encN(const struct aesenc *enc, const uint8_t in[static 16],
    155  1.2  riastrad     uint8_t out[static 16], size_t nblocks, uint32_t cw0)
    156  1.1  riastrad {
    157  1.1  riastrad 	const uint32_t cw[4] __aligned(16) = {
    158  1.1  riastrad 		[0] = (cw0
    159  1.1  riastrad 		    | C3_CRYPT_CWLO_ALG_AES
    160  1.1  riastrad 		    | C3_CRYPT_CWLO_ENCRYPT
    161  1.1  riastrad 		    | C3_CRYPT_CWLO_NORMAL),
    162  1.1  riastrad 	};
    163  1.1  riastrad 
    164  1.1  riastrad 	KASSERT(((uintptr_t)enc & 0xf) == 0);
    165  1.1  riastrad 	KASSERT(((uintptr_t)in & 0xf) == 0);
    166  1.1  riastrad 	KASSERT(((uintptr_t)out & 0xf) == 0);
    167  1.1  riastrad 
    168  1.1  riastrad 	asm volatile("rep xcryptecb"
    169  1.1  riastrad 	    : "+c"(nblocks), "+S"(in), "+D"(out)
    170  1.1  riastrad 	    : "b"(enc), "d"(cw)
    171  1.1  riastrad 	    : "memory", "cc");
    172  1.1  riastrad }
    173  1.1  riastrad 
    174  1.1  riastrad static inline void
    175  1.2  riastrad aesvia_decN(const struct aesdec *dec, const uint8_t in[static 16],
    176  1.2  riastrad     uint8_t out[static 16], size_t nblocks, uint32_t cw0)
    177  1.1  riastrad {
    178  1.1  riastrad 	const uint32_t cw[4] __aligned(16) = {
    179  1.1  riastrad 		[0] = (cw0
    180  1.1  riastrad 		    | C3_CRYPT_CWLO_ALG_AES
    181  1.1  riastrad 		    | C3_CRYPT_CWLO_DECRYPT
    182  1.1  riastrad 		    | C3_CRYPT_CWLO_NORMAL),
    183  1.1  riastrad 	};
    184  1.1  riastrad 
    185  1.1  riastrad 	KASSERT(((uintptr_t)dec & 0xf) == 0);
    186  1.1  riastrad 	KASSERT(((uintptr_t)in & 0xf) == 0);
    187  1.1  riastrad 	KASSERT(((uintptr_t)out & 0xf) == 0);
    188  1.1  riastrad 
    189  1.1  riastrad 	asm volatile("rep xcryptecb"
    190  1.1  riastrad 	    : "+c"(nblocks), "+S"(in), "+D"(out)
    191  1.1  riastrad 	    : "b"(dec), "d"(cw)
    192  1.1  riastrad 	    : "memory", "cc");
    193  1.1  riastrad }
    194  1.1  riastrad 
    195  1.1  riastrad static struct evcnt enc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    196  1.1  riastrad     NULL, "aesvia", "enc aligned");
    197  1.1  riastrad EVCNT_ATTACH_STATIC(enc_aligned_evcnt);
    198  1.1  riastrad static struct evcnt enc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    199  1.1  riastrad     NULL, "aesvia", "dec unaligned");
    200  1.1  riastrad EVCNT_ATTACH_STATIC(enc_unaligned_evcnt);
    201  1.1  riastrad 
    202  1.1  riastrad static void
    203  1.1  riastrad aesvia_enc(const struct aesenc *enc, const uint8_t in[static 16],
    204  1.1  riastrad     uint8_t out[static 16], uint32_t nrounds)
    205  1.1  riastrad {
    206  1.1  riastrad 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
    207  1.1  riastrad 
    208  1.1  riastrad 	fpu_kern_enter();
    209  1.1  riastrad 	aesvia_reload_keys();
    210  1.1  riastrad 	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
    211  1.1  riastrad 	    ((uintptr_t)in & 0xff0) != 0xff0) {
    212  1.1  riastrad 		enc_aligned_evcnt.ev_count++;
    213  1.2  riastrad 		aesvia_encN(enc, in, out, 1, cw0);
    214  1.1  riastrad 	} else {
    215  1.1  riastrad 		enc_unaligned_evcnt.ev_count++;
    216  1.1  riastrad 		/*
    217  1.1  riastrad 		 * VIA requires 16-byte/128-bit alignment, and
    218  1.1  riastrad 		 * xcrypt-ecb reads one block past the one we're
    219  1.1  riastrad 		 * working on -- which may go past the end of the page
    220  1.1  riastrad 		 * into unmapped territory.  Use a bounce buffer if
    221  1.1  riastrad 		 * either constraint is violated.
    222  1.1  riastrad 		 */
    223  1.1  riastrad 		uint8_t inbuf[16] __aligned(16);
    224  1.1  riastrad 		uint8_t outbuf[16] __aligned(16);
    225  1.1  riastrad 
    226  1.1  riastrad 		memcpy(inbuf, in, 16);
    227  1.2  riastrad 		aesvia_encN(enc, inbuf, outbuf, 1, cw0);
    228  1.1  riastrad 		memcpy(out, outbuf, 16);
    229  1.1  riastrad 
    230  1.1  riastrad 		explicit_memset(inbuf, 0, sizeof inbuf);
    231  1.1  riastrad 		explicit_memset(outbuf, 0, sizeof outbuf);
    232  1.1  riastrad 	}
    233  1.1  riastrad 	fpu_kern_leave();
    234  1.1  riastrad }
    235  1.1  riastrad 
    236  1.1  riastrad static struct evcnt dec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    237  1.1  riastrad     NULL, "aesvia", "dec aligned");
    238  1.1  riastrad EVCNT_ATTACH_STATIC(dec_aligned_evcnt);
    239  1.1  riastrad static struct evcnt dec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    240  1.1  riastrad     NULL, "aesvia", "dec unaligned");
    241  1.1  riastrad EVCNT_ATTACH_STATIC(dec_unaligned_evcnt);
    242  1.1  riastrad 
    243  1.1  riastrad static void
    244  1.1  riastrad aesvia_dec(const struct aesdec *dec, const uint8_t in[static 16],
    245  1.1  riastrad     uint8_t out[static 16], uint32_t nrounds)
    246  1.1  riastrad {
    247  1.1  riastrad 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
    248  1.1  riastrad 
    249  1.1  riastrad 	fpu_kern_enter();
    250  1.1  riastrad 	aesvia_reload_keys();
    251  1.1  riastrad 	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0 &&
    252  1.1  riastrad 	    ((uintptr_t)in & 0xff0) != 0xff0) {
    253  1.1  riastrad 		dec_aligned_evcnt.ev_count++;
    254  1.2  riastrad 		aesvia_decN(dec, in, out, 1, cw0);
    255  1.1  riastrad 	} else {
    256  1.1  riastrad 		dec_unaligned_evcnt.ev_count++;
    257  1.1  riastrad 		/*
    258  1.1  riastrad 		 * VIA requires 16-byte/128-bit alignment, and
    259  1.1  riastrad 		 * xcrypt-ecb reads one block past the one we're
    260  1.1  riastrad 		 * working on -- which may go past the end of the page
    261  1.1  riastrad 		 * into unmapped territory.  Use a bounce buffer if
    262  1.1  riastrad 		 * either constraint is violated.
    263  1.1  riastrad 		 */
    264  1.1  riastrad 		uint8_t inbuf[16] __aligned(16);
    265  1.1  riastrad 		uint8_t outbuf[16] __aligned(16);
    266  1.1  riastrad 
    267  1.1  riastrad 		memcpy(inbuf, in, 16);
    268  1.2  riastrad 		aesvia_decN(dec, inbuf, outbuf, 1, cw0);
    269  1.1  riastrad 		memcpy(out, outbuf, 16);
    270  1.1  riastrad 
    271  1.1  riastrad 		explicit_memset(inbuf, 0, sizeof inbuf);
    272  1.1  riastrad 		explicit_memset(outbuf, 0, sizeof outbuf);
    273  1.1  riastrad 	}
    274  1.1  riastrad 	fpu_kern_leave();
    275  1.1  riastrad }
    276  1.1  riastrad 
    277  1.1  riastrad static inline void
    278  1.2  riastrad aesvia_cbc_encN(const struct aesenc *enc, const uint8_t in[static 16],
    279  1.1  riastrad     uint8_t out[static 16], size_t nblocks, uint8_t **ivp, uint32_t cw0)
    280  1.1  riastrad {
    281  1.1  riastrad 	const uint32_t cw[4] __aligned(16) = {
    282  1.1  riastrad 		[0] = (cw0
    283  1.1  riastrad 		    | C3_CRYPT_CWLO_ALG_AES
    284  1.1  riastrad 		    | C3_CRYPT_CWLO_ENCRYPT
    285  1.1  riastrad 		    | C3_CRYPT_CWLO_NORMAL),
    286  1.1  riastrad 	};
    287  1.1  riastrad 
    288  1.1  riastrad 	KASSERT(((uintptr_t)enc & 0xf) == 0);
    289  1.1  riastrad 	KASSERT(((uintptr_t)in & 0xf) == 0);
    290  1.1  riastrad 	KASSERT(((uintptr_t)out & 0xf) == 0);
    291  1.1  riastrad 	KASSERT(((uintptr_t)*ivp & 0xf) == 0);
    292  1.1  riastrad 
    293  1.1  riastrad 	/*
    294  1.1  riastrad 	 * Register effects:
    295  1.1  riastrad 	 * - Counts nblocks down to zero.
    296  1.1  riastrad 	 * - Advances in by nblocks (units of blocks).
    297  1.1  riastrad 	 * - Advances out by nblocks (units of blocks).
    298  1.1  riastrad 	 * - Updates *ivp to point at the last block of out.
    299  1.1  riastrad 	 */
    300  1.1  riastrad 	asm volatile("rep xcryptcbc"
    301  1.1  riastrad 	    : "+c"(nblocks), "+S"(in), "+D"(out), "+a"(*ivp)
    302  1.1  riastrad 	    : "b"(enc), "d"(cw)
    303  1.1  riastrad 	    : "memory", "cc");
    304  1.1  riastrad }
    305  1.1  riastrad 
    306  1.1  riastrad static inline void
    307  1.2  riastrad aesvia_cbc_decN(const struct aesdec *dec, const uint8_t in[static 16],
    308  1.1  riastrad     uint8_t out[static 16], size_t nblocks, uint8_t iv[static 16],
    309  1.1  riastrad     uint32_t cw0)
    310  1.1  riastrad {
    311  1.1  riastrad 	const uint32_t cw[4] __aligned(16) = {
    312  1.1  riastrad 		[0] = (cw0
    313  1.1  riastrad 		    | C3_CRYPT_CWLO_ALG_AES
    314  1.1  riastrad 		    | C3_CRYPT_CWLO_DECRYPT
    315  1.1  riastrad 		    | C3_CRYPT_CWLO_NORMAL),
    316  1.1  riastrad 	};
    317  1.1  riastrad 
    318  1.1  riastrad 	KASSERT(((uintptr_t)dec & 0xf) == 0);
    319  1.1  riastrad 	KASSERT(((uintptr_t)in & 0xf) == 0);
    320  1.1  riastrad 	KASSERT(((uintptr_t)out & 0xf) == 0);
    321  1.1  riastrad 	KASSERT(((uintptr_t)iv & 0xf) == 0);
    322  1.1  riastrad 
    323  1.1  riastrad 	/*
    324  1.1  riastrad 	 * Register effects:
    325  1.1  riastrad 	 * - Counts nblocks down to zero.
    326  1.1  riastrad 	 * - Advances in by nblocks (units of blocks).
    327  1.1  riastrad 	 * - Advances out by nblocks (units of blocks).
    328  1.1  riastrad 	 * Memory side effects:
    329  1.1  riastrad 	 * - Writes what was the last block of in at the address iv.
    330  1.1  riastrad 	 */
    331  1.1  riastrad 	asm volatile("rep xcryptcbc"
    332  1.1  riastrad 	    : "+c"(nblocks), "+S"(in), "+D"(out)
    333  1.1  riastrad 	    : "a"(iv), "b"(dec), "d"(cw)
    334  1.1  riastrad 	    : "memory", "cc");
    335  1.1  riastrad }
    336  1.1  riastrad 
    337  1.1  riastrad static inline void
    338  1.1  riastrad xor128(void *x, const void *a, const void *b)
    339  1.1  riastrad {
    340  1.1  riastrad 	uint32_t *x32 = x;
    341  1.1  riastrad 	const uint32_t *a32 = a;
    342  1.1  riastrad 	const uint32_t *b32 = b;
    343  1.1  riastrad 
    344  1.1  riastrad 	x32[0] = a32[0] ^ b32[0];
    345  1.1  riastrad 	x32[1] = a32[1] ^ b32[1];
    346  1.1  riastrad 	x32[2] = a32[2] ^ b32[2];
    347  1.1  riastrad 	x32[3] = a32[3] ^ b32[3];
    348  1.1  riastrad }
    349  1.1  riastrad 
    350  1.1  riastrad static struct evcnt cbcenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    351  1.1  riastrad     NULL, "aesvia", "cbcenc aligned");
    352  1.1  riastrad EVCNT_ATTACH_STATIC(cbcenc_aligned_evcnt);
    353  1.1  riastrad static struct evcnt cbcenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    354  1.1  riastrad     NULL, "aesvia", "cbcenc unaligned");
    355  1.1  riastrad EVCNT_ATTACH_STATIC(cbcenc_unaligned_evcnt);
    356  1.1  riastrad 
    357  1.1  riastrad static void
    358  1.1  riastrad aesvia_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
    359  1.1  riastrad     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
    360  1.1  riastrad     uint32_t nrounds)
    361  1.1  riastrad {
    362  1.1  riastrad 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
    363  1.1  riastrad 
    364  1.1  riastrad 	KASSERT(nbytes % 16 == 0);
    365  1.1  riastrad 	if (nbytes == 0)
    366  1.1  riastrad 		return;
    367  1.1  riastrad 
    368  1.1  riastrad 	fpu_kern_enter();
    369  1.1  riastrad 	aesvia_reload_keys();
    370  1.1  riastrad 	if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) {
    371  1.1  riastrad 		cbcenc_aligned_evcnt.ev_count++;
    372  1.1  riastrad 		uint8_t *ivp = iv;
    373  1.2  riastrad 		aesvia_cbc_encN(enc, in, out, nbytes/16, &ivp, cw0);
    374  1.1  riastrad 		memcpy(iv, ivp, 16);
    375  1.1  riastrad 	} else {
    376  1.1  riastrad 		cbcenc_unaligned_evcnt.ev_count++;
    377  1.1  riastrad 		uint8_t cv[16] __aligned(16);
    378  1.1  riastrad 		uint8_t tmp[16] __aligned(16);
    379  1.1  riastrad 
    380  1.1  riastrad 		memcpy(cv, iv, 16);
    381  1.1  riastrad 		for (; nbytes; nbytes -= 16, in += 16, out += 16) {
    382  1.1  riastrad 			memcpy(tmp, in, 16);
    383  1.1  riastrad 			xor128(tmp, tmp, cv);
    384  1.2  riastrad 			aesvia_encN(enc, tmp, cv, 1, cw0);
    385  1.1  riastrad 			memcpy(out, cv, 16);
    386  1.1  riastrad 		}
    387  1.1  riastrad 		memcpy(iv, cv, 16);
    388  1.1  riastrad 	}
    389  1.1  riastrad 	fpu_kern_leave();
    390  1.1  riastrad }
    391  1.1  riastrad 
    392  1.1  riastrad static struct evcnt cbcdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    393  1.1  riastrad     NULL, "aesvia", "cbcdec aligned");
    394  1.1  riastrad EVCNT_ATTACH_STATIC(cbcdec_aligned_evcnt);
    395  1.1  riastrad static struct evcnt cbcdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    396  1.1  riastrad     NULL, "aesvia", "cbcdec unaligned");
    397  1.1  riastrad EVCNT_ATTACH_STATIC(cbcdec_unaligned_evcnt);
    398  1.1  riastrad 
    399  1.1  riastrad static void
    400  1.1  riastrad aesvia_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
    401  1.1  riastrad     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
    402  1.1  riastrad     uint32_t nrounds)
    403  1.1  riastrad {
    404  1.1  riastrad 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
    405  1.1  riastrad 
    406  1.1  riastrad 	KASSERT(nbytes % 16 == 0);
    407  1.1  riastrad 	if (nbytes == 0)
    408  1.1  riastrad 		return;
    409  1.1  riastrad 
    410  1.1  riastrad 	fpu_kern_enter();
    411  1.1  riastrad 	aesvia_reload_keys();
    412  1.1  riastrad 	if ((((uintptr_t)in | (uintptr_t)out | (uintptr_t)iv) & 0xf) == 0) {
    413  1.1  riastrad 		cbcdec_aligned_evcnt.ev_count++;
    414  1.2  riastrad 		aesvia_cbc_decN(dec, in, out, nbytes/16, iv, cw0);
    415  1.1  riastrad 	} else {
    416  1.1  riastrad 		cbcdec_unaligned_evcnt.ev_count++;
    417  1.1  riastrad 		uint8_t iv0[16] __aligned(16);
    418  1.1  riastrad 		uint8_t cv[16] __aligned(16);
    419  1.1  riastrad 		uint8_t tmp[16] __aligned(16);
    420  1.1  riastrad 
    421  1.1  riastrad 		memcpy(iv0, iv, 16);
    422  1.1  riastrad 		memcpy(cv, in + nbytes - 16, 16);
    423  1.1  riastrad 		memcpy(iv, cv, 16);
    424  1.1  riastrad 
    425  1.1  riastrad 		for (;;) {
    426  1.2  riastrad 			aesvia_decN(dec, cv, tmp, 1, cw0);
    427  1.1  riastrad 			if ((nbytes -= 16) == 0)
    428  1.1  riastrad 				break;
    429  1.1  riastrad 			memcpy(cv, in + nbytes - 16, 16);
    430  1.1  riastrad 			xor128(tmp, tmp, cv);
    431  1.8  christos 			memcpy(out + nbytes, tmp, 16);
    432  1.1  riastrad 		}
    433  1.1  riastrad 
    434  1.1  riastrad 		xor128(tmp, tmp, iv0);
    435  1.1  riastrad 		memcpy(out, tmp, 16);
    436  1.1  riastrad 		explicit_memset(tmp, 0, sizeof tmp);
    437  1.1  riastrad 	}
    438  1.1  riastrad 	fpu_kern_leave();
    439  1.1  riastrad }
    440  1.1  riastrad 
    441  1.1  riastrad static inline void
    442  1.1  riastrad aesvia_xts_update(uint32_t *t0, uint32_t *t1, uint32_t *t2, uint32_t *t3)
    443  1.1  riastrad {
    444  1.1  riastrad 	uint32_t s0, s1, s2, s3;
    445  1.1  riastrad 
    446  1.1  riastrad 	s0 = *t0 >> 31;
    447  1.1  riastrad 	s1 = *t1 >> 31;
    448  1.1  riastrad 	s2 = *t2 >> 31;
    449  1.1  riastrad 	s3 = *t3 >> 31;
    450  1.1  riastrad 	*t0 = (*t0 << 1) ^ (-s3 & 0x87);
    451  1.1  riastrad 	*t1 = (*t1 << 1) ^ s0;
    452  1.1  riastrad 	*t2 = (*t2 << 1) ^ s1;
    453  1.1  riastrad 	*t3 = (*t3 << 1) ^ s2;
    454  1.1  riastrad }
    455  1.1  riastrad 
    456  1.1  riastrad static int
    457  1.1  riastrad aesvia_xts_update_selftest(void)
    458  1.1  riastrad {
    459  1.1  riastrad 	static const struct {
    460  1.1  riastrad 		uint32_t in[4], out[4];
    461  1.1  riastrad 	} cases[] = {
    462  1.1  riastrad 		{ {1}, {2} },
    463  1.1  riastrad 		{ {0x80000000U,0,0,0}, {0,1,0,0} },
    464  1.1  riastrad 		{ {0,0x80000000U,0,0}, {0,0,1,0} },
    465  1.1  riastrad 		{ {0,0,0x80000000U,0}, {0,0,0,1} },
    466  1.1  riastrad 		{ {0,0,0,0x80000000U}, {0x87,0,0,0} },
    467  1.1  riastrad 		{ {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
    468  1.1  riastrad 	};
    469  1.1  riastrad 	unsigned i;
    470  1.1  riastrad 	uint32_t t0, t1, t2, t3;
    471  1.1  riastrad 
    472  1.1  riastrad 	for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
    473  1.1  riastrad 		t0 = cases[i].in[0];
    474  1.1  riastrad 		t1 = cases[i].in[1];
    475  1.1  riastrad 		t2 = cases[i].in[2];
    476  1.1  riastrad 		t3 = cases[i].in[3];
    477  1.1  riastrad 		aesvia_xts_update(&t0, &t1, &t2, &t3);
    478  1.1  riastrad 		if (t0 != cases[i].out[0] ||
    479  1.1  riastrad 		    t1 != cases[i].out[1] ||
    480  1.1  riastrad 		    t2 != cases[i].out[2] ||
    481  1.1  riastrad 		    t3 != cases[i].out[3])
    482  1.1  riastrad 			return -1;
    483  1.1  riastrad 	}
    484  1.1  riastrad 
    485  1.1  riastrad 	/* Success!  */
    486  1.1  riastrad 	return 0;
    487  1.1  riastrad }
    488  1.1  riastrad 
    489  1.1  riastrad static struct evcnt xtsenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    490  1.1  riastrad     NULL, "aesvia", "xtsenc aligned");
    491  1.1  riastrad EVCNT_ATTACH_STATIC(xtsenc_aligned_evcnt);
    492  1.1  riastrad static struct evcnt xtsenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    493  1.1  riastrad     NULL, "aesvia", "xtsenc unaligned");
    494  1.1  riastrad EVCNT_ATTACH_STATIC(xtsenc_unaligned_evcnt);
    495  1.1  riastrad 
    496  1.1  riastrad static void
    497  1.1  riastrad aesvia_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
    498  1.1  riastrad     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
    499  1.1  riastrad     uint32_t nrounds)
    500  1.1  riastrad {
    501  1.1  riastrad 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
    502  1.1  riastrad 	uint32_t t[4];
    503  1.1  riastrad 
    504  1.1  riastrad 	KASSERT(nbytes % 16 == 0);
    505  1.1  riastrad 
    506  1.1  riastrad 	memcpy(t, tweak, 16);
    507  1.1  riastrad 
    508  1.1  riastrad 	fpu_kern_enter();
    509  1.1  riastrad 	aesvia_reload_keys();
    510  1.1  riastrad 	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) {
    511  1.1  riastrad 		xtsenc_aligned_evcnt.ev_count++;
    512  1.1  riastrad 		unsigned lastblock = 0;
    513  1.2  riastrad 		uint32_t buf[8*4] __aligned(16);
    514  1.1  riastrad 
    515  1.1  riastrad 		/*
    516  1.1  riastrad 		 * Make sure the last block is not the last block of a
    517  1.1  riastrad 		 * page.  (Note that we store the AES input in `out' as
    518  1.1  riastrad 		 * a temporary buffer, rather than reading it directly
    519  1.1  riastrad 		 * from `in', since we have to combine the tweak
    520  1.1  riastrad 		 * first.)
    521  1.1  riastrad 		 */
    522  1.1  riastrad 		lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0);
    523  1.1  riastrad 		nbytes -= lastblock;
    524  1.1  riastrad 
    525  1.2  riastrad 		/*
    526  1.2  riastrad 		 * Handle an odd number of initial blocks so we can
    527  1.2  riastrad 		 * process the rest in eight-block (128-byte) chunks.
    528  1.2  riastrad 		 */
    529  1.2  riastrad 		if (nbytes % 128) {
    530  1.2  riastrad 			unsigned nbytes128 = nbytes % 128;
    531  1.2  riastrad 
    532  1.2  riastrad 			nbytes -= nbytes128;
    533  1.2  riastrad 			for (; nbytes128; nbytes128 -= 16, in += 16, out += 16)
    534  1.2  riastrad 			{
    535  1.2  riastrad 				xor128(out, in, t);
    536  1.2  riastrad 				aesvia_encN(enc, out, out, 1, cw0);
    537  1.2  riastrad 				xor128(out, out, t);
    538  1.2  riastrad 				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
    539  1.2  riastrad 			}
    540  1.2  riastrad 		}
    541  1.2  riastrad 
    542  1.2  riastrad 		/* Process eight blocks at a time.  */
    543  1.2  riastrad 		for (; nbytes; nbytes -= 128, in += 128, out += 128) {
    544  1.2  riastrad 			unsigned i;
    545  1.2  riastrad 			for (i = 0; i < 8; i++) {
    546  1.2  riastrad 				memcpy(buf + 4*i, t, 16);
    547  1.2  riastrad 				xor128(out + 4*i, in + 4*i, t);
    548  1.2  riastrad 				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
    549  1.2  riastrad 			}
    550  1.2  riastrad 			aesvia_encN(enc, out, out, 8, cw0);
    551  1.2  riastrad 			for (i = 0; i < 8; i++)
    552  1.2  riastrad 				xor128(out + 4*i, in + 4*i, buf + 4*i);
    553  1.1  riastrad 		}
    554  1.1  riastrad 
    555  1.1  riastrad 		/* Handle the last block of a page, if necessary.  */
    556  1.1  riastrad 		if (lastblock) {
    557  1.1  riastrad 			xor128(buf, in, t);
    558  1.2  riastrad 			aesvia_encN(enc, (const void *)buf, out, 1, cw0);
    559  1.1  riastrad 		}
    560  1.2  riastrad 
    561  1.2  riastrad 		explicit_memset(buf, 0, sizeof buf);
    562  1.1  riastrad 	} else {
    563  1.1  riastrad 		xtsenc_unaligned_evcnt.ev_count++;
    564  1.1  riastrad 		uint8_t buf[16] __aligned(16);
    565  1.1  riastrad 
    566  1.1  riastrad 		for (; nbytes; nbytes -= 16, in += 16, out += 16) {
    567  1.1  riastrad 			memcpy(buf, in, 16);
    568  1.1  riastrad 			xor128(buf, buf, t);
    569  1.2  riastrad 			aesvia_encN(enc, buf, buf, 1, cw0);
    570  1.1  riastrad 			xor128(buf, buf, t);
    571  1.1  riastrad 			memcpy(out, buf, 16);
    572  1.1  riastrad 			aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
    573  1.1  riastrad 		}
    574  1.1  riastrad 
    575  1.1  riastrad 		explicit_memset(buf, 0, sizeof buf);
    576  1.1  riastrad 	}
    577  1.1  riastrad 	fpu_kern_leave();
    578  1.1  riastrad 
    579  1.1  riastrad 	memcpy(tweak, t, 16);
    580  1.1  riastrad 	explicit_memset(t, 0, sizeof t);
    581  1.1  riastrad }
    582  1.1  riastrad 
    583  1.1  riastrad static struct evcnt xtsdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    584  1.1  riastrad     NULL, "aesvia", "xtsdec aligned");
    585  1.1  riastrad EVCNT_ATTACH_STATIC(xtsdec_aligned_evcnt);
    586  1.1  riastrad static struct evcnt xtsdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    587  1.1  riastrad     NULL, "aesvia", "xtsdec unaligned");
    588  1.1  riastrad EVCNT_ATTACH_STATIC(xtsdec_unaligned_evcnt);
    589  1.1  riastrad 
    590  1.1  riastrad static void
    591  1.1  riastrad aesvia_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
    592  1.1  riastrad     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
    593  1.1  riastrad     uint32_t nrounds)
    594  1.1  riastrad {
    595  1.1  riastrad 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
    596  1.1  riastrad 	uint32_t t[4];
    597  1.1  riastrad 
    598  1.1  riastrad 	KASSERT(nbytes % 16 == 0);
    599  1.1  riastrad 
    600  1.1  riastrad 	memcpy(t, tweak, 16);
    601  1.1  riastrad 
    602  1.1  riastrad 	fpu_kern_enter();
    603  1.1  riastrad 	aesvia_reload_keys();
    604  1.1  riastrad 	if ((((uintptr_t)in | (uintptr_t)out) & 0xf) == 0) {
    605  1.1  riastrad 		xtsdec_aligned_evcnt.ev_count++;
    606  1.1  riastrad 		unsigned lastblock = 0;
    607  1.2  riastrad 		uint32_t buf[8*4] __aligned(16);
    608  1.1  riastrad 
    609  1.1  riastrad 		/*
    610  1.1  riastrad 		 * Make sure the last block is not the last block of a
    611  1.1  riastrad 		 * page.  (Note that we store the AES input in `out' as
    612  1.1  riastrad 		 * a temporary buffer, rather than reading it directly
    613  1.1  riastrad 		 * from `in', since we have to combine the tweak
    614  1.1  riastrad 		 * first.)
    615  1.1  riastrad 		 */
    616  1.1  riastrad 		lastblock = 16*(((uintptr_t)(out + nbytes) & 0xfff) == 0);
    617  1.1  riastrad 		nbytes -= lastblock;
    618  1.1  riastrad 
    619  1.2  riastrad 		/*
    620  1.2  riastrad 		 * Handle an odd number of initial blocks so we can
    621  1.2  riastrad 		 * process the rest in eight-block (128-byte) chunks.
    622  1.2  riastrad 		 */
    623  1.2  riastrad 		if (nbytes % 128) {
    624  1.2  riastrad 			unsigned nbytes128 = nbytes % 128;
    625  1.2  riastrad 
    626  1.2  riastrad 			nbytes -= nbytes128;
    627  1.2  riastrad 			for (; nbytes128; nbytes128 -= 16, in += 16, out += 16)
    628  1.2  riastrad 			{
    629  1.2  riastrad 				xor128(out, in, t);
    630  1.2  riastrad 				aesvia_decN(dec, out, out, 1, cw0);
    631  1.2  riastrad 				xor128(out, out, t);
    632  1.2  riastrad 				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
    633  1.2  riastrad 			}
    634  1.2  riastrad 		}
    635  1.2  riastrad 
    636  1.2  riastrad 		/* Process eight blocks at a time.  */
    637  1.2  riastrad 		for (; nbytes; nbytes -= 128, in += 128, out += 128) {
    638  1.2  riastrad 			unsigned i;
    639  1.2  riastrad 			for (i = 0; i < 8; i++) {
    640  1.2  riastrad 				memcpy(buf + 4*i, t, 16);
    641  1.2  riastrad 				xor128(out + 4*i, in + 4*i, t);
    642  1.2  riastrad 				aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
    643  1.2  riastrad 			}
    644  1.2  riastrad 			aesvia_decN(dec, out, out, 8, cw0);
    645  1.2  riastrad 			for (i = 0; i < 8; i++)
    646  1.2  riastrad 				xor128(out + 4*i, in + 4*i, buf + 4*i);
    647  1.1  riastrad 		}
    648  1.1  riastrad 
    649  1.1  riastrad 		/* Handle the last block of a page, if necessary.  */
    650  1.1  riastrad 		if (lastblock) {
    651  1.1  riastrad 			xor128(buf, in, t);
    652  1.2  riastrad 			aesvia_decN(dec, (const void *)buf, out, 1, cw0);
    653  1.1  riastrad 		}
    654  1.2  riastrad 
    655  1.2  riastrad 		explicit_memset(buf, 0, sizeof buf);
    656  1.1  riastrad 	} else {
    657  1.1  riastrad 		xtsdec_unaligned_evcnt.ev_count++;
    658  1.1  riastrad 		uint8_t buf[16] __aligned(16);
    659  1.1  riastrad 
    660  1.1  riastrad 		for (; nbytes; nbytes -= 16, in += 16, out += 16) {
    661  1.1  riastrad 			memcpy(buf, in, 16);
    662  1.1  riastrad 			xor128(buf, buf, t);
    663  1.2  riastrad 			aesvia_decN(dec, buf, buf, 1, cw0);
    664  1.1  riastrad 			xor128(buf, buf, t);
    665  1.1  riastrad 			memcpy(out, buf, 16);
    666  1.1  riastrad 			aesvia_xts_update(&t[0], &t[1], &t[2], &t[3]);
    667  1.1  riastrad 		}
    668  1.1  riastrad 
    669  1.1  riastrad 		explicit_memset(buf, 0, sizeof buf);
    670  1.1  riastrad 	}
    671  1.1  riastrad 	fpu_kern_leave();
    672  1.1  riastrad 
    673  1.1  riastrad 	memcpy(tweak, t, 16);
    674  1.1  riastrad 	explicit_memset(t, 0, sizeof t);
    675  1.1  riastrad }
    676  1.1  riastrad 
    677  1.5  riastrad static struct evcnt cbcmac_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    678  1.5  riastrad     NULL, "aesvia", "cbcmac aligned");
    679  1.5  riastrad EVCNT_ATTACH_STATIC(cbcmac_aligned_evcnt);
    680  1.5  riastrad static struct evcnt cbcmac_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    681  1.5  riastrad     NULL, "aesvia", "cbcmac unaligned");
    682  1.5  riastrad EVCNT_ATTACH_STATIC(cbcmac_unaligned_evcnt);
    683  1.5  riastrad 
    684  1.5  riastrad static void
    685  1.5  riastrad aesvia_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
    686  1.5  riastrad     size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
    687  1.5  riastrad {
    688  1.5  riastrad 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
    689  1.5  riastrad 	uint8_t authbuf[16] __aligned(16);
    690  1.5  riastrad 	uint8_t *auth = auth0;
    691  1.5  riastrad 
    692  1.5  riastrad 	KASSERT(nbytes);
    693  1.5  riastrad 	KASSERT(nbytes % 16 == 0);
    694  1.5  riastrad 
    695  1.5  riastrad 	if ((uintptr_t)auth0 & 0xf) {
    696  1.5  riastrad 		memcpy(authbuf, auth0, 16);
    697  1.5  riastrad 		auth = authbuf;
    698  1.5  riastrad 		cbcmac_unaligned_evcnt.ev_count++;
    699  1.5  riastrad 	} else {
    700  1.5  riastrad 		cbcmac_aligned_evcnt.ev_count++;
    701  1.5  riastrad 	}
    702  1.5  riastrad 
    703  1.5  riastrad 	fpu_kern_enter();
    704  1.5  riastrad 	aesvia_reload_keys();
    705  1.5  riastrad 	for (; nbytes; nbytes -= 16, in += 16) {
    706  1.5  riastrad 		xor128(auth, auth, in);
    707  1.5  riastrad 		aesvia_encN(enc, auth, auth, 1, cw0);
    708  1.5  riastrad 	}
    709  1.5  riastrad 	fpu_kern_leave();
    710  1.5  riastrad 
    711  1.5  riastrad 	if ((uintptr_t)auth0 & 0xf) {
    712  1.5  riastrad 		memcpy(auth0, authbuf, 16);
    713  1.5  riastrad 		explicit_memset(authbuf, 0, sizeof authbuf);
    714  1.5  riastrad 	}
    715  1.5  riastrad }
    716  1.5  riastrad 
    717  1.5  riastrad static struct evcnt ccmenc_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    718  1.5  riastrad     NULL, "aesvia", "ccmenc aligned");
    719  1.5  riastrad EVCNT_ATTACH_STATIC(ccmenc_aligned_evcnt);
    720  1.5  riastrad static struct evcnt ccmenc_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    721  1.5  riastrad     NULL, "aesvia", "ccmenc unaligned");
    722  1.5  riastrad EVCNT_ATTACH_STATIC(ccmenc_unaligned_evcnt);
    723  1.5  riastrad 
    724  1.5  riastrad static void
    725  1.5  riastrad aesvia_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
    726  1.5  riastrad     uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
    727  1.5  riastrad     uint32_t nrounds)
    728  1.5  riastrad {
    729  1.5  riastrad 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
    730  1.5  riastrad 	uint8_t authctrbuf[32] __aligned(16);
    731  1.5  riastrad 	uint8_t *authctr;
    732  1.5  riastrad 	uint32_t c0, c1, c2, c3;
    733  1.5  riastrad 
    734  1.5  riastrad 	KASSERT(nbytes);
    735  1.5  riastrad 	KASSERT(nbytes % 16 == 0);
    736  1.5  riastrad 
    737  1.5  riastrad 	if ((uintptr_t)authctr0 & 0xf) {
    738  1.5  riastrad 		memcpy(authctrbuf, authctr0, 16);
    739  1.5  riastrad 		authctr = authctrbuf;
    740  1.5  riastrad 		ccmenc_unaligned_evcnt.ev_count++;
    741  1.5  riastrad 	} else {
    742  1.6  riastrad 		authctr = authctr0;
    743  1.5  riastrad 		ccmenc_aligned_evcnt.ev_count++;
    744  1.5  riastrad 	}
    745  1.5  riastrad 	c0 = le32dec(authctr0 + 16 + 4*0);
    746  1.5  riastrad 	c1 = le32dec(authctr0 + 16 + 4*1);
    747  1.5  riastrad 	c2 = le32dec(authctr0 + 16 + 4*2);
    748  1.5  riastrad 	c3 = be32dec(authctr0 + 16 + 4*3);
    749  1.5  riastrad 
    750  1.5  riastrad 	/*
    751  1.5  riastrad 	 * In principle we could use REP XCRYPTCTR here, but that
    752  1.5  riastrad 	 * doesn't help to compute the CBC-MAC step, and certain VIA
    753  1.5  riastrad 	 * CPUs have some weird errata with REP XCRYPTCTR that make it
    754  1.5  riastrad 	 * kind of a pain to use.  So let's just use REP XCRYPTECB to
    755  1.5  riastrad 	 * simultaneously compute the CBC-MAC step and the CTR step.
    756  1.5  riastrad 	 * (Maybe some VIA CPUs will compute REP XCRYPTECB in parallel,
    757  1.5  riastrad 	 * who knows...)
    758  1.5  riastrad 	 */
    759  1.5  riastrad 	fpu_kern_enter();
    760  1.5  riastrad 	aesvia_reload_keys();
    761  1.5  riastrad 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
    762  1.5  riastrad 		xor128(authctr, authctr, in);
    763  1.5  riastrad 		le32enc(authctr + 16 + 4*0, c0);
    764  1.5  riastrad 		le32enc(authctr + 16 + 4*1, c1);
    765  1.5  riastrad 		le32enc(authctr + 16 + 4*2, c2);
    766  1.5  riastrad 		be32enc(authctr + 16 + 4*3, ++c3);
    767  1.5  riastrad 		aesvia_encN(enc, authctr, authctr, 2, cw0);
    768  1.5  riastrad 		xor128(out, in, authctr + 16);
    769  1.5  riastrad 	}
    770  1.5  riastrad 	fpu_kern_leave();
    771  1.5  riastrad 
    772  1.5  riastrad 	if ((uintptr_t)authctr0 & 0xf) {
    773  1.5  riastrad 		memcpy(authctr0, authctrbuf, 16);
    774  1.5  riastrad 		explicit_memset(authctrbuf, 0, sizeof authctrbuf);
    775  1.5  riastrad 	}
    776  1.5  riastrad 
    777  1.5  riastrad 	le32enc(authctr0 + 16 + 4*0, c0);
    778  1.5  riastrad 	le32enc(authctr0 + 16 + 4*1, c1);
    779  1.5  riastrad 	le32enc(authctr0 + 16 + 4*2, c2);
    780  1.5  riastrad 	be32enc(authctr0 + 16 + 4*3, c3);
    781  1.5  riastrad }
    782  1.5  riastrad 
    783  1.5  riastrad static struct evcnt ccmdec_aligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    784  1.5  riastrad     NULL, "aesvia", "ccmdec aligned");
    785  1.5  riastrad EVCNT_ATTACH_STATIC(ccmdec_aligned_evcnt);
    786  1.5  riastrad static struct evcnt ccmdec_unaligned_evcnt = EVCNT_INITIALIZER(EVCNT_TYPE_MISC,
    787  1.5  riastrad     NULL, "aesvia", "ccmdec unaligned");
    788  1.5  riastrad EVCNT_ATTACH_STATIC(ccmdec_unaligned_evcnt);
    789  1.5  riastrad 
    790  1.5  riastrad static void
    791  1.5  riastrad aesvia_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
    792  1.5  riastrad     uint8_t out[static 16], size_t nbytes, uint8_t authctr0[static 32],
    793  1.5  riastrad     uint32_t nrounds)
    794  1.5  riastrad {
    795  1.5  riastrad 	const uint32_t cw0 = aesvia_keylen_cw0(nrounds);
    796  1.5  riastrad 	uint8_t authctrbuf[32] __aligned(16);
    797  1.5  riastrad 	uint8_t *authctr;
    798  1.5  riastrad 	uint32_t c0, c1, c2, c3;
    799  1.5  riastrad 
    800  1.5  riastrad 	KASSERT(nbytes);
    801  1.5  riastrad 	KASSERT(nbytes % 16 == 0);
    802  1.5  riastrad 
    803  1.5  riastrad 	c0 = le32dec(authctr0 + 16 + 4*0);
    804  1.5  riastrad 	c1 = le32dec(authctr0 + 16 + 4*1);
    805  1.5  riastrad 	c2 = le32dec(authctr0 + 16 + 4*2);
    806  1.5  riastrad 	c3 = be32dec(authctr0 + 16 + 4*3);
    807  1.5  riastrad 
    808  1.5  riastrad 	if ((uintptr_t)authctr0 & 0xf) {
    809  1.5  riastrad 		memcpy(authctrbuf, authctr0, 16);
    810  1.5  riastrad 		authctr = authctrbuf;
    811  1.5  riastrad 		le32enc(authctr + 16 + 4*0, c0);
    812  1.5  riastrad 		le32enc(authctr + 16 + 4*1, c1);
    813  1.5  riastrad 		le32enc(authctr + 16 + 4*2, c2);
    814  1.5  riastrad 		ccmdec_unaligned_evcnt.ev_count++;
    815  1.5  riastrad 	} else {
    816  1.6  riastrad 		authctr = authctr0;
    817  1.5  riastrad 		ccmdec_aligned_evcnt.ev_count++;
    818  1.5  riastrad 	}
    819  1.5  riastrad 
    820  1.5  riastrad 	fpu_kern_enter();
    821  1.5  riastrad 	aesvia_reload_keys();
    822  1.5  riastrad 	be32enc(authctr + 16 + 4*3, ++c3);
    823  1.5  riastrad 	aesvia_encN(enc, authctr + 16, authctr + 16, 1, cw0);
    824  1.5  riastrad 	for (;; in += 16, out += 16) {
    825  1.5  riastrad 		xor128(out, authctr + 16, in);
    826  1.5  riastrad 		xor128(authctr, authctr, out);
    827  1.5  riastrad 		if ((nbytes -= 16) == 0)
    828  1.5  riastrad 			break;
    829  1.5  riastrad 		le32enc(authctr + 16 + 4*0, c0);
    830  1.5  riastrad 		le32enc(authctr + 16 + 4*1, c1);
    831  1.5  riastrad 		le32enc(authctr + 16 + 4*2, c2);
    832  1.5  riastrad 		be32enc(authctr + 16 + 4*3, ++c3);
    833  1.5  riastrad 		aesvia_encN(enc, authctr, authctr, 2, cw0);
    834  1.5  riastrad 	}
    835  1.5  riastrad 	aesvia_encN(enc, authctr, authctr, 1, cw0);
    836  1.5  riastrad 	fpu_kern_leave();
    837  1.5  riastrad 
    838  1.5  riastrad 	if ((uintptr_t)authctr0 & 0xf) {
    839  1.5  riastrad 		memcpy(authctr0, authctrbuf, 16);
    840  1.5  riastrad 		explicit_memset(authctrbuf, 0, sizeof authctrbuf);
    841  1.5  riastrad 	}
    842  1.5  riastrad 
    843  1.5  riastrad 	le32enc(authctr0 + 16 + 4*0, c0);
    844  1.5  riastrad 	le32enc(authctr0 + 16 + 4*1, c1);
    845  1.5  riastrad 	le32enc(authctr0 + 16 + 4*2, c2);
    846  1.5  riastrad 	be32enc(authctr0 + 16 + 4*3, c3);
    847  1.5  riastrad }
    848  1.5  riastrad 
    849  1.1  riastrad static int
    850  1.1  riastrad aesvia_probe(void)
    851  1.1  riastrad {
    852  1.1  riastrad 
    853  1.1  riastrad 	/* Verify that the CPU advertises VIA ACE support.  */
    854  1.3  riastrad #ifdef _KERNEL
    855  1.1  riastrad 	if ((cpu_feature[4] & CPUID_VIA_HAS_ACE) == 0)
    856  1.1  riastrad 		return -1;
    857  1.3  riastrad #else
    858  1.3  riastrad 	/*
    859  1.3  riastrad 	 * From the VIA PadLock Programming Guide:
    860  1.9    rillig 	 * https://web.archive.org/web/20220104214041/http://linux.via.com.tw/support/beginDownload.action?eleid=181&fid=261
    861  1.3  riastrad 	 */
    862  1.3  riastrad 	unsigned eax, ebx, ecx, edx;
    863  1.3  riastrad 	if (!__get_cpuid(0, &eax, &ebx, &ecx, &edx))
    864  1.3  riastrad 		return -1;
    865  1.3  riastrad 	if (ebx != signature_CENTAUR_ebx ||
    866  1.3  riastrad 	    ecx != signature_CENTAUR_ecx ||
    867  1.3  riastrad 	    edx != signature_CENTAUR_edx)
    868  1.3  riastrad 		return -1;
    869  1.3  riastrad 	if (eax < 0xc0000000)
    870  1.3  riastrad 		return -1;
    871  1.3  riastrad 	if (!__get_cpuid(0xc0000000, &eax, &ebx, &ecx, &edx))
    872  1.3  riastrad 		return -1;
    873  1.3  riastrad 	if (eax < 0xc0000001)
    874  1.3  riastrad 		return -1;
    875  1.3  riastrad 	if (!__get_cpuid(0xc0000001, &eax, &ebx, &ecx, &edx))
    876  1.3  riastrad 		return -1;
    877  1.3  riastrad 	/* Check whether ACE or ACE2 is both supported and enabled.  */
    878  1.3  riastrad 	if ((edx & 0x000000c0) != 0x000000c0 ||
    879  1.3  riastrad 	    (edx & 0x00000300) != 0x00000300)
    880  1.3  riastrad 		return -1;
    881  1.3  riastrad #endif
    882  1.1  riastrad 
    883  1.1  riastrad 	/* Verify that our XTS tweak update logic works.  */
    884  1.1  riastrad 	if (aesvia_xts_update_selftest())
    885  1.1  riastrad 		return -1;
    886  1.1  riastrad 
    887  1.1  riastrad 	/* Success!  */
    888  1.1  riastrad 	return 0;
    889  1.1  riastrad }
    890  1.1  riastrad 
    891  1.1  riastrad struct aes_impl aes_via_impl = {
    892  1.1  riastrad 	.ai_name = "VIA ACE",
    893  1.1  riastrad 	.ai_probe = aesvia_probe,
    894  1.1  riastrad 	.ai_setenckey = aesvia_setenckey,
    895  1.1  riastrad 	.ai_setdeckey = aesvia_setdeckey,
    896  1.1  riastrad 	.ai_enc = aesvia_enc,
    897  1.1  riastrad 	.ai_dec = aesvia_dec,
    898  1.1  riastrad 	.ai_cbc_enc = aesvia_cbc_enc,
    899  1.1  riastrad 	.ai_cbc_dec = aesvia_cbc_dec,
    900  1.1  riastrad 	.ai_xts_enc = aesvia_xts_enc,
    901  1.1  riastrad 	.ai_xts_dec = aesvia_xts_dec,
    902  1.5  riastrad 	.ai_cbcmac_update1 = aesvia_cbcmac_update1,
    903  1.5  riastrad 	.ai_ccm_enc1 = aesvia_ccm_enc1,
    904  1.5  riastrad 	.ai_ccm_dec1 = aesvia_ccm_dec1,
    905  1.1  riastrad };
    906