Home | History | Annotate | Line # | Download | only in x86
      1 /*	$NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $	*/
      2 
      3 /*-
      4  * Copyright (c) 2020 The NetBSD Foundation, Inc.
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  *
     16  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     17  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     18  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     19  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     20  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     21  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     22  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     23  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     24  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     25  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     26  * POSSIBILITY OF SUCH DAMAGE.
     27  */
     28 
     29 #include <sys/cdefs.h>
     30 __KERNEL_RCSID(1, "$NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $");
     31 
     32 #ifdef _KERNEL
     33 #include <sys/systm.h>
     34 #include <lib/libkern/libkern.h>
     35 #else
     36 #include <assert.h>
     37 #include <inttypes.h>
     38 #include <stdio.h>
     39 #define	KASSERT			assert
     40 #endif
     41 
     42 #include "aes_ssse3_impl.h"
     43 
     44 static inline __m128i
     45 loadblock(const void *in)
     46 {
     47 	return _mm_loadu_epi8(in);
     48 }
     49 
     50 static inline void
     51 storeblock(void *out, __m128i block)
     52 {
     53 	_mm_storeu_epi8(out, block);
     54 }
     55 
     56 void
     57 aes_ssse3_enc(const struct aesenc *enc, const uint8_t in[static 16],
     58     uint8_t out[static 16], uint32_t nrounds)
     59 {
     60 	__m128i block;
     61 
     62 	block = loadblock(in);
     63 	block = aes_ssse3_enc1(enc, block, nrounds);
     64 	storeblock(out, block);
     65 }
     66 
     67 void
     68 aes_ssse3_dec(const struct aesdec *dec, const uint8_t in[static 16],
     69     uint8_t out[static 16], uint32_t nrounds)
     70 {
     71 	__m128i block;
     72 
     73 	block = loadblock(in);
     74 	block = aes_ssse3_dec1(dec, block, nrounds);
     75 	storeblock(out, block);
     76 }
     77 
     78 void
     79 aes_ssse3_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16],
     80     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
     81     uint32_t nrounds)
     82 {
     83 	__m128i cv;
     84 
     85 	KASSERT(nbytes);
     86 
     87 	cv = loadblock(iv);
     88 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
     89 		cv ^= loadblock(in);
     90 		cv = aes_ssse3_enc1(enc, cv, nrounds);
     91 		storeblock(out, cv);
     92 	}
     93 	storeblock(iv, cv);
     94 }
     95 
     96 void
     97 aes_ssse3_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16],
     98     uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16],
     99     uint32_t nrounds)
    100 {
    101 	__m128i iv0, cv, b;
    102 
    103 	KASSERT(nbytes);
    104 	KASSERT(nbytes % 16 == 0);
    105 
    106 	iv0 = loadblock(iv);
    107 	cv = loadblock(in + nbytes - 16);
    108 	storeblock(iv, cv);
    109 
    110 	for (;;) {
    111 		b = aes_ssse3_dec1(dec, cv, nrounds);
    112 		if ((nbytes -= 16) == 0)
    113 			break;
    114 		cv = loadblock(in + nbytes - 16);
    115 		storeblock(out + nbytes, b ^ cv);
    116 	}
    117 	storeblock(out, b ^ iv0);
    118 }
    119 
    120 static inline __m128i
    121 aes_ssse3_xts_update(__m128i t)
    122 {
    123 	const __m128i one = _mm_set_epi64x(1, 1);
    124 	__m128i s, m, c;
    125 
    126 	s = _mm_srli_epi64(t, 63);	/* 1 if high bit set else 0 */
    127 	m = _mm_sub_epi64(s, one);	/* 0 if high bit set else -1 */
    128 	m = _mm_shuffle_epi32(m, 0x4e);	/* swap halves */
    129 	c = _mm_set_epi64x(1, 0x87);	/* carry */
    130 
    131 	return _mm_slli_epi64(t, 1) ^ (c & ~m);
    132 }
    133 
    134 static int
    135 aes_ssse3_xts_update_selftest(void)
    136 {
    137 	static const struct {
    138 		uint32_t in[4], out[4];
    139 	} cases[] = {
    140 		[0] = { {1}, {2} },
    141 		[1] = { {0x80000000U,0,0,0}, {0,1,0,0} },
    142 		[2] = { {0,0x80000000U,0,0}, {0,0,1,0} },
    143 		[3] = { {0,0,0x80000000U,0}, {0,0,0,1} },
    144 		[4] = { {0,0,0,0x80000000U}, {0x87,0,0,0} },
    145 		[5] = { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} },
    146 	};
    147 	unsigned i;
    148 	uint32_t t[4];
    149 	int result = 0;
    150 
    151 	for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) {
    152 		t[0] = cases[i].in[0];
    153 		t[1] = cases[i].in[1];
    154 		t[2] = cases[i].in[2];
    155 		t[3] = cases[i].in[3];
    156 		storeblock(t, aes_ssse3_xts_update(loadblock(t)));
    157 		if (t[0] != cases[i].out[0] ||
    158 		    t[1] != cases[i].out[1] ||
    159 		    t[2] != cases[i].out[2] ||
    160 		    t[3] != cases[i].out[3]) {
    161 			printf("%s %u:"
    162 			    " %"PRIx32" %"PRIx32" %"PRIx32" %"PRIx32"\n",
    163 			    __func__, i, t[0], t[1], t[2], t[3]);
    164 			result = -1;
    165 		}
    166 	}
    167 
    168 	return result;
    169 }
    170 
    171 void
    172 aes_ssse3_xts_enc(const struct aesenc *enc, const uint8_t in[static 16],
    173     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
    174     uint32_t nrounds)
    175 {
    176 	__m128i t, b;
    177 
    178 	KASSERT(nbytes);
    179 	KASSERT(nbytes % 16 == 0);
    180 
    181 	t = loadblock(tweak);
    182 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
    183 		b = t ^ loadblock(in);
    184 		b = aes_ssse3_enc1(enc, b, nrounds);
    185 		storeblock(out, t ^ b);
    186 		t = aes_ssse3_xts_update(t);
    187 	}
    188 	storeblock(tweak, t);
    189 }
    190 
    191 void
    192 aes_ssse3_xts_dec(const struct aesdec *dec, const uint8_t in[static 16],
    193     uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16],
    194     uint32_t nrounds)
    195 {
    196 	__m128i t, b;
    197 
    198 	KASSERT(nbytes);
    199 	KASSERT(nbytes % 16 == 0);
    200 
    201 	t = loadblock(tweak);
    202 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
    203 		b = t ^ loadblock(in);
    204 		b = aes_ssse3_dec1(dec, b, nrounds);
    205 		storeblock(out, t ^ b);
    206 		t = aes_ssse3_xts_update(t);
    207 	}
    208 	storeblock(tweak, t);
    209 }
    210 
    211 void
    212 aes_ssse3_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16],
    213     size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds)
    214 {
    215 	__m128i auth;
    216 
    217 	KASSERT(nbytes);
    218 	KASSERT(nbytes % 16 == 0);
    219 
    220 	auth = loadblock(auth0);
    221 	for (; nbytes; nbytes -= 16, in += 16)
    222 		auth = aes_ssse3_enc1(enc, auth ^ loadblock(in), nrounds);
    223 	storeblock(auth0, auth);
    224 }
    225 
    226 void
    227 aes_ssse3_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16],
    228     uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
    229     uint32_t nrounds)
    230 {
    231 	const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0);
    232 	const __m128i bs32 =
    233 	    _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
    234 	__m128i auth, ctr_be, ctr, ptxt;
    235 
    236 	KASSERT(nbytes);
    237 	KASSERT(nbytes % 16 == 0);
    238 
    239 	auth = loadblock(authctr);
    240 	ctr_be = loadblock(authctr + 16);
    241 	ctr = _mm_shuffle_epi8(ctr_be, bs32);
    242 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
    243 		ptxt = loadblock(in);
    244 		auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds);
    245 		ctr = _mm_add_epi32(ctr, ctr32_inc);
    246 		ctr_be = _mm_shuffle_epi8(ctr, bs32);
    247 		storeblock(out, ptxt ^ aes_ssse3_enc1(enc, ctr_be, nrounds));
    248 	}
    249 	storeblock(authctr, auth);
    250 	storeblock(authctr + 16, ctr_be);
    251 }
    252 
    253 void
    254 aes_ssse3_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16],
    255     uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32],
    256     uint32_t nrounds)
    257 {
    258 	const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0);
    259 	const __m128i bs32 =
    260 	    _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203);
    261 	__m128i auth, ctr_be, ctr, ptxt;
    262 
    263 	KASSERT(nbytes);
    264 	KASSERT(nbytes % 16 == 0);
    265 
    266 	auth = loadblock(authctr);
    267 	ctr_be = loadblock(authctr + 16);
    268 	ctr = _mm_shuffle_epi8(ctr_be, bs32);
    269 	for (; nbytes; nbytes -= 16, in += 16, out += 16) {
    270 		ctr = _mm_add_epi32(ctr, ctr32_inc);
    271 		ctr_be = _mm_shuffle_epi8(ctr, bs32);
    272 		ptxt = loadblock(in) ^ aes_ssse3_enc1(enc, ctr_be, nrounds);
    273 		storeblock(out, ptxt);
    274 		auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds);
    275 	}
    276 	storeblock(authctr, auth);
    277 	storeblock(authctr + 16, ctr_be);
    278 }
    279 
    280 int
    281 aes_ssse3_selftest(void)
    282 {
    283 
    284 	if (aes_ssse3_xts_update_selftest())
    285 		return -1;
    286 
    287 	return 0;
    288 }
    289