1 1.3 riastrad /* $NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /*- 4 1.1 riastrad * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 1.1 riastrad * All rights reserved. 6 1.1 riastrad * 7 1.1 riastrad * Redistribution and use in source and binary forms, with or without 8 1.1 riastrad * modification, are permitted provided that the following conditions 9 1.1 riastrad * are met: 10 1.1 riastrad * 1. Redistributions of source code must retain the above copyright 11 1.1 riastrad * notice, this list of conditions and the following disclaimer. 12 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 riastrad * notice, this list of conditions and the following disclaimer in the 14 1.1 riastrad * documentation and/or other materials provided with the distribution. 15 1.1 riastrad * 16 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE. 27 1.1 riastrad */ 28 1.1 riastrad 29 1.1 riastrad #include <sys/cdefs.h> 30 1.3 riastrad __KERNEL_RCSID(1, "$NetBSD: aes_ssse3_subr.c,v 1.3 2020/07/25 22:31:04 riastradh Exp $"); 31 1.1 riastrad 32 1.2 riastrad #ifdef _KERNEL 33 1.1 riastrad #include <sys/systm.h> 34 1.1 riastrad #include <lib/libkern/libkern.h> 35 1.2 riastrad #else 36 1.2 riastrad #include <assert.h> 37 1.2 riastrad #include <inttypes.h> 38 1.2 riastrad #include <stdio.h> 39 1.2 riastrad #define KASSERT assert 40 1.2 riastrad #endif 41 1.1 riastrad 42 1.1 riastrad #include "aes_ssse3_impl.h" 43 1.1 riastrad 44 1.1 riastrad static inline __m128i 45 1.1 riastrad loadblock(const void *in) 46 1.1 riastrad { 47 1.1 riastrad return _mm_loadu_epi8(in); 48 1.1 riastrad } 49 1.1 riastrad 50 1.1 riastrad static inline void 51 1.1 riastrad storeblock(void *out, __m128i block) 52 1.1 riastrad { 53 1.1 riastrad _mm_storeu_epi8(out, block); 54 1.1 riastrad } 55 1.1 riastrad 56 1.1 riastrad void 57 1.1 riastrad aes_ssse3_enc(const struct aesenc *enc, const uint8_t in[static 16], 58 1.1 riastrad uint8_t out[static 16], uint32_t nrounds) 59 1.1 riastrad { 60 1.1 riastrad __m128i block; 61 1.1 riastrad 62 1.1 riastrad block = loadblock(in); 63 1.1 riastrad block = aes_ssse3_enc1(enc, block, nrounds); 64 1.1 riastrad storeblock(out, block); 65 1.1 riastrad } 66 1.1 riastrad 67 1.1 riastrad void 68 1.1 riastrad aes_ssse3_dec(const struct aesdec *dec, const uint8_t in[static 16], 69 1.1 riastrad uint8_t out[static 16], uint32_t nrounds) 70 1.1 riastrad { 71 1.1 riastrad __m128i block; 72 1.1 riastrad 73 1.1 riastrad block = loadblock(in); 74 1.1 riastrad block = aes_ssse3_dec1(dec, block, nrounds); 75 1.1 riastrad storeblock(out, block); 76 1.1 riastrad } 77 1.1 riastrad 78 1.1 riastrad void 79 1.1 riastrad aes_ssse3_cbc_enc(const struct aesenc *enc, const uint8_t in[static 16], 80 1.1 riastrad uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 81 1.1 riastrad uint32_t nrounds) 82 1.1 riastrad { 83 1.1 riastrad __m128i cv; 84 1.1 riastrad 85 1.1 riastrad KASSERT(nbytes); 86 1.1 riastrad 87 1.1 riastrad cv = loadblock(iv); 88 1.1 riastrad for (; nbytes; nbytes -= 16, in += 16, out += 16) { 89 1.1 riastrad cv ^= loadblock(in); 90 1.1 riastrad cv = aes_ssse3_enc1(enc, cv, nrounds); 91 1.1 riastrad storeblock(out, cv); 92 1.1 riastrad } 93 1.1 riastrad storeblock(iv, cv); 94 1.1 riastrad } 95 1.1 riastrad 96 1.1 riastrad void 97 1.1 riastrad aes_ssse3_cbc_dec(const struct aesdec *dec, const uint8_t in[static 16], 98 1.1 riastrad uint8_t out[static 16], size_t nbytes, uint8_t iv[static 16], 99 1.1 riastrad uint32_t nrounds) 100 1.1 riastrad { 101 1.1 riastrad __m128i iv0, cv, b; 102 1.1 riastrad 103 1.1 riastrad KASSERT(nbytes); 104 1.1 riastrad KASSERT(nbytes % 16 == 0); 105 1.1 riastrad 106 1.1 riastrad iv0 = loadblock(iv); 107 1.1 riastrad cv = loadblock(in + nbytes - 16); 108 1.1 riastrad storeblock(iv, cv); 109 1.1 riastrad 110 1.1 riastrad for (;;) { 111 1.1 riastrad b = aes_ssse3_dec1(dec, cv, nrounds); 112 1.1 riastrad if ((nbytes -= 16) == 0) 113 1.1 riastrad break; 114 1.1 riastrad cv = loadblock(in + nbytes - 16); 115 1.1 riastrad storeblock(out + nbytes, b ^ cv); 116 1.1 riastrad } 117 1.1 riastrad storeblock(out, b ^ iv0); 118 1.1 riastrad } 119 1.1 riastrad 120 1.1 riastrad static inline __m128i 121 1.1 riastrad aes_ssse3_xts_update(__m128i t) 122 1.1 riastrad { 123 1.1 riastrad const __m128i one = _mm_set_epi64x(1, 1); 124 1.1 riastrad __m128i s, m, c; 125 1.1 riastrad 126 1.1 riastrad s = _mm_srli_epi64(t, 63); /* 1 if high bit set else 0 */ 127 1.1 riastrad m = _mm_sub_epi64(s, one); /* 0 if high bit set else -1 */ 128 1.1 riastrad m = _mm_shuffle_epi32(m, 0x4e); /* swap halves */ 129 1.1 riastrad c = _mm_set_epi64x(1, 0x87); /* carry */ 130 1.1 riastrad 131 1.1 riastrad return _mm_slli_epi64(t, 1) ^ (c & ~m); 132 1.1 riastrad } 133 1.1 riastrad 134 1.1 riastrad static int 135 1.1 riastrad aes_ssse3_xts_update_selftest(void) 136 1.1 riastrad { 137 1.1 riastrad static const struct { 138 1.1 riastrad uint32_t in[4], out[4]; 139 1.1 riastrad } cases[] = { 140 1.1 riastrad [0] = { {1}, {2} }, 141 1.1 riastrad [1] = { {0x80000000U,0,0,0}, {0,1,0,0} }, 142 1.1 riastrad [2] = { {0,0x80000000U,0,0}, {0,0,1,0} }, 143 1.1 riastrad [3] = { {0,0,0x80000000U,0}, {0,0,0,1} }, 144 1.1 riastrad [4] = { {0,0,0,0x80000000U}, {0x87,0,0,0} }, 145 1.1 riastrad [5] = { {0,0x80000000U,0,0x80000000U}, {0x87,0,1,0} }, 146 1.1 riastrad }; 147 1.1 riastrad unsigned i; 148 1.1 riastrad uint32_t t[4]; 149 1.1 riastrad int result = 0; 150 1.1 riastrad 151 1.1 riastrad for (i = 0; i < sizeof(cases)/sizeof(cases[0]); i++) { 152 1.1 riastrad t[0] = cases[i].in[0]; 153 1.1 riastrad t[1] = cases[i].in[1]; 154 1.1 riastrad t[2] = cases[i].in[2]; 155 1.1 riastrad t[3] = cases[i].in[3]; 156 1.1 riastrad storeblock(t, aes_ssse3_xts_update(loadblock(t))); 157 1.1 riastrad if (t[0] != cases[i].out[0] || 158 1.1 riastrad t[1] != cases[i].out[1] || 159 1.1 riastrad t[2] != cases[i].out[2] || 160 1.1 riastrad t[3] != cases[i].out[3]) { 161 1.1 riastrad printf("%s %u:" 162 1.1 riastrad " %"PRIx32" %"PRIx32" %"PRIx32" %"PRIx32"\n", 163 1.1 riastrad __func__, i, t[0], t[1], t[2], t[3]); 164 1.1 riastrad result = -1; 165 1.1 riastrad } 166 1.1 riastrad } 167 1.1 riastrad 168 1.1 riastrad return result; 169 1.1 riastrad } 170 1.1 riastrad 171 1.1 riastrad void 172 1.1 riastrad aes_ssse3_xts_enc(const struct aesenc *enc, const uint8_t in[static 16], 173 1.1 riastrad uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 174 1.1 riastrad uint32_t nrounds) 175 1.1 riastrad { 176 1.1 riastrad __m128i t, b; 177 1.1 riastrad 178 1.1 riastrad KASSERT(nbytes); 179 1.1 riastrad KASSERT(nbytes % 16 == 0); 180 1.1 riastrad 181 1.1 riastrad t = loadblock(tweak); 182 1.1 riastrad for (; nbytes; nbytes -= 16, in += 16, out += 16) { 183 1.1 riastrad b = t ^ loadblock(in); 184 1.1 riastrad b = aes_ssse3_enc1(enc, b, nrounds); 185 1.1 riastrad storeblock(out, t ^ b); 186 1.1 riastrad t = aes_ssse3_xts_update(t); 187 1.1 riastrad } 188 1.1 riastrad storeblock(tweak, t); 189 1.1 riastrad } 190 1.1 riastrad 191 1.1 riastrad void 192 1.1 riastrad aes_ssse3_xts_dec(const struct aesdec *dec, const uint8_t in[static 16], 193 1.1 riastrad uint8_t out[static 16], size_t nbytes, uint8_t tweak[static 16], 194 1.1 riastrad uint32_t nrounds) 195 1.1 riastrad { 196 1.1 riastrad __m128i t, b; 197 1.1 riastrad 198 1.1 riastrad KASSERT(nbytes); 199 1.1 riastrad KASSERT(nbytes % 16 == 0); 200 1.1 riastrad 201 1.1 riastrad t = loadblock(tweak); 202 1.1 riastrad for (; nbytes; nbytes -= 16, in += 16, out += 16) { 203 1.1 riastrad b = t ^ loadblock(in); 204 1.1 riastrad b = aes_ssse3_dec1(dec, b, nrounds); 205 1.1 riastrad storeblock(out, t ^ b); 206 1.1 riastrad t = aes_ssse3_xts_update(t); 207 1.1 riastrad } 208 1.1 riastrad storeblock(tweak, t); 209 1.1 riastrad } 210 1.1 riastrad 211 1.3 riastrad void 212 1.3 riastrad aes_ssse3_cbcmac_update1(const struct aesenc *enc, const uint8_t in[static 16], 213 1.3 riastrad size_t nbytes, uint8_t auth0[static 16], uint32_t nrounds) 214 1.3 riastrad { 215 1.3 riastrad __m128i auth; 216 1.3 riastrad 217 1.3 riastrad KASSERT(nbytes); 218 1.3 riastrad KASSERT(nbytes % 16 == 0); 219 1.3 riastrad 220 1.3 riastrad auth = loadblock(auth0); 221 1.3 riastrad for (; nbytes; nbytes -= 16, in += 16) 222 1.3 riastrad auth = aes_ssse3_enc1(enc, auth ^ loadblock(in), nrounds); 223 1.3 riastrad storeblock(auth0, auth); 224 1.3 riastrad } 225 1.3 riastrad 226 1.3 riastrad void 227 1.3 riastrad aes_ssse3_ccm_enc1(const struct aesenc *enc, const uint8_t in[static 16], 228 1.3 riastrad uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32], 229 1.3 riastrad uint32_t nrounds) 230 1.3 riastrad { 231 1.3 riastrad const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0); 232 1.3 riastrad const __m128i bs32 = 233 1.3 riastrad _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203); 234 1.3 riastrad __m128i auth, ctr_be, ctr, ptxt; 235 1.3 riastrad 236 1.3 riastrad KASSERT(nbytes); 237 1.3 riastrad KASSERT(nbytes % 16 == 0); 238 1.3 riastrad 239 1.3 riastrad auth = loadblock(authctr); 240 1.3 riastrad ctr_be = loadblock(authctr + 16); 241 1.3 riastrad ctr = _mm_shuffle_epi8(ctr_be, bs32); 242 1.3 riastrad for (; nbytes; nbytes -= 16, in += 16, out += 16) { 243 1.3 riastrad ptxt = loadblock(in); 244 1.3 riastrad auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds); 245 1.3 riastrad ctr = _mm_add_epi32(ctr, ctr32_inc); 246 1.3 riastrad ctr_be = _mm_shuffle_epi8(ctr, bs32); 247 1.3 riastrad storeblock(out, ptxt ^ aes_ssse3_enc1(enc, ctr_be, nrounds)); 248 1.3 riastrad } 249 1.3 riastrad storeblock(authctr, auth); 250 1.3 riastrad storeblock(authctr + 16, ctr_be); 251 1.3 riastrad } 252 1.3 riastrad 253 1.3 riastrad void 254 1.3 riastrad aes_ssse3_ccm_dec1(const struct aesenc *enc, const uint8_t in[static 16], 255 1.3 riastrad uint8_t out[static 16], size_t nbytes, uint8_t authctr[static 32], 256 1.3 riastrad uint32_t nrounds) 257 1.3 riastrad { 258 1.3 riastrad const __m128i ctr32_inc = _mm_set_epi32(1, 0, 0, 0); 259 1.3 riastrad const __m128i bs32 = 260 1.3 riastrad _mm_set_epi32(0x0c0d0e0f, 0x08090a0b, 0x04050607, 0x00010203); 261 1.3 riastrad __m128i auth, ctr_be, ctr, ptxt; 262 1.3 riastrad 263 1.3 riastrad KASSERT(nbytes); 264 1.3 riastrad KASSERT(nbytes % 16 == 0); 265 1.3 riastrad 266 1.3 riastrad auth = loadblock(authctr); 267 1.3 riastrad ctr_be = loadblock(authctr + 16); 268 1.3 riastrad ctr = _mm_shuffle_epi8(ctr_be, bs32); 269 1.3 riastrad for (; nbytes; nbytes -= 16, in += 16, out += 16) { 270 1.3 riastrad ctr = _mm_add_epi32(ctr, ctr32_inc); 271 1.3 riastrad ctr_be = _mm_shuffle_epi8(ctr, bs32); 272 1.3 riastrad ptxt = loadblock(in) ^ aes_ssse3_enc1(enc, ctr_be, nrounds); 273 1.3 riastrad storeblock(out, ptxt); 274 1.3 riastrad auth = aes_ssse3_enc1(enc, auth ^ ptxt, nrounds); 275 1.3 riastrad } 276 1.3 riastrad storeblock(authctr, auth); 277 1.3 riastrad storeblock(authctr + 16, ctr_be); 278 1.3 riastrad } 279 1.3 riastrad 280 1.1 riastrad int 281 1.1 riastrad aes_ssse3_selftest(void) 282 1.1 riastrad { 283 1.1 riastrad 284 1.1 riastrad if (aes_ssse3_xts_update_selftest()) 285 1.1 riastrad return -1; 286 1.1 riastrad 287 1.1 riastrad return 0; 288 1.1 riastrad } 289