1 1.6 riastrad /* $NetBSD: aes_ni_64.S,v 1.6 2020/07/27 20:57:23 riastradh Exp $ */ 2 1.1 riastrad 3 1.1 riastrad /*- 4 1.1 riastrad * Copyright (c) 2020 The NetBSD Foundation, Inc. 5 1.1 riastrad * All rights reserved. 6 1.1 riastrad * 7 1.1 riastrad * Redistribution and use in source and binary forms, with or without 8 1.1 riastrad * modification, are permitted provided that the following conditions 9 1.1 riastrad * are met: 10 1.1 riastrad * 1. Redistributions of source code must retain the above copyright 11 1.1 riastrad * notice, this list of conditions and the following disclaimer. 12 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright 13 1.1 riastrad * notice, this list of conditions and the following disclaimer in the 14 1.1 riastrad * documentation and/or other materials provided with the distribution. 15 1.1 riastrad * 16 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 17 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 18 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 19 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 20 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 21 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 22 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 23 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 24 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 25 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 26 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE. 27 1.1 riastrad */ 28 1.1 riastrad 29 1.1 riastrad #include <machine/asm.h> 30 1.1 riastrad 31 1.6 riastrad RCSID("$NetBSD: aes_ni_64.S,v 1.6 2020/07/27 20:57:23 riastradh Exp $") 32 1.6 riastrad 33 1.1 riastrad /* 34 1.1 riastrad * MOVDQA/MOVDQU are Move Double Quadword (Aligned/Unaligned), defined 35 1.1 riastrad * to operate on integers; MOVAPS/MOVUPS are Move (Aligned/Unaligned) 36 1.1 riastrad * Packed Single, defined to operate on binary32 floats. They have 37 1.1 riastrad * exactly the same architectural effects (move a 128-bit quantity from 38 1.1 riastrad * memory into an xmm register). 39 1.1 riastrad * 40 1.1 riastrad * In principle, they might have different microarchitectural effects 41 1.1 riastrad * so that MOVAPS/MOVUPS might incur a penalty when the register is 42 1.1 riastrad * later used for integer paths, but in practice they don't. So we use 43 1.1 riastrad * the one whose instruction encoding is shorter -- MOVAPS/MOVUPS. 44 1.1 riastrad */ 45 1.1 riastrad #define movdqa movaps 46 1.1 riastrad #define movdqu movups 47 1.1 riastrad 48 1.1 riastrad /* 49 1.1 riastrad * aesni_setenckey128(struct aesenc *enckey@rdi, const uint8_t key[16] @rsi) 50 1.1 riastrad * 51 1.1 riastrad * Expand a 16-byte AES-128 key into 10 round keys. 52 1.1 riastrad * 53 1.1 riastrad * Standard ABI calling convention. 54 1.1 riastrad */ 55 1.1 riastrad ENTRY(aesni_setenckey128) 56 1.1 riastrad movdqu (%rsi),%xmm0 /* load master key into %xmm0 */ 57 1.1 riastrad movdqa %xmm0,(%rdi) /* store master key as the first round key */ 58 1.1 riastrad lea 0x10(%rdi),%rdi /* advance %rdi to next round key */ 59 1.1 riastrad aeskeygenassist $0x1,%xmm0,%xmm2 60 1.1 riastrad call aesni_expand128 61 1.1 riastrad aeskeygenassist $0x2,%xmm0,%xmm2 62 1.1 riastrad call aesni_expand128 63 1.1 riastrad aeskeygenassist $0x4,%xmm0,%xmm2 64 1.1 riastrad call aesni_expand128 65 1.1 riastrad aeskeygenassist $0x8,%xmm0,%xmm2 66 1.1 riastrad call aesni_expand128 67 1.1 riastrad aeskeygenassist $0x10,%xmm0,%xmm2 68 1.1 riastrad call aesni_expand128 69 1.1 riastrad aeskeygenassist $0x20,%xmm0,%xmm2 70 1.1 riastrad call aesni_expand128 71 1.1 riastrad aeskeygenassist $0x40,%xmm0,%xmm2 72 1.1 riastrad call aesni_expand128 73 1.1 riastrad aeskeygenassist $0x80,%xmm0,%xmm2 74 1.1 riastrad call aesni_expand128 75 1.1 riastrad aeskeygenassist $0x1b,%xmm0,%xmm2 76 1.1 riastrad call aesni_expand128 77 1.1 riastrad aeskeygenassist $0x36,%xmm0,%xmm2 78 1.1 riastrad call aesni_expand128 79 1.1 riastrad ret 80 1.1 riastrad END(aesni_setenckey128) 81 1.1 riastrad 82 1.1 riastrad /* 83 1.1 riastrad * aesni_setenckey192(struct aesenc *enckey@rdi, const uint8_t key[24] @rsi) 84 1.1 riastrad * 85 1.1 riastrad * Expand a 24-byte AES-192 key into 12 round keys. 86 1.1 riastrad * 87 1.1 riastrad * Standard ABI calling convention. 88 1.1 riastrad */ 89 1.1 riastrad ENTRY(aesni_setenckey192) 90 1.1 riastrad movdqu (%rsi),%xmm0 /* load master key [0:128) into %xmm0 */ 91 1.1 riastrad movq 0x10(%rsi),%xmm1 /* load master key [128:192) into %xmm1 */ 92 1.1 riastrad movdqa %xmm0,(%rdi) /* store master key [0:128) as round key */ 93 1.1 riastrad lea 0x10(%rdi),%rdi /* advance %rdi to next round key */ 94 1.1 riastrad aeskeygenassist $0x1,%xmm1,%xmm2 95 1.1 riastrad call aesni_expand192a 96 1.1 riastrad aeskeygenassist $0x2,%xmm0,%xmm2 97 1.1 riastrad call aesni_expand192b 98 1.1 riastrad aeskeygenassist $0x4,%xmm1,%xmm2 99 1.1 riastrad call aesni_expand192a 100 1.1 riastrad aeskeygenassist $0x8,%xmm0,%xmm2 101 1.1 riastrad call aesni_expand192b 102 1.1 riastrad aeskeygenassist $0x10,%xmm1,%xmm2 103 1.1 riastrad call aesni_expand192a 104 1.1 riastrad aeskeygenassist $0x20,%xmm0,%xmm2 105 1.1 riastrad call aesni_expand192b 106 1.1 riastrad aeskeygenassist $0x40,%xmm1,%xmm2 107 1.1 riastrad call aesni_expand192a 108 1.1 riastrad aeskeygenassist $0x80,%xmm0,%xmm2 109 1.1 riastrad call aesni_expand192b 110 1.1 riastrad ret 111 1.1 riastrad END(aesni_setenckey192) 112 1.1 riastrad 113 1.1 riastrad /* 114 1.1 riastrad * aesni_setenckey256(struct aesenc *enckey@rdi, const uint8_t key[32] @rsi) 115 1.1 riastrad * 116 1.1 riastrad * Expand a 32-byte AES-256 key into 14 round keys. 117 1.1 riastrad * 118 1.1 riastrad * Standard ABI calling convention. 119 1.1 riastrad */ 120 1.1 riastrad ENTRY(aesni_setenckey256) 121 1.1 riastrad movdqu (%rsi),%xmm0 /* load master key [0:128) into %xmm0 */ 122 1.1 riastrad movdqu 0x10(%rsi),%xmm1 /* load master key [128:256) into %xmm1 */ 123 1.1 riastrad movdqa %xmm0,(%rdi) /* store master key [0:128) as round key */ 124 1.1 riastrad movdqa %xmm1,0x10(%rdi) /* store master key [128:256) as round key */ 125 1.1 riastrad lea 0x20(%rdi),%rdi /* advance %rdi to next round key */ 126 1.1 riastrad aeskeygenassist $0x1,%xmm1,%xmm2 127 1.1 riastrad call aesni_expand256a 128 1.1 riastrad aeskeygenassist $0x1,%xmm0,%xmm2 129 1.1 riastrad call aesni_expand256b 130 1.1 riastrad aeskeygenassist $0x2,%xmm1,%xmm2 131 1.1 riastrad call aesni_expand256a 132 1.1 riastrad aeskeygenassist $0x2,%xmm0,%xmm2 133 1.1 riastrad call aesni_expand256b 134 1.1 riastrad aeskeygenassist $0x4,%xmm1,%xmm2 135 1.1 riastrad call aesni_expand256a 136 1.1 riastrad aeskeygenassist $0x4,%xmm0,%xmm2 137 1.1 riastrad call aesni_expand256b 138 1.1 riastrad aeskeygenassist $0x8,%xmm1,%xmm2 139 1.1 riastrad call aesni_expand256a 140 1.1 riastrad aeskeygenassist $0x8,%xmm0,%xmm2 141 1.1 riastrad call aesni_expand256b 142 1.1 riastrad aeskeygenassist $0x10,%xmm1,%xmm2 143 1.1 riastrad call aesni_expand256a 144 1.1 riastrad aeskeygenassist $0x10,%xmm0,%xmm2 145 1.1 riastrad call aesni_expand256b 146 1.1 riastrad aeskeygenassist $0x20,%xmm1,%xmm2 147 1.1 riastrad call aesni_expand256a 148 1.1 riastrad aeskeygenassist $0x20,%xmm0,%xmm2 149 1.1 riastrad call aesni_expand256b 150 1.1 riastrad aeskeygenassist $0x40,%xmm1,%xmm2 151 1.1 riastrad call aesni_expand256a 152 1.1 riastrad ret 153 1.1 riastrad END(aesni_setenckey256) 154 1.1 riastrad 155 1.1 riastrad /* 156 1.1 riastrad * aesni_expand128(uint128_t *rkp@rdi, uint128_t prk@xmm0, 157 1.1 riastrad * uint128_t keygenassist@xmm2) 158 1.1 riastrad * 159 1.1 riastrad * 1. Compute the AES-128 round key using the previous round key. 160 1.1 riastrad * 2. Store it at *rkp. 161 1.1 riastrad * 3. Set %xmm0 to it. 162 1.1 riastrad * 4. Advance %rdi to point at the next round key. 163 1.1 riastrad * 164 1.1 riastrad * Internal ABI. On entry: 165 1.1 riastrad * 166 1.1 riastrad * %rdi = rkp, pointer to round key to compute 167 1.1 riastrad * %xmm0 = (prk[0], prk[1], prk[2], prk[3]) 168 1.1 riastrad * %xmm2 = (xxx, xxx, xxx, t = Rot(SubWord(prk[3])) ^ RCON) 169 1.1 riastrad * 170 1.1 riastrad * On exit: 171 1.1 riastrad * 172 1.1 riastrad * %rdi = &rkp[1], rkp advanced by one round key 173 1.1 riastrad * %xmm0 = rk, the round key we just computed 174 1.1 riastrad * %xmm2 = garbage 175 1.1 riastrad * %xmm4 = garbage 176 1.1 riastrad * %xmm5 = garbage 177 1.1 riastrad * %xmm6 = garbage 178 1.1 riastrad * 179 1.1 riastrad * Note: %xmm1 is preserved (as are %xmm3 and %xmm7 through %xmm15, 180 1.1 riastrad * and all other registers). 181 1.1 riastrad */ 182 1.1 riastrad .text 183 1.1 riastrad _ALIGN_TEXT 184 1.1 riastrad .type aesni_expand128,@function 185 1.1 riastrad aesni_expand128: 186 1.1 riastrad /* 187 1.1 riastrad * %xmm2 := (%xmm2[3], %xmm2[3], %xmm2[3], %xmm2[3]), 188 1.1 riastrad * i.e., set each word of %xmm2 to t := Rot(SubWord(prk[3])) ^ RCON. 189 1.1 riastrad */ 190 1.1 riastrad pshufd $0b11111111,%xmm2,%xmm2 191 1.1 riastrad 192 1.1 riastrad /* 193 1.1 riastrad * %xmm4 := (0, prk[0], prk[1], prk[2]) 194 1.1 riastrad * %xmm5 := (0, 0, prk[0], prk[1]) 195 1.1 riastrad * %xmm6 := (0, 0, 0, prk[0]) 196 1.1 riastrad */ 197 1.1 riastrad movdqa %xmm0,%xmm4 198 1.1 riastrad movdqa %xmm0,%xmm5 199 1.1 riastrad movdqa %xmm0,%xmm6 200 1.1 riastrad pslldq $4,%xmm4 201 1.1 riastrad pslldq $8,%xmm5 202 1.1 riastrad pslldq $12,%xmm6 203 1.1 riastrad 204 1.1 riastrad /* 205 1.1 riastrad * %xmm0 := (rk[0] = t ^ prk[0], 206 1.1 riastrad * rk[1] = t ^ prk[0] ^ prk[1], 207 1.1 riastrad * rk[2] = t ^ prk[0] ^ prk[1] ^ prk[2], 208 1.1 riastrad * rk[3] = t ^ prk[0] ^ prk[1] ^ prk[2] ^ prk[3]) 209 1.1 riastrad */ 210 1.1 riastrad pxor %xmm2,%xmm0 211 1.1 riastrad pxor %xmm4,%xmm0 212 1.1 riastrad pxor %xmm5,%xmm0 213 1.1 riastrad pxor %xmm6,%xmm0 214 1.1 riastrad 215 1.1 riastrad movdqa %xmm0,(%rdi) /* store round key */ 216 1.1 riastrad lea 0x10(%rdi),%rdi /* advance to next round key address */ 217 1.1 riastrad ret 218 1.1 riastrad END(aesni_expand128) 219 1.1 riastrad 220 1.1 riastrad /* 221 1.1 riastrad * aesni_expand192a(uint128_t *rkp@rdi, uint128_t prk@xmm0, 222 1.1 riastrad * uint64_t rklo@xmm1, uint128_t keygenassist@xmm2) 223 1.1 riastrad * 224 1.1 riastrad * Set even-numbered AES-192 round key. 225 1.1 riastrad * 226 1.1 riastrad * Internal ABI. On entry: 227 1.1 riastrad * 228 1.1 riastrad * %rdi = rkp, pointer to two round keys to compute 229 1.1 riastrad * %xmm0 = (prk[0], prk[1], prk[2], prk[3]) 230 1.1 riastrad * %xmm1 = (rklo[0], rklo[1], xxx, xxx) 231 1.1 riastrad * %xmm2 = (xxx, t = Rot(SubWord(rklo[1])) ^ RCON, xxx, xxx) 232 1.1 riastrad * 233 1.1 riastrad * On exit: 234 1.1 riastrad * 235 1.1 riastrad * %rdi = &rkp[2], rkp advanced by two round keys 236 1.1 riastrad * %xmm0 = nrk, second round key we just computed 237 1.1 riastrad * %xmm1 = rk, first round key we just computed 238 1.1 riastrad * %xmm2 = garbage 239 1.1 riastrad * %xmm4 = garbage 240 1.1 riastrad * %xmm5 = garbage 241 1.1 riastrad * %xmm6 = garbage 242 1.1 riastrad * %xmm7 = garbage 243 1.1 riastrad */ 244 1.1 riastrad .text 245 1.1 riastrad _ALIGN_TEXT 246 1.1 riastrad .type aesni_expand192a,@function 247 1.1 riastrad aesni_expand192a: 248 1.1 riastrad /* 249 1.1 riastrad * %xmm2 := (%xmm2[1], %xmm2[1], %xmm2[1], %xmm2[1]), 250 1.1 riastrad * i.e., set each word of %xmm2 to t := Rot(SubWord(rklo[1])) ^ RCON. 251 1.1 riastrad */ 252 1.1 riastrad pshufd $0b01010101,%xmm2,%xmm2 253 1.1 riastrad 254 1.1 riastrad /* 255 1.1 riastrad * We need to compute: 256 1.1 riastrad * 257 1.1 riastrad * rk[0] := rklo[0] 258 1.1 riastrad * rk[1] := rklo[1] 259 1.1 riastrad * rk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] 260 1.1 riastrad * rk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1] 261 1.1 riastrad * nrk[0] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1] ^ prk[2] 262 1.1 riastrad * nrk[1] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] 263 1.1 riastrad * nrk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0] 264 1.1 riastrad * nrk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0] 265 1.1 riastrad * ^ rklo[1] 266 1.1 riastrad */ 267 1.1 riastrad 268 1.1 riastrad /* 269 1.1 riastrad * %xmm4 := (prk[0], prk[1], prk[2], prk[3]) 270 1.1 riastrad * %xmm5 := (0, prk[0], prk[1], prk[2]) 271 1.1 riastrad * %xmm6 := (0, 0, prk[0], prk[1]) 272 1.1 riastrad * %xmm7 := (0, 0, 0, prk[0]) 273 1.1 riastrad */ 274 1.1 riastrad movdqa %xmm0,%xmm4 275 1.1 riastrad movdqa %xmm0,%xmm5 276 1.1 riastrad movdqa %xmm0,%xmm6 277 1.1 riastrad movdqa %xmm0,%xmm7 278 1.1 riastrad pslldq $4,%xmm5 279 1.1 riastrad pslldq $8,%xmm6 280 1.1 riastrad pslldq $12,%xmm7 281 1.1 riastrad 282 1.1 riastrad /* %xmm4 := (rk[2], rk[3], nrk[0], nrk[1]) */ 283 1.1 riastrad pxor %xmm2,%xmm4 284 1.1 riastrad pxor %xmm5,%xmm4 285 1.1 riastrad pxor %xmm6,%xmm4 286 1.1 riastrad pxor %xmm7,%xmm4 287 1.1 riastrad 288 1.1 riastrad /* 289 1.1 riastrad * At this point, rk is split across %xmm1 (rk[0],rk[1],...) and 290 1.1 riastrad * %xmm4 (rk[2],rk[3],...); nrk is in %xmm4 (...,nrk[0],nrk[1]); 291 1.1 riastrad * and we have yet to compute nrk[2] or nrk[3], which requires 292 1.1 riastrad * rklo[0] and rklo[1] in %xmm1 (rklo[0], rklo[1], ...). We need 293 1.1 riastrad * nrk to end up in %xmm0 at the end, so gather rk into %xmm1 and 294 1.1 riastrad * nrk into %xmm0. 295 1.1 riastrad */ 296 1.1 riastrad 297 1.1 riastrad /* %xmm0 := (nrk[0], nrk[1], nrk[1], nrk[1]) */ 298 1.1 riastrad pshufd $0b11111110,%xmm4,%xmm0 299 1.1 riastrad 300 1.1 riastrad /* 301 1.1 riastrad * %xmm6 := (0, 0, rklo[0], rklo[1]) 302 1.1 riastrad * %xmm7 := (0, 0, 0, rklo[0]) 303 1.1 riastrad */ 304 1.1 riastrad movdqa %xmm1,%xmm6 305 1.1 riastrad movdqa %xmm1,%xmm7 306 1.1 riastrad 307 1.1 riastrad pslldq $8,%xmm6 308 1.1 riastrad pslldq $12,%xmm7 309 1.1 riastrad 310 1.1 riastrad /* 311 1.1 riastrad * %xmm0 := (nrk[0], 312 1.1 riastrad * nrk[1], 313 1.1 riastrad * nrk[2] = nrk[1] ^ rklo[0], 314 1.1 riastrad * nrk[3] = nrk[1] ^ rklo[0] ^ rklo[1]) 315 1.1 riastrad */ 316 1.1 riastrad pxor %xmm6,%xmm0 317 1.1 riastrad pxor %xmm7,%xmm0 318 1.1 riastrad 319 1.1 riastrad /* %xmm1 := (rk[0], rk[1], rk[2], rk[3]) */ 320 1.1 riastrad shufps $0b01000100,%xmm4,%xmm1 321 1.1 riastrad 322 1.1 riastrad movdqa %xmm1,(%rdi) /* store round key */ 323 1.1 riastrad movdqa %xmm0,0x10(%rdi) /* store next round key */ 324 1.1 riastrad lea 0x20(%rdi),%rdi /* advance two round keys */ 325 1.1 riastrad ret 326 1.1 riastrad END(aesni_expand192a) 327 1.1 riastrad 328 1.1 riastrad /* 329 1.1 riastrad * aesni_expand192b(uint128_t *roundkey@rdi, uint128_t prk@xmm0, 330 1.1 riastrad * uint128_t keygenassist@xmm2) 331 1.1 riastrad * 332 1.1 riastrad * Set odd-numbered AES-192 round key. 333 1.1 riastrad * 334 1.1 riastrad * Internal ABI. On entry: 335 1.1 riastrad * 336 1.1 riastrad * %rdi = rkp, pointer to round key to compute 337 1.1 riastrad * %xmm0 = (prk[0], prk[1], prk[2], prk[3]) 338 1.1 riastrad * %xmm1 = (xxx, xxx, pprk[2], pprk[3]) 339 1.1 riastrad * %xmm2 = (xxx, xxx, xxx, t = Rot(Sub(prk[3])) ^ RCON) 340 1.1 riastrad * 341 1.1 riastrad * On exit: 342 1.1 riastrad * 343 1.1 riastrad * %rdi = &rkp[1], rkp advanced by one round key 344 1.1 riastrad * %xmm0 = rk, the round key we just computed 345 1.1 riastrad * %xmm1 = (nrk[0], nrk[1], xxx, xxx), half of next round key 346 1.1 riastrad * %xmm2 = garbage 347 1.1 riastrad * %xmm4 = garbage 348 1.1 riastrad * %xmm5 = garbage 349 1.1 riastrad * %xmm6 = garbage 350 1.1 riastrad * %xmm7 = garbage 351 1.1 riastrad */ 352 1.1 riastrad .text 353 1.1 riastrad _ALIGN_TEXT 354 1.1 riastrad .type aesni_expand192b,@function 355 1.1 riastrad aesni_expand192b: 356 1.1 riastrad /* 357 1.1 riastrad * %xmm2 := (%xmm2[3], %xmm2[3], %xmm2[3], %xmm2[3]), 358 1.1 riastrad * i.e., set each word of %xmm2 to t := Rot(Sub(prk[3])) ^ RCON. 359 1.1 riastrad */ 360 1.1 riastrad pshufd $0b11111111,%xmm2,%xmm2 361 1.1 riastrad 362 1.1 riastrad /* 363 1.1 riastrad * We need to compute: 364 1.1 riastrad * 365 1.1 riastrad * rk[0] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] 366 1.1 riastrad * rk[1] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] 367 1.1 riastrad * rk[2] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0] 368 1.1 riastrad * rk[3] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0] 369 1.1 riastrad * ^ prk[1] 370 1.1 riastrad * nrk[0] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0] 371 1.1 riastrad * ^ prk[1] ^ prk[2] 372 1.1 riastrad * nrk[1] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0] 373 1.1 riastrad * ^ prk[1] ^ prk[2] ^ prk[3] 374 1.1 riastrad */ 375 1.1 riastrad 376 1.1 riastrad /* %xmm1 := (pprk[2], pprk[3], prk[0], prk[1]) */ 377 1.1 riastrad shufps $0b01001110,%xmm0,%xmm1 378 1.1 riastrad 379 1.1 riastrad /* 380 1.1 riastrad * %xmm5 := (0, pprk[2], pprk[3], prk[0]) 381 1.1 riastrad * %xmm6 := (0, 0, pprk[2], pprk[3]) 382 1.1 riastrad * %xmm7 := (0, 0, 0, pprk[2]) 383 1.1 riastrad */ 384 1.1 riastrad movdqa %xmm1,%xmm5 385 1.1 riastrad movdqa %xmm1,%xmm6 386 1.1 riastrad movdqa %xmm1,%xmm7 387 1.1 riastrad pslldq $4,%xmm5 388 1.1 riastrad pslldq $8,%xmm6 389 1.1 riastrad pslldq $12,%xmm7 390 1.1 riastrad 391 1.1 riastrad /* %xmm1 := (rk[0], rk[1], rk[2], rk[3) */ 392 1.1 riastrad pxor %xmm2,%xmm1 393 1.1 riastrad pxor %xmm5,%xmm1 394 1.1 riastrad pxor %xmm6,%xmm1 395 1.1 riastrad pxor %xmm7,%xmm1 396 1.1 riastrad 397 1.1 riastrad /* %xmm4 := (prk[2], prk[3], xxx, xxx) */ 398 1.1 riastrad pshufd $0b00001110,%xmm0,%xmm4 399 1.1 riastrad 400 1.1 riastrad /* %xmm5 := (0, prk[2], xxx, xxx) */ 401 1.1 riastrad movdqa %xmm4,%xmm5 402 1.1 riastrad pslldq $4,%xmm5 403 1.1 riastrad 404 1.1 riastrad /* %xmm0 := (rk[0], rk[1], rk[2], rk[3]) */ 405 1.1 riastrad movdqa %xmm1,%xmm0 406 1.1 riastrad 407 1.1 riastrad /* %xmm1 := (rk[3], rk[3], xxx, xxx) */ 408 1.1 riastrad shufps $0b00001111,%xmm1,%xmm1 409 1.1 riastrad 410 1.1 riastrad /* 411 1.1 riastrad * %xmm1 := (nrk[0] = rk[3] ^ prk[2], 412 1.1 riastrad * nrk[1] = rk[3] ^ prk[2] ^ prk[3], 413 1.1 riastrad * xxx, 414 1.1 riastrad * xxx) 415 1.1 riastrad */ 416 1.1 riastrad pxor %xmm4,%xmm1 417 1.1 riastrad pxor %xmm5,%xmm1 418 1.1 riastrad 419 1.1 riastrad movdqa %xmm0,(%rdi) /* store round key */ 420 1.1 riastrad lea 0x10(%rdi),%rdi /* advance to next round key address */ 421 1.1 riastrad ret 422 1.1 riastrad END(aesni_expand192b) 423 1.1 riastrad 424 1.1 riastrad /* 425 1.1 riastrad * aesni_expand256a(uint128_t *rkp@rdi, uint128_t pprk@xmm0, 426 1.1 riastrad * uint128_t prk@xmm1, uint128_t keygenassist@xmm2) 427 1.1 riastrad * 428 1.1 riastrad * Set even-numbered AES-256 round key. 429 1.1 riastrad * 430 1.1 riastrad * Internal ABI. On entry: 431 1.1 riastrad * 432 1.1 riastrad * %rdi = rkp, pointer to round key to compute 433 1.1 riastrad * %xmm0 = (pprk[0], pprk[1], pprk[2], pprk[3]) 434 1.1 riastrad * %xmm1 = (prk[0], prk[1], prk[2], prk[3]) 435 1.1 riastrad * %xmm2 = (xxx, xxx, xxx, t = Rot(SubWord(prk[3]))) 436 1.1 riastrad * 437 1.1 riastrad * On exit: 438 1.1 riastrad * 439 1.1 riastrad * %rdi = &rkp[1], rkp advanced by one round key 440 1.1 riastrad * %xmm0 = rk, the round key we just computed 441 1.1 riastrad * %xmm1 = prk, previous round key, preserved from entry 442 1.1 riastrad * %xmm2 = garbage 443 1.1 riastrad * %xmm4 = garbage 444 1.1 riastrad * %xmm5 = garbage 445 1.1 riastrad * %xmm6 = garbage 446 1.1 riastrad * 447 1.1 riastrad * The computation turns out to be the same as for AES-128; the 448 1.1 riastrad * previous round key does not figure into it, only the 449 1.1 riastrad * previous-previous round key. 450 1.1 riastrad */ 451 1.1 riastrad aesni_expand256a = aesni_expand128 452 1.1 riastrad 453 1.1 riastrad /* 454 1.1 riastrad * aesni_expand256b(uint128_t *rkp@rdi, uint128_t prk@xmm0, 455 1.1 riastrad * uint128_t pprk@xmm1, uint128_t keygenassist@xmm2) 456 1.1 riastrad * 457 1.1 riastrad * Set odd-numbered AES-256 round key. 458 1.1 riastrad * 459 1.1 riastrad * Internal ABI. On entry: 460 1.1 riastrad * 461 1.1 riastrad * %rdi = rkp, pointer to round key to compute 462 1.1 riastrad * %xmm0 = (prk[0], prk[1], prk[2], prk[3]) 463 1.1 riastrad * %xmm1 = (pprk[0], pprk[1], pprk[2], pprk[3]) 464 1.1 riastrad * %xmm2 = (xxx, xxx, t = Sub(prk[3]), xxx) 465 1.1 riastrad * 466 1.1 riastrad * On exit: 467 1.1 riastrad * 468 1.1 riastrad * %rdi = &rkp[1], rkp advanced by one round key 469 1.1 riastrad * %xmm0 = prk, previous round key, preserved from entry 470 1.1 riastrad * %xmm1 = rk, the round key we just computed 471 1.1 riastrad * %xmm2 = garbage 472 1.1 riastrad * %xmm4 = garbage 473 1.1 riastrad * %xmm5 = garbage 474 1.1 riastrad * %xmm6 = garbage 475 1.1 riastrad */ 476 1.1 riastrad .text 477 1.1 riastrad _ALIGN_TEXT 478 1.1 riastrad .type aesni_expand256b,@function 479 1.1 riastrad aesni_expand256b: 480 1.1 riastrad /* 481 1.1 riastrad * %xmm2 := (%xmm2[3], %xmm2[3], %xmm2[3], %xmm2[3]), 482 1.1 riastrad * i.e., set each word of %xmm2 to t := Sub(prk[3]). 483 1.1 riastrad */ 484 1.1 riastrad pshufd $0b10101010,%xmm2,%xmm2 485 1.1 riastrad 486 1.1 riastrad /* 487 1.1 riastrad * %xmm4 := (0, pprk[0], pprk[1], pprk[2]) 488 1.1 riastrad * %xmm5 := (0, 0, pprk[0], pprk[1]) 489 1.1 riastrad * %xmm6 := (0, 0, 0, pprk[0]) 490 1.1 riastrad */ 491 1.1 riastrad movdqa %xmm1,%xmm4 492 1.1 riastrad movdqa %xmm1,%xmm5 493 1.1 riastrad movdqa %xmm1,%xmm6 494 1.1 riastrad pslldq $4,%xmm4 495 1.1 riastrad pslldq $8,%xmm5 496 1.1 riastrad pslldq $12,%xmm6 497 1.1 riastrad 498 1.1 riastrad /* 499 1.1 riastrad * %xmm0 := (rk[0] = t ^ pprk[0], 500 1.1 riastrad * rk[1] = t ^ pprk[0] ^ pprk[1], 501 1.1 riastrad * rk[2] = t ^ pprk[0] ^ pprk[1] ^ pprk[2], 502 1.1 riastrad * rk[3] = t ^ pprk[0] ^ pprk[1] ^ pprk[2] ^ pprk[3]) 503 1.1 riastrad */ 504 1.1 riastrad pxor %xmm2,%xmm1 505 1.1 riastrad pxor %xmm4,%xmm1 506 1.1 riastrad pxor %xmm5,%xmm1 507 1.1 riastrad pxor %xmm6,%xmm1 508 1.1 riastrad 509 1.1 riastrad movdqa %xmm1,(%rdi) /* store round key */ 510 1.1 riastrad lea 0x10(%rdi),%rdi /* advance to next round key address */ 511 1.1 riastrad ret 512 1.1 riastrad END(aesni_expand256b) 513 1.1 riastrad 514 1.1 riastrad /* 515 1.1 riastrad * aesni_enctodec(const struct aesenc *enckey@rdi, struct aesdec *deckey@rsi, 516 1.1 riastrad * uint32_t nrounds@rdx) 517 1.1 riastrad * 518 1.1 riastrad * Convert AES encryption round keys to AES decryption round keys. 519 1.1 riastrad * `rounds' must be between 10 and 14. 520 1.1 riastrad * 521 1.1 riastrad * Standard ABI calling convention. 522 1.1 riastrad */ 523 1.1 riastrad ENTRY(aesni_enctodec) 524 1.1 riastrad shl $4,%edx /* rdx := byte offset of last round key */ 525 1.1 riastrad movdqa (%rdi,%rdx),%xmm0 /* load last round key */ 526 1.1 riastrad movdqa %xmm0,(%rsi) /* store last round key verbatim */ 527 1.3 riastrad jmp 2f 528 1.5 riastrad _ALIGN_TEXT 529 1.3 riastrad 1: movdqa (%rdi,%rdx),%xmm0 /* load round key */ 530 1.1 riastrad aesimc %xmm0,%xmm0 /* convert encryption to decryption */ 531 1.1 riastrad movdqa %xmm0,(%rsi) /* store round key */ 532 1.3 riastrad 2: sub $0x10,%rdx /* advance to next round key */ 533 1.3 riastrad lea 0x10(%rsi),%rsi 534 1.3 riastrad jnz 1b /* repeat if more rounds */ 535 1.3 riastrad movdqa (%rdi),%xmm0 /* load first round key */ 536 1.1 riastrad movdqa %xmm0,(%rsi) /* store first round key verbatim */ 537 1.1 riastrad ret 538 1.1 riastrad END(aesni_enctodec) 539 1.1 riastrad 540 1.1 riastrad /* 541 1.1 riastrad * aesni_enc(const struct aesenc *enckey@rdi, const uint8_t in[16] @rsi, 542 1.1 riastrad * uint8_t out[16] @rdx, uint32_t nrounds@ecx) 543 1.1 riastrad * 544 1.1 riastrad * Encrypt a single block. 545 1.1 riastrad * 546 1.1 riastrad * Standard ABI calling convention. 547 1.1 riastrad */ 548 1.1 riastrad ENTRY(aesni_enc) 549 1.1 riastrad movdqu (%rsi),%xmm0 550 1.1 riastrad call aesni_enc1 551 1.1 riastrad movdqu %xmm0,(%rdx) 552 1.1 riastrad ret 553 1.1 riastrad END(aesni_enc) 554 1.1 riastrad 555 1.1 riastrad /* 556 1.1 riastrad * aesni_dec(const struct aesdec *deckey@rdi, const uint8_t in[16] @rsi, 557 1.1 riastrad * uint8_t out[16] @rdx, uint32_t nrounds@ecx) 558 1.1 riastrad * 559 1.1 riastrad * Decrypt a single block. 560 1.1 riastrad * 561 1.1 riastrad * Standard ABI calling convention. 562 1.1 riastrad */ 563 1.1 riastrad ENTRY(aesni_dec) 564 1.1 riastrad movdqu (%rsi),%xmm0 565 1.1 riastrad call aesni_dec1 566 1.1 riastrad movdqu %xmm0,(%rdx) 567 1.1 riastrad ret 568 1.1 riastrad END(aesni_dec) 569 1.1 riastrad 570 1.1 riastrad /* 571 1.1 riastrad * aesni_cbc_enc(const struct aesenc *enckey@rdi, const uint8_t *in@rsi, 572 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t iv[16] @r8, 573 1.1 riastrad * uint32_t nrounds@r9d) 574 1.1 riastrad * 575 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-CBC. 576 1.1 riastrad * 577 1.1 riastrad * nbytes must be an integral multiple of 16. 578 1.1 riastrad * 579 1.1 riastrad * Standard ABI calling convention. 580 1.1 riastrad */ 581 1.1 riastrad ENTRY(aesni_cbc_enc) 582 1.1 riastrad cmp $0,%rcx 583 1.1 riastrad jz 2f 584 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */ 585 1.1 riastrad movdqu (%r8),%xmm0 /* xmm0 := chaining value */ 586 1.5 riastrad _ALIGN_TEXT 587 1.1 riastrad 1: movdqu (%rsi),%xmm1 /* xmm1 := plaintext block */ 588 1.1 riastrad lea 0x10(%rsi),%rsi 589 1.1 riastrad pxor %xmm1,%xmm0 /* xmm0 := cv ^ ptxt */ 590 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */ 591 1.1 riastrad call aesni_enc1 /* xmm0 := ciphertext block */ 592 1.1 riastrad movdqu %xmm0,(%rdx) 593 1.1 riastrad lea 0x10(%rdx),%rdx 594 1.1 riastrad sub $0x10,%r10 595 1.1 riastrad jnz 1b /* repeat if r10 is nonzero */ 596 1.1 riastrad movdqu %xmm0,(%r8) /* store chaining value */ 597 1.1 riastrad 2: ret 598 1.1 riastrad END(aesni_cbc_enc) 599 1.1 riastrad 600 1.1 riastrad /* 601 1.1 riastrad * aesni_cbc_dec1(const struct aesdec *deckey@rdi, const uint8_t *in@rsi, 602 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, const uint8_t iv[16] @r8, 603 1.1 riastrad * uint32_t nrounds@r9) 604 1.1 riastrad * 605 1.1 riastrad * Decrypt a contiguous sequence of blocks with AES-CBC. 606 1.1 riastrad * 607 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine 608 1.1 riastrad * is not vectorized; use aesni_cbc_dec8 for >=8 blocks at once. 609 1.1 riastrad * 610 1.1 riastrad * Standard ABI calling convention. 611 1.1 riastrad */ 612 1.1 riastrad ENTRY(aesni_cbc_dec1) 613 1.1 riastrad push %rbp /* create stack frame uint128[1] */ 614 1.1 riastrad mov %rsp,%rbp 615 1.1 riastrad sub $0x10,%rsp 616 1.1 riastrad movdqu (%r8),%xmm8 /* xmm8 := iv */ 617 1.1 riastrad movdqa %xmm8,(%rsp) /* save iv */ 618 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */ 619 1.1 riastrad movdqu -0x10(%rsi,%r10),%xmm0 /* xmm0 := last ciphertext block */ 620 1.1 riastrad movdqu %xmm0,(%r8) /* update iv */ 621 1.3 riastrad jmp 2f 622 1.5 riastrad _ALIGN_TEXT 623 1.3 riastrad 1: movdqu -0x10(%rsi,%r10),%xmm8 /* xmm8 := chaining value */ 624 1.1 riastrad pxor %xmm8,%xmm0 /* xmm0 := ptxt */ 625 1.1 riastrad movdqu %xmm0,(%rdx,%r10) /* store plaintext block */ 626 1.1 riastrad movdqa %xmm8,%xmm0 /* move cv = ciphertext block */ 627 1.3 riastrad 2: mov %r9d,%ecx /* ecx := nrounds */ 628 1.3 riastrad call aesni_dec1 /* xmm0 := cv ^ ptxt */ 629 1.3 riastrad sub $0x10,%r10 630 1.3 riastrad jnz 1b /* repeat if more blocks */ 631 1.3 riastrad pxor (%rsp),%xmm0 /* xmm0 := ptxt */ 632 1.1 riastrad movdqu %xmm0,(%rdx) /* store first plaintext block */ 633 1.1 riastrad leave 634 1.1 riastrad ret 635 1.1 riastrad END(aesni_cbc_dec1) 636 1.1 riastrad 637 1.1 riastrad /* 638 1.1 riastrad * aesni_cbc_dec8(const struct aesdec *deckey@rdi, const uint8_t *in@rsi, 639 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, const uint8_t iv[16] @r8, 640 1.1 riastrad * uint32_t nrounds@r9) 641 1.1 riastrad * 642 1.1 riastrad * Decrypt a contiguous sequence of 8-block units with AES-CBC. 643 1.1 riastrad * 644 1.1 riastrad * nbytes must be a positive integral multiple of 128. 645 1.1 riastrad * 646 1.1 riastrad * Standard ABI calling convention. 647 1.1 riastrad */ 648 1.1 riastrad ENTRY(aesni_cbc_dec8) 649 1.1 riastrad push %rbp /* create stack frame uint128[1] */ 650 1.1 riastrad mov %rsp,%rbp 651 1.1 riastrad sub $0x10,%rsp 652 1.1 riastrad movdqu (%r8),%xmm8 /* xmm8 := iv */ 653 1.1 riastrad movdqa %xmm8,(%rsp) /* save iv */ 654 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */ 655 1.1 riastrad movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := ciphertext block[n-1] */ 656 1.1 riastrad movdqu %xmm7,(%r8) /* update iv */ 657 1.3 riastrad jmp 2f 658 1.5 riastrad _ALIGN_TEXT 659 1.3 riastrad 1: movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := cv[0] */ 660 1.3 riastrad pxor %xmm7,%xmm0 /* xmm0 := ptxt[0] */ 661 1.3 riastrad movdqu %xmm0,(%rdx,%r10) /* store plaintext block */ 662 1.3 riastrad 2: movdqu -0x20(%rsi,%r10),%xmm6 /* xmm6 := ciphertext block[n-2] */ 663 1.1 riastrad movdqu -0x30(%rsi,%r10),%xmm5 /* xmm5 := ciphertext block[n-3] */ 664 1.1 riastrad movdqu -0x40(%rsi,%r10),%xmm4 /* xmm4 := ciphertext block[n-4] */ 665 1.1 riastrad movdqu -0x50(%rsi,%r10),%xmm3 /* xmm3 := ciphertext block[n-5] */ 666 1.1 riastrad movdqu -0x60(%rsi,%r10),%xmm2 /* xmm2 := ciphertext block[n-6] */ 667 1.1 riastrad movdqu -0x70(%rsi,%r10),%xmm1 /* xmm1 := ciphertext block[n-7] */ 668 1.1 riastrad movdqu -0x80(%rsi,%r10),%xmm0 /* xmm0 := ciphertext block[n-8] */ 669 1.1 riastrad movdqa %xmm6,%xmm15 /* xmm[8+i] := cv[i], 0<i<8 */ 670 1.1 riastrad movdqa %xmm5,%xmm14 671 1.1 riastrad movdqa %xmm4,%xmm13 672 1.1 riastrad movdqa %xmm3,%xmm12 673 1.1 riastrad movdqa %xmm2,%xmm11 674 1.1 riastrad movdqa %xmm1,%xmm10 675 1.1 riastrad movdqa %xmm0,%xmm9 676 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */ 677 1.1 riastrad call aesni_dec8 /* xmm[i] := cv[i] ^ ptxt[i], 0<=i<8 */ 678 1.1 riastrad pxor %xmm15,%xmm7 /* xmm[i] := ptxt[i], 0<i<8 */ 679 1.1 riastrad pxor %xmm14,%xmm6 680 1.1 riastrad pxor %xmm13,%xmm5 681 1.1 riastrad pxor %xmm12,%xmm4 682 1.1 riastrad pxor %xmm11,%xmm3 683 1.1 riastrad pxor %xmm10,%xmm2 684 1.1 riastrad pxor %xmm9,%xmm1 685 1.1 riastrad movdqu %xmm7,-0x10(%rdx,%r10) /* store plaintext blocks */ 686 1.1 riastrad movdqu %xmm6,-0x20(%rdx,%r10) 687 1.1 riastrad movdqu %xmm5,-0x30(%rdx,%r10) 688 1.1 riastrad movdqu %xmm4,-0x40(%rdx,%r10) 689 1.1 riastrad movdqu %xmm3,-0x50(%rdx,%r10) 690 1.1 riastrad movdqu %xmm2,-0x60(%rdx,%r10) 691 1.1 riastrad movdqu %xmm1,-0x70(%rdx,%r10) 692 1.1 riastrad sub $0x80,%r10 693 1.3 riastrad jnz 1b /* repeat if more blocks */ 694 1.3 riastrad pxor (%rsp),%xmm0 /* xmm0 := ptxt[0] */ 695 1.1 riastrad movdqu %xmm0,(%rdx) /* store first plaintext block */ 696 1.1 riastrad leave 697 1.1 riastrad ret 698 1.1 riastrad END(aesni_cbc_dec8) 699 1.1 riastrad 700 1.1 riastrad /* 701 1.1 riastrad * aesni_xts_enc1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi, 702 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8, 703 1.1 riastrad * uint32_t nrounds@r9d) 704 1.1 riastrad * 705 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-XTS. 706 1.1 riastrad * 707 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine 708 1.1 riastrad * is not vectorized; use aesni_xts_enc8 for >=8 blocks at once. 709 1.1 riastrad * 710 1.1 riastrad * Standard ABI calling convention. 711 1.1 riastrad */ 712 1.1 riastrad ENTRY(aesni_xts_enc1) 713 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */ 714 1.1 riastrad movdqu (%r8),%xmm15 /* xmm15 := tweak */ 715 1.5 riastrad _ALIGN_TEXT 716 1.1 riastrad 1: movdqu (%rsi),%xmm0 /* xmm0 := ptxt */ 717 1.1 riastrad lea 0x10(%rsi),%rsi /* advance rdi to next block */ 718 1.1 riastrad pxor %xmm15,%xmm0 /* xmm0 := ptxt ^ tweak */ 719 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */ 720 1.1 riastrad call aesni_enc1 /* xmm0 := AES(ptxt ^ tweak) */ 721 1.1 riastrad pxor %xmm15,%xmm0 /* xmm0 := AES(ptxt ^ tweak) ^ tweak */ 722 1.1 riastrad movdqu %xmm0,(%rdx) /* store ciphertext block */ 723 1.1 riastrad lea 0x10(%rdx),%rdx /* advance rsi to next block */ 724 1.1 riastrad call aesni_xts_mulx /* xmm15 *= x; trash xmm0 */ 725 1.1 riastrad sub $0x10,%r10 726 1.1 riastrad jnz 1b /* repeat if more blocks */ 727 1.1 riastrad movdqu %xmm15,(%r8) /* update tweak */ 728 1.1 riastrad ret 729 1.1 riastrad END(aesni_xts_enc1) 730 1.1 riastrad 731 1.1 riastrad /* 732 1.1 riastrad * aesni_xts_enc8(const struct aesenc *enckey@rdi, const uint8_t *in@rsi, 733 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8, 734 1.1 riastrad * uint32_t nrounds@r9d) 735 1.1 riastrad * 736 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-XTS. 737 1.1 riastrad * 738 1.1 riastrad * nbytes must be a positive integral multiple of 128. 739 1.1 riastrad * 740 1.1 riastrad * Standard ABI calling convention. 741 1.1 riastrad */ 742 1.1 riastrad ENTRY(aesni_xts_enc8) 743 1.1 riastrad push %rbp /* create stack frame uint128[1] */ 744 1.1 riastrad mov %rsp,%rbp 745 1.1 riastrad sub $0x10,%rsp 746 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */ 747 1.1 riastrad movdqu (%r8),%xmm15 /* xmm15 := tweak[0] */ 748 1.5 riastrad _ALIGN_TEXT 749 1.1 riastrad 1: movdqa %xmm15,%xmm8 /* xmm8 := tweak[0] */ 750 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[1] */ 751 1.1 riastrad movdqa %xmm15,%xmm9 /* xmm9 := tweak[1] */ 752 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[2] */ 753 1.1 riastrad movdqa %xmm15,%xmm10 /* xmm10 := tweak[2] */ 754 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[3] */ 755 1.1 riastrad movdqa %xmm15,%xmm11 /* xmm11 := tweak[3] */ 756 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[4] */ 757 1.1 riastrad movdqa %xmm15,%xmm12 /* xmm12 := tweak[4] */ 758 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[5] */ 759 1.1 riastrad movdqa %xmm15,%xmm13 /* xmm13 := tweak[5] */ 760 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[6] */ 761 1.1 riastrad movdqa %xmm15,%xmm14 /* xmm14 := tweak[6] */ 762 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[7] */ 763 1.1 riastrad movdqu (%rsi),%xmm0 /* xmm[i] := ptxt[i] */ 764 1.1 riastrad movdqu 0x10(%rsi),%xmm1 765 1.1 riastrad movdqu 0x20(%rsi),%xmm2 766 1.1 riastrad movdqu 0x30(%rsi),%xmm3 767 1.1 riastrad movdqu 0x40(%rsi),%xmm4 768 1.1 riastrad movdqu 0x50(%rsi),%xmm5 769 1.1 riastrad movdqu 0x60(%rsi),%xmm6 770 1.1 riastrad movdqu 0x70(%rsi),%xmm7 771 1.1 riastrad lea 0x80(%rsi),%rsi /* advance rsi to next block group */ 772 1.1 riastrad movdqa %xmm8,(%rsp) /* save tweak[0] */ 773 1.1 riastrad pxor %xmm8,%xmm0 /* xmm[i] := ptxt[i] ^ tweak[i] */ 774 1.1 riastrad pxor %xmm9,%xmm1 775 1.1 riastrad pxor %xmm10,%xmm2 776 1.1 riastrad pxor %xmm11,%xmm3 777 1.1 riastrad pxor %xmm12,%xmm4 778 1.1 riastrad pxor %xmm13,%xmm5 779 1.1 riastrad pxor %xmm14,%xmm6 780 1.1 riastrad pxor %xmm15,%xmm7 781 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */ 782 1.1 riastrad call aesni_enc8 /* xmm[i] := AES(ptxt[i] ^ tweak[i]) */ 783 1.1 riastrad pxor (%rsp),%xmm0 /* xmm[i] := AES(...) ^ tweak[i] */ 784 1.1 riastrad pxor %xmm9,%xmm1 785 1.1 riastrad pxor %xmm10,%xmm2 786 1.1 riastrad pxor %xmm11,%xmm3 787 1.1 riastrad pxor %xmm12,%xmm4 788 1.1 riastrad pxor %xmm13,%xmm5 789 1.1 riastrad pxor %xmm14,%xmm6 790 1.1 riastrad pxor %xmm15,%xmm7 791 1.1 riastrad movdqu %xmm0,(%rdx) /* store ciphertext blocks */ 792 1.1 riastrad movdqu %xmm1,0x10(%rdx) 793 1.1 riastrad movdqu %xmm2,0x20(%rdx) 794 1.1 riastrad movdqu %xmm3,0x30(%rdx) 795 1.1 riastrad movdqu %xmm4,0x40(%rdx) 796 1.1 riastrad movdqu %xmm5,0x50(%rdx) 797 1.1 riastrad movdqu %xmm6,0x60(%rdx) 798 1.1 riastrad movdqu %xmm7,0x70(%rdx) 799 1.1 riastrad lea 0x80(%rdx),%rdx /* advance rdx to next block group */ 800 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[8] */ 801 1.1 riastrad sub $0x80,%r10 802 1.1 riastrad jnz 1b /* repeat if more block groups */ 803 1.1 riastrad movdqu %xmm15,(%r8) /* update tweak */ 804 1.1 riastrad leave 805 1.1 riastrad ret 806 1.1 riastrad END(aesni_xts_enc8) 807 1.1 riastrad 808 1.1 riastrad /* 809 1.1 riastrad * aesni_xts_dec1(const struct aesdec *deckey@rdi, const uint8_t *in@rsi, 810 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8, 811 1.1 riastrad * uint32_t nrounds@r9d) 812 1.1 riastrad * 813 1.1 riastrad * Decrypt a contiguous sequence of blocks with AES-XTS. 814 1.1 riastrad * 815 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine 816 1.1 riastrad * is not vectorized; use aesni_xts_dec8 for >=8 blocks at once. 817 1.1 riastrad * 818 1.1 riastrad * Standard ABI calling convention. 819 1.1 riastrad */ 820 1.1 riastrad ENTRY(aesni_xts_dec1) 821 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */ 822 1.1 riastrad movdqu (%r8),%xmm15 /* xmm15 := tweak */ 823 1.5 riastrad _ALIGN_TEXT 824 1.1 riastrad 1: movdqu (%rsi),%xmm0 /* xmm0 := ctxt */ 825 1.1 riastrad lea 0x10(%rsi),%rsi /* advance rdi to next block */ 826 1.1 riastrad pxor %xmm15,%xmm0 /* xmm0 := ctxt ^ tweak */ 827 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */ 828 1.1 riastrad call aesni_dec1 /* xmm0 := AES(ctxt ^ tweak) */ 829 1.1 riastrad pxor %xmm15,%xmm0 /* xmm0 := AES(ctxt ^ tweak) ^ tweak */ 830 1.1 riastrad movdqu %xmm0,(%rdx) /* store plaintext block */ 831 1.1 riastrad lea 0x10(%rdx),%rdx /* advance rsi to next block */ 832 1.1 riastrad call aesni_xts_mulx /* xmm15 *= x; trash xmm0 */ 833 1.1 riastrad sub $0x10,%r10 834 1.1 riastrad jnz 1b /* repeat if more blocks */ 835 1.1 riastrad movdqu %xmm15,(%r8) /* update tweak */ 836 1.1 riastrad ret 837 1.1 riastrad END(aesni_xts_dec1) 838 1.1 riastrad 839 1.1 riastrad /* 840 1.1 riastrad * aesni_xts_dec8(const struct aesdec *deckey@rdi, const uint8_t *in@rsi, 841 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8, 842 1.1 riastrad * uint32_t nrounds@r9d) 843 1.1 riastrad * 844 1.1 riastrad * Decrypt a contiguous sequence of blocks with AES-XTS. 845 1.1 riastrad * 846 1.1 riastrad * nbytes must be a positive integral multiple of 128. 847 1.1 riastrad * 848 1.1 riastrad * Standard ABI calling convention. 849 1.1 riastrad */ 850 1.1 riastrad ENTRY(aesni_xts_dec8) 851 1.1 riastrad push %rbp /* create stack frame uint128[1] */ 852 1.1 riastrad mov %rsp,%rbp 853 1.1 riastrad sub $0x10,%rsp 854 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */ 855 1.1 riastrad movdqu (%r8),%xmm15 /* xmm15 := tweak[0] */ 856 1.5 riastrad _ALIGN_TEXT 857 1.1 riastrad 1: movdqa %xmm15,%xmm8 /* xmm8 := tweak[0] */ 858 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[1] */ 859 1.1 riastrad movdqa %xmm15,%xmm9 /* xmm9 := tweak[1] */ 860 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[2] */ 861 1.1 riastrad movdqa %xmm15,%xmm10 /* xmm10 := tweak[2] */ 862 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[3] */ 863 1.1 riastrad movdqa %xmm15,%xmm11 /* xmm11 := tweak[3] */ 864 1.1 riastrad call aesni_xts_mulx /* xmm51 := tweak[4] */ 865 1.1 riastrad movdqa %xmm15,%xmm12 /* xmm12 := tweak[4] */ 866 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[5] */ 867 1.1 riastrad movdqa %xmm15,%xmm13 /* xmm13 := tweak[5] */ 868 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[6] */ 869 1.1 riastrad movdqa %xmm15,%xmm14 /* xmm14 := tweak[6] */ 870 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[7] */ 871 1.1 riastrad movdqu (%rsi),%xmm0 /* xmm[i] := ptxt[i] */ 872 1.1 riastrad movdqu 0x10(%rsi),%xmm1 873 1.1 riastrad movdqu 0x20(%rsi),%xmm2 874 1.1 riastrad movdqu 0x30(%rsi),%xmm3 875 1.1 riastrad movdqu 0x40(%rsi),%xmm4 876 1.1 riastrad movdqu 0x50(%rsi),%xmm5 877 1.1 riastrad movdqu 0x60(%rsi),%xmm6 878 1.1 riastrad movdqu 0x70(%rsi),%xmm7 879 1.1 riastrad lea 0x80(%rsi),%rsi /* advance rsi to next block group */ 880 1.1 riastrad movdqa %xmm8,(%rsp) /* save tweak[0] */ 881 1.1 riastrad pxor %xmm8,%xmm0 /* xmm[i] := ptxt[i] ^ tweak[i] */ 882 1.1 riastrad pxor %xmm9,%xmm1 883 1.1 riastrad pxor %xmm10,%xmm2 884 1.1 riastrad pxor %xmm11,%xmm3 885 1.1 riastrad pxor %xmm12,%xmm4 886 1.1 riastrad pxor %xmm13,%xmm5 887 1.1 riastrad pxor %xmm14,%xmm6 888 1.1 riastrad pxor %xmm15,%xmm7 889 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */ 890 1.1 riastrad call aesni_dec8 /* xmm[i] := AES(ptxt[i] ^ tweak[i]) */ 891 1.1 riastrad pxor (%rsp),%xmm0 /* xmm[i] := AES(...) ^ tweak[i] */ 892 1.1 riastrad pxor %xmm9,%xmm1 893 1.1 riastrad pxor %xmm10,%xmm2 894 1.1 riastrad pxor %xmm11,%xmm3 895 1.1 riastrad pxor %xmm12,%xmm4 896 1.1 riastrad pxor %xmm13,%xmm5 897 1.1 riastrad pxor %xmm14,%xmm6 898 1.1 riastrad pxor %xmm15,%xmm7 899 1.1 riastrad movdqu %xmm0,(%rdx) /* store ciphertext blocks */ 900 1.1 riastrad movdqu %xmm1,0x10(%rdx) 901 1.1 riastrad movdqu %xmm2,0x20(%rdx) 902 1.1 riastrad movdqu %xmm3,0x30(%rdx) 903 1.1 riastrad movdqu %xmm4,0x40(%rdx) 904 1.1 riastrad movdqu %xmm5,0x50(%rdx) 905 1.1 riastrad movdqu %xmm6,0x60(%rdx) 906 1.1 riastrad movdqu %xmm7,0x70(%rdx) 907 1.1 riastrad lea 0x80(%rdx),%rdx /* advance rdx to next block group */ 908 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[8] */ 909 1.1 riastrad sub $0x80,%r10 910 1.1 riastrad jnz 1b /* repeat if more block groups */ 911 1.1 riastrad movdqu %xmm15,(%r8) /* update tweak */ 912 1.1 riastrad leave 913 1.1 riastrad ret 914 1.1 riastrad END(aesni_xts_dec8) 915 1.1 riastrad 916 1.1 riastrad /* 917 1.1 riastrad * aesni_xts_mulx(tweak@xmm15) 918 1.1 riastrad * 919 1.1 riastrad * Multiply xmm15 by x, modulo x^128 + x^7 + x^2 + x + 1, in place. 920 1.1 riastrad * Uses %xmm0 as temporary. 921 1.1 riastrad */ 922 1.1 riastrad .text 923 1.1 riastrad _ALIGN_TEXT 924 1.1 riastrad .type aesni_xts_mulx,@function 925 1.1 riastrad aesni_xts_mulx: 926 1.1 riastrad /* 927 1.1 riastrad * Simultaneously determine 928 1.1 riastrad * (a) whether the high bit of the low quadword must be 929 1.1 riastrad * shifted into the low bit of the high quadword, and 930 1.1 riastrad * (b) whether the high bit of the high quadword must be 931 1.1 riastrad * carried into x^128 = x^7 + x^2 + x + 1. 932 1.1 riastrad */ 933 1.1 riastrad pxor %xmm0,%xmm0 /* xmm0 := 0 */ 934 1.1 riastrad pcmpgtq %xmm15,%xmm0 /* xmm0[i] := -1 if 0 > xmm15[i] else 0 */ 935 1.1 riastrad pshufd $0b01001110,%xmm0,%xmm0 /* swap halves of xmm0 */ 936 1.1 riastrad pand xtscarry(%rip),%xmm0 /* copy xtscarry according to mask */ 937 1.1 riastrad psllq $1,%xmm15 /* shift */ 938 1.1 riastrad pxor %xmm0,%xmm15 /* incorporate (a) and (b) */ 939 1.1 riastrad ret 940 1.1 riastrad END(aesni_xts_mulx) 941 1.1 riastrad 942 1.1 riastrad .section .rodata 943 1.2 riastrad .p2align 4 944 1.1 riastrad .type xtscarry,@object 945 1.1 riastrad xtscarry: 946 1.1 riastrad .byte 0x87,0,0,0, 0,0,0,0, 1,0,0,0, 0,0,0,0 947 1.1 riastrad END(xtscarry) 948 1.1 riastrad 949 1.1 riastrad /* 950 1.1 riastrad * aesni_xts_update(const uint8_t in[16] @rdi, uint8_t out[16] @rsi) 951 1.1 riastrad * 952 1.1 riastrad * Update an AES-XTS tweak. 953 1.1 riastrad * 954 1.1 riastrad * Standard ABI calling convention. 955 1.1 riastrad */ 956 1.1 riastrad ENTRY(aesni_xts_update) 957 1.1 riastrad movdqu (%rdi),%xmm15 958 1.1 riastrad call aesni_xts_mulx 959 1.1 riastrad movdqu %xmm15,(%rsi) 960 1.1 riastrad ret 961 1.1 riastrad END(aesni_xts_update) 962 1.1 riastrad 963 1.1 riastrad /* 964 1.4 riastrad * aesni_cbcmac_update1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi, 965 1.4 riastrad * size_t nbytes@rdx, uint8_t auth[16] @rcx, uint32_t nrounds@r8d) 966 1.4 riastrad * 967 1.4 riastrad * Update CBC-MAC. 968 1.4 riastrad * 969 1.4 riastrad * nbytes must be a positive integral multiple of 16. 970 1.4 riastrad * 971 1.4 riastrad * Standard ABI calling convention. 972 1.4 riastrad */ 973 1.4 riastrad ENTRY(aesni_cbcmac_update1) 974 1.4 riastrad movdqu (%rcx),%xmm0 /* xmm0 := auth */ 975 1.4 riastrad mov %rdx,%r10 /* r10 := nbytes */ 976 1.4 riastrad mov %rcx,%rdx /* rdx := &auth */ 977 1.5 riastrad _ALIGN_TEXT 978 1.4 riastrad 1: pxor (%rsi),%xmm0 /* xmm0 ^= plaintext block */ 979 1.4 riastrad lea 0x10(%rsi),%rsi 980 1.4 riastrad mov %r8d,%ecx /* ecx := nrounds */ 981 1.4 riastrad call aesni_enc1 /* xmm0 := auth'; trash rax,rcx,xmm8 */ 982 1.4 riastrad sub $0x10,%r10 983 1.4 riastrad jnz 1b 984 1.4 riastrad movdqu %xmm0,(%rdx) /* store auth' */ 985 1.4 riastrad ret 986 1.4 riastrad END(aesni_cbcmac_update1) 987 1.4 riastrad 988 1.4 riastrad /* 989 1.4 riastrad * aesni_ccm_enc1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi, 990 1.4 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, 991 1.4 riastrad * uint8_t authctr[32] @r8, uint32_t nrounds@r9d) 992 1.4 riastrad * 993 1.4 riastrad * Update CCM encryption. 994 1.4 riastrad * 995 1.4 riastrad * nbytes must be a positive integral multiple of 16. 996 1.4 riastrad * 997 1.4 riastrad * Standard ABI calling convention. 998 1.4 riastrad */ 999 1.4 riastrad ENTRY(aesni_ccm_enc1) 1000 1.4 riastrad mov %rcx,%r10 /* r10 := nbytes */ 1001 1.4 riastrad movdqu 0x10(%r8),%xmm2 /* xmm2 := ctr (be) */ 1002 1.4 riastrad movdqa bswap32(%rip),%xmm4 /* xmm4 := bswap32 table */ 1003 1.4 riastrad movdqa ctr32_inc(%rip),%xmm5 /* xmm5 := (0,0,0,1) (le) */ 1004 1.4 riastrad movdqu (%r8),%xmm0 /* xmm0 := auth */ 1005 1.4 riastrad pshufb %xmm4,%xmm2 /* xmm2 := ctr (le) */ 1006 1.5 riastrad _ALIGN_TEXT 1007 1.4 riastrad 1: movdqu (%rsi),%xmm3 /* xmm3 := plaintext block */ 1008 1.4 riastrad paddd %xmm5,%xmm2 /* increment ctr (32-bit) */ 1009 1.4 riastrad lea 0x10(%rsi),%rsi 1010 1.4 riastrad movdqa %xmm2,%xmm1 /* xmm1 := ctr (le) */ 1011 1.4 riastrad mov %r9d,%ecx /* ecx := nrounds */ 1012 1.4 riastrad pshufb %xmm4,%xmm1 /* xmm1 := ctr (be) */ 1013 1.4 riastrad pxor %xmm3,%xmm0 /* xmm0 := auth ^ ptxt */ 1014 1.4 riastrad call aesni_enc2 /* trash rax/rcx/xmm8 */ 1015 1.4 riastrad pxor %xmm1,%xmm3 /* xmm3 := ciphertext block */ 1016 1.4 riastrad sub $0x10,%r10 /* count down bytes */ 1017 1.4 riastrad movdqu %xmm3,(%rdx) /* store ciphertext block */ 1018 1.4 riastrad lea 0x10(%rdx),%rdx 1019 1.4 riastrad jnz 1b /* repeat if more blocks */ 1020 1.4 riastrad pshufb %xmm4,%xmm2 /* xmm2 := ctr (be) */ 1021 1.4 riastrad movdqu %xmm0,(%r8) /* store updated auth */ 1022 1.4 riastrad movdqu %xmm2,0x10(%r8) /* store updated ctr */ 1023 1.4 riastrad ret 1024 1.4 riastrad END(aesni_ccm_enc1) 1025 1.4 riastrad 1026 1.4 riastrad /* 1027 1.4 riastrad * aesni_ccm_dec1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi, 1028 1.4 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, 1029 1.4 riastrad * uint8_t authctr[32] @r8, uint32_t nrounds@r9d) 1030 1.4 riastrad * 1031 1.4 riastrad * Update CCM decryption. 1032 1.4 riastrad * 1033 1.4 riastrad * nbytes must be a positive integral multiple of 16. 1034 1.4 riastrad * 1035 1.4 riastrad * Standard ABI calling convention. 1036 1.4 riastrad */ 1037 1.4 riastrad ENTRY(aesni_ccm_dec1) 1038 1.4 riastrad movdqu 0x10(%r8),%xmm2 /* xmm2 := ctr (be) */ 1039 1.4 riastrad movdqa bswap32(%rip),%xmm4 /* xmm4 := bswap32 table */ 1040 1.4 riastrad movdqa ctr32_inc(%rip),%xmm5 /* xmm5 := (0,0,0,1) (le) */ 1041 1.4 riastrad movdqu (%r8),%xmm1 /* xmm1 := auth */ 1042 1.4 riastrad pshufb %xmm4,%xmm2 /* xmm2 := ctr (le) */ 1043 1.4 riastrad mov %rcx,%r10 /* r10 := nbytes */ 1044 1.4 riastrad 1045 1.4 riastrad /* Decrypt the first block. */ 1046 1.4 riastrad paddd %xmm5,%xmm2 /* increment ctr (32-bit) */ 1047 1.4 riastrad mov %r9d,%ecx /* ecx := nrounds */ 1048 1.4 riastrad movdqa %xmm2,%xmm0 /* xmm0 := ctr (le) */ 1049 1.4 riastrad movdqu (%rsi),%xmm3 /* xmm3 := ctxt */ 1050 1.4 riastrad pshufb %xmm4,%xmm0 /* xmm0 := ctr (be) */ 1051 1.4 riastrad lea 0x10(%rsi),%rsi 1052 1.4 riastrad call aesni_enc1 /* xmm0 := pad; trash rax/rcx/xmm8 */ 1053 1.4 riastrad jmp 2f 1054 1.4 riastrad 1055 1.5 riastrad _ALIGN_TEXT 1056 1.4 riastrad 1: /* 1057 1.4 riastrad * Authenticate the last block and decrypt the next block 1058 1.4 riastrad * simultaneously. 1059 1.4 riastrad * 1060 1.4 riastrad * xmm1 = auth ^ ptxt[-1] 1061 1.4 riastrad * xmm2 = ctr[-1] (le) 1062 1.4 riastrad */ 1063 1.4 riastrad paddd %xmm5,%xmm2 /* increment ctr (32-bit) */ 1064 1.4 riastrad mov %r9d,%ecx /* ecx := nrounds */ 1065 1.4 riastrad movdqa %xmm2,%xmm0 /* xmm0 := ctr (le) */ 1066 1.4 riastrad movdqu (%rsi),%xmm3 /* xmm3 := ctxt */ 1067 1.4 riastrad pshufb %xmm4,%xmm0 /* xmm0 := ctr (be) */ 1068 1.4 riastrad lea 0x10(%rsi),%rsi 1069 1.4 riastrad call aesni_enc2 /* xmm0 := pad, xmm1 := auth'; 1070 1.4 riastrad * trash rax/rcx/xmm8 */ 1071 1.4 riastrad 2: pxor %xmm0,%xmm3 /* xmm3 := ptxt */ 1072 1.4 riastrad sub $0x10,%r10 1073 1.4 riastrad movdqu %xmm3,(%rdx) /* store plaintext */ 1074 1.4 riastrad lea 0x10(%rdx),%rdx 1075 1.4 riastrad pxor %xmm3,%xmm1 /* xmm1 := auth ^ ptxt */ 1076 1.4 riastrad jnz 1b 1077 1.4 riastrad 1078 1.4 riastrad /* Authenticate the last block. */ 1079 1.4 riastrad movdqa %xmm1,%xmm0 /* xmm0 := auth ^ ptxt */ 1080 1.4 riastrad mov %r9d,%ecx /* ecx := nrounds */ 1081 1.4 riastrad call aesni_enc1 /* xmm0 := auth' */ 1082 1.4 riastrad pshufb %xmm4,%xmm2 /* xmm2 := ctr (be) */ 1083 1.4 riastrad movdqu %xmm0,(%r8) /* store updated auth */ 1084 1.4 riastrad movdqu %xmm2,0x10(%r8) /* store updated ctr */ 1085 1.4 riastrad ret 1086 1.4 riastrad END(aesni_ccm_dec1) 1087 1.4 riastrad 1088 1.4 riastrad .section .rodata 1089 1.4 riastrad .p2align 4 1090 1.4 riastrad .type bswap32,@object 1091 1.4 riastrad bswap32: 1092 1.4 riastrad .byte 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12 1093 1.4 riastrad END(bswap32) 1094 1.4 riastrad 1095 1.4 riastrad .section .rodata 1096 1.4 riastrad .p2align 4 1097 1.4 riastrad .type ctr32_inc,@object 1098 1.4 riastrad ctr32_inc: 1099 1.4 riastrad .byte 0,0,0,0, 0,0,0,0, 0,0,0,0, 1,0,0,0 1100 1.4 riastrad END(ctr32_inc) 1101 1.4 riastrad 1102 1.4 riastrad /* 1103 1.1 riastrad * aesni_enc1(const struct aesenc *enckey@rdi, uint128_t block@xmm0, 1104 1.1 riastrad * uint32_t nrounds@ecx) 1105 1.1 riastrad * 1106 1.1 riastrad * Encrypt a single AES block in %xmm0. 1107 1.1 riastrad * 1108 1.1 riastrad * Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx. 1109 1.1 riastrad */ 1110 1.1 riastrad .text 1111 1.1 riastrad _ALIGN_TEXT 1112 1.1 riastrad .type aesni_enc1,@function 1113 1.1 riastrad aesni_enc1: 1114 1.1 riastrad pxor (%rdi),%xmm0 /* xor in first round key */ 1115 1.1 riastrad shl $4,%ecx /* ecx := total byte size of round keys */ 1116 1.1 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */ 1117 1.1 riastrad neg %rcx /* rcx := byte offset of round key from end */ 1118 1.3 riastrad jmp 2f 1119 1.5 riastrad _ALIGN_TEXT 1120 1.3 riastrad 1: aesenc %xmm8,%xmm0 1121 1.3 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */ 1122 1.1 riastrad add $0x10,%rcx 1123 1.3 riastrad jnz 1b /* repeat if more rounds */ 1124 1.3 riastrad aesenclast %xmm8,%xmm0 1125 1.1 riastrad ret 1126 1.1 riastrad END(aesni_enc1) 1127 1.1 riastrad 1128 1.1 riastrad /* 1129 1.4 riastrad * aesni_enc2(const struct aesenc *enckey@rdi, uint128_t block0@xmm0, 1130 1.4 riastrad * uint128_t block1@xmm1, uint32_t nrounds@ecx) 1131 1.4 riastrad * 1132 1.4 riastrad * Encrypt two AES blocks in %xmm0 and %xmm1. 1133 1.4 riastrad * 1134 1.4 riastrad * Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx. 1135 1.4 riastrad */ 1136 1.4 riastrad .text 1137 1.4 riastrad _ALIGN_TEXT 1138 1.4 riastrad .type aesni_enc2,@function 1139 1.4 riastrad aesni_enc2: 1140 1.4 riastrad movdqa (%rdi),%xmm8 /* xmm8 := first round key */ 1141 1.4 riastrad shl $4,%ecx /* ecx := total byte size of round keys */ 1142 1.4 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */ 1143 1.4 riastrad neg %rcx /* rcx := byte offset of round key from end */ 1144 1.4 riastrad pxor %xmm8,%xmm0 /* xor in first round key */ 1145 1.4 riastrad pxor %xmm8,%xmm1 1146 1.4 riastrad jmp 2f 1147 1.5 riastrad _ALIGN_TEXT 1148 1.4 riastrad 1: aesenc %xmm8,%xmm0 1149 1.4 riastrad aesenc %xmm8,%xmm1 1150 1.4 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */ 1151 1.4 riastrad add $0x10,%rcx 1152 1.4 riastrad jnz 1b /* repeat if there's more */ 1153 1.4 riastrad aesenclast %xmm8,%xmm0 1154 1.4 riastrad aesenclast %xmm8,%xmm1 1155 1.4 riastrad ret 1156 1.4 riastrad END(aesni_enc2) 1157 1.4 riastrad 1158 1.4 riastrad /* 1159 1.1 riastrad * aesni_enc8(const struct aesenc *enckey@rdi, uint128_t block0@xmm0, ..., 1160 1.1 riastrad * block7@xmm7, uint32_t nrounds@ecx) 1161 1.1 riastrad * 1162 1.1 riastrad * Encrypt eight AES blocks in %xmm0 through %xmm7 in parallel. 1163 1.1 riastrad * 1164 1.1 riastrad * Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx. 1165 1.1 riastrad */ 1166 1.1 riastrad .text 1167 1.1 riastrad _ALIGN_TEXT 1168 1.1 riastrad .type aesni_enc8,@function 1169 1.1 riastrad aesni_enc8: 1170 1.1 riastrad movdqa (%rdi),%xmm8 /* xor in first round key */ 1171 1.1 riastrad pxor %xmm8,%xmm0 1172 1.1 riastrad pxor %xmm8,%xmm1 1173 1.1 riastrad pxor %xmm8,%xmm2 1174 1.1 riastrad pxor %xmm8,%xmm3 1175 1.1 riastrad pxor %xmm8,%xmm4 1176 1.1 riastrad pxor %xmm8,%xmm5 1177 1.1 riastrad pxor %xmm8,%xmm6 1178 1.1 riastrad pxor %xmm8,%xmm7 1179 1.1 riastrad shl $4,%ecx /* ecx := total byte size of round keys */ 1180 1.1 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */ 1181 1.1 riastrad neg %rcx /* rcx := byte offset of round key from end */ 1182 1.3 riastrad jmp 2f 1183 1.5 riastrad _ALIGN_TEXT 1184 1.3 riastrad 1: aesenc %xmm8,%xmm0 1185 1.1 riastrad aesenc %xmm8,%xmm1 1186 1.1 riastrad aesenc %xmm8,%xmm2 1187 1.1 riastrad aesenc %xmm8,%xmm3 1188 1.1 riastrad aesenc %xmm8,%xmm4 1189 1.1 riastrad aesenc %xmm8,%xmm5 1190 1.1 riastrad aesenc %xmm8,%xmm6 1191 1.1 riastrad aesenc %xmm8,%xmm7 1192 1.3 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */ 1193 1.3 riastrad add $0x10,%rcx 1194 1.3 riastrad jnz 1b /* repeat if more rounds */ 1195 1.3 riastrad aesenclast %xmm8,%xmm0 1196 1.1 riastrad aesenclast %xmm8,%xmm1 1197 1.1 riastrad aesenclast %xmm8,%xmm2 1198 1.1 riastrad aesenclast %xmm8,%xmm3 1199 1.1 riastrad aesenclast %xmm8,%xmm4 1200 1.1 riastrad aesenclast %xmm8,%xmm5 1201 1.1 riastrad aesenclast %xmm8,%xmm6 1202 1.1 riastrad aesenclast %xmm8,%xmm7 1203 1.1 riastrad ret 1204 1.1 riastrad END(aesni_enc8) 1205 1.1 riastrad 1206 1.1 riastrad /* 1207 1.1 riastrad * aesni_dec1(const struct aesdec *deckey@rdi, uint128_t block@xmm0, 1208 1.1 riastrad * uint32_t nrounds@ecx) 1209 1.1 riastrad * 1210 1.1 riastrad * Decrypt a single AES block in %xmm0. 1211 1.1 riastrad * 1212 1.1 riastrad * Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx. 1213 1.1 riastrad */ 1214 1.1 riastrad .text 1215 1.1 riastrad _ALIGN_TEXT 1216 1.1 riastrad .type aesni_dec1,@function 1217 1.1 riastrad aesni_dec1: 1218 1.1 riastrad pxor (%rdi),%xmm0 /* xor in first round key */ 1219 1.1 riastrad shl $4,%ecx /* ecx := byte offset of round key */ 1220 1.1 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := pointer to round key */ 1221 1.1 riastrad neg %rcx /* rcx := byte offset of round key from end */ 1222 1.3 riastrad jmp 2f 1223 1.5 riastrad _ALIGN_TEXT 1224 1.3 riastrad 1: aesdec %xmm8,%xmm0 1225 1.3 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */ 1226 1.1 riastrad add $0x10,%rcx 1227 1.3 riastrad jnz 1b /* repeat if more rounds */ 1228 1.3 riastrad aesdeclast %xmm8,%xmm0 1229 1.1 riastrad ret 1230 1.1 riastrad END(aesni_dec1) 1231 1.1 riastrad 1232 1.1 riastrad /* 1233 1.1 riastrad * aesni_dec8(const struct aesdec *deckey@rdi, uint128_t block0@xmm0, ..., 1234 1.1 riastrad * block7@xmm7, uint32_t nrounds@ecx) 1235 1.1 riastrad * 1236 1.1 riastrad * Decrypt eight AES blocks in %xmm0 through %xmm7 in parallel. 1237 1.1 riastrad * 1238 1.1 riastrad * Internal ABI. Uses %xmm8 as temporary. Destroys %rcx. 1239 1.1 riastrad */ 1240 1.1 riastrad .text 1241 1.1 riastrad _ALIGN_TEXT 1242 1.1 riastrad .type aesni_dec8,@function 1243 1.1 riastrad aesni_dec8: 1244 1.1 riastrad movdqa (%rdi),%xmm8 /* xor in first round key */ 1245 1.1 riastrad pxor %xmm8,%xmm0 1246 1.1 riastrad pxor %xmm8,%xmm1 1247 1.1 riastrad pxor %xmm8,%xmm2 1248 1.1 riastrad pxor %xmm8,%xmm3 1249 1.1 riastrad pxor %xmm8,%xmm4 1250 1.1 riastrad pxor %xmm8,%xmm5 1251 1.1 riastrad pxor %xmm8,%xmm6 1252 1.1 riastrad pxor %xmm8,%xmm7 1253 1.1 riastrad shl $4,%ecx /* ecx := byte offset of round key */ 1254 1.1 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := pointer to round key */ 1255 1.1 riastrad neg %rcx /* rcx := byte offset of round key from end */ 1256 1.3 riastrad jmp 2f 1257 1.5 riastrad _ALIGN_TEXT 1258 1.3 riastrad 1: aesdec %xmm8,%xmm0 1259 1.1 riastrad aesdec %xmm8,%xmm1 1260 1.1 riastrad aesdec %xmm8,%xmm2 1261 1.1 riastrad aesdec %xmm8,%xmm3 1262 1.1 riastrad aesdec %xmm8,%xmm4 1263 1.1 riastrad aesdec %xmm8,%xmm5 1264 1.1 riastrad aesdec %xmm8,%xmm6 1265 1.1 riastrad aesdec %xmm8,%xmm7 1266 1.3 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */ 1267 1.3 riastrad add $0x10,%rcx 1268 1.3 riastrad jnz 1b /* repeat if more rounds */ 1269 1.3 riastrad aesdeclast %xmm8,%xmm0 1270 1.1 riastrad aesdeclast %xmm8,%xmm1 1271 1.1 riastrad aesdeclast %xmm8,%xmm2 1272 1.1 riastrad aesdeclast %xmm8,%xmm3 1273 1.1 riastrad aesdeclast %xmm8,%xmm4 1274 1.1 riastrad aesdeclast %xmm8,%xmm5 1275 1.1 riastrad aesdeclast %xmm8,%xmm6 1276 1.1 riastrad aesdeclast %xmm8,%xmm7 1277 1.1 riastrad ret 1278 1.1 riastrad END(aesni_dec8) 1279