aes_ni_64.S revision 1.6 1 1.6 riastrad /* $NetBSD: aes_ni_64.S,v 1.6 2020/07/27 20:57:23 riastradh Exp $ */
2 1.1 riastrad
3 1.1 riastrad /*-
4 1.1 riastrad * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 1.1 riastrad * All rights reserved.
6 1.1 riastrad *
7 1.1 riastrad * Redistribution and use in source and binary forms, with or without
8 1.1 riastrad * modification, are permitted provided that the following conditions
9 1.1 riastrad * are met:
10 1.1 riastrad * 1. Redistributions of source code must retain the above copyright
11 1.1 riastrad * notice, this list of conditions and the following disclaimer.
12 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 riastrad * notice, this list of conditions and the following disclaimer in the
14 1.1 riastrad * documentation and/or other materials provided with the distribution.
15 1.1 riastrad *
16 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE.
27 1.1 riastrad */
28 1.1 riastrad
29 1.1 riastrad #include <machine/asm.h>
30 1.1 riastrad
31 1.6 riastrad RCSID("$NetBSD: aes_ni_64.S,v 1.6 2020/07/27 20:57:23 riastradh Exp $")
32 1.6 riastrad
33 1.1 riastrad /*
34 1.1 riastrad * MOVDQA/MOVDQU are Move Double Quadword (Aligned/Unaligned), defined
35 1.1 riastrad * to operate on integers; MOVAPS/MOVUPS are Move (Aligned/Unaligned)
36 1.1 riastrad * Packed Single, defined to operate on binary32 floats. They have
37 1.1 riastrad * exactly the same architectural effects (move a 128-bit quantity from
38 1.1 riastrad * memory into an xmm register).
39 1.1 riastrad *
40 1.1 riastrad * In principle, they might have different microarchitectural effects
41 1.1 riastrad * so that MOVAPS/MOVUPS might incur a penalty when the register is
42 1.1 riastrad * later used for integer paths, but in practice they don't. So we use
43 1.1 riastrad * the one whose instruction encoding is shorter -- MOVAPS/MOVUPS.
44 1.1 riastrad */
45 1.1 riastrad #define movdqa movaps
46 1.1 riastrad #define movdqu movups
47 1.1 riastrad
48 1.1 riastrad /*
49 1.1 riastrad * aesni_setenckey128(struct aesenc *enckey@rdi, const uint8_t key[16] @rsi)
50 1.1 riastrad *
51 1.1 riastrad * Expand a 16-byte AES-128 key into 10 round keys.
52 1.1 riastrad *
53 1.1 riastrad * Standard ABI calling convention.
54 1.1 riastrad */
55 1.1 riastrad ENTRY(aesni_setenckey128)
56 1.1 riastrad movdqu (%rsi),%xmm0 /* load master key into %xmm0 */
57 1.1 riastrad movdqa %xmm0,(%rdi) /* store master key as the first round key */
58 1.1 riastrad lea 0x10(%rdi),%rdi /* advance %rdi to next round key */
59 1.1 riastrad aeskeygenassist $0x1,%xmm0,%xmm2
60 1.1 riastrad call aesni_expand128
61 1.1 riastrad aeskeygenassist $0x2,%xmm0,%xmm2
62 1.1 riastrad call aesni_expand128
63 1.1 riastrad aeskeygenassist $0x4,%xmm0,%xmm2
64 1.1 riastrad call aesni_expand128
65 1.1 riastrad aeskeygenassist $0x8,%xmm0,%xmm2
66 1.1 riastrad call aesni_expand128
67 1.1 riastrad aeskeygenassist $0x10,%xmm0,%xmm2
68 1.1 riastrad call aesni_expand128
69 1.1 riastrad aeskeygenassist $0x20,%xmm0,%xmm2
70 1.1 riastrad call aesni_expand128
71 1.1 riastrad aeskeygenassist $0x40,%xmm0,%xmm2
72 1.1 riastrad call aesni_expand128
73 1.1 riastrad aeskeygenassist $0x80,%xmm0,%xmm2
74 1.1 riastrad call aesni_expand128
75 1.1 riastrad aeskeygenassist $0x1b,%xmm0,%xmm2
76 1.1 riastrad call aesni_expand128
77 1.1 riastrad aeskeygenassist $0x36,%xmm0,%xmm2
78 1.1 riastrad call aesni_expand128
79 1.1 riastrad ret
80 1.1 riastrad END(aesni_setenckey128)
81 1.1 riastrad
82 1.1 riastrad /*
83 1.1 riastrad * aesni_setenckey192(struct aesenc *enckey@rdi, const uint8_t key[24] @rsi)
84 1.1 riastrad *
85 1.1 riastrad * Expand a 24-byte AES-192 key into 12 round keys.
86 1.1 riastrad *
87 1.1 riastrad * Standard ABI calling convention.
88 1.1 riastrad */
89 1.1 riastrad ENTRY(aesni_setenckey192)
90 1.1 riastrad movdqu (%rsi),%xmm0 /* load master key [0:128) into %xmm0 */
91 1.1 riastrad movq 0x10(%rsi),%xmm1 /* load master key [128:192) into %xmm1 */
92 1.1 riastrad movdqa %xmm0,(%rdi) /* store master key [0:128) as round key */
93 1.1 riastrad lea 0x10(%rdi),%rdi /* advance %rdi to next round key */
94 1.1 riastrad aeskeygenassist $0x1,%xmm1,%xmm2
95 1.1 riastrad call aesni_expand192a
96 1.1 riastrad aeskeygenassist $0x2,%xmm0,%xmm2
97 1.1 riastrad call aesni_expand192b
98 1.1 riastrad aeskeygenassist $0x4,%xmm1,%xmm2
99 1.1 riastrad call aesni_expand192a
100 1.1 riastrad aeskeygenassist $0x8,%xmm0,%xmm2
101 1.1 riastrad call aesni_expand192b
102 1.1 riastrad aeskeygenassist $0x10,%xmm1,%xmm2
103 1.1 riastrad call aesni_expand192a
104 1.1 riastrad aeskeygenassist $0x20,%xmm0,%xmm2
105 1.1 riastrad call aesni_expand192b
106 1.1 riastrad aeskeygenassist $0x40,%xmm1,%xmm2
107 1.1 riastrad call aesni_expand192a
108 1.1 riastrad aeskeygenassist $0x80,%xmm0,%xmm2
109 1.1 riastrad call aesni_expand192b
110 1.1 riastrad ret
111 1.1 riastrad END(aesni_setenckey192)
112 1.1 riastrad
113 1.1 riastrad /*
114 1.1 riastrad * aesni_setenckey256(struct aesenc *enckey@rdi, const uint8_t key[32] @rsi)
115 1.1 riastrad *
116 1.1 riastrad * Expand a 32-byte AES-256 key into 14 round keys.
117 1.1 riastrad *
118 1.1 riastrad * Standard ABI calling convention.
119 1.1 riastrad */
120 1.1 riastrad ENTRY(aesni_setenckey256)
121 1.1 riastrad movdqu (%rsi),%xmm0 /* load master key [0:128) into %xmm0 */
122 1.1 riastrad movdqu 0x10(%rsi),%xmm1 /* load master key [128:256) into %xmm1 */
123 1.1 riastrad movdqa %xmm0,(%rdi) /* store master key [0:128) as round key */
124 1.1 riastrad movdqa %xmm1,0x10(%rdi) /* store master key [128:256) as round key */
125 1.1 riastrad lea 0x20(%rdi),%rdi /* advance %rdi to next round key */
126 1.1 riastrad aeskeygenassist $0x1,%xmm1,%xmm2
127 1.1 riastrad call aesni_expand256a
128 1.1 riastrad aeskeygenassist $0x1,%xmm0,%xmm2
129 1.1 riastrad call aesni_expand256b
130 1.1 riastrad aeskeygenassist $0x2,%xmm1,%xmm2
131 1.1 riastrad call aesni_expand256a
132 1.1 riastrad aeskeygenassist $0x2,%xmm0,%xmm2
133 1.1 riastrad call aesni_expand256b
134 1.1 riastrad aeskeygenassist $0x4,%xmm1,%xmm2
135 1.1 riastrad call aesni_expand256a
136 1.1 riastrad aeskeygenassist $0x4,%xmm0,%xmm2
137 1.1 riastrad call aesni_expand256b
138 1.1 riastrad aeskeygenassist $0x8,%xmm1,%xmm2
139 1.1 riastrad call aesni_expand256a
140 1.1 riastrad aeskeygenassist $0x8,%xmm0,%xmm2
141 1.1 riastrad call aesni_expand256b
142 1.1 riastrad aeskeygenassist $0x10,%xmm1,%xmm2
143 1.1 riastrad call aesni_expand256a
144 1.1 riastrad aeskeygenassist $0x10,%xmm0,%xmm2
145 1.1 riastrad call aesni_expand256b
146 1.1 riastrad aeskeygenassist $0x20,%xmm1,%xmm2
147 1.1 riastrad call aesni_expand256a
148 1.1 riastrad aeskeygenassist $0x20,%xmm0,%xmm2
149 1.1 riastrad call aesni_expand256b
150 1.1 riastrad aeskeygenassist $0x40,%xmm1,%xmm2
151 1.1 riastrad call aesni_expand256a
152 1.1 riastrad ret
153 1.1 riastrad END(aesni_setenckey256)
154 1.1 riastrad
155 1.1 riastrad /*
156 1.1 riastrad * aesni_expand128(uint128_t *rkp@rdi, uint128_t prk@xmm0,
157 1.1 riastrad * uint128_t keygenassist@xmm2)
158 1.1 riastrad *
159 1.1 riastrad * 1. Compute the AES-128 round key using the previous round key.
160 1.1 riastrad * 2. Store it at *rkp.
161 1.1 riastrad * 3. Set %xmm0 to it.
162 1.1 riastrad * 4. Advance %rdi to point at the next round key.
163 1.1 riastrad *
164 1.1 riastrad * Internal ABI. On entry:
165 1.1 riastrad *
166 1.1 riastrad * %rdi = rkp, pointer to round key to compute
167 1.1 riastrad * %xmm0 = (prk[0], prk[1], prk[2], prk[3])
168 1.1 riastrad * %xmm2 = (xxx, xxx, xxx, t = Rot(SubWord(prk[3])) ^ RCON)
169 1.1 riastrad *
170 1.1 riastrad * On exit:
171 1.1 riastrad *
172 1.1 riastrad * %rdi = &rkp[1], rkp advanced by one round key
173 1.1 riastrad * %xmm0 = rk, the round key we just computed
174 1.1 riastrad * %xmm2 = garbage
175 1.1 riastrad * %xmm4 = garbage
176 1.1 riastrad * %xmm5 = garbage
177 1.1 riastrad * %xmm6 = garbage
178 1.1 riastrad *
179 1.1 riastrad * Note: %xmm1 is preserved (as are %xmm3 and %xmm7 through %xmm15,
180 1.1 riastrad * and all other registers).
181 1.1 riastrad */
182 1.1 riastrad .text
183 1.1 riastrad _ALIGN_TEXT
184 1.1 riastrad .type aesni_expand128,@function
185 1.1 riastrad aesni_expand128:
186 1.1 riastrad /*
187 1.1 riastrad * %xmm2 := (%xmm2[3], %xmm2[3], %xmm2[3], %xmm2[3]),
188 1.1 riastrad * i.e., set each word of %xmm2 to t := Rot(SubWord(prk[3])) ^ RCON.
189 1.1 riastrad */
190 1.1 riastrad pshufd $0b11111111,%xmm2,%xmm2
191 1.1 riastrad
192 1.1 riastrad /*
193 1.1 riastrad * %xmm4 := (0, prk[0], prk[1], prk[2])
194 1.1 riastrad * %xmm5 := (0, 0, prk[0], prk[1])
195 1.1 riastrad * %xmm6 := (0, 0, 0, prk[0])
196 1.1 riastrad */
197 1.1 riastrad movdqa %xmm0,%xmm4
198 1.1 riastrad movdqa %xmm0,%xmm5
199 1.1 riastrad movdqa %xmm0,%xmm6
200 1.1 riastrad pslldq $4,%xmm4
201 1.1 riastrad pslldq $8,%xmm5
202 1.1 riastrad pslldq $12,%xmm6
203 1.1 riastrad
204 1.1 riastrad /*
205 1.1 riastrad * %xmm0 := (rk[0] = t ^ prk[0],
206 1.1 riastrad * rk[1] = t ^ prk[0] ^ prk[1],
207 1.1 riastrad * rk[2] = t ^ prk[0] ^ prk[1] ^ prk[2],
208 1.1 riastrad * rk[3] = t ^ prk[0] ^ prk[1] ^ prk[2] ^ prk[3])
209 1.1 riastrad */
210 1.1 riastrad pxor %xmm2,%xmm0
211 1.1 riastrad pxor %xmm4,%xmm0
212 1.1 riastrad pxor %xmm5,%xmm0
213 1.1 riastrad pxor %xmm6,%xmm0
214 1.1 riastrad
215 1.1 riastrad movdqa %xmm0,(%rdi) /* store round key */
216 1.1 riastrad lea 0x10(%rdi),%rdi /* advance to next round key address */
217 1.1 riastrad ret
218 1.1 riastrad END(aesni_expand128)
219 1.1 riastrad
220 1.1 riastrad /*
221 1.1 riastrad * aesni_expand192a(uint128_t *rkp@rdi, uint128_t prk@xmm0,
222 1.1 riastrad * uint64_t rklo@xmm1, uint128_t keygenassist@xmm2)
223 1.1 riastrad *
224 1.1 riastrad * Set even-numbered AES-192 round key.
225 1.1 riastrad *
226 1.1 riastrad * Internal ABI. On entry:
227 1.1 riastrad *
228 1.1 riastrad * %rdi = rkp, pointer to two round keys to compute
229 1.1 riastrad * %xmm0 = (prk[0], prk[1], prk[2], prk[3])
230 1.1 riastrad * %xmm1 = (rklo[0], rklo[1], xxx, xxx)
231 1.1 riastrad * %xmm2 = (xxx, t = Rot(SubWord(rklo[1])) ^ RCON, xxx, xxx)
232 1.1 riastrad *
233 1.1 riastrad * On exit:
234 1.1 riastrad *
235 1.1 riastrad * %rdi = &rkp[2], rkp advanced by two round keys
236 1.1 riastrad * %xmm0 = nrk, second round key we just computed
237 1.1 riastrad * %xmm1 = rk, first round key we just computed
238 1.1 riastrad * %xmm2 = garbage
239 1.1 riastrad * %xmm4 = garbage
240 1.1 riastrad * %xmm5 = garbage
241 1.1 riastrad * %xmm6 = garbage
242 1.1 riastrad * %xmm7 = garbage
243 1.1 riastrad */
244 1.1 riastrad .text
245 1.1 riastrad _ALIGN_TEXT
246 1.1 riastrad .type aesni_expand192a,@function
247 1.1 riastrad aesni_expand192a:
248 1.1 riastrad /*
249 1.1 riastrad * %xmm2 := (%xmm2[1], %xmm2[1], %xmm2[1], %xmm2[1]),
250 1.1 riastrad * i.e., set each word of %xmm2 to t := Rot(SubWord(rklo[1])) ^ RCON.
251 1.1 riastrad */
252 1.1 riastrad pshufd $0b01010101,%xmm2,%xmm2
253 1.1 riastrad
254 1.1 riastrad /*
255 1.1 riastrad * We need to compute:
256 1.1 riastrad *
257 1.1 riastrad * rk[0] := rklo[0]
258 1.1 riastrad * rk[1] := rklo[1]
259 1.1 riastrad * rk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0]
260 1.1 riastrad * rk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1]
261 1.1 riastrad * nrk[0] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1] ^ prk[2]
262 1.1 riastrad * nrk[1] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3]
263 1.1 riastrad * nrk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0]
264 1.1 riastrad * nrk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0]
265 1.1 riastrad * ^ rklo[1]
266 1.1 riastrad */
267 1.1 riastrad
268 1.1 riastrad /*
269 1.1 riastrad * %xmm4 := (prk[0], prk[1], prk[2], prk[3])
270 1.1 riastrad * %xmm5 := (0, prk[0], prk[1], prk[2])
271 1.1 riastrad * %xmm6 := (0, 0, prk[0], prk[1])
272 1.1 riastrad * %xmm7 := (0, 0, 0, prk[0])
273 1.1 riastrad */
274 1.1 riastrad movdqa %xmm0,%xmm4
275 1.1 riastrad movdqa %xmm0,%xmm5
276 1.1 riastrad movdqa %xmm0,%xmm6
277 1.1 riastrad movdqa %xmm0,%xmm7
278 1.1 riastrad pslldq $4,%xmm5
279 1.1 riastrad pslldq $8,%xmm6
280 1.1 riastrad pslldq $12,%xmm7
281 1.1 riastrad
282 1.1 riastrad /* %xmm4 := (rk[2], rk[3], nrk[0], nrk[1]) */
283 1.1 riastrad pxor %xmm2,%xmm4
284 1.1 riastrad pxor %xmm5,%xmm4
285 1.1 riastrad pxor %xmm6,%xmm4
286 1.1 riastrad pxor %xmm7,%xmm4
287 1.1 riastrad
288 1.1 riastrad /*
289 1.1 riastrad * At this point, rk is split across %xmm1 (rk[0],rk[1],...) and
290 1.1 riastrad * %xmm4 (rk[2],rk[3],...); nrk is in %xmm4 (...,nrk[0],nrk[1]);
291 1.1 riastrad * and we have yet to compute nrk[2] or nrk[3], which requires
292 1.1 riastrad * rklo[0] and rklo[1] in %xmm1 (rklo[0], rklo[1], ...). We need
293 1.1 riastrad * nrk to end up in %xmm0 at the end, so gather rk into %xmm1 and
294 1.1 riastrad * nrk into %xmm0.
295 1.1 riastrad */
296 1.1 riastrad
297 1.1 riastrad /* %xmm0 := (nrk[0], nrk[1], nrk[1], nrk[1]) */
298 1.1 riastrad pshufd $0b11111110,%xmm4,%xmm0
299 1.1 riastrad
300 1.1 riastrad /*
301 1.1 riastrad * %xmm6 := (0, 0, rklo[0], rklo[1])
302 1.1 riastrad * %xmm7 := (0, 0, 0, rklo[0])
303 1.1 riastrad */
304 1.1 riastrad movdqa %xmm1,%xmm6
305 1.1 riastrad movdqa %xmm1,%xmm7
306 1.1 riastrad
307 1.1 riastrad pslldq $8,%xmm6
308 1.1 riastrad pslldq $12,%xmm7
309 1.1 riastrad
310 1.1 riastrad /*
311 1.1 riastrad * %xmm0 := (nrk[0],
312 1.1 riastrad * nrk[1],
313 1.1 riastrad * nrk[2] = nrk[1] ^ rklo[0],
314 1.1 riastrad * nrk[3] = nrk[1] ^ rklo[0] ^ rklo[1])
315 1.1 riastrad */
316 1.1 riastrad pxor %xmm6,%xmm0
317 1.1 riastrad pxor %xmm7,%xmm0
318 1.1 riastrad
319 1.1 riastrad /* %xmm1 := (rk[0], rk[1], rk[2], rk[3]) */
320 1.1 riastrad shufps $0b01000100,%xmm4,%xmm1
321 1.1 riastrad
322 1.1 riastrad movdqa %xmm1,(%rdi) /* store round key */
323 1.1 riastrad movdqa %xmm0,0x10(%rdi) /* store next round key */
324 1.1 riastrad lea 0x20(%rdi),%rdi /* advance two round keys */
325 1.1 riastrad ret
326 1.1 riastrad END(aesni_expand192a)
327 1.1 riastrad
328 1.1 riastrad /*
329 1.1 riastrad * aesni_expand192b(uint128_t *roundkey@rdi, uint128_t prk@xmm0,
330 1.1 riastrad * uint128_t keygenassist@xmm2)
331 1.1 riastrad *
332 1.1 riastrad * Set odd-numbered AES-192 round key.
333 1.1 riastrad *
334 1.1 riastrad * Internal ABI. On entry:
335 1.1 riastrad *
336 1.1 riastrad * %rdi = rkp, pointer to round key to compute
337 1.1 riastrad * %xmm0 = (prk[0], prk[1], prk[2], prk[3])
338 1.1 riastrad * %xmm1 = (xxx, xxx, pprk[2], pprk[3])
339 1.1 riastrad * %xmm2 = (xxx, xxx, xxx, t = Rot(Sub(prk[3])) ^ RCON)
340 1.1 riastrad *
341 1.1 riastrad * On exit:
342 1.1 riastrad *
343 1.1 riastrad * %rdi = &rkp[1], rkp advanced by one round key
344 1.1 riastrad * %xmm0 = rk, the round key we just computed
345 1.1 riastrad * %xmm1 = (nrk[0], nrk[1], xxx, xxx), half of next round key
346 1.1 riastrad * %xmm2 = garbage
347 1.1 riastrad * %xmm4 = garbage
348 1.1 riastrad * %xmm5 = garbage
349 1.1 riastrad * %xmm6 = garbage
350 1.1 riastrad * %xmm7 = garbage
351 1.1 riastrad */
352 1.1 riastrad .text
353 1.1 riastrad _ALIGN_TEXT
354 1.1 riastrad .type aesni_expand192b,@function
355 1.1 riastrad aesni_expand192b:
356 1.1 riastrad /*
357 1.1 riastrad * %xmm2 := (%xmm2[3], %xmm2[3], %xmm2[3], %xmm2[3]),
358 1.1 riastrad * i.e., set each word of %xmm2 to t := Rot(Sub(prk[3])) ^ RCON.
359 1.1 riastrad */
360 1.1 riastrad pshufd $0b11111111,%xmm2,%xmm2
361 1.1 riastrad
362 1.1 riastrad /*
363 1.1 riastrad * We need to compute:
364 1.1 riastrad *
365 1.1 riastrad * rk[0] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2]
366 1.1 riastrad * rk[1] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3]
367 1.1 riastrad * rk[2] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0]
368 1.1 riastrad * rk[3] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0]
369 1.1 riastrad * ^ prk[1]
370 1.1 riastrad * nrk[0] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0]
371 1.1 riastrad * ^ prk[1] ^ prk[2]
372 1.1 riastrad * nrk[1] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0]
373 1.1 riastrad * ^ prk[1] ^ prk[2] ^ prk[3]
374 1.1 riastrad */
375 1.1 riastrad
376 1.1 riastrad /* %xmm1 := (pprk[2], pprk[3], prk[0], prk[1]) */
377 1.1 riastrad shufps $0b01001110,%xmm0,%xmm1
378 1.1 riastrad
379 1.1 riastrad /*
380 1.1 riastrad * %xmm5 := (0, pprk[2], pprk[3], prk[0])
381 1.1 riastrad * %xmm6 := (0, 0, pprk[2], pprk[3])
382 1.1 riastrad * %xmm7 := (0, 0, 0, pprk[2])
383 1.1 riastrad */
384 1.1 riastrad movdqa %xmm1,%xmm5
385 1.1 riastrad movdqa %xmm1,%xmm6
386 1.1 riastrad movdqa %xmm1,%xmm7
387 1.1 riastrad pslldq $4,%xmm5
388 1.1 riastrad pslldq $8,%xmm6
389 1.1 riastrad pslldq $12,%xmm7
390 1.1 riastrad
391 1.1 riastrad /* %xmm1 := (rk[0], rk[1], rk[2], rk[3) */
392 1.1 riastrad pxor %xmm2,%xmm1
393 1.1 riastrad pxor %xmm5,%xmm1
394 1.1 riastrad pxor %xmm6,%xmm1
395 1.1 riastrad pxor %xmm7,%xmm1
396 1.1 riastrad
397 1.1 riastrad /* %xmm4 := (prk[2], prk[3], xxx, xxx) */
398 1.1 riastrad pshufd $0b00001110,%xmm0,%xmm4
399 1.1 riastrad
400 1.1 riastrad /* %xmm5 := (0, prk[2], xxx, xxx) */
401 1.1 riastrad movdqa %xmm4,%xmm5
402 1.1 riastrad pslldq $4,%xmm5
403 1.1 riastrad
404 1.1 riastrad /* %xmm0 := (rk[0], rk[1], rk[2], rk[3]) */
405 1.1 riastrad movdqa %xmm1,%xmm0
406 1.1 riastrad
407 1.1 riastrad /* %xmm1 := (rk[3], rk[3], xxx, xxx) */
408 1.1 riastrad shufps $0b00001111,%xmm1,%xmm1
409 1.1 riastrad
410 1.1 riastrad /*
411 1.1 riastrad * %xmm1 := (nrk[0] = rk[3] ^ prk[2],
412 1.1 riastrad * nrk[1] = rk[3] ^ prk[2] ^ prk[3],
413 1.1 riastrad * xxx,
414 1.1 riastrad * xxx)
415 1.1 riastrad */
416 1.1 riastrad pxor %xmm4,%xmm1
417 1.1 riastrad pxor %xmm5,%xmm1
418 1.1 riastrad
419 1.1 riastrad movdqa %xmm0,(%rdi) /* store round key */
420 1.1 riastrad lea 0x10(%rdi),%rdi /* advance to next round key address */
421 1.1 riastrad ret
422 1.1 riastrad END(aesni_expand192b)
423 1.1 riastrad
424 1.1 riastrad /*
425 1.1 riastrad * aesni_expand256a(uint128_t *rkp@rdi, uint128_t pprk@xmm0,
426 1.1 riastrad * uint128_t prk@xmm1, uint128_t keygenassist@xmm2)
427 1.1 riastrad *
428 1.1 riastrad * Set even-numbered AES-256 round key.
429 1.1 riastrad *
430 1.1 riastrad * Internal ABI. On entry:
431 1.1 riastrad *
432 1.1 riastrad * %rdi = rkp, pointer to round key to compute
433 1.1 riastrad * %xmm0 = (pprk[0], pprk[1], pprk[2], pprk[3])
434 1.1 riastrad * %xmm1 = (prk[0], prk[1], prk[2], prk[3])
435 1.1 riastrad * %xmm2 = (xxx, xxx, xxx, t = Rot(SubWord(prk[3])))
436 1.1 riastrad *
437 1.1 riastrad * On exit:
438 1.1 riastrad *
439 1.1 riastrad * %rdi = &rkp[1], rkp advanced by one round key
440 1.1 riastrad * %xmm0 = rk, the round key we just computed
441 1.1 riastrad * %xmm1 = prk, previous round key, preserved from entry
442 1.1 riastrad * %xmm2 = garbage
443 1.1 riastrad * %xmm4 = garbage
444 1.1 riastrad * %xmm5 = garbage
445 1.1 riastrad * %xmm6 = garbage
446 1.1 riastrad *
447 1.1 riastrad * The computation turns out to be the same as for AES-128; the
448 1.1 riastrad * previous round key does not figure into it, only the
449 1.1 riastrad * previous-previous round key.
450 1.1 riastrad */
451 1.1 riastrad aesni_expand256a = aesni_expand128
452 1.1 riastrad
453 1.1 riastrad /*
454 1.1 riastrad * aesni_expand256b(uint128_t *rkp@rdi, uint128_t prk@xmm0,
455 1.1 riastrad * uint128_t pprk@xmm1, uint128_t keygenassist@xmm2)
456 1.1 riastrad *
457 1.1 riastrad * Set odd-numbered AES-256 round key.
458 1.1 riastrad *
459 1.1 riastrad * Internal ABI. On entry:
460 1.1 riastrad *
461 1.1 riastrad * %rdi = rkp, pointer to round key to compute
462 1.1 riastrad * %xmm0 = (prk[0], prk[1], prk[2], prk[3])
463 1.1 riastrad * %xmm1 = (pprk[0], pprk[1], pprk[2], pprk[3])
464 1.1 riastrad * %xmm2 = (xxx, xxx, t = Sub(prk[3]), xxx)
465 1.1 riastrad *
466 1.1 riastrad * On exit:
467 1.1 riastrad *
468 1.1 riastrad * %rdi = &rkp[1], rkp advanced by one round key
469 1.1 riastrad * %xmm0 = prk, previous round key, preserved from entry
470 1.1 riastrad * %xmm1 = rk, the round key we just computed
471 1.1 riastrad * %xmm2 = garbage
472 1.1 riastrad * %xmm4 = garbage
473 1.1 riastrad * %xmm5 = garbage
474 1.1 riastrad * %xmm6 = garbage
475 1.1 riastrad */
476 1.1 riastrad .text
477 1.1 riastrad _ALIGN_TEXT
478 1.1 riastrad .type aesni_expand256b,@function
479 1.1 riastrad aesni_expand256b:
480 1.1 riastrad /*
481 1.1 riastrad * %xmm2 := (%xmm2[3], %xmm2[3], %xmm2[3], %xmm2[3]),
482 1.1 riastrad * i.e., set each word of %xmm2 to t := Sub(prk[3]).
483 1.1 riastrad */
484 1.1 riastrad pshufd $0b10101010,%xmm2,%xmm2
485 1.1 riastrad
486 1.1 riastrad /*
487 1.1 riastrad * %xmm4 := (0, pprk[0], pprk[1], pprk[2])
488 1.1 riastrad * %xmm5 := (0, 0, pprk[0], pprk[1])
489 1.1 riastrad * %xmm6 := (0, 0, 0, pprk[0])
490 1.1 riastrad */
491 1.1 riastrad movdqa %xmm1,%xmm4
492 1.1 riastrad movdqa %xmm1,%xmm5
493 1.1 riastrad movdqa %xmm1,%xmm6
494 1.1 riastrad pslldq $4,%xmm4
495 1.1 riastrad pslldq $8,%xmm5
496 1.1 riastrad pslldq $12,%xmm6
497 1.1 riastrad
498 1.1 riastrad /*
499 1.1 riastrad * %xmm0 := (rk[0] = t ^ pprk[0],
500 1.1 riastrad * rk[1] = t ^ pprk[0] ^ pprk[1],
501 1.1 riastrad * rk[2] = t ^ pprk[0] ^ pprk[1] ^ pprk[2],
502 1.1 riastrad * rk[3] = t ^ pprk[0] ^ pprk[1] ^ pprk[2] ^ pprk[3])
503 1.1 riastrad */
504 1.1 riastrad pxor %xmm2,%xmm1
505 1.1 riastrad pxor %xmm4,%xmm1
506 1.1 riastrad pxor %xmm5,%xmm1
507 1.1 riastrad pxor %xmm6,%xmm1
508 1.1 riastrad
509 1.1 riastrad movdqa %xmm1,(%rdi) /* store round key */
510 1.1 riastrad lea 0x10(%rdi),%rdi /* advance to next round key address */
511 1.1 riastrad ret
512 1.1 riastrad END(aesni_expand256b)
513 1.1 riastrad
514 1.1 riastrad /*
515 1.1 riastrad * aesni_enctodec(const struct aesenc *enckey@rdi, struct aesdec *deckey@rsi,
516 1.1 riastrad * uint32_t nrounds@rdx)
517 1.1 riastrad *
518 1.1 riastrad * Convert AES encryption round keys to AES decryption round keys.
519 1.1 riastrad * `rounds' must be between 10 and 14.
520 1.1 riastrad *
521 1.1 riastrad * Standard ABI calling convention.
522 1.1 riastrad */
523 1.1 riastrad ENTRY(aesni_enctodec)
524 1.1 riastrad shl $4,%edx /* rdx := byte offset of last round key */
525 1.1 riastrad movdqa (%rdi,%rdx),%xmm0 /* load last round key */
526 1.1 riastrad movdqa %xmm0,(%rsi) /* store last round key verbatim */
527 1.3 riastrad jmp 2f
528 1.5 riastrad _ALIGN_TEXT
529 1.3 riastrad 1: movdqa (%rdi,%rdx),%xmm0 /* load round key */
530 1.1 riastrad aesimc %xmm0,%xmm0 /* convert encryption to decryption */
531 1.1 riastrad movdqa %xmm0,(%rsi) /* store round key */
532 1.3 riastrad 2: sub $0x10,%rdx /* advance to next round key */
533 1.3 riastrad lea 0x10(%rsi),%rsi
534 1.3 riastrad jnz 1b /* repeat if more rounds */
535 1.3 riastrad movdqa (%rdi),%xmm0 /* load first round key */
536 1.1 riastrad movdqa %xmm0,(%rsi) /* store first round key verbatim */
537 1.1 riastrad ret
538 1.1 riastrad END(aesni_enctodec)
539 1.1 riastrad
540 1.1 riastrad /*
541 1.1 riastrad * aesni_enc(const struct aesenc *enckey@rdi, const uint8_t in[16] @rsi,
542 1.1 riastrad * uint8_t out[16] @rdx, uint32_t nrounds@ecx)
543 1.1 riastrad *
544 1.1 riastrad * Encrypt a single block.
545 1.1 riastrad *
546 1.1 riastrad * Standard ABI calling convention.
547 1.1 riastrad */
548 1.1 riastrad ENTRY(aesni_enc)
549 1.1 riastrad movdqu (%rsi),%xmm0
550 1.1 riastrad call aesni_enc1
551 1.1 riastrad movdqu %xmm0,(%rdx)
552 1.1 riastrad ret
553 1.1 riastrad END(aesni_enc)
554 1.1 riastrad
555 1.1 riastrad /*
556 1.1 riastrad * aesni_dec(const struct aesdec *deckey@rdi, const uint8_t in[16] @rsi,
557 1.1 riastrad * uint8_t out[16] @rdx, uint32_t nrounds@ecx)
558 1.1 riastrad *
559 1.1 riastrad * Decrypt a single block.
560 1.1 riastrad *
561 1.1 riastrad * Standard ABI calling convention.
562 1.1 riastrad */
563 1.1 riastrad ENTRY(aesni_dec)
564 1.1 riastrad movdqu (%rsi),%xmm0
565 1.1 riastrad call aesni_dec1
566 1.1 riastrad movdqu %xmm0,(%rdx)
567 1.1 riastrad ret
568 1.1 riastrad END(aesni_dec)
569 1.1 riastrad
570 1.1 riastrad /*
571 1.1 riastrad * aesni_cbc_enc(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
572 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t iv[16] @r8,
573 1.1 riastrad * uint32_t nrounds@r9d)
574 1.1 riastrad *
575 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-CBC.
576 1.1 riastrad *
577 1.1 riastrad * nbytes must be an integral multiple of 16.
578 1.1 riastrad *
579 1.1 riastrad * Standard ABI calling convention.
580 1.1 riastrad */
581 1.1 riastrad ENTRY(aesni_cbc_enc)
582 1.1 riastrad cmp $0,%rcx
583 1.1 riastrad jz 2f
584 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */
585 1.1 riastrad movdqu (%r8),%xmm0 /* xmm0 := chaining value */
586 1.5 riastrad _ALIGN_TEXT
587 1.1 riastrad 1: movdqu (%rsi),%xmm1 /* xmm1 := plaintext block */
588 1.1 riastrad lea 0x10(%rsi),%rsi
589 1.1 riastrad pxor %xmm1,%xmm0 /* xmm0 := cv ^ ptxt */
590 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */
591 1.1 riastrad call aesni_enc1 /* xmm0 := ciphertext block */
592 1.1 riastrad movdqu %xmm0,(%rdx)
593 1.1 riastrad lea 0x10(%rdx),%rdx
594 1.1 riastrad sub $0x10,%r10
595 1.1 riastrad jnz 1b /* repeat if r10 is nonzero */
596 1.1 riastrad movdqu %xmm0,(%r8) /* store chaining value */
597 1.1 riastrad 2: ret
598 1.1 riastrad END(aesni_cbc_enc)
599 1.1 riastrad
600 1.1 riastrad /*
601 1.1 riastrad * aesni_cbc_dec1(const struct aesdec *deckey@rdi, const uint8_t *in@rsi,
602 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, const uint8_t iv[16] @r8,
603 1.1 riastrad * uint32_t nrounds@r9)
604 1.1 riastrad *
605 1.1 riastrad * Decrypt a contiguous sequence of blocks with AES-CBC.
606 1.1 riastrad *
607 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
608 1.1 riastrad * is not vectorized; use aesni_cbc_dec8 for >=8 blocks at once.
609 1.1 riastrad *
610 1.1 riastrad * Standard ABI calling convention.
611 1.1 riastrad */
612 1.1 riastrad ENTRY(aesni_cbc_dec1)
613 1.1 riastrad push %rbp /* create stack frame uint128[1] */
614 1.1 riastrad mov %rsp,%rbp
615 1.1 riastrad sub $0x10,%rsp
616 1.1 riastrad movdqu (%r8),%xmm8 /* xmm8 := iv */
617 1.1 riastrad movdqa %xmm8,(%rsp) /* save iv */
618 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */
619 1.1 riastrad movdqu -0x10(%rsi,%r10),%xmm0 /* xmm0 := last ciphertext block */
620 1.1 riastrad movdqu %xmm0,(%r8) /* update iv */
621 1.3 riastrad jmp 2f
622 1.5 riastrad _ALIGN_TEXT
623 1.3 riastrad 1: movdqu -0x10(%rsi,%r10),%xmm8 /* xmm8 := chaining value */
624 1.1 riastrad pxor %xmm8,%xmm0 /* xmm0 := ptxt */
625 1.1 riastrad movdqu %xmm0,(%rdx,%r10) /* store plaintext block */
626 1.1 riastrad movdqa %xmm8,%xmm0 /* move cv = ciphertext block */
627 1.3 riastrad 2: mov %r9d,%ecx /* ecx := nrounds */
628 1.3 riastrad call aesni_dec1 /* xmm0 := cv ^ ptxt */
629 1.3 riastrad sub $0x10,%r10
630 1.3 riastrad jnz 1b /* repeat if more blocks */
631 1.3 riastrad pxor (%rsp),%xmm0 /* xmm0 := ptxt */
632 1.1 riastrad movdqu %xmm0,(%rdx) /* store first plaintext block */
633 1.1 riastrad leave
634 1.1 riastrad ret
635 1.1 riastrad END(aesni_cbc_dec1)
636 1.1 riastrad
637 1.1 riastrad /*
638 1.1 riastrad * aesni_cbc_dec8(const struct aesdec *deckey@rdi, const uint8_t *in@rsi,
639 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, const uint8_t iv[16] @r8,
640 1.1 riastrad * uint32_t nrounds@r9)
641 1.1 riastrad *
642 1.1 riastrad * Decrypt a contiguous sequence of 8-block units with AES-CBC.
643 1.1 riastrad *
644 1.1 riastrad * nbytes must be a positive integral multiple of 128.
645 1.1 riastrad *
646 1.1 riastrad * Standard ABI calling convention.
647 1.1 riastrad */
648 1.1 riastrad ENTRY(aesni_cbc_dec8)
649 1.1 riastrad push %rbp /* create stack frame uint128[1] */
650 1.1 riastrad mov %rsp,%rbp
651 1.1 riastrad sub $0x10,%rsp
652 1.1 riastrad movdqu (%r8),%xmm8 /* xmm8 := iv */
653 1.1 riastrad movdqa %xmm8,(%rsp) /* save iv */
654 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */
655 1.1 riastrad movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := ciphertext block[n-1] */
656 1.1 riastrad movdqu %xmm7,(%r8) /* update iv */
657 1.3 riastrad jmp 2f
658 1.5 riastrad _ALIGN_TEXT
659 1.3 riastrad 1: movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := cv[0] */
660 1.3 riastrad pxor %xmm7,%xmm0 /* xmm0 := ptxt[0] */
661 1.3 riastrad movdqu %xmm0,(%rdx,%r10) /* store plaintext block */
662 1.3 riastrad 2: movdqu -0x20(%rsi,%r10),%xmm6 /* xmm6 := ciphertext block[n-2] */
663 1.1 riastrad movdqu -0x30(%rsi,%r10),%xmm5 /* xmm5 := ciphertext block[n-3] */
664 1.1 riastrad movdqu -0x40(%rsi,%r10),%xmm4 /* xmm4 := ciphertext block[n-4] */
665 1.1 riastrad movdqu -0x50(%rsi,%r10),%xmm3 /* xmm3 := ciphertext block[n-5] */
666 1.1 riastrad movdqu -0x60(%rsi,%r10),%xmm2 /* xmm2 := ciphertext block[n-6] */
667 1.1 riastrad movdqu -0x70(%rsi,%r10),%xmm1 /* xmm1 := ciphertext block[n-7] */
668 1.1 riastrad movdqu -0x80(%rsi,%r10),%xmm0 /* xmm0 := ciphertext block[n-8] */
669 1.1 riastrad movdqa %xmm6,%xmm15 /* xmm[8+i] := cv[i], 0<i<8 */
670 1.1 riastrad movdqa %xmm5,%xmm14
671 1.1 riastrad movdqa %xmm4,%xmm13
672 1.1 riastrad movdqa %xmm3,%xmm12
673 1.1 riastrad movdqa %xmm2,%xmm11
674 1.1 riastrad movdqa %xmm1,%xmm10
675 1.1 riastrad movdqa %xmm0,%xmm9
676 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */
677 1.1 riastrad call aesni_dec8 /* xmm[i] := cv[i] ^ ptxt[i], 0<=i<8 */
678 1.1 riastrad pxor %xmm15,%xmm7 /* xmm[i] := ptxt[i], 0<i<8 */
679 1.1 riastrad pxor %xmm14,%xmm6
680 1.1 riastrad pxor %xmm13,%xmm5
681 1.1 riastrad pxor %xmm12,%xmm4
682 1.1 riastrad pxor %xmm11,%xmm3
683 1.1 riastrad pxor %xmm10,%xmm2
684 1.1 riastrad pxor %xmm9,%xmm1
685 1.1 riastrad movdqu %xmm7,-0x10(%rdx,%r10) /* store plaintext blocks */
686 1.1 riastrad movdqu %xmm6,-0x20(%rdx,%r10)
687 1.1 riastrad movdqu %xmm5,-0x30(%rdx,%r10)
688 1.1 riastrad movdqu %xmm4,-0x40(%rdx,%r10)
689 1.1 riastrad movdqu %xmm3,-0x50(%rdx,%r10)
690 1.1 riastrad movdqu %xmm2,-0x60(%rdx,%r10)
691 1.1 riastrad movdqu %xmm1,-0x70(%rdx,%r10)
692 1.1 riastrad sub $0x80,%r10
693 1.3 riastrad jnz 1b /* repeat if more blocks */
694 1.3 riastrad pxor (%rsp),%xmm0 /* xmm0 := ptxt[0] */
695 1.1 riastrad movdqu %xmm0,(%rdx) /* store first plaintext block */
696 1.1 riastrad leave
697 1.1 riastrad ret
698 1.1 riastrad END(aesni_cbc_dec8)
699 1.1 riastrad
700 1.1 riastrad /*
701 1.1 riastrad * aesni_xts_enc1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
702 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8,
703 1.1 riastrad * uint32_t nrounds@r9d)
704 1.1 riastrad *
705 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-XTS.
706 1.1 riastrad *
707 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
708 1.1 riastrad * is not vectorized; use aesni_xts_enc8 for >=8 blocks at once.
709 1.1 riastrad *
710 1.1 riastrad * Standard ABI calling convention.
711 1.1 riastrad */
712 1.1 riastrad ENTRY(aesni_xts_enc1)
713 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */
714 1.1 riastrad movdqu (%r8),%xmm15 /* xmm15 := tweak */
715 1.5 riastrad _ALIGN_TEXT
716 1.1 riastrad 1: movdqu (%rsi),%xmm0 /* xmm0 := ptxt */
717 1.1 riastrad lea 0x10(%rsi),%rsi /* advance rdi to next block */
718 1.1 riastrad pxor %xmm15,%xmm0 /* xmm0 := ptxt ^ tweak */
719 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */
720 1.1 riastrad call aesni_enc1 /* xmm0 := AES(ptxt ^ tweak) */
721 1.1 riastrad pxor %xmm15,%xmm0 /* xmm0 := AES(ptxt ^ tweak) ^ tweak */
722 1.1 riastrad movdqu %xmm0,(%rdx) /* store ciphertext block */
723 1.1 riastrad lea 0x10(%rdx),%rdx /* advance rsi to next block */
724 1.1 riastrad call aesni_xts_mulx /* xmm15 *= x; trash xmm0 */
725 1.1 riastrad sub $0x10,%r10
726 1.1 riastrad jnz 1b /* repeat if more blocks */
727 1.1 riastrad movdqu %xmm15,(%r8) /* update tweak */
728 1.1 riastrad ret
729 1.1 riastrad END(aesni_xts_enc1)
730 1.1 riastrad
731 1.1 riastrad /*
732 1.1 riastrad * aesni_xts_enc8(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
733 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8,
734 1.1 riastrad * uint32_t nrounds@r9d)
735 1.1 riastrad *
736 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-XTS.
737 1.1 riastrad *
738 1.1 riastrad * nbytes must be a positive integral multiple of 128.
739 1.1 riastrad *
740 1.1 riastrad * Standard ABI calling convention.
741 1.1 riastrad */
742 1.1 riastrad ENTRY(aesni_xts_enc8)
743 1.1 riastrad push %rbp /* create stack frame uint128[1] */
744 1.1 riastrad mov %rsp,%rbp
745 1.1 riastrad sub $0x10,%rsp
746 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */
747 1.1 riastrad movdqu (%r8),%xmm15 /* xmm15 := tweak[0] */
748 1.5 riastrad _ALIGN_TEXT
749 1.1 riastrad 1: movdqa %xmm15,%xmm8 /* xmm8 := tweak[0] */
750 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[1] */
751 1.1 riastrad movdqa %xmm15,%xmm9 /* xmm9 := tweak[1] */
752 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[2] */
753 1.1 riastrad movdqa %xmm15,%xmm10 /* xmm10 := tweak[2] */
754 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[3] */
755 1.1 riastrad movdqa %xmm15,%xmm11 /* xmm11 := tweak[3] */
756 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[4] */
757 1.1 riastrad movdqa %xmm15,%xmm12 /* xmm12 := tweak[4] */
758 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[5] */
759 1.1 riastrad movdqa %xmm15,%xmm13 /* xmm13 := tweak[5] */
760 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[6] */
761 1.1 riastrad movdqa %xmm15,%xmm14 /* xmm14 := tweak[6] */
762 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[7] */
763 1.1 riastrad movdqu (%rsi),%xmm0 /* xmm[i] := ptxt[i] */
764 1.1 riastrad movdqu 0x10(%rsi),%xmm1
765 1.1 riastrad movdqu 0x20(%rsi),%xmm2
766 1.1 riastrad movdqu 0x30(%rsi),%xmm3
767 1.1 riastrad movdqu 0x40(%rsi),%xmm4
768 1.1 riastrad movdqu 0x50(%rsi),%xmm5
769 1.1 riastrad movdqu 0x60(%rsi),%xmm6
770 1.1 riastrad movdqu 0x70(%rsi),%xmm7
771 1.1 riastrad lea 0x80(%rsi),%rsi /* advance rsi to next block group */
772 1.1 riastrad movdqa %xmm8,(%rsp) /* save tweak[0] */
773 1.1 riastrad pxor %xmm8,%xmm0 /* xmm[i] := ptxt[i] ^ tweak[i] */
774 1.1 riastrad pxor %xmm9,%xmm1
775 1.1 riastrad pxor %xmm10,%xmm2
776 1.1 riastrad pxor %xmm11,%xmm3
777 1.1 riastrad pxor %xmm12,%xmm4
778 1.1 riastrad pxor %xmm13,%xmm5
779 1.1 riastrad pxor %xmm14,%xmm6
780 1.1 riastrad pxor %xmm15,%xmm7
781 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */
782 1.1 riastrad call aesni_enc8 /* xmm[i] := AES(ptxt[i] ^ tweak[i]) */
783 1.1 riastrad pxor (%rsp),%xmm0 /* xmm[i] := AES(...) ^ tweak[i] */
784 1.1 riastrad pxor %xmm9,%xmm1
785 1.1 riastrad pxor %xmm10,%xmm2
786 1.1 riastrad pxor %xmm11,%xmm3
787 1.1 riastrad pxor %xmm12,%xmm4
788 1.1 riastrad pxor %xmm13,%xmm5
789 1.1 riastrad pxor %xmm14,%xmm6
790 1.1 riastrad pxor %xmm15,%xmm7
791 1.1 riastrad movdqu %xmm0,(%rdx) /* store ciphertext blocks */
792 1.1 riastrad movdqu %xmm1,0x10(%rdx)
793 1.1 riastrad movdqu %xmm2,0x20(%rdx)
794 1.1 riastrad movdqu %xmm3,0x30(%rdx)
795 1.1 riastrad movdqu %xmm4,0x40(%rdx)
796 1.1 riastrad movdqu %xmm5,0x50(%rdx)
797 1.1 riastrad movdqu %xmm6,0x60(%rdx)
798 1.1 riastrad movdqu %xmm7,0x70(%rdx)
799 1.1 riastrad lea 0x80(%rdx),%rdx /* advance rdx to next block group */
800 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[8] */
801 1.1 riastrad sub $0x80,%r10
802 1.1 riastrad jnz 1b /* repeat if more block groups */
803 1.1 riastrad movdqu %xmm15,(%r8) /* update tweak */
804 1.1 riastrad leave
805 1.1 riastrad ret
806 1.1 riastrad END(aesni_xts_enc8)
807 1.1 riastrad
808 1.1 riastrad /*
809 1.1 riastrad * aesni_xts_dec1(const struct aesdec *deckey@rdi, const uint8_t *in@rsi,
810 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8,
811 1.1 riastrad * uint32_t nrounds@r9d)
812 1.1 riastrad *
813 1.1 riastrad * Decrypt a contiguous sequence of blocks with AES-XTS.
814 1.1 riastrad *
815 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
816 1.1 riastrad * is not vectorized; use aesni_xts_dec8 for >=8 blocks at once.
817 1.1 riastrad *
818 1.1 riastrad * Standard ABI calling convention.
819 1.1 riastrad */
820 1.1 riastrad ENTRY(aesni_xts_dec1)
821 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */
822 1.1 riastrad movdqu (%r8),%xmm15 /* xmm15 := tweak */
823 1.5 riastrad _ALIGN_TEXT
824 1.1 riastrad 1: movdqu (%rsi),%xmm0 /* xmm0 := ctxt */
825 1.1 riastrad lea 0x10(%rsi),%rsi /* advance rdi to next block */
826 1.1 riastrad pxor %xmm15,%xmm0 /* xmm0 := ctxt ^ tweak */
827 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */
828 1.1 riastrad call aesni_dec1 /* xmm0 := AES(ctxt ^ tweak) */
829 1.1 riastrad pxor %xmm15,%xmm0 /* xmm0 := AES(ctxt ^ tweak) ^ tweak */
830 1.1 riastrad movdqu %xmm0,(%rdx) /* store plaintext block */
831 1.1 riastrad lea 0x10(%rdx),%rdx /* advance rsi to next block */
832 1.1 riastrad call aesni_xts_mulx /* xmm15 *= x; trash xmm0 */
833 1.1 riastrad sub $0x10,%r10
834 1.1 riastrad jnz 1b /* repeat if more blocks */
835 1.1 riastrad movdqu %xmm15,(%r8) /* update tweak */
836 1.1 riastrad ret
837 1.1 riastrad END(aesni_xts_dec1)
838 1.1 riastrad
839 1.1 riastrad /*
840 1.1 riastrad * aesni_xts_dec8(const struct aesdec *deckey@rdi, const uint8_t *in@rsi,
841 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8,
842 1.1 riastrad * uint32_t nrounds@r9d)
843 1.1 riastrad *
844 1.1 riastrad * Decrypt a contiguous sequence of blocks with AES-XTS.
845 1.1 riastrad *
846 1.1 riastrad * nbytes must be a positive integral multiple of 128.
847 1.1 riastrad *
848 1.1 riastrad * Standard ABI calling convention.
849 1.1 riastrad */
850 1.1 riastrad ENTRY(aesni_xts_dec8)
851 1.1 riastrad push %rbp /* create stack frame uint128[1] */
852 1.1 riastrad mov %rsp,%rbp
853 1.1 riastrad sub $0x10,%rsp
854 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */
855 1.1 riastrad movdqu (%r8),%xmm15 /* xmm15 := tweak[0] */
856 1.5 riastrad _ALIGN_TEXT
857 1.1 riastrad 1: movdqa %xmm15,%xmm8 /* xmm8 := tweak[0] */
858 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[1] */
859 1.1 riastrad movdqa %xmm15,%xmm9 /* xmm9 := tweak[1] */
860 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[2] */
861 1.1 riastrad movdqa %xmm15,%xmm10 /* xmm10 := tweak[2] */
862 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[3] */
863 1.1 riastrad movdqa %xmm15,%xmm11 /* xmm11 := tweak[3] */
864 1.1 riastrad call aesni_xts_mulx /* xmm51 := tweak[4] */
865 1.1 riastrad movdqa %xmm15,%xmm12 /* xmm12 := tweak[4] */
866 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[5] */
867 1.1 riastrad movdqa %xmm15,%xmm13 /* xmm13 := tweak[5] */
868 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[6] */
869 1.1 riastrad movdqa %xmm15,%xmm14 /* xmm14 := tweak[6] */
870 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[7] */
871 1.1 riastrad movdqu (%rsi),%xmm0 /* xmm[i] := ptxt[i] */
872 1.1 riastrad movdqu 0x10(%rsi),%xmm1
873 1.1 riastrad movdqu 0x20(%rsi),%xmm2
874 1.1 riastrad movdqu 0x30(%rsi),%xmm3
875 1.1 riastrad movdqu 0x40(%rsi),%xmm4
876 1.1 riastrad movdqu 0x50(%rsi),%xmm5
877 1.1 riastrad movdqu 0x60(%rsi),%xmm6
878 1.1 riastrad movdqu 0x70(%rsi),%xmm7
879 1.1 riastrad lea 0x80(%rsi),%rsi /* advance rsi to next block group */
880 1.1 riastrad movdqa %xmm8,(%rsp) /* save tweak[0] */
881 1.1 riastrad pxor %xmm8,%xmm0 /* xmm[i] := ptxt[i] ^ tweak[i] */
882 1.1 riastrad pxor %xmm9,%xmm1
883 1.1 riastrad pxor %xmm10,%xmm2
884 1.1 riastrad pxor %xmm11,%xmm3
885 1.1 riastrad pxor %xmm12,%xmm4
886 1.1 riastrad pxor %xmm13,%xmm5
887 1.1 riastrad pxor %xmm14,%xmm6
888 1.1 riastrad pxor %xmm15,%xmm7
889 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */
890 1.1 riastrad call aesni_dec8 /* xmm[i] := AES(ptxt[i] ^ tweak[i]) */
891 1.1 riastrad pxor (%rsp),%xmm0 /* xmm[i] := AES(...) ^ tweak[i] */
892 1.1 riastrad pxor %xmm9,%xmm1
893 1.1 riastrad pxor %xmm10,%xmm2
894 1.1 riastrad pxor %xmm11,%xmm3
895 1.1 riastrad pxor %xmm12,%xmm4
896 1.1 riastrad pxor %xmm13,%xmm5
897 1.1 riastrad pxor %xmm14,%xmm6
898 1.1 riastrad pxor %xmm15,%xmm7
899 1.1 riastrad movdqu %xmm0,(%rdx) /* store ciphertext blocks */
900 1.1 riastrad movdqu %xmm1,0x10(%rdx)
901 1.1 riastrad movdqu %xmm2,0x20(%rdx)
902 1.1 riastrad movdqu %xmm3,0x30(%rdx)
903 1.1 riastrad movdqu %xmm4,0x40(%rdx)
904 1.1 riastrad movdqu %xmm5,0x50(%rdx)
905 1.1 riastrad movdqu %xmm6,0x60(%rdx)
906 1.1 riastrad movdqu %xmm7,0x70(%rdx)
907 1.1 riastrad lea 0x80(%rdx),%rdx /* advance rdx to next block group */
908 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[8] */
909 1.1 riastrad sub $0x80,%r10
910 1.1 riastrad jnz 1b /* repeat if more block groups */
911 1.1 riastrad movdqu %xmm15,(%r8) /* update tweak */
912 1.1 riastrad leave
913 1.1 riastrad ret
914 1.1 riastrad END(aesni_xts_dec8)
915 1.1 riastrad
916 1.1 riastrad /*
917 1.1 riastrad * aesni_xts_mulx(tweak@xmm15)
918 1.1 riastrad *
919 1.1 riastrad * Multiply xmm15 by x, modulo x^128 + x^7 + x^2 + x + 1, in place.
920 1.1 riastrad * Uses %xmm0 as temporary.
921 1.1 riastrad */
922 1.1 riastrad .text
923 1.1 riastrad _ALIGN_TEXT
924 1.1 riastrad .type aesni_xts_mulx,@function
925 1.1 riastrad aesni_xts_mulx:
926 1.1 riastrad /*
927 1.1 riastrad * Simultaneously determine
928 1.1 riastrad * (a) whether the high bit of the low quadword must be
929 1.1 riastrad * shifted into the low bit of the high quadword, and
930 1.1 riastrad * (b) whether the high bit of the high quadword must be
931 1.1 riastrad * carried into x^128 = x^7 + x^2 + x + 1.
932 1.1 riastrad */
933 1.1 riastrad pxor %xmm0,%xmm0 /* xmm0 := 0 */
934 1.1 riastrad pcmpgtq %xmm15,%xmm0 /* xmm0[i] := -1 if 0 > xmm15[i] else 0 */
935 1.1 riastrad pshufd $0b01001110,%xmm0,%xmm0 /* swap halves of xmm0 */
936 1.1 riastrad pand xtscarry(%rip),%xmm0 /* copy xtscarry according to mask */
937 1.1 riastrad psllq $1,%xmm15 /* shift */
938 1.1 riastrad pxor %xmm0,%xmm15 /* incorporate (a) and (b) */
939 1.1 riastrad ret
940 1.1 riastrad END(aesni_xts_mulx)
941 1.1 riastrad
942 1.1 riastrad .section .rodata
943 1.2 riastrad .p2align 4
944 1.1 riastrad .type xtscarry,@object
945 1.1 riastrad xtscarry:
946 1.1 riastrad .byte 0x87,0,0,0, 0,0,0,0, 1,0,0,0, 0,0,0,0
947 1.1 riastrad END(xtscarry)
948 1.1 riastrad
949 1.1 riastrad /*
950 1.1 riastrad * aesni_xts_update(const uint8_t in[16] @rdi, uint8_t out[16] @rsi)
951 1.1 riastrad *
952 1.1 riastrad * Update an AES-XTS tweak.
953 1.1 riastrad *
954 1.1 riastrad * Standard ABI calling convention.
955 1.1 riastrad */
956 1.1 riastrad ENTRY(aesni_xts_update)
957 1.1 riastrad movdqu (%rdi),%xmm15
958 1.1 riastrad call aesni_xts_mulx
959 1.1 riastrad movdqu %xmm15,(%rsi)
960 1.1 riastrad ret
961 1.1 riastrad END(aesni_xts_update)
962 1.1 riastrad
963 1.1 riastrad /*
964 1.4 riastrad * aesni_cbcmac_update1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
965 1.4 riastrad * size_t nbytes@rdx, uint8_t auth[16] @rcx, uint32_t nrounds@r8d)
966 1.4 riastrad *
967 1.4 riastrad * Update CBC-MAC.
968 1.4 riastrad *
969 1.4 riastrad * nbytes must be a positive integral multiple of 16.
970 1.4 riastrad *
971 1.4 riastrad * Standard ABI calling convention.
972 1.4 riastrad */
973 1.4 riastrad ENTRY(aesni_cbcmac_update1)
974 1.4 riastrad movdqu (%rcx),%xmm0 /* xmm0 := auth */
975 1.4 riastrad mov %rdx,%r10 /* r10 := nbytes */
976 1.4 riastrad mov %rcx,%rdx /* rdx := &auth */
977 1.5 riastrad _ALIGN_TEXT
978 1.4 riastrad 1: pxor (%rsi),%xmm0 /* xmm0 ^= plaintext block */
979 1.4 riastrad lea 0x10(%rsi),%rsi
980 1.4 riastrad mov %r8d,%ecx /* ecx := nrounds */
981 1.4 riastrad call aesni_enc1 /* xmm0 := auth'; trash rax,rcx,xmm8 */
982 1.4 riastrad sub $0x10,%r10
983 1.4 riastrad jnz 1b
984 1.4 riastrad movdqu %xmm0,(%rdx) /* store auth' */
985 1.4 riastrad ret
986 1.4 riastrad END(aesni_cbcmac_update1)
987 1.4 riastrad
988 1.4 riastrad /*
989 1.4 riastrad * aesni_ccm_enc1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
990 1.4 riastrad * uint8_t *out@rdx, size_t nbytes@rcx,
991 1.4 riastrad * uint8_t authctr[32] @r8, uint32_t nrounds@r9d)
992 1.4 riastrad *
993 1.4 riastrad * Update CCM encryption.
994 1.4 riastrad *
995 1.4 riastrad * nbytes must be a positive integral multiple of 16.
996 1.4 riastrad *
997 1.4 riastrad * Standard ABI calling convention.
998 1.4 riastrad */
999 1.4 riastrad ENTRY(aesni_ccm_enc1)
1000 1.4 riastrad mov %rcx,%r10 /* r10 := nbytes */
1001 1.4 riastrad movdqu 0x10(%r8),%xmm2 /* xmm2 := ctr (be) */
1002 1.4 riastrad movdqa bswap32(%rip),%xmm4 /* xmm4 := bswap32 table */
1003 1.4 riastrad movdqa ctr32_inc(%rip),%xmm5 /* xmm5 := (0,0,0,1) (le) */
1004 1.4 riastrad movdqu (%r8),%xmm0 /* xmm0 := auth */
1005 1.4 riastrad pshufb %xmm4,%xmm2 /* xmm2 := ctr (le) */
1006 1.5 riastrad _ALIGN_TEXT
1007 1.4 riastrad 1: movdqu (%rsi),%xmm3 /* xmm3 := plaintext block */
1008 1.4 riastrad paddd %xmm5,%xmm2 /* increment ctr (32-bit) */
1009 1.4 riastrad lea 0x10(%rsi),%rsi
1010 1.4 riastrad movdqa %xmm2,%xmm1 /* xmm1 := ctr (le) */
1011 1.4 riastrad mov %r9d,%ecx /* ecx := nrounds */
1012 1.4 riastrad pshufb %xmm4,%xmm1 /* xmm1 := ctr (be) */
1013 1.4 riastrad pxor %xmm3,%xmm0 /* xmm0 := auth ^ ptxt */
1014 1.4 riastrad call aesni_enc2 /* trash rax/rcx/xmm8 */
1015 1.4 riastrad pxor %xmm1,%xmm3 /* xmm3 := ciphertext block */
1016 1.4 riastrad sub $0x10,%r10 /* count down bytes */
1017 1.4 riastrad movdqu %xmm3,(%rdx) /* store ciphertext block */
1018 1.4 riastrad lea 0x10(%rdx),%rdx
1019 1.4 riastrad jnz 1b /* repeat if more blocks */
1020 1.4 riastrad pshufb %xmm4,%xmm2 /* xmm2 := ctr (be) */
1021 1.4 riastrad movdqu %xmm0,(%r8) /* store updated auth */
1022 1.4 riastrad movdqu %xmm2,0x10(%r8) /* store updated ctr */
1023 1.4 riastrad ret
1024 1.4 riastrad END(aesni_ccm_enc1)
1025 1.4 riastrad
1026 1.4 riastrad /*
1027 1.4 riastrad * aesni_ccm_dec1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
1028 1.4 riastrad * uint8_t *out@rdx, size_t nbytes@rcx,
1029 1.4 riastrad * uint8_t authctr[32] @r8, uint32_t nrounds@r9d)
1030 1.4 riastrad *
1031 1.4 riastrad * Update CCM decryption.
1032 1.4 riastrad *
1033 1.4 riastrad * nbytes must be a positive integral multiple of 16.
1034 1.4 riastrad *
1035 1.4 riastrad * Standard ABI calling convention.
1036 1.4 riastrad */
1037 1.4 riastrad ENTRY(aesni_ccm_dec1)
1038 1.4 riastrad movdqu 0x10(%r8),%xmm2 /* xmm2 := ctr (be) */
1039 1.4 riastrad movdqa bswap32(%rip),%xmm4 /* xmm4 := bswap32 table */
1040 1.4 riastrad movdqa ctr32_inc(%rip),%xmm5 /* xmm5 := (0,0,0,1) (le) */
1041 1.4 riastrad movdqu (%r8),%xmm1 /* xmm1 := auth */
1042 1.4 riastrad pshufb %xmm4,%xmm2 /* xmm2 := ctr (le) */
1043 1.4 riastrad mov %rcx,%r10 /* r10 := nbytes */
1044 1.4 riastrad
1045 1.4 riastrad /* Decrypt the first block. */
1046 1.4 riastrad paddd %xmm5,%xmm2 /* increment ctr (32-bit) */
1047 1.4 riastrad mov %r9d,%ecx /* ecx := nrounds */
1048 1.4 riastrad movdqa %xmm2,%xmm0 /* xmm0 := ctr (le) */
1049 1.4 riastrad movdqu (%rsi),%xmm3 /* xmm3 := ctxt */
1050 1.4 riastrad pshufb %xmm4,%xmm0 /* xmm0 := ctr (be) */
1051 1.4 riastrad lea 0x10(%rsi),%rsi
1052 1.4 riastrad call aesni_enc1 /* xmm0 := pad; trash rax/rcx/xmm8 */
1053 1.4 riastrad jmp 2f
1054 1.4 riastrad
1055 1.5 riastrad _ALIGN_TEXT
1056 1.4 riastrad 1: /*
1057 1.4 riastrad * Authenticate the last block and decrypt the next block
1058 1.4 riastrad * simultaneously.
1059 1.4 riastrad *
1060 1.4 riastrad * xmm1 = auth ^ ptxt[-1]
1061 1.4 riastrad * xmm2 = ctr[-1] (le)
1062 1.4 riastrad */
1063 1.4 riastrad paddd %xmm5,%xmm2 /* increment ctr (32-bit) */
1064 1.4 riastrad mov %r9d,%ecx /* ecx := nrounds */
1065 1.4 riastrad movdqa %xmm2,%xmm0 /* xmm0 := ctr (le) */
1066 1.4 riastrad movdqu (%rsi),%xmm3 /* xmm3 := ctxt */
1067 1.4 riastrad pshufb %xmm4,%xmm0 /* xmm0 := ctr (be) */
1068 1.4 riastrad lea 0x10(%rsi),%rsi
1069 1.4 riastrad call aesni_enc2 /* xmm0 := pad, xmm1 := auth';
1070 1.4 riastrad * trash rax/rcx/xmm8 */
1071 1.4 riastrad 2: pxor %xmm0,%xmm3 /* xmm3 := ptxt */
1072 1.4 riastrad sub $0x10,%r10
1073 1.4 riastrad movdqu %xmm3,(%rdx) /* store plaintext */
1074 1.4 riastrad lea 0x10(%rdx),%rdx
1075 1.4 riastrad pxor %xmm3,%xmm1 /* xmm1 := auth ^ ptxt */
1076 1.4 riastrad jnz 1b
1077 1.4 riastrad
1078 1.4 riastrad /* Authenticate the last block. */
1079 1.4 riastrad movdqa %xmm1,%xmm0 /* xmm0 := auth ^ ptxt */
1080 1.4 riastrad mov %r9d,%ecx /* ecx := nrounds */
1081 1.4 riastrad call aesni_enc1 /* xmm0 := auth' */
1082 1.4 riastrad pshufb %xmm4,%xmm2 /* xmm2 := ctr (be) */
1083 1.4 riastrad movdqu %xmm0,(%r8) /* store updated auth */
1084 1.4 riastrad movdqu %xmm2,0x10(%r8) /* store updated ctr */
1085 1.4 riastrad ret
1086 1.4 riastrad END(aesni_ccm_dec1)
1087 1.4 riastrad
1088 1.4 riastrad .section .rodata
1089 1.4 riastrad .p2align 4
1090 1.4 riastrad .type bswap32,@object
1091 1.4 riastrad bswap32:
1092 1.4 riastrad .byte 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
1093 1.4 riastrad END(bswap32)
1094 1.4 riastrad
1095 1.4 riastrad .section .rodata
1096 1.4 riastrad .p2align 4
1097 1.4 riastrad .type ctr32_inc,@object
1098 1.4 riastrad ctr32_inc:
1099 1.4 riastrad .byte 0,0,0,0, 0,0,0,0, 0,0,0,0, 1,0,0,0
1100 1.4 riastrad END(ctr32_inc)
1101 1.4 riastrad
1102 1.4 riastrad /*
1103 1.1 riastrad * aesni_enc1(const struct aesenc *enckey@rdi, uint128_t block@xmm0,
1104 1.1 riastrad * uint32_t nrounds@ecx)
1105 1.1 riastrad *
1106 1.1 riastrad * Encrypt a single AES block in %xmm0.
1107 1.1 riastrad *
1108 1.1 riastrad * Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx.
1109 1.1 riastrad */
1110 1.1 riastrad .text
1111 1.1 riastrad _ALIGN_TEXT
1112 1.1 riastrad .type aesni_enc1,@function
1113 1.1 riastrad aesni_enc1:
1114 1.1 riastrad pxor (%rdi),%xmm0 /* xor in first round key */
1115 1.1 riastrad shl $4,%ecx /* ecx := total byte size of round keys */
1116 1.1 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */
1117 1.1 riastrad neg %rcx /* rcx := byte offset of round key from end */
1118 1.3 riastrad jmp 2f
1119 1.5 riastrad _ALIGN_TEXT
1120 1.3 riastrad 1: aesenc %xmm8,%xmm0
1121 1.3 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */
1122 1.1 riastrad add $0x10,%rcx
1123 1.3 riastrad jnz 1b /* repeat if more rounds */
1124 1.3 riastrad aesenclast %xmm8,%xmm0
1125 1.1 riastrad ret
1126 1.1 riastrad END(aesni_enc1)
1127 1.1 riastrad
1128 1.1 riastrad /*
1129 1.4 riastrad * aesni_enc2(const struct aesenc *enckey@rdi, uint128_t block0@xmm0,
1130 1.4 riastrad * uint128_t block1@xmm1, uint32_t nrounds@ecx)
1131 1.4 riastrad *
1132 1.4 riastrad * Encrypt two AES blocks in %xmm0 and %xmm1.
1133 1.4 riastrad *
1134 1.4 riastrad * Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx.
1135 1.4 riastrad */
1136 1.4 riastrad .text
1137 1.4 riastrad _ALIGN_TEXT
1138 1.4 riastrad .type aesni_enc2,@function
1139 1.4 riastrad aesni_enc2:
1140 1.4 riastrad movdqa (%rdi),%xmm8 /* xmm8 := first round key */
1141 1.4 riastrad shl $4,%ecx /* ecx := total byte size of round keys */
1142 1.4 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */
1143 1.4 riastrad neg %rcx /* rcx := byte offset of round key from end */
1144 1.4 riastrad pxor %xmm8,%xmm0 /* xor in first round key */
1145 1.4 riastrad pxor %xmm8,%xmm1
1146 1.4 riastrad jmp 2f
1147 1.5 riastrad _ALIGN_TEXT
1148 1.4 riastrad 1: aesenc %xmm8,%xmm0
1149 1.4 riastrad aesenc %xmm8,%xmm1
1150 1.4 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */
1151 1.4 riastrad add $0x10,%rcx
1152 1.4 riastrad jnz 1b /* repeat if there's more */
1153 1.4 riastrad aesenclast %xmm8,%xmm0
1154 1.4 riastrad aesenclast %xmm8,%xmm1
1155 1.4 riastrad ret
1156 1.4 riastrad END(aesni_enc2)
1157 1.4 riastrad
1158 1.4 riastrad /*
1159 1.1 riastrad * aesni_enc8(const struct aesenc *enckey@rdi, uint128_t block0@xmm0, ...,
1160 1.1 riastrad * block7@xmm7, uint32_t nrounds@ecx)
1161 1.1 riastrad *
1162 1.1 riastrad * Encrypt eight AES blocks in %xmm0 through %xmm7 in parallel.
1163 1.1 riastrad *
1164 1.1 riastrad * Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx.
1165 1.1 riastrad */
1166 1.1 riastrad .text
1167 1.1 riastrad _ALIGN_TEXT
1168 1.1 riastrad .type aesni_enc8,@function
1169 1.1 riastrad aesni_enc8:
1170 1.1 riastrad movdqa (%rdi),%xmm8 /* xor in first round key */
1171 1.1 riastrad pxor %xmm8,%xmm0
1172 1.1 riastrad pxor %xmm8,%xmm1
1173 1.1 riastrad pxor %xmm8,%xmm2
1174 1.1 riastrad pxor %xmm8,%xmm3
1175 1.1 riastrad pxor %xmm8,%xmm4
1176 1.1 riastrad pxor %xmm8,%xmm5
1177 1.1 riastrad pxor %xmm8,%xmm6
1178 1.1 riastrad pxor %xmm8,%xmm7
1179 1.1 riastrad shl $4,%ecx /* ecx := total byte size of round keys */
1180 1.1 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */
1181 1.1 riastrad neg %rcx /* rcx := byte offset of round key from end */
1182 1.3 riastrad jmp 2f
1183 1.5 riastrad _ALIGN_TEXT
1184 1.3 riastrad 1: aesenc %xmm8,%xmm0
1185 1.1 riastrad aesenc %xmm8,%xmm1
1186 1.1 riastrad aesenc %xmm8,%xmm2
1187 1.1 riastrad aesenc %xmm8,%xmm3
1188 1.1 riastrad aesenc %xmm8,%xmm4
1189 1.1 riastrad aesenc %xmm8,%xmm5
1190 1.1 riastrad aesenc %xmm8,%xmm6
1191 1.1 riastrad aesenc %xmm8,%xmm7
1192 1.3 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */
1193 1.3 riastrad add $0x10,%rcx
1194 1.3 riastrad jnz 1b /* repeat if more rounds */
1195 1.3 riastrad aesenclast %xmm8,%xmm0
1196 1.1 riastrad aesenclast %xmm8,%xmm1
1197 1.1 riastrad aesenclast %xmm8,%xmm2
1198 1.1 riastrad aesenclast %xmm8,%xmm3
1199 1.1 riastrad aesenclast %xmm8,%xmm4
1200 1.1 riastrad aesenclast %xmm8,%xmm5
1201 1.1 riastrad aesenclast %xmm8,%xmm6
1202 1.1 riastrad aesenclast %xmm8,%xmm7
1203 1.1 riastrad ret
1204 1.1 riastrad END(aesni_enc8)
1205 1.1 riastrad
1206 1.1 riastrad /*
1207 1.1 riastrad * aesni_dec1(const struct aesdec *deckey@rdi, uint128_t block@xmm0,
1208 1.1 riastrad * uint32_t nrounds@ecx)
1209 1.1 riastrad *
1210 1.1 riastrad * Decrypt a single AES block in %xmm0.
1211 1.1 riastrad *
1212 1.1 riastrad * Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx.
1213 1.1 riastrad */
1214 1.1 riastrad .text
1215 1.1 riastrad _ALIGN_TEXT
1216 1.1 riastrad .type aesni_dec1,@function
1217 1.1 riastrad aesni_dec1:
1218 1.1 riastrad pxor (%rdi),%xmm0 /* xor in first round key */
1219 1.1 riastrad shl $4,%ecx /* ecx := byte offset of round key */
1220 1.1 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := pointer to round key */
1221 1.1 riastrad neg %rcx /* rcx := byte offset of round key from end */
1222 1.3 riastrad jmp 2f
1223 1.5 riastrad _ALIGN_TEXT
1224 1.3 riastrad 1: aesdec %xmm8,%xmm0
1225 1.3 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */
1226 1.1 riastrad add $0x10,%rcx
1227 1.3 riastrad jnz 1b /* repeat if more rounds */
1228 1.3 riastrad aesdeclast %xmm8,%xmm0
1229 1.1 riastrad ret
1230 1.1 riastrad END(aesni_dec1)
1231 1.1 riastrad
1232 1.1 riastrad /*
1233 1.1 riastrad * aesni_dec8(const struct aesdec *deckey@rdi, uint128_t block0@xmm0, ...,
1234 1.1 riastrad * block7@xmm7, uint32_t nrounds@ecx)
1235 1.1 riastrad *
1236 1.1 riastrad * Decrypt eight AES blocks in %xmm0 through %xmm7 in parallel.
1237 1.1 riastrad *
1238 1.1 riastrad * Internal ABI. Uses %xmm8 as temporary. Destroys %rcx.
1239 1.1 riastrad */
1240 1.1 riastrad .text
1241 1.1 riastrad _ALIGN_TEXT
1242 1.1 riastrad .type aesni_dec8,@function
1243 1.1 riastrad aesni_dec8:
1244 1.1 riastrad movdqa (%rdi),%xmm8 /* xor in first round key */
1245 1.1 riastrad pxor %xmm8,%xmm0
1246 1.1 riastrad pxor %xmm8,%xmm1
1247 1.1 riastrad pxor %xmm8,%xmm2
1248 1.1 riastrad pxor %xmm8,%xmm3
1249 1.1 riastrad pxor %xmm8,%xmm4
1250 1.1 riastrad pxor %xmm8,%xmm5
1251 1.1 riastrad pxor %xmm8,%xmm6
1252 1.1 riastrad pxor %xmm8,%xmm7
1253 1.1 riastrad shl $4,%ecx /* ecx := byte offset of round key */
1254 1.1 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := pointer to round key */
1255 1.1 riastrad neg %rcx /* rcx := byte offset of round key from end */
1256 1.3 riastrad jmp 2f
1257 1.5 riastrad _ALIGN_TEXT
1258 1.3 riastrad 1: aesdec %xmm8,%xmm0
1259 1.1 riastrad aesdec %xmm8,%xmm1
1260 1.1 riastrad aesdec %xmm8,%xmm2
1261 1.1 riastrad aesdec %xmm8,%xmm3
1262 1.1 riastrad aesdec %xmm8,%xmm4
1263 1.1 riastrad aesdec %xmm8,%xmm5
1264 1.1 riastrad aesdec %xmm8,%xmm6
1265 1.1 riastrad aesdec %xmm8,%xmm7
1266 1.3 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */
1267 1.3 riastrad add $0x10,%rcx
1268 1.3 riastrad jnz 1b /* repeat if more rounds */
1269 1.3 riastrad aesdeclast %xmm8,%xmm0
1270 1.1 riastrad aesdeclast %xmm8,%xmm1
1271 1.1 riastrad aesdeclast %xmm8,%xmm2
1272 1.1 riastrad aesdeclast %xmm8,%xmm3
1273 1.1 riastrad aesdeclast %xmm8,%xmm4
1274 1.1 riastrad aesdeclast %xmm8,%xmm5
1275 1.1 riastrad aesdeclast %xmm8,%xmm6
1276 1.1 riastrad aesdeclast %xmm8,%xmm7
1277 1.1 riastrad ret
1278 1.1 riastrad END(aesni_dec8)
1279