aes_ni_64.S revision 1.4 1 1.4 riastrad /* $NetBSD: aes_ni_64.S,v 1.4 2020/07/25 22:29:06 riastradh Exp $ */
2 1.1 riastrad
3 1.1 riastrad /*-
4 1.1 riastrad * Copyright (c) 2020 The NetBSD Foundation, Inc.
5 1.1 riastrad * All rights reserved.
6 1.1 riastrad *
7 1.1 riastrad * Redistribution and use in source and binary forms, with or without
8 1.1 riastrad * modification, are permitted provided that the following conditions
9 1.1 riastrad * are met:
10 1.1 riastrad * 1. Redistributions of source code must retain the above copyright
11 1.1 riastrad * notice, this list of conditions and the following disclaimer.
12 1.1 riastrad * 2. Redistributions in binary form must reproduce the above copyright
13 1.1 riastrad * notice, this list of conditions and the following disclaimer in the
14 1.1 riastrad * documentation and/or other materials provided with the distribution.
15 1.1 riastrad *
16 1.1 riastrad * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
17 1.1 riastrad * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
18 1.1 riastrad * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
19 1.1 riastrad * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
20 1.1 riastrad * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 1.1 riastrad * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 1.1 riastrad * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 1.1 riastrad * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 1.1 riastrad * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 1.1 riastrad * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 1.1 riastrad * POSSIBILITY OF SUCH DAMAGE.
27 1.1 riastrad */
28 1.1 riastrad
29 1.1 riastrad #include <machine/asm.h>
30 1.1 riastrad
31 1.1 riastrad /*
32 1.1 riastrad * MOVDQA/MOVDQU are Move Double Quadword (Aligned/Unaligned), defined
33 1.1 riastrad * to operate on integers; MOVAPS/MOVUPS are Move (Aligned/Unaligned)
34 1.1 riastrad * Packed Single, defined to operate on binary32 floats. They have
35 1.1 riastrad * exactly the same architectural effects (move a 128-bit quantity from
36 1.1 riastrad * memory into an xmm register).
37 1.1 riastrad *
38 1.1 riastrad * In principle, they might have different microarchitectural effects
39 1.1 riastrad * so that MOVAPS/MOVUPS might incur a penalty when the register is
40 1.1 riastrad * later used for integer paths, but in practice they don't. So we use
41 1.1 riastrad * the one whose instruction encoding is shorter -- MOVAPS/MOVUPS.
42 1.1 riastrad */
43 1.1 riastrad #define movdqa movaps
44 1.1 riastrad #define movdqu movups
45 1.1 riastrad
46 1.1 riastrad /*
47 1.1 riastrad * aesni_setenckey128(struct aesenc *enckey@rdi, const uint8_t key[16] @rsi)
48 1.1 riastrad *
49 1.1 riastrad * Expand a 16-byte AES-128 key into 10 round keys.
50 1.1 riastrad *
51 1.1 riastrad * Standard ABI calling convention.
52 1.1 riastrad */
53 1.1 riastrad ENTRY(aesni_setenckey128)
54 1.1 riastrad movdqu (%rsi),%xmm0 /* load master key into %xmm0 */
55 1.1 riastrad movdqa %xmm0,(%rdi) /* store master key as the first round key */
56 1.1 riastrad lea 0x10(%rdi),%rdi /* advance %rdi to next round key */
57 1.1 riastrad aeskeygenassist $0x1,%xmm0,%xmm2
58 1.1 riastrad call aesni_expand128
59 1.1 riastrad aeskeygenassist $0x2,%xmm0,%xmm2
60 1.1 riastrad call aesni_expand128
61 1.1 riastrad aeskeygenassist $0x4,%xmm0,%xmm2
62 1.1 riastrad call aesni_expand128
63 1.1 riastrad aeskeygenassist $0x8,%xmm0,%xmm2
64 1.1 riastrad call aesni_expand128
65 1.1 riastrad aeskeygenassist $0x10,%xmm0,%xmm2
66 1.1 riastrad call aesni_expand128
67 1.1 riastrad aeskeygenassist $0x20,%xmm0,%xmm2
68 1.1 riastrad call aesni_expand128
69 1.1 riastrad aeskeygenassist $0x40,%xmm0,%xmm2
70 1.1 riastrad call aesni_expand128
71 1.1 riastrad aeskeygenassist $0x80,%xmm0,%xmm2
72 1.1 riastrad call aesni_expand128
73 1.1 riastrad aeskeygenassist $0x1b,%xmm0,%xmm2
74 1.1 riastrad call aesni_expand128
75 1.1 riastrad aeskeygenassist $0x36,%xmm0,%xmm2
76 1.1 riastrad call aesni_expand128
77 1.1 riastrad ret
78 1.1 riastrad END(aesni_setenckey128)
79 1.1 riastrad
80 1.1 riastrad /*
81 1.1 riastrad * aesni_setenckey192(struct aesenc *enckey@rdi, const uint8_t key[24] @rsi)
82 1.1 riastrad *
83 1.1 riastrad * Expand a 24-byte AES-192 key into 12 round keys.
84 1.1 riastrad *
85 1.1 riastrad * Standard ABI calling convention.
86 1.1 riastrad */
87 1.1 riastrad ENTRY(aesni_setenckey192)
88 1.1 riastrad movdqu (%rsi),%xmm0 /* load master key [0:128) into %xmm0 */
89 1.1 riastrad movq 0x10(%rsi),%xmm1 /* load master key [128:192) into %xmm1 */
90 1.1 riastrad movdqa %xmm0,(%rdi) /* store master key [0:128) as round key */
91 1.1 riastrad lea 0x10(%rdi),%rdi /* advance %rdi to next round key */
92 1.1 riastrad aeskeygenassist $0x1,%xmm1,%xmm2
93 1.1 riastrad call aesni_expand192a
94 1.1 riastrad aeskeygenassist $0x2,%xmm0,%xmm2
95 1.1 riastrad call aesni_expand192b
96 1.1 riastrad aeskeygenassist $0x4,%xmm1,%xmm2
97 1.1 riastrad call aesni_expand192a
98 1.1 riastrad aeskeygenassist $0x8,%xmm0,%xmm2
99 1.1 riastrad call aesni_expand192b
100 1.1 riastrad aeskeygenassist $0x10,%xmm1,%xmm2
101 1.1 riastrad call aesni_expand192a
102 1.1 riastrad aeskeygenassist $0x20,%xmm0,%xmm2
103 1.1 riastrad call aesni_expand192b
104 1.1 riastrad aeskeygenassist $0x40,%xmm1,%xmm2
105 1.1 riastrad call aesni_expand192a
106 1.1 riastrad aeskeygenassist $0x80,%xmm0,%xmm2
107 1.1 riastrad call aesni_expand192b
108 1.1 riastrad ret
109 1.1 riastrad END(aesni_setenckey192)
110 1.1 riastrad
111 1.1 riastrad /*
112 1.1 riastrad * aesni_setenckey256(struct aesenc *enckey@rdi, const uint8_t key[32] @rsi)
113 1.1 riastrad *
114 1.1 riastrad * Expand a 32-byte AES-256 key into 14 round keys.
115 1.1 riastrad *
116 1.1 riastrad * Standard ABI calling convention.
117 1.1 riastrad */
118 1.1 riastrad ENTRY(aesni_setenckey256)
119 1.1 riastrad movdqu (%rsi),%xmm0 /* load master key [0:128) into %xmm0 */
120 1.1 riastrad movdqu 0x10(%rsi),%xmm1 /* load master key [128:256) into %xmm1 */
121 1.1 riastrad movdqa %xmm0,(%rdi) /* store master key [0:128) as round key */
122 1.1 riastrad movdqa %xmm1,0x10(%rdi) /* store master key [128:256) as round key */
123 1.1 riastrad lea 0x20(%rdi),%rdi /* advance %rdi to next round key */
124 1.1 riastrad aeskeygenassist $0x1,%xmm1,%xmm2
125 1.1 riastrad call aesni_expand256a
126 1.1 riastrad aeskeygenassist $0x1,%xmm0,%xmm2
127 1.1 riastrad call aesni_expand256b
128 1.1 riastrad aeskeygenassist $0x2,%xmm1,%xmm2
129 1.1 riastrad call aesni_expand256a
130 1.1 riastrad aeskeygenassist $0x2,%xmm0,%xmm2
131 1.1 riastrad call aesni_expand256b
132 1.1 riastrad aeskeygenassist $0x4,%xmm1,%xmm2
133 1.1 riastrad call aesni_expand256a
134 1.1 riastrad aeskeygenassist $0x4,%xmm0,%xmm2
135 1.1 riastrad call aesni_expand256b
136 1.1 riastrad aeskeygenassist $0x8,%xmm1,%xmm2
137 1.1 riastrad call aesni_expand256a
138 1.1 riastrad aeskeygenassist $0x8,%xmm0,%xmm2
139 1.1 riastrad call aesni_expand256b
140 1.1 riastrad aeskeygenassist $0x10,%xmm1,%xmm2
141 1.1 riastrad call aesni_expand256a
142 1.1 riastrad aeskeygenassist $0x10,%xmm0,%xmm2
143 1.1 riastrad call aesni_expand256b
144 1.1 riastrad aeskeygenassist $0x20,%xmm1,%xmm2
145 1.1 riastrad call aesni_expand256a
146 1.1 riastrad aeskeygenassist $0x20,%xmm0,%xmm2
147 1.1 riastrad call aesni_expand256b
148 1.1 riastrad aeskeygenassist $0x40,%xmm1,%xmm2
149 1.1 riastrad call aesni_expand256a
150 1.1 riastrad ret
151 1.1 riastrad END(aesni_setenckey256)
152 1.1 riastrad
153 1.1 riastrad /*
154 1.1 riastrad * aesni_expand128(uint128_t *rkp@rdi, uint128_t prk@xmm0,
155 1.1 riastrad * uint128_t keygenassist@xmm2)
156 1.1 riastrad *
157 1.1 riastrad * 1. Compute the AES-128 round key using the previous round key.
158 1.1 riastrad * 2. Store it at *rkp.
159 1.1 riastrad * 3. Set %xmm0 to it.
160 1.1 riastrad * 4. Advance %rdi to point at the next round key.
161 1.1 riastrad *
162 1.1 riastrad * Internal ABI. On entry:
163 1.1 riastrad *
164 1.1 riastrad * %rdi = rkp, pointer to round key to compute
165 1.1 riastrad * %xmm0 = (prk[0], prk[1], prk[2], prk[3])
166 1.1 riastrad * %xmm2 = (xxx, xxx, xxx, t = Rot(SubWord(prk[3])) ^ RCON)
167 1.1 riastrad *
168 1.1 riastrad * On exit:
169 1.1 riastrad *
170 1.1 riastrad * %rdi = &rkp[1], rkp advanced by one round key
171 1.1 riastrad * %xmm0 = rk, the round key we just computed
172 1.1 riastrad * %xmm2 = garbage
173 1.1 riastrad * %xmm4 = garbage
174 1.1 riastrad * %xmm5 = garbage
175 1.1 riastrad * %xmm6 = garbage
176 1.1 riastrad *
177 1.1 riastrad * Note: %xmm1 is preserved (as are %xmm3 and %xmm7 through %xmm15,
178 1.1 riastrad * and all other registers).
179 1.1 riastrad */
180 1.1 riastrad .text
181 1.1 riastrad _ALIGN_TEXT
182 1.1 riastrad .type aesni_expand128,@function
183 1.1 riastrad aesni_expand128:
184 1.1 riastrad /*
185 1.1 riastrad * %xmm2 := (%xmm2[3], %xmm2[3], %xmm2[3], %xmm2[3]),
186 1.1 riastrad * i.e., set each word of %xmm2 to t := Rot(SubWord(prk[3])) ^ RCON.
187 1.1 riastrad */
188 1.1 riastrad pshufd $0b11111111,%xmm2,%xmm2
189 1.1 riastrad
190 1.1 riastrad /*
191 1.1 riastrad * %xmm4 := (0, prk[0], prk[1], prk[2])
192 1.1 riastrad * %xmm5 := (0, 0, prk[0], prk[1])
193 1.1 riastrad * %xmm6 := (0, 0, 0, prk[0])
194 1.1 riastrad */
195 1.1 riastrad movdqa %xmm0,%xmm4
196 1.1 riastrad movdqa %xmm0,%xmm5
197 1.1 riastrad movdqa %xmm0,%xmm6
198 1.1 riastrad pslldq $4,%xmm4
199 1.1 riastrad pslldq $8,%xmm5
200 1.1 riastrad pslldq $12,%xmm6
201 1.1 riastrad
202 1.1 riastrad /*
203 1.1 riastrad * %xmm0 := (rk[0] = t ^ prk[0],
204 1.1 riastrad * rk[1] = t ^ prk[0] ^ prk[1],
205 1.1 riastrad * rk[2] = t ^ prk[0] ^ prk[1] ^ prk[2],
206 1.1 riastrad * rk[3] = t ^ prk[0] ^ prk[1] ^ prk[2] ^ prk[3])
207 1.1 riastrad */
208 1.1 riastrad pxor %xmm2,%xmm0
209 1.1 riastrad pxor %xmm4,%xmm0
210 1.1 riastrad pxor %xmm5,%xmm0
211 1.1 riastrad pxor %xmm6,%xmm0
212 1.1 riastrad
213 1.1 riastrad movdqa %xmm0,(%rdi) /* store round key */
214 1.1 riastrad lea 0x10(%rdi),%rdi /* advance to next round key address */
215 1.1 riastrad ret
216 1.1 riastrad END(aesni_expand128)
217 1.1 riastrad
218 1.1 riastrad /*
219 1.1 riastrad * aesni_expand192a(uint128_t *rkp@rdi, uint128_t prk@xmm0,
220 1.1 riastrad * uint64_t rklo@xmm1, uint128_t keygenassist@xmm2)
221 1.1 riastrad *
222 1.1 riastrad * Set even-numbered AES-192 round key.
223 1.1 riastrad *
224 1.1 riastrad * Internal ABI. On entry:
225 1.1 riastrad *
226 1.1 riastrad * %rdi = rkp, pointer to two round keys to compute
227 1.1 riastrad * %xmm0 = (prk[0], prk[1], prk[2], prk[3])
228 1.1 riastrad * %xmm1 = (rklo[0], rklo[1], xxx, xxx)
229 1.1 riastrad * %xmm2 = (xxx, t = Rot(SubWord(rklo[1])) ^ RCON, xxx, xxx)
230 1.1 riastrad *
231 1.1 riastrad * On exit:
232 1.1 riastrad *
233 1.1 riastrad * %rdi = &rkp[2], rkp advanced by two round keys
234 1.1 riastrad * %xmm0 = nrk, second round key we just computed
235 1.1 riastrad * %xmm1 = rk, first round key we just computed
236 1.1 riastrad * %xmm2 = garbage
237 1.1 riastrad * %xmm4 = garbage
238 1.1 riastrad * %xmm5 = garbage
239 1.1 riastrad * %xmm6 = garbage
240 1.1 riastrad * %xmm7 = garbage
241 1.1 riastrad */
242 1.1 riastrad .text
243 1.1 riastrad _ALIGN_TEXT
244 1.1 riastrad .type aesni_expand192a,@function
245 1.1 riastrad aesni_expand192a:
246 1.1 riastrad /*
247 1.1 riastrad * %xmm2 := (%xmm2[1], %xmm2[1], %xmm2[1], %xmm2[1]),
248 1.1 riastrad * i.e., set each word of %xmm2 to t := Rot(SubWord(rklo[1])) ^ RCON.
249 1.1 riastrad */
250 1.1 riastrad pshufd $0b01010101,%xmm2,%xmm2
251 1.1 riastrad
252 1.1 riastrad /*
253 1.1 riastrad * We need to compute:
254 1.1 riastrad *
255 1.1 riastrad * rk[0] := rklo[0]
256 1.1 riastrad * rk[1] := rklo[1]
257 1.1 riastrad * rk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0]
258 1.1 riastrad * rk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1]
259 1.1 riastrad * nrk[0] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ prk[1] ^ prk[2]
260 1.1 riastrad * nrk[1] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3]
261 1.1 riastrad * nrk[2] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0]
262 1.1 riastrad * nrk[3] := Rot(Sub(rklo[1])) ^ RCON ^ prk[0] ^ ... ^ prk[3] ^ rklo[0]
263 1.1 riastrad * ^ rklo[1]
264 1.1 riastrad */
265 1.1 riastrad
266 1.1 riastrad /*
267 1.1 riastrad * %xmm4 := (prk[0], prk[1], prk[2], prk[3])
268 1.1 riastrad * %xmm5 := (0, prk[0], prk[1], prk[2])
269 1.1 riastrad * %xmm6 := (0, 0, prk[0], prk[1])
270 1.1 riastrad * %xmm7 := (0, 0, 0, prk[0])
271 1.1 riastrad */
272 1.1 riastrad movdqa %xmm0,%xmm4
273 1.1 riastrad movdqa %xmm0,%xmm5
274 1.1 riastrad movdqa %xmm0,%xmm6
275 1.1 riastrad movdqa %xmm0,%xmm7
276 1.1 riastrad pslldq $4,%xmm5
277 1.1 riastrad pslldq $8,%xmm6
278 1.1 riastrad pslldq $12,%xmm7
279 1.1 riastrad
280 1.1 riastrad /* %xmm4 := (rk[2], rk[3], nrk[0], nrk[1]) */
281 1.1 riastrad pxor %xmm2,%xmm4
282 1.1 riastrad pxor %xmm5,%xmm4
283 1.1 riastrad pxor %xmm6,%xmm4
284 1.1 riastrad pxor %xmm7,%xmm4
285 1.1 riastrad
286 1.1 riastrad /*
287 1.1 riastrad * At this point, rk is split across %xmm1 (rk[0],rk[1],...) and
288 1.1 riastrad * %xmm4 (rk[2],rk[3],...); nrk is in %xmm4 (...,nrk[0],nrk[1]);
289 1.1 riastrad * and we have yet to compute nrk[2] or nrk[3], which requires
290 1.1 riastrad * rklo[0] and rklo[1] in %xmm1 (rklo[0], rklo[1], ...). We need
291 1.1 riastrad * nrk to end up in %xmm0 at the end, so gather rk into %xmm1 and
292 1.1 riastrad * nrk into %xmm0.
293 1.1 riastrad */
294 1.1 riastrad
295 1.1 riastrad /* %xmm0 := (nrk[0], nrk[1], nrk[1], nrk[1]) */
296 1.1 riastrad pshufd $0b11111110,%xmm4,%xmm0
297 1.1 riastrad
298 1.1 riastrad /*
299 1.1 riastrad * %xmm6 := (0, 0, rklo[0], rklo[1])
300 1.1 riastrad * %xmm7 := (0, 0, 0, rklo[0])
301 1.1 riastrad */
302 1.1 riastrad movdqa %xmm1,%xmm6
303 1.1 riastrad movdqa %xmm1,%xmm7
304 1.1 riastrad
305 1.1 riastrad pslldq $8,%xmm6
306 1.1 riastrad pslldq $12,%xmm7
307 1.1 riastrad
308 1.1 riastrad /*
309 1.1 riastrad * %xmm0 := (nrk[0],
310 1.1 riastrad * nrk[1],
311 1.1 riastrad * nrk[2] = nrk[1] ^ rklo[0],
312 1.1 riastrad * nrk[3] = nrk[1] ^ rklo[0] ^ rklo[1])
313 1.1 riastrad */
314 1.1 riastrad pxor %xmm6,%xmm0
315 1.1 riastrad pxor %xmm7,%xmm0
316 1.1 riastrad
317 1.1 riastrad /* %xmm1 := (rk[0], rk[1], rk[2], rk[3]) */
318 1.1 riastrad shufps $0b01000100,%xmm4,%xmm1
319 1.1 riastrad
320 1.1 riastrad movdqa %xmm1,(%rdi) /* store round key */
321 1.1 riastrad movdqa %xmm0,0x10(%rdi) /* store next round key */
322 1.1 riastrad lea 0x20(%rdi),%rdi /* advance two round keys */
323 1.1 riastrad ret
324 1.1 riastrad END(aesni_expand192a)
325 1.1 riastrad
326 1.1 riastrad /*
327 1.1 riastrad * aesni_expand192b(uint128_t *roundkey@rdi, uint128_t prk@xmm0,
328 1.1 riastrad * uint128_t keygenassist@xmm2)
329 1.1 riastrad *
330 1.1 riastrad * Set odd-numbered AES-192 round key.
331 1.1 riastrad *
332 1.1 riastrad * Internal ABI. On entry:
333 1.1 riastrad *
334 1.1 riastrad * %rdi = rkp, pointer to round key to compute
335 1.1 riastrad * %xmm0 = (prk[0], prk[1], prk[2], prk[3])
336 1.1 riastrad * %xmm1 = (xxx, xxx, pprk[2], pprk[3])
337 1.1 riastrad * %xmm2 = (xxx, xxx, xxx, t = Rot(Sub(prk[3])) ^ RCON)
338 1.1 riastrad *
339 1.1 riastrad * On exit:
340 1.1 riastrad *
341 1.1 riastrad * %rdi = &rkp[1], rkp advanced by one round key
342 1.1 riastrad * %xmm0 = rk, the round key we just computed
343 1.1 riastrad * %xmm1 = (nrk[0], nrk[1], xxx, xxx), half of next round key
344 1.1 riastrad * %xmm2 = garbage
345 1.1 riastrad * %xmm4 = garbage
346 1.1 riastrad * %xmm5 = garbage
347 1.1 riastrad * %xmm6 = garbage
348 1.1 riastrad * %xmm7 = garbage
349 1.1 riastrad */
350 1.1 riastrad .text
351 1.1 riastrad _ALIGN_TEXT
352 1.1 riastrad .type aesni_expand192b,@function
353 1.1 riastrad aesni_expand192b:
354 1.1 riastrad /*
355 1.1 riastrad * %xmm2 := (%xmm2[3], %xmm2[3], %xmm2[3], %xmm2[3]),
356 1.1 riastrad * i.e., set each word of %xmm2 to t := Rot(Sub(prk[3])) ^ RCON.
357 1.1 riastrad */
358 1.1 riastrad pshufd $0b11111111,%xmm2,%xmm2
359 1.1 riastrad
360 1.1 riastrad /*
361 1.1 riastrad * We need to compute:
362 1.1 riastrad *
363 1.1 riastrad * rk[0] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2]
364 1.1 riastrad * rk[1] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3]
365 1.1 riastrad * rk[2] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0]
366 1.1 riastrad * rk[3] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0]
367 1.1 riastrad * ^ prk[1]
368 1.1 riastrad * nrk[0] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0]
369 1.1 riastrad * ^ prk[1] ^ prk[2]
370 1.1 riastrad * nrk[1] := Rot(Sub(prk[3])) ^ RCON ^ pprk[2] ^ pprk[3] ^ prk[0]
371 1.1 riastrad * ^ prk[1] ^ prk[2] ^ prk[3]
372 1.1 riastrad */
373 1.1 riastrad
374 1.1 riastrad /* %xmm1 := (pprk[2], pprk[3], prk[0], prk[1]) */
375 1.1 riastrad shufps $0b01001110,%xmm0,%xmm1
376 1.1 riastrad
377 1.1 riastrad /*
378 1.1 riastrad * %xmm5 := (0, pprk[2], pprk[3], prk[0])
379 1.1 riastrad * %xmm6 := (0, 0, pprk[2], pprk[3])
380 1.1 riastrad * %xmm7 := (0, 0, 0, pprk[2])
381 1.1 riastrad */
382 1.1 riastrad movdqa %xmm1,%xmm5
383 1.1 riastrad movdqa %xmm1,%xmm6
384 1.1 riastrad movdqa %xmm1,%xmm7
385 1.1 riastrad pslldq $4,%xmm5
386 1.1 riastrad pslldq $8,%xmm6
387 1.1 riastrad pslldq $12,%xmm7
388 1.1 riastrad
389 1.1 riastrad /* %xmm1 := (rk[0], rk[1], rk[2], rk[3) */
390 1.1 riastrad pxor %xmm2,%xmm1
391 1.1 riastrad pxor %xmm5,%xmm1
392 1.1 riastrad pxor %xmm6,%xmm1
393 1.1 riastrad pxor %xmm7,%xmm1
394 1.1 riastrad
395 1.1 riastrad /* %xmm4 := (prk[2], prk[3], xxx, xxx) */
396 1.1 riastrad pshufd $0b00001110,%xmm0,%xmm4
397 1.1 riastrad
398 1.1 riastrad /* %xmm5 := (0, prk[2], xxx, xxx) */
399 1.1 riastrad movdqa %xmm4,%xmm5
400 1.1 riastrad pslldq $4,%xmm5
401 1.1 riastrad
402 1.1 riastrad /* %xmm0 := (rk[0], rk[1], rk[2], rk[3]) */
403 1.1 riastrad movdqa %xmm1,%xmm0
404 1.1 riastrad
405 1.1 riastrad /* %xmm1 := (rk[3], rk[3], xxx, xxx) */
406 1.1 riastrad shufps $0b00001111,%xmm1,%xmm1
407 1.1 riastrad
408 1.1 riastrad /*
409 1.1 riastrad * %xmm1 := (nrk[0] = rk[3] ^ prk[2],
410 1.1 riastrad * nrk[1] = rk[3] ^ prk[2] ^ prk[3],
411 1.1 riastrad * xxx,
412 1.1 riastrad * xxx)
413 1.1 riastrad */
414 1.1 riastrad pxor %xmm4,%xmm1
415 1.1 riastrad pxor %xmm5,%xmm1
416 1.1 riastrad
417 1.1 riastrad movdqa %xmm0,(%rdi) /* store round key */
418 1.1 riastrad lea 0x10(%rdi),%rdi /* advance to next round key address */
419 1.1 riastrad ret
420 1.1 riastrad END(aesni_expand192b)
421 1.1 riastrad
422 1.1 riastrad /*
423 1.1 riastrad * aesni_expand256a(uint128_t *rkp@rdi, uint128_t pprk@xmm0,
424 1.1 riastrad * uint128_t prk@xmm1, uint128_t keygenassist@xmm2)
425 1.1 riastrad *
426 1.1 riastrad * Set even-numbered AES-256 round key.
427 1.1 riastrad *
428 1.1 riastrad * Internal ABI. On entry:
429 1.1 riastrad *
430 1.1 riastrad * %rdi = rkp, pointer to round key to compute
431 1.1 riastrad * %xmm0 = (pprk[0], pprk[1], pprk[2], pprk[3])
432 1.1 riastrad * %xmm1 = (prk[0], prk[1], prk[2], prk[3])
433 1.1 riastrad * %xmm2 = (xxx, xxx, xxx, t = Rot(SubWord(prk[3])))
434 1.1 riastrad *
435 1.1 riastrad * On exit:
436 1.1 riastrad *
437 1.1 riastrad * %rdi = &rkp[1], rkp advanced by one round key
438 1.1 riastrad * %xmm0 = rk, the round key we just computed
439 1.1 riastrad * %xmm1 = prk, previous round key, preserved from entry
440 1.1 riastrad * %xmm2 = garbage
441 1.1 riastrad * %xmm4 = garbage
442 1.1 riastrad * %xmm5 = garbage
443 1.1 riastrad * %xmm6 = garbage
444 1.1 riastrad *
445 1.1 riastrad * The computation turns out to be the same as for AES-128; the
446 1.1 riastrad * previous round key does not figure into it, only the
447 1.1 riastrad * previous-previous round key.
448 1.1 riastrad */
449 1.1 riastrad aesni_expand256a = aesni_expand128
450 1.1 riastrad
451 1.1 riastrad /*
452 1.1 riastrad * aesni_expand256b(uint128_t *rkp@rdi, uint128_t prk@xmm0,
453 1.1 riastrad * uint128_t pprk@xmm1, uint128_t keygenassist@xmm2)
454 1.1 riastrad *
455 1.1 riastrad * Set odd-numbered AES-256 round key.
456 1.1 riastrad *
457 1.1 riastrad * Internal ABI. On entry:
458 1.1 riastrad *
459 1.1 riastrad * %rdi = rkp, pointer to round key to compute
460 1.1 riastrad * %xmm0 = (prk[0], prk[1], prk[2], prk[3])
461 1.1 riastrad * %xmm1 = (pprk[0], pprk[1], pprk[2], pprk[3])
462 1.1 riastrad * %xmm2 = (xxx, xxx, t = Sub(prk[3]), xxx)
463 1.1 riastrad *
464 1.1 riastrad * On exit:
465 1.1 riastrad *
466 1.1 riastrad * %rdi = &rkp[1], rkp advanced by one round key
467 1.1 riastrad * %xmm0 = prk, previous round key, preserved from entry
468 1.1 riastrad * %xmm1 = rk, the round key we just computed
469 1.1 riastrad * %xmm2 = garbage
470 1.1 riastrad * %xmm4 = garbage
471 1.1 riastrad * %xmm5 = garbage
472 1.1 riastrad * %xmm6 = garbage
473 1.1 riastrad */
474 1.1 riastrad .text
475 1.1 riastrad _ALIGN_TEXT
476 1.1 riastrad .type aesni_expand256b,@function
477 1.1 riastrad aesni_expand256b:
478 1.1 riastrad /*
479 1.1 riastrad * %xmm2 := (%xmm2[3], %xmm2[3], %xmm2[3], %xmm2[3]),
480 1.1 riastrad * i.e., set each word of %xmm2 to t := Sub(prk[3]).
481 1.1 riastrad */
482 1.1 riastrad pshufd $0b10101010,%xmm2,%xmm2
483 1.1 riastrad
484 1.1 riastrad /*
485 1.1 riastrad * %xmm4 := (0, pprk[0], pprk[1], pprk[2])
486 1.1 riastrad * %xmm5 := (0, 0, pprk[0], pprk[1])
487 1.1 riastrad * %xmm6 := (0, 0, 0, pprk[0])
488 1.1 riastrad */
489 1.1 riastrad movdqa %xmm1,%xmm4
490 1.1 riastrad movdqa %xmm1,%xmm5
491 1.1 riastrad movdqa %xmm1,%xmm6
492 1.1 riastrad pslldq $4,%xmm4
493 1.1 riastrad pslldq $8,%xmm5
494 1.1 riastrad pslldq $12,%xmm6
495 1.1 riastrad
496 1.1 riastrad /*
497 1.1 riastrad * %xmm0 := (rk[0] = t ^ pprk[0],
498 1.1 riastrad * rk[1] = t ^ pprk[0] ^ pprk[1],
499 1.1 riastrad * rk[2] = t ^ pprk[0] ^ pprk[1] ^ pprk[2],
500 1.1 riastrad * rk[3] = t ^ pprk[0] ^ pprk[1] ^ pprk[2] ^ pprk[3])
501 1.1 riastrad */
502 1.1 riastrad pxor %xmm2,%xmm1
503 1.1 riastrad pxor %xmm4,%xmm1
504 1.1 riastrad pxor %xmm5,%xmm1
505 1.1 riastrad pxor %xmm6,%xmm1
506 1.1 riastrad
507 1.1 riastrad movdqa %xmm1,(%rdi) /* store round key */
508 1.1 riastrad lea 0x10(%rdi),%rdi /* advance to next round key address */
509 1.1 riastrad ret
510 1.1 riastrad END(aesni_expand256b)
511 1.1 riastrad
512 1.1 riastrad /*
513 1.1 riastrad * aesni_enctodec(const struct aesenc *enckey@rdi, struct aesdec *deckey@rsi,
514 1.1 riastrad * uint32_t nrounds@rdx)
515 1.1 riastrad *
516 1.1 riastrad * Convert AES encryption round keys to AES decryption round keys.
517 1.1 riastrad * `rounds' must be between 10 and 14.
518 1.1 riastrad *
519 1.1 riastrad * Standard ABI calling convention.
520 1.1 riastrad */
521 1.1 riastrad ENTRY(aesni_enctodec)
522 1.1 riastrad shl $4,%edx /* rdx := byte offset of last round key */
523 1.1 riastrad movdqa (%rdi,%rdx),%xmm0 /* load last round key */
524 1.1 riastrad movdqa %xmm0,(%rsi) /* store last round key verbatim */
525 1.3 riastrad jmp 2f
526 1.3 riastrad 1: movdqa (%rdi,%rdx),%xmm0 /* load round key */
527 1.1 riastrad aesimc %xmm0,%xmm0 /* convert encryption to decryption */
528 1.1 riastrad movdqa %xmm0,(%rsi) /* store round key */
529 1.3 riastrad 2: sub $0x10,%rdx /* advance to next round key */
530 1.3 riastrad lea 0x10(%rsi),%rsi
531 1.3 riastrad jnz 1b /* repeat if more rounds */
532 1.3 riastrad movdqa (%rdi),%xmm0 /* load first round key */
533 1.1 riastrad movdqa %xmm0,(%rsi) /* store first round key verbatim */
534 1.1 riastrad ret
535 1.1 riastrad END(aesni_enctodec)
536 1.1 riastrad
537 1.1 riastrad /*
538 1.1 riastrad * aesni_enc(const struct aesenc *enckey@rdi, const uint8_t in[16] @rsi,
539 1.1 riastrad * uint8_t out[16] @rdx, uint32_t nrounds@ecx)
540 1.1 riastrad *
541 1.1 riastrad * Encrypt a single block.
542 1.1 riastrad *
543 1.1 riastrad * Standard ABI calling convention.
544 1.1 riastrad */
545 1.1 riastrad ENTRY(aesni_enc)
546 1.1 riastrad movdqu (%rsi),%xmm0
547 1.1 riastrad call aesni_enc1
548 1.1 riastrad movdqu %xmm0,(%rdx)
549 1.1 riastrad ret
550 1.1 riastrad END(aesni_enc)
551 1.1 riastrad
552 1.1 riastrad /*
553 1.1 riastrad * aesni_dec(const struct aesdec *deckey@rdi, const uint8_t in[16] @rsi,
554 1.1 riastrad * uint8_t out[16] @rdx, uint32_t nrounds@ecx)
555 1.1 riastrad *
556 1.1 riastrad * Decrypt a single block.
557 1.1 riastrad *
558 1.1 riastrad * Standard ABI calling convention.
559 1.1 riastrad */
560 1.1 riastrad ENTRY(aesni_dec)
561 1.1 riastrad movdqu (%rsi),%xmm0
562 1.1 riastrad call aesni_dec1
563 1.1 riastrad movdqu %xmm0,(%rdx)
564 1.1 riastrad ret
565 1.1 riastrad END(aesni_dec)
566 1.1 riastrad
567 1.1 riastrad /*
568 1.1 riastrad * aesni_cbc_enc(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
569 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t iv[16] @r8,
570 1.1 riastrad * uint32_t nrounds@r9d)
571 1.1 riastrad *
572 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-CBC.
573 1.1 riastrad *
574 1.1 riastrad * nbytes must be an integral multiple of 16.
575 1.1 riastrad *
576 1.1 riastrad * Standard ABI calling convention.
577 1.1 riastrad */
578 1.1 riastrad ENTRY(aesni_cbc_enc)
579 1.1 riastrad cmp $0,%rcx
580 1.1 riastrad jz 2f
581 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */
582 1.1 riastrad movdqu (%r8),%xmm0 /* xmm0 := chaining value */
583 1.1 riastrad 1: movdqu (%rsi),%xmm1 /* xmm1 := plaintext block */
584 1.1 riastrad lea 0x10(%rsi),%rsi
585 1.1 riastrad pxor %xmm1,%xmm0 /* xmm0 := cv ^ ptxt */
586 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */
587 1.1 riastrad call aesni_enc1 /* xmm0 := ciphertext block */
588 1.1 riastrad movdqu %xmm0,(%rdx)
589 1.1 riastrad lea 0x10(%rdx),%rdx
590 1.1 riastrad sub $0x10,%r10
591 1.1 riastrad jnz 1b /* repeat if r10 is nonzero */
592 1.1 riastrad movdqu %xmm0,(%r8) /* store chaining value */
593 1.1 riastrad 2: ret
594 1.1 riastrad END(aesni_cbc_enc)
595 1.1 riastrad
596 1.1 riastrad /*
597 1.1 riastrad * aesni_cbc_dec1(const struct aesdec *deckey@rdi, const uint8_t *in@rsi,
598 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, const uint8_t iv[16] @r8,
599 1.1 riastrad * uint32_t nrounds@r9)
600 1.1 riastrad *
601 1.1 riastrad * Decrypt a contiguous sequence of blocks with AES-CBC.
602 1.1 riastrad *
603 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
604 1.1 riastrad * is not vectorized; use aesni_cbc_dec8 for >=8 blocks at once.
605 1.1 riastrad *
606 1.1 riastrad * Standard ABI calling convention.
607 1.1 riastrad */
608 1.1 riastrad ENTRY(aesni_cbc_dec1)
609 1.1 riastrad push %rbp /* create stack frame uint128[1] */
610 1.1 riastrad mov %rsp,%rbp
611 1.1 riastrad sub $0x10,%rsp
612 1.1 riastrad movdqu (%r8),%xmm8 /* xmm8 := iv */
613 1.1 riastrad movdqa %xmm8,(%rsp) /* save iv */
614 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */
615 1.1 riastrad movdqu -0x10(%rsi,%r10),%xmm0 /* xmm0 := last ciphertext block */
616 1.1 riastrad movdqu %xmm0,(%r8) /* update iv */
617 1.3 riastrad jmp 2f
618 1.3 riastrad 1: movdqu -0x10(%rsi,%r10),%xmm8 /* xmm8 := chaining value */
619 1.1 riastrad pxor %xmm8,%xmm0 /* xmm0 := ptxt */
620 1.1 riastrad movdqu %xmm0,(%rdx,%r10) /* store plaintext block */
621 1.1 riastrad movdqa %xmm8,%xmm0 /* move cv = ciphertext block */
622 1.3 riastrad 2: mov %r9d,%ecx /* ecx := nrounds */
623 1.3 riastrad call aesni_dec1 /* xmm0 := cv ^ ptxt */
624 1.3 riastrad sub $0x10,%r10
625 1.3 riastrad jnz 1b /* repeat if more blocks */
626 1.3 riastrad pxor (%rsp),%xmm0 /* xmm0 := ptxt */
627 1.1 riastrad movdqu %xmm0,(%rdx) /* store first plaintext block */
628 1.1 riastrad leave
629 1.1 riastrad ret
630 1.1 riastrad END(aesni_cbc_dec1)
631 1.1 riastrad
632 1.1 riastrad /*
633 1.1 riastrad * aesni_cbc_dec8(const struct aesdec *deckey@rdi, const uint8_t *in@rsi,
634 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, const uint8_t iv[16] @r8,
635 1.1 riastrad * uint32_t nrounds@r9)
636 1.1 riastrad *
637 1.1 riastrad * Decrypt a contiguous sequence of 8-block units with AES-CBC.
638 1.1 riastrad *
639 1.1 riastrad * nbytes must be a positive integral multiple of 128.
640 1.1 riastrad *
641 1.1 riastrad * Standard ABI calling convention.
642 1.1 riastrad */
643 1.1 riastrad ENTRY(aesni_cbc_dec8)
644 1.1 riastrad push %rbp /* create stack frame uint128[1] */
645 1.1 riastrad mov %rsp,%rbp
646 1.1 riastrad sub $0x10,%rsp
647 1.1 riastrad movdqu (%r8),%xmm8 /* xmm8 := iv */
648 1.1 riastrad movdqa %xmm8,(%rsp) /* save iv */
649 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */
650 1.1 riastrad movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := ciphertext block[n-1] */
651 1.1 riastrad movdqu %xmm7,(%r8) /* update iv */
652 1.3 riastrad jmp 2f
653 1.3 riastrad 1: movdqu -0x10(%rsi,%r10),%xmm7 /* xmm7 := cv[0] */
654 1.3 riastrad pxor %xmm7,%xmm0 /* xmm0 := ptxt[0] */
655 1.3 riastrad movdqu %xmm0,(%rdx,%r10) /* store plaintext block */
656 1.3 riastrad 2: movdqu -0x20(%rsi,%r10),%xmm6 /* xmm6 := ciphertext block[n-2] */
657 1.1 riastrad movdqu -0x30(%rsi,%r10),%xmm5 /* xmm5 := ciphertext block[n-3] */
658 1.1 riastrad movdqu -0x40(%rsi,%r10),%xmm4 /* xmm4 := ciphertext block[n-4] */
659 1.1 riastrad movdqu -0x50(%rsi,%r10),%xmm3 /* xmm3 := ciphertext block[n-5] */
660 1.1 riastrad movdqu -0x60(%rsi,%r10),%xmm2 /* xmm2 := ciphertext block[n-6] */
661 1.1 riastrad movdqu -0x70(%rsi,%r10),%xmm1 /* xmm1 := ciphertext block[n-7] */
662 1.1 riastrad movdqu -0x80(%rsi,%r10),%xmm0 /* xmm0 := ciphertext block[n-8] */
663 1.1 riastrad movdqa %xmm6,%xmm15 /* xmm[8+i] := cv[i], 0<i<8 */
664 1.1 riastrad movdqa %xmm5,%xmm14
665 1.1 riastrad movdqa %xmm4,%xmm13
666 1.1 riastrad movdqa %xmm3,%xmm12
667 1.1 riastrad movdqa %xmm2,%xmm11
668 1.1 riastrad movdqa %xmm1,%xmm10
669 1.1 riastrad movdqa %xmm0,%xmm9
670 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */
671 1.1 riastrad call aesni_dec8 /* xmm[i] := cv[i] ^ ptxt[i], 0<=i<8 */
672 1.1 riastrad pxor %xmm15,%xmm7 /* xmm[i] := ptxt[i], 0<i<8 */
673 1.1 riastrad pxor %xmm14,%xmm6
674 1.1 riastrad pxor %xmm13,%xmm5
675 1.1 riastrad pxor %xmm12,%xmm4
676 1.1 riastrad pxor %xmm11,%xmm3
677 1.1 riastrad pxor %xmm10,%xmm2
678 1.1 riastrad pxor %xmm9,%xmm1
679 1.1 riastrad movdqu %xmm7,-0x10(%rdx,%r10) /* store plaintext blocks */
680 1.1 riastrad movdqu %xmm6,-0x20(%rdx,%r10)
681 1.1 riastrad movdqu %xmm5,-0x30(%rdx,%r10)
682 1.1 riastrad movdqu %xmm4,-0x40(%rdx,%r10)
683 1.1 riastrad movdqu %xmm3,-0x50(%rdx,%r10)
684 1.1 riastrad movdqu %xmm2,-0x60(%rdx,%r10)
685 1.1 riastrad movdqu %xmm1,-0x70(%rdx,%r10)
686 1.1 riastrad sub $0x80,%r10
687 1.3 riastrad jnz 1b /* repeat if more blocks */
688 1.3 riastrad pxor (%rsp),%xmm0 /* xmm0 := ptxt[0] */
689 1.1 riastrad movdqu %xmm0,(%rdx) /* store first plaintext block */
690 1.1 riastrad leave
691 1.1 riastrad ret
692 1.1 riastrad END(aesni_cbc_dec8)
693 1.1 riastrad
694 1.1 riastrad /*
695 1.1 riastrad * aesni_xts_enc1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
696 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8,
697 1.1 riastrad * uint32_t nrounds@r9d)
698 1.1 riastrad *
699 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-XTS.
700 1.1 riastrad *
701 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
702 1.1 riastrad * is not vectorized; use aesni_xts_enc8 for >=8 blocks at once.
703 1.1 riastrad *
704 1.1 riastrad * Standard ABI calling convention.
705 1.1 riastrad */
706 1.1 riastrad ENTRY(aesni_xts_enc1)
707 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */
708 1.1 riastrad movdqu (%r8),%xmm15 /* xmm15 := tweak */
709 1.1 riastrad 1: movdqu (%rsi),%xmm0 /* xmm0 := ptxt */
710 1.1 riastrad lea 0x10(%rsi),%rsi /* advance rdi to next block */
711 1.1 riastrad pxor %xmm15,%xmm0 /* xmm0 := ptxt ^ tweak */
712 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */
713 1.1 riastrad call aesni_enc1 /* xmm0 := AES(ptxt ^ tweak) */
714 1.1 riastrad pxor %xmm15,%xmm0 /* xmm0 := AES(ptxt ^ tweak) ^ tweak */
715 1.1 riastrad movdqu %xmm0,(%rdx) /* store ciphertext block */
716 1.1 riastrad lea 0x10(%rdx),%rdx /* advance rsi to next block */
717 1.1 riastrad call aesni_xts_mulx /* xmm15 *= x; trash xmm0 */
718 1.1 riastrad sub $0x10,%r10
719 1.1 riastrad jnz 1b /* repeat if more blocks */
720 1.1 riastrad movdqu %xmm15,(%r8) /* update tweak */
721 1.1 riastrad ret
722 1.1 riastrad END(aesni_xts_enc1)
723 1.1 riastrad
724 1.1 riastrad /*
725 1.1 riastrad * aesni_xts_enc8(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
726 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8,
727 1.1 riastrad * uint32_t nrounds@r9d)
728 1.1 riastrad *
729 1.1 riastrad * Encrypt a contiguous sequence of blocks with AES-XTS.
730 1.1 riastrad *
731 1.1 riastrad * nbytes must be a positive integral multiple of 128.
732 1.1 riastrad *
733 1.1 riastrad * Standard ABI calling convention.
734 1.1 riastrad */
735 1.1 riastrad ENTRY(aesni_xts_enc8)
736 1.1 riastrad push %rbp /* create stack frame uint128[1] */
737 1.1 riastrad mov %rsp,%rbp
738 1.1 riastrad sub $0x10,%rsp
739 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */
740 1.1 riastrad movdqu (%r8),%xmm15 /* xmm15 := tweak[0] */
741 1.1 riastrad 1: movdqa %xmm15,%xmm8 /* xmm8 := tweak[0] */
742 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[1] */
743 1.1 riastrad movdqa %xmm15,%xmm9 /* xmm9 := tweak[1] */
744 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[2] */
745 1.1 riastrad movdqa %xmm15,%xmm10 /* xmm10 := tweak[2] */
746 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[3] */
747 1.1 riastrad movdqa %xmm15,%xmm11 /* xmm11 := tweak[3] */
748 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[4] */
749 1.1 riastrad movdqa %xmm15,%xmm12 /* xmm12 := tweak[4] */
750 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[5] */
751 1.1 riastrad movdqa %xmm15,%xmm13 /* xmm13 := tweak[5] */
752 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[6] */
753 1.1 riastrad movdqa %xmm15,%xmm14 /* xmm14 := tweak[6] */
754 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[7] */
755 1.1 riastrad movdqu (%rsi),%xmm0 /* xmm[i] := ptxt[i] */
756 1.1 riastrad movdqu 0x10(%rsi),%xmm1
757 1.1 riastrad movdqu 0x20(%rsi),%xmm2
758 1.1 riastrad movdqu 0x30(%rsi),%xmm3
759 1.1 riastrad movdqu 0x40(%rsi),%xmm4
760 1.1 riastrad movdqu 0x50(%rsi),%xmm5
761 1.1 riastrad movdqu 0x60(%rsi),%xmm6
762 1.1 riastrad movdqu 0x70(%rsi),%xmm7
763 1.1 riastrad lea 0x80(%rsi),%rsi /* advance rsi to next block group */
764 1.1 riastrad movdqa %xmm8,(%rsp) /* save tweak[0] */
765 1.1 riastrad pxor %xmm8,%xmm0 /* xmm[i] := ptxt[i] ^ tweak[i] */
766 1.1 riastrad pxor %xmm9,%xmm1
767 1.1 riastrad pxor %xmm10,%xmm2
768 1.1 riastrad pxor %xmm11,%xmm3
769 1.1 riastrad pxor %xmm12,%xmm4
770 1.1 riastrad pxor %xmm13,%xmm5
771 1.1 riastrad pxor %xmm14,%xmm6
772 1.1 riastrad pxor %xmm15,%xmm7
773 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */
774 1.1 riastrad call aesni_enc8 /* xmm[i] := AES(ptxt[i] ^ tweak[i]) */
775 1.1 riastrad pxor (%rsp),%xmm0 /* xmm[i] := AES(...) ^ tweak[i] */
776 1.1 riastrad pxor %xmm9,%xmm1
777 1.1 riastrad pxor %xmm10,%xmm2
778 1.1 riastrad pxor %xmm11,%xmm3
779 1.1 riastrad pxor %xmm12,%xmm4
780 1.1 riastrad pxor %xmm13,%xmm5
781 1.1 riastrad pxor %xmm14,%xmm6
782 1.1 riastrad pxor %xmm15,%xmm7
783 1.1 riastrad movdqu %xmm0,(%rdx) /* store ciphertext blocks */
784 1.1 riastrad movdqu %xmm1,0x10(%rdx)
785 1.1 riastrad movdqu %xmm2,0x20(%rdx)
786 1.1 riastrad movdqu %xmm3,0x30(%rdx)
787 1.1 riastrad movdqu %xmm4,0x40(%rdx)
788 1.1 riastrad movdqu %xmm5,0x50(%rdx)
789 1.1 riastrad movdqu %xmm6,0x60(%rdx)
790 1.1 riastrad movdqu %xmm7,0x70(%rdx)
791 1.1 riastrad lea 0x80(%rdx),%rdx /* advance rdx to next block group */
792 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[8] */
793 1.1 riastrad sub $0x80,%r10
794 1.1 riastrad jnz 1b /* repeat if more block groups */
795 1.1 riastrad movdqu %xmm15,(%r8) /* update tweak */
796 1.1 riastrad leave
797 1.1 riastrad ret
798 1.1 riastrad END(aesni_xts_enc8)
799 1.1 riastrad
800 1.1 riastrad /*
801 1.1 riastrad * aesni_xts_dec1(const struct aesdec *deckey@rdi, const uint8_t *in@rsi,
802 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8,
803 1.1 riastrad * uint32_t nrounds@r9d)
804 1.1 riastrad *
805 1.1 riastrad * Decrypt a contiguous sequence of blocks with AES-XTS.
806 1.1 riastrad *
807 1.1 riastrad * nbytes must be a positive integral multiple of 16. This routine
808 1.1 riastrad * is not vectorized; use aesni_xts_dec8 for >=8 blocks at once.
809 1.1 riastrad *
810 1.1 riastrad * Standard ABI calling convention.
811 1.1 riastrad */
812 1.1 riastrad ENTRY(aesni_xts_dec1)
813 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */
814 1.1 riastrad movdqu (%r8),%xmm15 /* xmm15 := tweak */
815 1.1 riastrad 1: movdqu (%rsi),%xmm0 /* xmm0 := ctxt */
816 1.1 riastrad lea 0x10(%rsi),%rsi /* advance rdi to next block */
817 1.1 riastrad pxor %xmm15,%xmm0 /* xmm0 := ctxt ^ tweak */
818 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */
819 1.1 riastrad call aesni_dec1 /* xmm0 := AES(ctxt ^ tweak) */
820 1.1 riastrad pxor %xmm15,%xmm0 /* xmm0 := AES(ctxt ^ tweak) ^ tweak */
821 1.1 riastrad movdqu %xmm0,(%rdx) /* store plaintext block */
822 1.1 riastrad lea 0x10(%rdx),%rdx /* advance rsi to next block */
823 1.1 riastrad call aesni_xts_mulx /* xmm15 *= x; trash xmm0 */
824 1.1 riastrad sub $0x10,%r10
825 1.1 riastrad jnz 1b /* repeat if more blocks */
826 1.1 riastrad movdqu %xmm15,(%r8) /* update tweak */
827 1.1 riastrad ret
828 1.1 riastrad END(aesni_xts_dec1)
829 1.1 riastrad
830 1.1 riastrad /*
831 1.1 riastrad * aesni_xts_dec8(const struct aesdec *deckey@rdi, const uint8_t *in@rsi,
832 1.1 riastrad * uint8_t *out@rdx, size_t nbytes@rcx, uint8_t tweak[16] @r8,
833 1.1 riastrad * uint32_t nrounds@r9d)
834 1.1 riastrad *
835 1.1 riastrad * Decrypt a contiguous sequence of blocks with AES-XTS.
836 1.1 riastrad *
837 1.1 riastrad * nbytes must be a positive integral multiple of 128.
838 1.1 riastrad *
839 1.1 riastrad * Standard ABI calling convention.
840 1.1 riastrad */
841 1.1 riastrad ENTRY(aesni_xts_dec8)
842 1.1 riastrad push %rbp /* create stack frame uint128[1] */
843 1.1 riastrad mov %rsp,%rbp
844 1.1 riastrad sub $0x10,%rsp
845 1.1 riastrad mov %rcx,%r10 /* r10 := nbytes */
846 1.1 riastrad movdqu (%r8),%xmm15 /* xmm15 := tweak[0] */
847 1.1 riastrad 1: movdqa %xmm15,%xmm8 /* xmm8 := tweak[0] */
848 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[1] */
849 1.1 riastrad movdqa %xmm15,%xmm9 /* xmm9 := tweak[1] */
850 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[2] */
851 1.1 riastrad movdqa %xmm15,%xmm10 /* xmm10 := tweak[2] */
852 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[3] */
853 1.1 riastrad movdqa %xmm15,%xmm11 /* xmm11 := tweak[3] */
854 1.1 riastrad call aesni_xts_mulx /* xmm51 := tweak[4] */
855 1.1 riastrad movdqa %xmm15,%xmm12 /* xmm12 := tweak[4] */
856 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[5] */
857 1.1 riastrad movdqa %xmm15,%xmm13 /* xmm13 := tweak[5] */
858 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[6] */
859 1.1 riastrad movdqa %xmm15,%xmm14 /* xmm14 := tweak[6] */
860 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[7] */
861 1.1 riastrad movdqu (%rsi),%xmm0 /* xmm[i] := ptxt[i] */
862 1.1 riastrad movdqu 0x10(%rsi),%xmm1
863 1.1 riastrad movdqu 0x20(%rsi),%xmm2
864 1.1 riastrad movdqu 0x30(%rsi),%xmm3
865 1.1 riastrad movdqu 0x40(%rsi),%xmm4
866 1.1 riastrad movdqu 0x50(%rsi),%xmm5
867 1.1 riastrad movdqu 0x60(%rsi),%xmm6
868 1.1 riastrad movdqu 0x70(%rsi),%xmm7
869 1.1 riastrad lea 0x80(%rsi),%rsi /* advance rsi to next block group */
870 1.1 riastrad movdqa %xmm8,(%rsp) /* save tweak[0] */
871 1.1 riastrad pxor %xmm8,%xmm0 /* xmm[i] := ptxt[i] ^ tweak[i] */
872 1.1 riastrad pxor %xmm9,%xmm1
873 1.1 riastrad pxor %xmm10,%xmm2
874 1.1 riastrad pxor %xmm11,%xmm3
875 1.1 riastrad pxor %xmm12,%xmm4
876 1.1 riastrad pxor %xmm13,%xmm5
877 1.1 riastrad pxor %xmm14,%xmm6
878 1.1 riastrad pxor %xmm15,%xmm7
879 1.1 riastrad mov %r9d,%ecx /* ecx := nrounds */
880 1.1 riastrad call aesni_dec8 /* xmm[i] := AES(ptxt[i] ^ tweak[i]) */
881 1.1 riastrad pxor (%rsp),%xmm0 /* xmm[i] := AES(...) ^ tweak[i] */
882 1.1 riastrad pxor %xmm9,%xmm1
883 1.1 riastrad pxor %xmm10,%xmm2
884 1.1 riastrad pxor %xmm11,%xmm3
885 1.1 riastrad pxor %xmm12,%xmm4
886 1.1 riastrad pxor %xmm13,%xmm5
887 1.1 riastrad pxor %xmm14,%xmm6
888 1.1 riastrad pxor %xmm15,%xmm7
889 1.1 riastrad movdqu %xmm0,(%rdx) /* store ciphertext blocks */
890 1.1 riastrad movdqu %xmm1,0x10(%rdx)
891 1.1 riastrad movdqu %xmm2,0x20(%rdx)
892 1.1 riastrad movdqu %xmm3,0x30(%rdx)
893 1.1 riastrad movdqu %xmm4,0x40(%rdx)
894 1.1 riastrad movdqu %xmm5,0x50(%rdx)
895 1.1 riastrad movdqu %xmm6,0x60(%rdx)
896 1.1 riastrad movdqu %xmm7,0x70(%rdx)
897 1.1 riastrad lea 0x80(%rdx),%rdx /* advance rdx to next block group */
898 1.1 riastrad call aesni_xts_mulx /* xmm15 := tweak[8] */
899 1.1 riastrad sub $0x80,%r10
900 1.1 riastrad jnz 1b /* repeat if more block groups */
901 1.1 riastrad movdqu %xmm15,(%r8) /* update tweak */
902 1.1 riastrad leave
903 1.1 riastrad ret
904 1.1 riastrad END(aesni_xts_dec8)
905 1.1 riastrad
906 1.1 riastrad /*
907 1.1 riastrad * aesni_xts_mulx(tweak@xmm15)
908 1.1 riastrad *
909 1.1 riastrad * Multiply xmm15 by x, modulo x^128 + x^7 + x^2 + x + 1, in place.
910 1.1 riastrad * Uses %xmm0 as temporary.
911 1.1 riastrad */
912 1.1 riastrad .text
913 1.1 riastrad _ALIGN_TEXT
914 1.1 riastrad .type aesni_xts_mulx,@function
915 1.1 riastrad aesni_xts_mulx:
916 1.1 riastrad /*
917 1.1 riastrad * Simultaneously determine
918 1.1 riastrad * (a) whether the high bit of the low quadword must be
919 1.1 riastrad * shifted into the low bit of the high quadword, and
920 1.1 riastrad * (b) whether the high bit of the high quadword must be
921 1.1 riastrad * carried into x^128 = x^7 + x^2 + x + 1.
922 1.1 riastrad */
923 1.1 riastrad pxor %xmm0,%xmm0 /* xmm0 := 0 */
924 1.1 riastrad pcmpgtq %xmm15,%xmm0 /* xmm0[i] := -1 if 0 > xmm15[i] else 0 */
925 1.1 riastrad pshufd $0b01001110,%xmm0,%xmm0 /* swap halves of xmm0 */
926 1.1 riastrad pand xtscarry(%rip),%xmm0 /* copy xtscarry according to mask */
927 1.1 riastrad psllq $1,%xmm15 /* shift */
928 1.1 riastrad pxor %xmm0,%xmm15 /* incorporate (a) and (b) */
929 1.1 riastrad ret
930 1.1 riastrad END(aesni_xts_mulx)
931 1.1 riastrad
932 1.1 riastrad .section .rodata
933 1.2 riastrad .p2align 4
934 1.1 riastrad .type xtscarry,@object
935 1.1 riastrad xtscarry:
936 1.1 riastrad .byte 0x87,0,0,0, 0,0,0,0, 1,0,0,0, 0,0,0,0
937 1.1 riastrad END(xtscarry)
938 1.1 riastrad
939 1.1 riastrad /*
940 1.1 riastrad * aesni_xts_update(const uint8_t in[16] @rdi, uint8_t out[16] @rsi)
941 1.1 riastrad *
942 1.1 riastrad * Update an AES-XTS tweak.
943 1.1 riastrad *
944 1.1 riastrad * Standard ABI calling convention.
945 1.1 riastrad */
946 1.1 riastrad ENTRY(aesni_xts_update)
947 1.1 riastrad movdqu (%rdi),%xmm15
948 1.1 riastrad call aesni_xts_mulx
949 1.1 riastrad movdqu %xmm15,(%rsi)
950 1.1 riastrad ret
951 1.1 riastrad END(aesni_xts_update)
952 1.1 riastrad
953 1.1 riastrad /*
954 1.4 riastrad * aesni_cbcmac_update1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
955 1.4 riastrad * size_t nbytes@rdx, uint8_t auth[16] @rcx, uint32_t nrounds@r8d)
956 1.4 riastrad *
957 1.4 riastrad * Update CBC-MAC.
958 1.4 riastrad *
959 1.4 riastrad * nbytes must be a positive integral multiple of 16.
960 1.4 riastrad *
961 1.4 riastrad * Standard ABI calling convention.
962 1.4 riastrad */
963 1.4 riastrad ENTRY(aesni_cbcmac_update1)
964 1.4 riastrad movdqu (%rcx),%xmm0 /* xmm0 := auth */
965 1.4 riastrad mov %rdx,%r10 /* r10 := nbytes */
966 1.4 riastrad mov %rcx,%rdx /* rdx := &auth */
967 1.4 riastrad 1: pxor (%rsi),%xmm0 /* xmm0 ^= plaintext block */
968 1.4 riastrad lea 0x10(%rsi),%rsi
969 1.4 riastrad mov %r8d,%ecx /* ecx := nrounds */
970 1.4 riastrad call aesni_enc1 /* xmm0 := auth'; trash rax,rcx,xmm8 */
971 1.4 riastrad sub $0x10,%r10
972 1.4 riastrad jnz 1b
973 1.4 riastrad movdqu %xmm0,(%rdx) /* store auth' */
974 1.4 riastrad ret
975 1.4 riastrad END(aesni_cbcmac_update1)
976 1.4 riastrad
977 1.4 riastrad /*
978 1.4 riastrad * aesni_ccm_enc1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
979 1.4 riastrad * uint8_t *out@rdx, size_t nbytes@rcx,
980 1.4 riastrad * uint8_t authctr[32] @r8, uint32_t nrounds@r9d)
981 1.4 riastrad *
982 1.4 riastrad * Update CCM encryption.
983 1.4 riastrad *
984 1.4 riastrad * nbytes must be a positive integral multiple of 16.
985 1.4 riastrad *
986 1.4 riastrad * Standard ABI calling convention.
987 1.4 riastrad */
988 1.4 riastrad ENTRY(aesni_ccm_enc1)
989 1.4 riastrad mov %rcx,%r10 /* r10 := nbytes */
990 1.4 riastrad movdqu 0x10(%r8),%xmm2 /* xmm2 := ctr (be) */
991 1.4 riastrad movdqa bswap32(%rip),%xmm4 /* xmm4 := bswap32 table */
992 1.4 riastrad movdqa ctr32_inc(%rip),%xmm5 /* xmm5 := (0,0,0,1) (le) */
993 1.4 riastrad movdqu (%r8),%xmm0 /* xmm0 := auth */
994 1.4 riastrad pshufb %xmm4,%xmm2 /* xmm2 := ctr (le) */
995 1.4 riastrad 1: movdqu (%rsi),%xmm3 /* xmm3 := plaintext block */
996 1.4 riastrad paddd %xmm5,%xmm2 /* increment ctr (32-bit) */
997 1.4 riastrad lea 0x10(%rsi),%rsi
998 1.4 riastrad movdqa %xmm2,%xmm1 /* xmm1 := ctr (le) */
999 1.4 riastrad mov %r9d,%ecx /* ecx := nrounds */
1000 1.4 riastrad pshufb %xmm4,%xmm1 /* xmm1 := ctr (be) */
1001 1.4 riastrad pxor %xmm3,%xmm0 /* xmm0 := auth ^ ptxt */
1002 1.4 riastrad call aesni_enc2 /* trash rax/rcx/xmm8 */
1003 1.4 riastrad pxor %xmm1,%xmm3 /* xmm3 := ciphertext block */
1004 1.4 riastrad sub $0x10,%r10 /* count down bytes */
1005 1.4 riastrad movdqu %xmm3,(%rdx) /* store ciphertext block */
1006 1.4 riastrad lea 0x10(%rdx),%rdx
1007 1.4 riastrad jnz 1b /* repeat if more blocks */
1008 1.4 riastrad pshufb %xmm4,%xmm2 /* xmm2 := ctr (be) */
1009 1.4 riastrad movdqu %xmm0,(%r8) /* store updated auth */
1010 1.4 riastrad movdqu %xmm2,0x10(%r8) /* store updated ctr */
1011 1.4 riastrad ret
1012 1.4 riastrad END(aesni_ccm_enc1)
1013 1.4 riastrad
1014 1.4 riastrad /*
1015 1.4 riastrad * aesni_ccm_dec1(const struct aesenc *enckey@rdi, const uint8_t *in@rsi,
1016 1.4 riastrad * uint8_t *out@rdx, size_t nbytes@rcx,
1017 1.4 riastrad * uint8_t authctr[32] @r8, uint32_t nrounds@r9d)
1018 1.4 riastrad *
1019 1.4 riastrad * Update CCM decryption.
1020 1.4 riastrad *
1021 1.4 riastrad * nbytes must be a positive integral multiple of 16.
1022 1.4 riastrad *
1023 1.4 riastrad * Standard ABI calling convention.
1024 1.4 riastrad */
1025 1.4 riastrad ENTRY(aesni_ccm_dec1)
1026 1.4 riastrad movdqu 0x10(%r8),%xmm2 /* xmm2 := ctr (be) */
1027 1.4 riastrad movdqa bswap32(%rip),%xmm4 /* xmm4 := bswap32 table */
1028 1.4 riastrad movdqa ctr32_inc(%rip),%xmm5 /* xmm5 := (0,0,0,1) (le) */
1029 1.4 riastrad movdqu (%r8),%xmm1 /* xmm1 := auth */
1030 1.4 riastrad pshufb %xmm4,%xmm2 /* xmm2 := ctr (le) */
1031 1.4 riastrad mov %rcx,%r10 /* r10 := nbytes */
1032 1.4 riastrad
1033 1.4 riastrad /* Decrypt the first block. */
1034 1.4 riastrad paddd %xmm5,%xmm2 /* increment ctr (32-bit) */
1035 1.4 riastrad mov %r9d,%ecx /* ecx := nrounds */
1036 1.4 riastrad movdqa %xmm2,%xmm0 /* xmm0 := ctr (le) */
1037 1.4 riastrad movdqu (%rsi),%xmm3 /* xmm3 := ctxt */
1038 1.4 riastrad pshufb %xmm4,%xmm0 /* xmm0 := ctr (be) */
1039 1.4 riastrad lea 0x10(%rsi),%rsi
1040 1.4 riastrad call aesni_enc1 /* xmm0 := pad; trash rax/rcx/xmm8 */
1041 1.4 riastrad jmp 2f
1042 1.4 riastrad
1043 1.4 riastrad 1: /*
1044 1.4 riastrad * Authenticate the last block and decrypt the next block
1045 1.4 riastrad * simultaneously.
1046 1.4 riastrad *
1047 1.4 riastrad * xmm1 = auth ^ ptxt[-1]
1048 1.4 riastrad * xmm2 = ctr[-1] (le)
1049 1.4 riastrad */
1050 1.4 riastrad paddd %xmm5,%xmm2 /* increment ctr (32-bit) */
1051 1.4 riastrad mov %r9d,%ecx /* ecx := nrounds */
1052 1.4 riastrad movdqa %xmm2,%xmm0 /* xmm0 := ctr (le) */
1053 1.4 riastrad movdqu (%rsi),%xmm3 /* xmm3 := ctxt */
1054 1.4 riastrad pshufb %xmm4,%xmm0 /* xmm0 := ctr (be) */
1055 1.4 riastrad lea 0x10(%rsi),%rsi
1056 1.4 riastrad call aesni_enc2 /* xmm0 := pad, xmm1 := auth';
1057 1.4 riastrad * trash rax/rcx/xmm8 */
1058 1.4 riastrad 2: pxor %xmm0,%xmm3 /* xmm3 := ptxt */
1059 1.4 riastrad sub $0x10,%r10
1060 1.4 riastrad movdqu %xmm3,(%rdx) /* store plaintext */
1061 1.4 riastrad lea 0x10(%rdx),%rdx
1062 1.4 riastrad pxor %xmm3,%xmm1 /* xmm1 := auth ^ ptxt */
1063 1.4 riastrad jnz 1b
1064 1.4 riastrad
1065 1.4 riastrad /* Authenticate the last block. */
1066 1.4 riastrad movdqa %xmm1,%xmm0 /* xmm0 := auth ^ ptxt */
1067 1.4 riastrad mov %r9d,%ecx /* ecx := nrounds */
1068 1.4 riastrad call aesni_enc1 /* xmm0 := auth' */
1069 1.4 riastrad pshufb %xmm4,%xmm2 /* xmm2 := ctr (be) */
1070 1.4 riastrad movdqu %xmm0,(%r8) /* store updated auth */
1071 1.4 riastrad movdqu %xmm2,0x10(%r8) /* store updated ctr */
1072 1.4 riastrad ret
1073 1.4 riastrad END(aesni_ccm_dec1)
1074 1.4 riastrad
1075 1.4 riastrad .section .rodata
1076 1.4 riastrad .p2align 4
1077 1.4 riastrad .type bswap32,@object
1078 1.4 riastrad bswap32:
1079 1.4 riastrad .byte 3,2,1,0, 7,6,5,4, 11,10,9,8, 15,14,13,12
1080 1.4 riastrad END(bswap32)
1081 1.4 riastrad
1082 1.4 riastrad .section .rodata
1083 1.4 riastrad .p2align 4
1084 1.4 riastrad .type ctr32_inc,@object
1085 1.4 riastrad ctr32_inc:
1086 1.4 riastrad .byte 0,0,0,0, 0,0,0,0, 0,0,0,0, 1,0,0,0
1087 1.4 riastrad END(ctr32_inc)
1088 1.4 riastrad
1089 1.4 riastrad /*
1090 1.1 riastrad * aesni_enc1(const struct aesenc *enckey@rdi, uint128_t block@xmm0,
1091 1.1 riastrad * uint32_t nrounds@ecx)
1092 1.1 riastrad *
1093 1.1 riastrad * Encrypt a single AES block in %xmm0.
1094 1.1 riastrad *
1095 1.1 riastrad * Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx.
1096 1.1 riastrad */
1097 1.1 riastrad .text
1098 1.1 riastrad _ALIGN_TEXT
1099 1.1 riastrad .type aesni_enc1,@function
1100 1.1 riastrad aesni_enc1:
1101 1.1 riastrad pxor (%rdi),%xmm0 /* xor in first round key */
1102 1.1 riastrad shl $4,%ecx /* ecx := total byte size of round keys */
1103 1.1 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */
1104 1.1 riastrad neg %rcx /* rcx := byte offset of round key from end */
1105 1.3 riastrad jmp 2f
1106 1.3 riastrad 1: aesenc %xmm8,%xmm0
1107 1.3 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */
1108 1.1 riastrad add $0x10,%rcx
1109 1.3 riastrad jnz 1b /* repeat if more rounds */
1110 1.3 riastrad aesenclast %xmm8,%xmm0
1111 1.1 riastrad ret
1112 1.1 riastrad END(aesni_enc1)
1113 1.1 riastrad
1114 1.1 riastrad /*
1115 1.4 riastrad * aesni_enc2(const struct aesenc *enckey@rdi, uint128_t block0@xmm0,
1116 1.4 riastrad * uint128_t block1@xmm1, uint32_t nrounds@ecx)
1117 1.4 riastrad *
1118 1.4 riastrad * Encrypt two AES blocks in %xmm0 and %xmm1.
1119 1.4 riastrad *
1120 1.4 riastrad * Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx.
1121 1.4 riastrad */
1122 1.4 riastrad .text
1123 1.4 riastrad _ALIGN_TEXT
1124 1.4 riastrad .type aesni_enc2,@function
1125 1.4 riastrad aesni_enc2:
1126 1.4 riastrad movdqa (%rdi),%xmm8 /* xmm8 := first round key */
1127 1.4 riastrad shl $4,%ecx /* ecx := total byte size of round keys */
1128 1.4 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */
1129 1.4 riastrad neg %rcx /* rcx := byte offset of round key from end */
1130 1.4 riastrad pxor %xmm8,%xmm0 /* xor in first round key */
1131 1.4 riastrad pxor %xmm8,%xmm1
1132 1.4 riastrad jmp 2f
1133 1.4 riastrad 1: aesenc %xmm8,%xmm0
1134 1.4 riastrad aesenc %xmm8,%xmm1
1135 1.4 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */
1136 1.4 riastrad add $0x10,%rcx
1137 1.4 riastrad jnz 1b /* repeat if there's more */
1138 1.4 riastrad aesenclast %xmm8,%xmm0
1139 1.4 riastrad aesenclast %xmm8,%xmm1
1140 1.4 riastrad ret
1141 1.4 riastrad END(aesni_enc2)
1142 1.4 riastrad
1143 1.4 riastrad /*
1144 1.1 riastrad * aesni_enc8(const struct aesenc *enckey@rdi, uint128_t block0@xmm0, ...,
1145 1.1 riastrad * block7@xmm7, uint32_t nrounds@ecx)
1146 1.1 riastrad *
1147 1.1 riastrad * Encrypt eight AES blocks in %xmm0 through %xmm7 in parallel.
1148 1.1 riastrad *
1149 1.1 riastrad * Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx.
1150 1.1 riastrad */
1151 1.1 riastrad .text
1152 1.1 riastrad _ALIGN_TEXT
1153 1.1 riastrad .type aesni_enc8,@function
1154 1.1 riastrad aesni_enc8:
1155 1.1 riastrad movdqa (%rdi),%xmm8 /* xor in first round key */
1156 1.1 riastrad pxor %xmm8,%xmm0
1157 1.1 riastrad pxor %xmm8,%xmm1
1158 1.1 riastrad pxor %xmm8,%xmm2
1159 1.1 riastrad pxor %xmm8,%xmm3
1160 1.1 riastrad pxor %xmm8,%xmm4
1161 1.1 riastrad pxor %xmm8,%xmm5
1162 1.1 riastrad pxor %xmm8,%xmm6
1163 1.1 riastrad pxor %xmm8,%xmm7
1164 1.1 riastrad shl $4,%ecx /* ecx := total byte size of round keys */
1165 1.1 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := end of round key array */
1166 1.1 riastrad neg %rcx /* rcx := byte offset of round key from end */
1167 1.3 riastrad jmp 2f
1168 1.3 riastrad 1: aesenc %xmm8,%xmm0
1169 1.1 riastrad aesenc %xmm8,%xmm1
1170 1.1 riastrad aesenc %xmm8,%xmm2
1171 1.1 riastrad aesenc %xmm8,%xmm3
1172 1.1 riastrad aesenc %xmm8,%xmm4
1173 1.1 riastrad aesenc %xmm8,%xmm5
1174 1.1 riastrad aesenc %xmm8,%xmm6
1175 1.1 riastrad aesenc %xmm8,%xmm7
1176 1.3 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */
1177 1.3 riastrad add $0x10,%rcx
1178 1.3 riastrad jnz 1b /* repeat if more rounds */
1179 1.3 riastrad aesenclast %xmm8,%xmm0
1180 1.1 riastrad aesenclast %xmm8,%xmm1
1181 1.1 riastrad aesenclast %xmm8,%xmm2
1182 1.1 riastrad aesenclast %xmm8,%xmm3
1183 1.1 riastrad aesenclast %xmm8,%xmm4
1184 1.1 riastrad aesenclast %xmm8,%xmm5
1185 1.1 riastrad aesenclast %xmm8,%xmm6
1186 1.1 riastrad aesenclast %xmm8,%xmm7
1187 1.1 riastrad ret
1188 1.1 riastrad END(aesni_enc8)
1189 1.1 riastrad
1190 1.1 riastrad /*
1191 1.1 riastrad * aesni_dec1(const struct aesdec *deckey@rdi, uint128_t block@xmm0,
1192 1.1 riastrad * uint32_t nrounds@ecx)
1193 1.1 riastrad *
1194 1.1 riastrad * Decrypt a single AES block in %xmm0.
1195 1.1 riastrad *
1196 1.1 riastrad * Internal ABI. Uses %rax and %xmm8 as temporaries. Destroys %ecx.
1197 1.1 riastrad */
1198 1.1 riastrad .text
1199 1.1 riastrad _ALIGN_TEXT
1200 1.1 riastrad .type aesni_dec1,@function
1201 1.1 riastrad aesni_dec1:
1202 1.1 riastrad pxor (%rdi),%xmm0 /* xor in first round key */
1203 1.1 riastrad shl $4,%ecx /* ecx := byte offset of round key */
1204 1.1 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := pointer to round key */
1205 1.1 riastrad neg %rcx /* rcx := byte offset of round key from end */
1206 1.3 riastrad jmp 2f
1207 1.3 riastrad 1: aesdec %xmm8,%xmm0
1208 1.3 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */
1209 1.1 riastrad add $0x10,%rcx
1210 1.3 riastrad jnz 1b /* repeat if more rounds */
1211 1.3 riastrad aesdeclast %xmm8,%xmm0
1212 1.1 riastrad ret
1213 1.1 riastrad END(aesni_dec1)
1214 1.1 riastrad
1215 1.1 riastrad /*
1216 1.1 riastrad * aesni_dec8(const struct aesdec *deckey@rdi, uint128_t block0@xmm0, ...,
1217 1.1 riastrad * block7@xmm7, uint32_t nrounds@ecx)
1218 1.1 riastrad *
1219 1.1 riastrad * Decrypt eight AES blocks in %xmm0 through %xmm7 in parallel.
1220 1.1 riastrad *
1221 1.1 riastrad * Internal ABI. Uses %xmm8 as temporary. Destroys %rcx.
1222 1.1 riastrad */
1223 1.1 riastrad .text
1224 1.1 riastrad _ALIGN_TEXT
1225 1.1 riastrad .type aesni_dec8,@function
1226 1.1 riastrad aesni_dec8:
1227 1.1 riastrad movdqa (%rdi),%xmm8 /* xor in first round key */
1228 1.1 riastrad pxor %xmm8,%xmm0
1229 1.1 riastrad pxor %xmm8,%xmm1
1230 1.1 riastrad pxor %xmm8,%xmm2
1231 1.1 riastrad pxor %xmm8,%xmm3
1232 1.1 riastrad pxor %xmm8,%xmm4
1233 1.1 riastrad pxor %xmm8,%xmm5
1234 1.1 riastrad pxor %xmm8,%xmm6
1235 1.1 riastrad pxor %xmm8,%xmm7
1236 1.1 riastrad shl $4,%ecx /* ecx := byte offset of round key */
1237 1.1 riastrad lea 0x10(%rdi,%rcx),%rax /* rax := pointer to round key */
1238 1.1 riastrad neg %rcx /* rcx := byte offset of round key from end */
1239 1.3 riastrad jmp 2f
1240 1.3 riastrad 1: aesdec %xmm8,%xmm0
1241 1.1 riastrad aesdec %xmm8,%xmm1
1242 1.1 riastrad aesdec %xmm8,%xmm2
1243 1.1 riastrad aesdec %xmm8,%xmm3
1244 1.1 riastrad aesdec %xmm8,%xmm4
1245 1.1 riastrad aesdec %xmm8,%xmm5
1246 1.1 riastrad aesdec %xmm8,%xmm6
1247 1.1 riastrad aesdec %xmm8,%xmm7
1248 1.3 riastrad 2: movdqa (%rax,%rcx),%xmm8 /* load round key */
1249 1.3 riastrad add $0x10,%rcx
1250 1.3 riastrad jnz 1b /* repeat if more rounds */
1251 1.3 riastrad aesdeclast %xmm8,%xmm0
1252 1.1 riastrad aesdeclast %xmm8,%xmm1
1253 1.1 riastrad aesdeclast %xmm8,%xmm2
1254 1.1 riastrad aesdeclast %xmm8,%xmm3
1255 1.1 riastrad aesdeclast %xmm8,%xmm4
1256 1.1 riastrad aesdeclast %xmm8,%xmm5
1257 1.1 riastrad aesdeclast %xmm8,%xmm6
1258 1.1 riastrad aesdeclast %xmm8,%xmm7
1259 1.1 riastrad ret
1260 1.1 riastrad END(aesni_dec8)
1261