aesni-x86.S revision 1.1 1 #include <machine/asm.h>
2 .text
3 .globl aesni_encrypt
4 .type aesni_encrypt,@function
5 .align 16
6 aesni_encrypt:
7 .L_aesni_encrypt_begin:
8 movl 4(%esp),%eax
9 movl 12(%esp),%edx
10 movups (%eax),%xmm2
11 movl 240(%edx),%ecx
12 movl 8(%esp),%eax
13 movups (%edx),%xmm0
14 movups 16(%edx),%xmm1
15 leal 32(%edx),%edx
16 xorps %xmm0,%xmm2
17 .L000enc1_loop_1:
18 .byte 102,15,56,220,209
19 decl %ecx
20 movups (%edx),%xmm1
21 leal 16(%edx),%edx
22 jnz .L000enc1_loop_1
23 .byte 102,15,56,221,209
24 pxor %xmm0,%xmm0
25 pxor %xmm1,%xmm1
26 movups %xmm2,(%eax)
27 pxor %xmm2,%xmm2
28 ret
29 .size aesni_encrypt,.-.L_aesni_encrypt_begin
30 .globl aesni_decrypt
31 .type aesni_decrypt,@function
32 .align 16
33 aesni_decrypt:
34 .L_aesni_decrypt_begin:
35 movl 4(%esp),%eax
36 movl 12(%esp),%edx
37 movups (%eax),%xmm2
38 movl 240(%edx),%ecx
39 movl 8(%esp),%eax
40 movups (%edx),%xmm0
41 movups 16(%edx),%xmm1
42 leal 32(%edx),%edx
43 xorps %xmm0,%xmm2
44 .L001dec1_loop_2:
45 .byte 102,15,56,222,209
46 decl %ecx
47 movups (%edx),%xmm1
48 leal 16(%edx),%edx
49 jnz .L001dec1_loop_2
50 .byte 102,15,56,223,209
51 pxor %xmm0,%xmm0
52 pxor %xmm1,%xmm1
53 movups %xmm2,(%eax)
54 pxor %xmm2,%xmm2
55 ret
56 .size aesni_decrypt,.-.L_aesni_decrypt_begin
57 .type _aesni_encrypt2,@function
58 .align 16
59 _aesni_encrypt2:
60 movups (%edx),%xmm0
61 shll $4,%ecx
62 movups 16(%edx),%xmm1
63 xorps %xmm0,%xmm2
64 pxor %xmm0,%xmm3
65 movups 32(%edx),%xmm0
66 leal 32(%edx,%ecx,1),%edx
67 negl %ecx
68 addl $16,%ecx
69 .L002enc2_loop:
70 .byte 102,15,56,220,209
71 .byte 102,15,56,220,217
72 movups (%edx,%ecx,1),%xmm1
73 addl $32,%ecx
74 .byte 102,15,56,220,208
75 .byte 102,15,56,220,216
76 movups -16(%edx,%ecx,1),%xmm0
77 jnz .L002enc2_loop
78 .byte 102,15,56,220,209
79 .byte 102,15,56,220,217
80 .byte 102,15,56,221,208
81 .byte 102,15,56,221,216
82 ret
83 .size _aesni_encrypt2,.-_aesni_encrypt2
84 .type _aesni_decrypt2,@function
85 .align 16
86 _aesni_decrypt2:
87 movups (%edx),%xmm0
88 shll $4,%ecx
89 movups 16(%edx),%xmm1
90 xorps %xmm0,%xmm2
91 pxor %xmm0,%xmm3
92 movups 32(%edx),%xmm0
93 leal 32(%edx,%ecx,1),%edx
94 negl %ecx
95 addl $16,%ecx
96 .L003dec2_loop:
97 .byte 102,15,56,222,209
98 .byte 102,15,56,222,217
99 movups (%edx,%ecx,1),%xmm1
100 addl $32,%ecx
101 .byte 102,15,56,222,208
102 .byte 102,15,56,222,216
103 movups -16(%edx,%ecx,1),%xmm0
104 jnz .L003dec2_loop
105 .byte 102,15,56,222,209
106 .byte 102,15,56,222,217
107 .byte 102,15,56,223,208
108 .byte 102,15,56,223,216
109 ret
110 .size _aesni_decrypt2,.-_aesni_decrypt2
111 .type _aesni_encrypt3,@function
112 .align 16
113 _aesni_encrypt3:
114 movups (%edx),%xmm0
115 shll $4,%ecx
116 movups 16(%edx),%xmm1
117 xorps %xmm0,%xmm2
118 pxor %xmm0,%xmm3
119 pxor %xmm0,%xmm4
120 movups 32(%edx),%xmm0
121 leal 32(%edx,%ecx,1),%edx
122 negl %ecx
123 addl $16,%ecx
124 .L004enc3_loop:
125 .byte 102,15,56,220,209
126 .byte 102,15,56,220,217
127 .byte 102,15,56,220,225
128 movups (%edx,%ecx,1),%xmm1
129 addl $32,%ecx
130 .byte 102,15,56,220,208
131 .byte 102,15,56,220,216
132 .byte 102,15,56,220,224
133 movups -16(%edx,%ecx,1),%xmm0
134 jnz .L004enc3_loop
135 .byte 102,15,56,220,209
136 .byte 102,15,56,220,217
137 .byte 102,15,56,220,225
138 .byte 102,15,56,221,208
139 .byte 102,15,56,221,216
140 .byte 102,15,56,221,224
141 ret
142 .size _aesni_encrypt3,.-_aesni_encrypt3
143 .type _aesni_decrypt3,@function
144 .align 16
145 _aesni_decrypt3:
146 movups (%edx),%xmm0
147 shll $4,%ecx
148 movups 16(%edx),%xmm1
149 xorps %xmm0,%xmm2
150 pxor %xmm0,%xmm3
151 pxor %xmm0,%xmm4
152 movups 32(%edx),%xmm0
153 leal 32(%edx,%ecx,1),%edx
154 negl %ecx
155 addl $16,%ecx
156 .L005dec3_loop:
157 .byte 102,15,56,222,209
158 .byte 102,15,56,222,217
159 .byte 102,15,56,222,225
160 movups (%edx,%ecx,1),%xmm1
161 addl $32,%ecx
162 .byte 102,15,56,222,208
163 .byte 102,15,56,222,216
164 .byte 102,15,56,222,224
165 movups -16(%edx,%ecx,1),%xmm0
166 jnz .L005dec3_loop
167 .byte 102,15,56,222,209
168 .byte 102,15,56,222,217
169 .byte 102,15,56,222,225
170 .byte 102,15,56,223,208
171 .byte 102,15,56,223,216
172 .byte 102,15,56,223,224
173 ret
174 .size _aesni_decrypt3,.-_aesni_decrypt3
175 .type _aesni_encrypt4,@function
176 .align 16
177 _aesni_encrypt4:
178 movups (%edx),%xmm0
179 movups 16(%edx),%xmm1
180 shll $4,%ecx
181 xorps %xmm0,%xmm2
182 pxor %xmm0,%xmm3
183 pxor %xmm0,%xmm4
184 pxor %xmm0,%xmm5
185 movups 32(%edx),%xmm0
186 leal 32(%edx,%ecx,1),%edx
187 negl %ecx
188 .byte 15,31,64,0
189 addl $16,%ecx
190 .L006enc4_loop:
191 .byte 102,15,56,220,209
192 .byte 102,15,56,220,217
193 .byte 102,15,56,220,225
194 .byte 102,15,56,220,233
195 movups (%edx,%ecx,1),%xmm1
196 addl $32,%ecx
197 .byte 102,15,56,220,208
198 .byte 102,15,56,220,216
199 .byte 102,15,56,220,224
200 .byte 102,15,56,220,232
201 movups -16(%edx,%ecx,1),%xmm0
202 jnz .L006enc4_loop
203 .byte 102,15,56,220,209
204 .byte 102,15,56,220,217
205 .byte 102,15,56,220,225
206 .byte 102,15,56,220,233
207 .byte 102,15,56,221,208
208 .byte 102,15,56,221,216
209 .byte 102,15,56,221,224
210 .byte 102,15,56,221,232
211 ret
212 .size _aesni_encrypt4,.-_aesni_encrypt4
213 .type _aesni_decrypt4,@function
214 .align 16
215 _aesni_decrypt4:
216 movups (%edx),%xmm0
217 movups 16(%edx),%xmm1
218 shll $4,%ecx
219 xorps %xmm0,%xmm2
220 pxor %xmm0,%xmm3
221 pxor %xmm0,%xmm4
222 pxor %xmm0,%xmm5
223 movups 32(%edx),%xmm0
224 leal 32(%edx,%ecx,1),%edx
225 negl %ecx
226 .byte 15,31,64,0
227 addl $16,%ecx
228 .L007dec4_loop:
229 .byte 102,15,56,222,209
230 .byte 102,15,56,222,217
231 .byte 102,15,56,222,225
232 .byte 102,15,56,222,233
233 movups (%edx,%ecx,1),%xmm1
234 addl $32,%ecx
235 .byte 102,15,56,222,208
236 .byte 102,15,56,222,216
237 .byte 102,15,56,222,224
238 .byte 102,15,56,222,232
239 movups -16(%edx,%ecx,1),%xmm0
240 jnz .L007dec4_loop
241 .byte 102,15,56,222,209
242 .byte 102,15,56,222,217
243 .byte 102,15,56,222,225
244 .byte 102,15,56,222,233
245 .byte 102,15,56,223,208
246 .byte 102,15,56,223,216
247 .byte 102,15,56,223,224
248 .byte 102,15,56,223,232
249 ret
250 .size _aesni_decrypt4,.-_aesni_decrypt4
251 .type _aesni_encrypt6,@function
252 .align 16
253 _aesni_encrypt6:
254 movups (%edx),%xmm0
255 shll $4,%ecx
256 movups 16(%edx),%xmm1
257 xorps %xmm0,%xmm2
258 pxor %xmm0,%xmm3
259 pxor %xmm0,%xmm4
260 .byte 102,15,56,220,209
261 pxor %xmm0,%xmm5
262 pxor %xmm0,%xmm6
263 .byte 102,15,56,220,217
264 leal 32(%edx,%ecx,1),%edx
265 negl %ecx
266 .byte 102,15,56,220,225
267 pxor %xmm0,%xmm7
268 movups (%edx,%ecx,1),%xmm0
269 addl $16,%ecx
270 jmp .L008_aesni_encrypt6_inner
271 .align 16
272 .L009enc6_loop:
273 .byte 102,15,56,220,209
274 .byte 102,15,56,220,217
275 .byte 102,15,56,220,225
276 .L008_aesni_encrypt6_inner:
277 .byte 102,15,56,220,233
278 .byte 102,15,56,220,241
279 .byte 102,15,56,220,249
280 .L_aesni_encrypt6_enter:
281 movups (%edx,%ecx,1),%xmm1
282 addl $32,%ecx
283 .byte 102,15,56,220,208
284 .byte 102,15,56,220,216
285 .byte 102,15,56,220,224
286 .byte 102,15,56,220,232
287 .byte 102,15,56,220,240
288 .byte 102,15,56,220,248
289 movups -16(%edx,%ecx,1),%xmm0
290 jnz .L009enc6_loop
291 .byte 102,15,56,220,209
292 .byte 102,15,56,220,217
293 .byte 102,15,56,220,225
294 .byte 102,15,56,220,233
295 .byte 102,15,56,220,241
296 .byte 102,15,56,220,249
297 .byte 102,15,56,221,208
298 .byte 102,15,56,221,216
299 .byte 102,15,56,221,224
300 .byte 102,15,56,221,232
301 .byte 102,15,56,221,240
302 .byte 102,15,56,221,248
303 ret
304 .size _aesni_encrypt6,.-_aesni_encrypt6
305 .type _aesni_decrypt6,@function
306 .align 16
307 _aesni_decrypt6:
308 movups (%edx),%xmm0
309 shll $4,%ecx
310 movups 16(%edx),%xmm1
311 xorps %xmm0,%xmm2
312 pxor %xmm0,%xmm3
313 pxor %xmm0,%xmm4
314 .byte 102,15,56,222,209
315 pxor %xmm0,%xmm5
316 pxor %xmm0,%xmm6
317 .byte 102,15,56,222,217
318 leal 32(%edx,%ecx,1),%edx
319 negl %ecx
320 .byte 102,15,56,222,225
321 pxor %xmm0,%xmm7
322 movups (%edx,%ecx,1),%xmm0
323 addl $16,%ecx
324 jmp .L010_aesni_decrypt6_inner
325 .align 16
326 .L011dec6_loop:
327 .byte 102,15,56,222,209
328 .byte 102,15,56,222,217
329 .byte 102,15,56,222,225
330 .L010_aesni_decrypt6_inner:
331 .byte 102,15,56,222,233
332 .byte 102,15,56,222,241
333 .byte 102,15,56,222,249
334 .L_aesni_decrypt6_enter:
335 movups (%edx,%ecx,1),%xmm1
336 addl $32,%ecx
337 .byte 102,15,56,222,208
338 .byte 102,15,56,222,216
339 .byte 102,15,56,222,224
340 .byte 102,15,56,222,232
341 .byte 102,15,56,222,240
342 .byte 102,15,56,222,248
343 movups -16(%edx,%ecx,1),%xmm0
344 jnz .L011dec6_loop
345 .byte 102,15,56,222,209
346 .byte 102,15,56,222,217
347 .byte 102,15,56,222,225
348 .byte 102,15,56,222,233
349 .byte 102,15,56,222,241
350 .byte 102,15,56,222,249
351 .byte 102,15,56,223,208
352 .byte 102,15,56,223,216
353 .byte 102,15,56,223,224
354 .byte 102,15,56,223,232
355 .byte 102,15,56,223,240
356 .byte 102,15,56,223,248
357 ret
358 .size _aesni_decrypt6,.-_aesni_decrypt6
359 .globl aesni_ecb_encrypt
360 .type aesni_ecb_encrypt,@function
361 .align 16
362 aesni_ecb_encrypt:
363 .L_aesni_ecb_encrypt_begin:
364 pushl %ebp
365 pushl %ebx
366 pushl %esi
367 pushl %edi
368 movl 20(%esp),%esi
369 movl 24(%esp),%edi
370 movl 28(%esp),%eax
371 movl 32(%esp),%edx
372 movl 36(%esp),%ebx
373 andl $-16,%eax
374 jz .L012ecb_ret
375 movl 240(%edx),%ecx
376 testl %ebx,%ebx
377 jz .L013ecb_decrypt
378 movl %edx,%ebp
379 movl %ecx,%ebx
380 cmpl $96,%eax
381 jb .L014ecb_enc_tail
382 movdqu (%esi),%xmm2
383 movdqu 16(%esi),%xmm3
384 movdqu 32(%esi),%xmm4
385 movdqu 48(%esi),%xmm5
386 movdqu 64(%esi),%xmm6
387 movdqu 80(%esi),%xmm7
388 leal 96(%esi),%esi
389 subl $96,%eax
390 jmp .L015ecb_enc_loop6_enter
391 .align 16
392 .L016ecb_enc_loop6:
393 movups %xmm2,(%edi)
394 movdqu (%esi),%xmm2
395 movups %xmm3,16(%edi)
396 movdqu 16(%esi),%xmm3
397 movups %xmm4,32(%edi)
398 movdqu 32(%esi),%xmm4
399 movups %xmm5,48(%edi)
400 movdqu 48(%esi),%xmm5
401 movups %xmm6,64(%edi)
402 movdqu 64(%esi),%xmm6
403 movups %xmm7,80(%edi)
404 leal 96(%edi),%edi
405 movdqu 80(%esi),%xmm7
406 leal 96(%esi),%esi
407 .L015ecb_enc_loop6_enter:
408 call _aesni_encrypt6
409 movl %ebp,%edx
410 movl %ebx,%ecx
411 subl $96,%eax
412 jnc .L016ecb_enc_loop6
413 movups %xmm2,(%edi)
414 movups %xmm3,16(%edi)
415 movups %xmm4,32(%edi)
416 movups %xmm5,48(%edi)
417 movups %xmm6,64(%edi)
418 movups %xmm7,80(%edi)
419 leal 96(%edi),%edi
420 addl $96,%eax
421 jz .L012ecb_ret
422 .L014ecb_enc_tail:
423 movups (%esi),%xmm2
424 cmpl $32,%eax
425 jb .L017ecb_enc_one
426 movups 16(%esi),%xmm3
427 je .L018ecb_enc_two
428 movups 32(%esi),%xmm4
429 cmpl $64,%eax
430 jb .L019ecb_enc_three
431 movups 48(%esi),%xmm5
432 je .L020ecb_enc_four
433 movups 64(%esi),%xmm6
434 xorps %xmm7,%xmm7
435 call _aesni_encrypt6
436 movups %xmm2,(%edi)
437 movups %xmm3,16(%edi)
438 movups %xmm4,32(%edi)
439 movups %xmm5,48(%edi)
440 movups %xmm6,64(%edi)
441 jmp .L012ecb_ret
442 .align 16
443 .L017ecb_enc_one:
444 movups (%edx),%xmm0
445 movups 16(%edx),%xmm1
446 leal 32(%edx),%edx
447 xorps %xmm0,%xmm2
448 .L021enc1_loop_3:
449 .byte 102,15,56,220,209
450 decl %ecx
451 movups (%edx),%xmm1
452 leal 16(%edx),%edx
453 jnz .L021enc1_loop_3
454 .byte 102,15,56,221,209
455 movups %xmm2,(%edi)
456 jmp .L012ecb_ret
457 .align 16
458 .L018ecb_enc_two:
459 call _aesni_encrypt2
460 movups %xmm2,(%edi)
461 movups %xmm3,16(%edi)
462 jmp .L012ecb_ret
463 .align 16
464 .L019ecb_enc_three:
465 call _aesni_encrypt3
466 movups %xmm2,(%edi)
467 movups %xmm3,16(%edi)
468 movups %xmm4,32(%edi)
469 jmp .L012ecb_ret
470 .align 16
471 .L020ecb_enc_four:
472 call _aesni_encrypt4
473 movups %xmm2,(%edi)
474 movups %xmm3,16(%edi)
475 movups %xmm4,32(%edi)
476 movups %xmm5,48(%edi)
477 jmp .L012ecb_ret
478 .align 16
479 .L013ecb_decrypt:
480 movl %edx,%ebp
481 movl %ecx,%ebx
482 cmpl $96,%eax
483 jb .L022ecb_dec_tail
484 movdqu (%esi),%xmm2
485 movdqu 16(%esi),%xmm3
486 movdqu 32(%esi),%xmm4
487 movdqu 48(%esi),%xmm5
488 movdqu 64(%esi),%xmm6
489 movdqu 80(%esi),%xmm7
490 leal 96(%esi),%esi
491 subl $96,%eax
492 jmp .L023ecb_dec_loop6_enter
493 .align 16
494 .L024ecb_dec_loop6:
495 movups %xmm2,(%edi)
496 movdqu (%esi),%xmm2
497 movups %xmm3,16(%edi)
498 movdqu 16(%esi),%xmm3
499 movups %xmm4,32(%edi)
500 movdqu 32(%esi),%xmm4
501 movups %xmm5,48(%edi)
502 movdqu 48(%esi),%xmm5
503 movups %xmm6,64(%edi)
504 movdqu 64(%esi),%xmm6
505 movups %xmm7,80(%edi)
506 leal 96(%edi),%edi
507 movdqu 80(%esi),%xmm7
508 leal 96(%esi),%esi
509 .L023ecb_dec_loop6_enter:
510 call _aesni_decrypt6
511 movl %ebp,%edx
512 movl %ebx,%ecx
513 subl $96,%eax
514 jnc .L024ecb_dec_loop6
515 movups %xmm2,(%edi)
516 movups %xmm3,16(%edi)
517 movups %xmm4,32(%edi)
518 movups %xmm5,48(%edi)
519 movups %xmm6,64(%edi)
520 movups %xmm7,80(%edi)
521 leal 96(%edi),%edi
522 addl $96,%eax
523 jz .L012ecb_ret
524 .L022ecb_dec_tail:
525 movups (%esi),%xmm2
526 cmpl $32,%eax
527 jb .L025ecb_dec_one
528 movups 16(%esi),%xmm3
529 je .L026ecb_dec_two
530 movups 32(%esi),%xmm4
531 cmpl $64,%eax
532 jb .L027ecb_dec_three
533 movups 48(%esi),%xmm5
534 je .L028ecb_dec_four
535 movups 64(%esi),%xmm6
536 xorps %xmm7,%xmm7
537 call _aesni_decrypt6
538 movups %xmm2,(%edi)
539 movups %xmm3,16(%edi)
540 movups %xmm4,32(%edi)
541 movups %xmm5,48(%edi)
542 movups %xmm6,64(%edi)
543 jmp .L012ecb_ret
544 .align 16
545 .L025ecb_dec_one:
546 movups (%edx),%xmm0
547 movups 16(%edx),%xmm1
548 leal 32(%edx),%edx
549 xorps %xmm0,%xmm2
550 .L029dec1_loop_4:
551 .byte 102,15,56,222,209
552 decl %ecx
553 movups (%edx),%xmm1
554 leal 16(%edx),%edx
555 jnz .L029dec1_loop_4
556 .byte 102,15,56,223,209
557 movups %xmm2,(%edi)
558 jmp .L012ecb_ret
559 .align 16
560 .L026ecb_dec_two:
561 call _aesni_decrypt2
562 movups %xmm2,(%edi)
563 movups %xmm3,16(%edi)
564 jmp .L012ecb_ret
565 .align 16
566 .L027ecb_dec_three:
567 call _aesni_decrypt3
568 movups %xmm2,(%edi)
569 movups %xmm3,16(%edi)
570 movups %xmm4,32(%edi)
571 jmp .L012ecb_ret
572 .align 16
573 .L028ecb_dec_four:
574 call _aesni_decrypt4
575 movups %xmm2,(%edi)
576 movups %xmm3,16(%edi)
577 movups %xmm4,32(%edi)
578 movups %xmm5,48(%edi)
579 .L012ecb_ret:
580 pxor %xmm0,%xmm0
581 pxor %xmm1,%xmm1
582 pxor %xmm2,%xmm2
583 pxor %xmm3,%xmm3
584 pxor %xmm4,%xmm4
585 pxor %xmm5,%xmm5
586 pxor %xmm6,%xmm6
587 pxor %xmm7,%xmm7
588 popl %edi
589 popl %esi
590 popl %ebx
591 popl %ebp
592 ret
593 .size aesni_ecb_encrypt,.-.L_aesni_ecb_encrypt_begin
594 .globl aesni_ccm64_encrypt_blocks
595 .type aesni_ccm64_encrypt_blocks,@function
596 .align 16
597 aesni_ccm64_encrypt_blocks:
598 .L_aesni_ccm64_encrypt_blocks_begin:
599 pushl %ebp
600 pushl %ebx
601 pushl %esi
602 pushl %edi
603 movl 20(%esp),%esi
604 movl 24(%esp),%edi
605 movl 28(%esp),%eax
606 movl 32(%esp),%edx
607 movl 36(%esp),%ebx
608 movl 40(%esp),%ecx
609 movl %esp,%ebp
610 subl $60,%esp
611 andl $-16,%esp
612 movl %ebp,48(%esp)
613 movdqu (%ebx),%xmm7
614 movdqu (%ecx),%xmm3
615 movl 240(%edx),%ecx
616 movl $202182159,(%esp)
617 movl $134810123,4(%esp)
618 movl $67438087,8(%esp)
619 movl $66051,12(%esp)
620 movl $1,%ebx
621 xorl %ebp,%ebp
622 movl %ebx,16(%esp)
623 movl %ebp,20(%esp)
624 movl %ebp,24(%esp)
625 movl %ebp,28(%esp)
626 shll $4,%ecx
627 movl $16,%ebx
628 leal (%edx),%ebp
629 movdqa (%esp),%xmm5
630 movdqa %xmm7,%xmm2
631 leal 32(%edx,%ecx,1),%edx
632 subl %ecx,%ebx
633 .byte 102,15,56,0,253
634 .L030ccm64_enc_outer:
635 movups (%ebp),%xmm0
636 movl %ebx,%ecx
637 movups (%esi),%xmm6
638 xorps %xmm0,%xmm2
639 movups 16(%ebp),%xmm1
640 xorps %xmm6,%xmm0
641 xorps %xmm0,%xmm3
642 movups 32(%ebp),%xmm0
643 .L031ccm64_enc2_loop:
644 .byte 102,15,56,220,209
645 .byte 102,15,56,220,217
646 movups (%edx,%ecx,1),%xmm1
647 addl $32,%ecx
648 .byte 102,15,56,220,208
649 .byte 102,15,56,220,216
650 movups -16(%edx,%ecx,1),%xmm0
651 jnz .L031ccm64_enc2_loop
652 .byte 102,15,56,220,209
653 .byte 102,15,56,220,217
654 paddq 16(%esp),%xmm7
655 decl %eax
656 .byte 102,15,56,221,208
657 .byte 102,15,56,221,216
658 leal 16(%esi),%esi
659 xorps %xmm2,%xmm6
660 movdqa %xmm7,%xmm2
661 movups %xmm6,(%edi)
662 .byte 102,15,56,0,213
663 leal 16(%edi),%edi
664 jnz .L030ccm64_enc_outer
665 movl 48(%esp),%esp
666 movl 40(%esp),%edi
667 movups %xmm3,(%edi)
668 pxor %xmm0,%xmm0
669 pxor %xmm1,%xmm1
670 pxor %xmm2,%xmm2
671 pxor %xmm3,%xmm3
672 pxor %xmm4,%xmm4
673 pxor %xmm5,%xmm5
674 pxor %xmm6,%xmm6
675 pxor %xmm7,%xmm7
676 popl %edi
677 popl %esi
678 popl %ebx
679 popl %ebp
680 ret
681 .size aesni_ccm64_encrypt_blocks,.-.L_aesni_ccm64_encrypt_blocks_begin
682 .globl aesni_ccm64_decrypt_blocks
683 .type aesni_ccm64_decrypt_blocks,@function
684 .align 16
685 aesni_ccm64_decrypt_blocks:
686 .L_aesni_ccm64_decrypt_blocks_begin:
687 pushl %ebp
688 pushl %ebx
689 pushl %esi
690 pushl %edi
691 movl 20(%esp),%esi
692 movl 24(%esp),%edi
693 movl 28(%esp),%eax
694 movl 32(%esp),%edx
695 movl 36(%esp),%ebx
696 movl 40(%esp),%ecx
697 movl %esp,%ebp
698 subl $60,%esp
699 andl $-16,%esp
700 movl %ebp,48(%esp)
701 movdqu (%ebx),%xmm7
702 movdqu (%ecx),%xmm3
703 movl 240(%edx),%ecx
704 movl $202182159,(%esp)
705 movl $134810123,4(%esp)
706 movl $67438087,8(%esp)
707 movl $66051,12(%esp)
708 movl $1,%ebx
709 xorl %ebp,%ebp
710 movl %ebx,16(%esp)
711 movl %ebp,20(%esp)
712 movl %ebp,24(%esp)
713 movl %ebp,28(%esp)
714 movdqa (%esp),%xmm5
715 movdqa %xmm7,%xmm2
716 movl %edx,%ebp
717 movl %ecx,%ebx
718 .byte 102,15,56,0,253
719 movups (%edx),%xmm0
720 movups 16(%edx),%xmm1
721 leal 32(%edx),%edx
722 xorps %xmm0,%xmm2
723 .L032enc1_loop_5:
724 .byte 102,15,56,220,209
725 decl %ecx
726 movups (%edx),%xmm1
727 leal 16(%edx),%edx
728 jnz .L032enc1_loop_5
729 .byte 102,15,56,221,209
730 shll $4,%ebx
731 movl $16,%ecx
732 movups (%esi),%xmm6
733 paddq 16(%esp),%xmm7
734 leal 16(%esi),%esi
735 subl %ebx,%ecx
736 leal 32(%ebp,%ebx,1),%edx
737 movl %ecx,%ebx
738 jmp .L033ccm64_dec_outer
739 .align 16
740 .L033ccm64_dec_outer:
741 xorps %xmm2,%xmm6
742 movdqa %xmm7,%xmm2
743 movups %xmm6,(%edi)
744 leal 16(%edi),%edi
745 .byte 102,15,56,0,213
746 subl $1,%eax
747 jz .L034ccm64_dec_break
748 movups (%ebp),%xmm0
749 movl %ebx,%ecx
750 movups 16(%ebp),%xmm1
751 xorps %xmm0,%xmm6
752 xorps %xmm0,%xmm2
753 xorps %xmm6,%xmm3
754 movups 32(%ebp),%xmm0
755 .L035ccm64_dec2_loop:
756 .byte 102,15,56,220,209
757 .byte 102,15,56,220,217
758 movups (%edx,%ecx,1),%xmm1
759 addl $32,%ecx
760 .byte 102,15,56,220,208
761 .byte 102,15,56,220,216
762 movups -16(%edx,%ecx,1),%xmm0
763 jnz .L035ccm64_dec2_loop
764 movups (%esi),%xmm6
765 paddq 16(%esp),%xmm7
766 .byte 102,15,56,220,209
767 .byte 102,15,56,220,217
768 .byte 102,15,56,221,208
769 .byte 102,15,56,221,216
770 leal 16(%esi),%esi
771 jmp .L033ccm64_dec_outer
772 .align 16
773 .L034ccm64_dec_break:
774 movl 240(%ebp),%ecx
775 movl %ebp,%edx
776 movups (%edx),%xmm0
777 movups 16(%edx),%xmm1
778 xorps %xmm0,%xmm6
779 leal 32(%edx),%edx
780 xorps %xmm6,%xmm3
781 .L036enc1_loop_6:
782 .byte 102,15,56,220,217
783 decl %ecx
784 movups (%edx),%xmm1
785 leal 16(%edx),%edx
786 jnz .L036enc1_loop_6
787 .byte 102,15,56,221,217
788 movl 48(%esp),%esp
789 movl 40(%esp),%edi
790 movups %xmm3,(%edi)
791 pxor %xmm0,%xmm0
792 pxor %xmm1,%xmm1
793 pxor %xmm2,%xmm2
794 pxor %xmm3,%xmm3
795 pxor %xmm4,%xmm4
796 pxor %xmm5,%xmm5
797 pxor %xmm6,%xmm6
798 pxor %xmm7,%xmm7
799 popl %edi
800 popl %esi
801 popl %ebx
802 popl %ebp
803 ret
804 .size aesni_ccm64_decrypt_blocks,.-.L_aesni_ccm64_decrypt_blocks_begin
805 .globl aesni_ctr32_encrypt_blocks
806 .type aesni_ctr32_encrypt_blocks,@function
807 .align 16
808 aesni_ctr32_encrypt_blocks:
809 .L_aesni_ctr32_encrypt_blocks_begin:
810 pushl %ebp
811 pushl %ebx
812 pushl %esi
813 pushl %edi
814 movl 20(%esp),%esi
815 movl 24(%esp),%edi
816 movl 28(%esp),%eax
817 movl 32(%esp),%edx
818 movl 36(%esp),%ebx
819 movl %esp,%ebp
820 subl $88,%esp
821 andl $-16,%esp
822 movl %ebp,80(%esp)
823 cmpl $1,%eax
824 je .L037ctr32_one_shortcut
825 movdqu (%ebx),%xmm7
826 movl $202182159,(%esp)
827 movl $134810123,4(%esp)
828 movl $67438087,8(%esp)
829 movl $66051,12(%esp)
830 movl $6,%ecx
831 xorl %ebp,%ebp
832 movl %ecx,16(%esp)
833 movl %ecx,20(%esp)
834 movl %ecx,24(%esp)
835 movl %ebp,28(%esp)
836 .byte 102,15,58,22,251,3
837 .byte 102,15,58,34,253,3
838 movl 240(%edx),%ecx
839 bswap %ebx
840 pxor %xmm0,%xmm0
841 pxor %xmm1,%xmm1
842 movdqa (%esp),%xmm2
843 .byte 102,15,58,34,195,0
844 leal 3(%ebx),%ebp
845 .byte 102,15,58,34,205,0
846 incl %ebx
847 .byte 102,15,58,34,195,1
848 incl %ebp
849 .byte 102,15,58,34,205,1
850 incl %ebx
851 .byte 102,15,58,34,195,2
852 incl %ebp
853 .byte 102,15,58,34,205,2
854 movdqa %xmm0,48(%esp)
855 .byte 102,15,56,0,194
856 movdqu (%edx),%xmm6
857 movdqa %xmm1,64(%esp)
858 .byte 102,15,56,0,202
859 pshufd $192,%xmm0,%xmm2
860 pshufd $128,%xmm0,%xmm3
861 cmpl $6,%eax
862 jb .L038ctr32_tail
863 pxor %xmm6,%xmm7
864 shll $4,%ecx
865 movl $16,%ebx
866 movdqa %xmm7,32(%esp)
867 movl %edx,%ebp
868 subl %ecx,%ebx
869 leal 32(%edx,%ecx,1),%edx
870 subl $6,%eax
871 jmp .L039ctr32_loop6
872 .align 16
873 .L039ctr32_loop6:
874 pshufd $64,%xmm0,%xmm4
875 movdqa 32(%esp),%xmm0
876 pshufd $192,%xmm1,%xmm5
877 pxor %xmm0,%xmm2
878 pshufd $128,%xmm1,%xmm6
879 pxor %xmm0,%xmm3
880 pshufd $64,%xmm1,%xmm7
881 movups 16(%ebp),%xmm1
882 pxor %xmm0,%xmm4
883 pxor %xmm0,%xmm5
884 .byte 102,15,56,220,209
885 pxor %xmm0,%xmm6
886 pxor %xmm0,%xmm7
887 .byte 102,15,56,220,217
888 movups 32(%ebp),%xmm0
889 movl %ebx,%ecx
890 .byte 102,15,56,220,225
891 .byte 102,15,56,220,233
892 .byte 102,15,56,220,241
893 .byte 102,15,56,220,249
894 call .L_aesni_encrypt6_enter
895 movups (%esi),%xmm1
896 movups 16(%esi),%xmm0
897 xorps %xmm1,%xmm2
898 movups 32(%esi),%xmm1
899 xorps %xmm0,%xmm3
900 movups %xmm2,(%edi)
901 movdqa 16(%esp),%xmm0
902 xorps %xmm1,%xmm4
903 movdqa 64(%esp),%xmm1
904 movups %xmm3,16(%edi)
905 movups %xmm4,32(%edi)
906 paddd %xmm0,%xmm1
907 paddd 48(%esp),%xmm0
908 movdqa (%esp),%xmm2
909 movups 48(%esi),%xmm3
910 movups 64(%esi),%xmm4
911 xorps %xmm3,%xmm5
912 movups 80(%esi),%xmm3
913 leal 96(%esi),%esi
914 movdqa %xmm0,48(%esp)
915 .byte 102,15,56,0,194
916 xorps %xmm4,%xmm6
917 movups %xmm5,48(%edi)
918 xorps %xmm3,%xmm7
919 movdqa %xmm1,64(%esp)
920 .byte 102,15,56,0,202
921 movups %xmm6,64(%edi)
922 pshufd $192,%xmm0,%xmm2
923 movups %xmm7,80(%edi)
924 leal 96(%edi),%edi
925 pshufd $128,%xmm0,%xmm3
926 subl $6,%eax
927 jnc .L039ctr32_loop6
928 addl $6,%eax
929 jz .L040ctr32_ret
930 movdqu (%ebp),%xmm7
931 movl %ebp,%edx
932 pxor 32(%esp),%xmm7
933 movl 240(%ebp),%ecx
934 .L038ctr32_tail:
935 por %xmm7,%xmm2
936 cmpl $2,%eax
937 jb .L041ctr32_one
938 pshufd $64,%xmm0,%xmm4
939 por %xmm7,%xmm3
940 je .L042ctr32_two
941 pshufd $192,%xmm1,%xmm5
942 por %xmm7,%xmm4
943 cmpl $4,%eax
944 jb .L043ctr32_three
945 pshufd $128,%xmm1,%xmm6
946 por %xmm7,%xmm5
947 je .L044ctr32_four
948 por %xmm7,%xmm6
949 call _aesni_encrypt6
950 movups (%esi),%xmm1
951 movups 16(%esi),%xmm0
952 xorps %xmm1,%xmm2
953 movups 32(%esi),%xmm1
954 xorps %xmm0,%xmm3
955 movups 48(%esi),%xmm0
956 xorps %xmm1,%xmm4
957 movups 64(%esi),%xmm1
958 xorps %xmm0,%xmm5
959 movups %xmm2,(%edi)
960 xorps %xmm1,%xmm6
961 movups %xmm3,16(%edi)
962 movups %xmm4,32(%edi)
963 movups %xmm5,48(%edi)
964 movups %xmm6,64(%edi)
965 jmp .L040ctr32_ret
966 .align 16
967 .L037ctr32_one_shortcut:
968 movups (%ebx),%xmm2
969 movl 240(%edx),%ecx
970 .L041ctr32_one:
971 movups (%edx),%xmm0
972 movups 16(%edx),%xmm1
973 leal 32(%edx),%edx
974 xorps %xmm0,%xmm2
975 .L045enc1_loop_7:
976 .byte 102,15,56,220,209
977 decl %ecx
978 movups (%edx),%xmm1
979 leal 16(%edx),%edx
980 jnz .L045enc1_loop_7
981 .byte 102,15,56,221,209
982 movups (%esi),%xmm6
983 xorps %xmm2,%xmm6
984 movups %xmm6,(%edi)
985 jmp .L040ctr32_ret
986 .align 16
987 .L042ctr32_two:
988 call _aesni_encrypt2
989 movups (%esi),%xmm5
990 movups 16(%esi),%xmm6
991 xorps %xmm5,%xmm2
992 xorps %xmm6,%xmm3
993 movups %xmm2,(%edi)
994 movups %xmm3,16(%edi)
995 jmp .L040ctr32_ret
996 .align 16
997 .L043ctr32_three:
998 call _aesni_encrypt3
999 movups (%esi),%xmm5
1000 movups 16(%esi),%xmm6
1001 xorps %xmm5,%xmm2
1002 movups 32(%esi),%xmm7
1003 xorps %xmm6,%xmm3
1004 movups %xmm2,(%edi)
1005 xorps %xmm7,%xmm4
1006 movups %xmm3,16(%edi)
1007 movups %xmm4,32(%edi)
1008 jmp .L040ctr32_ret
1009 .align 16
1010 .L044ctr32_four:
1011 call _aesni_encrypt4
1012 movups (%esi),%xmm6
1013 movups 16(%esi),%xmm7
1014 movups 32(%esi),%xmm1
1015 xorps %xmm6,%xmm2
1016 movups 48(%esi),%xmm0
1017 xorps %xmm7,%xmm3
1018 movups %xmm2,(%edi)
1019 xorps %xmm1,%xmm4
1020 movups %xmm3,16(%edi)
1021 xorps %xmm0,%xmm5
1022 movups %xmm4,32(%edi)
1023 movups %xmm5,48(%edi)
1024 .L040ctr32_ret:
1025 pxor %xmm0,%xmm0
1026 pxor %xmm1,%xmm1
1027 pxor %xmm2,%xmm2
1028 pxor %xmm3,%xmm3
1029 pxor %xmm4,%xmm4
1030 movdqa %xmm0,32(%esp)
1031 pxor %xmm5,%xmm5
1032 movdqa %xmm0,48(%esp)
1033 pxor %xmm6,%xmm6
1034 movdqa %xmm0,64(%esp)
1035 pxor %xmm7,%xmm7
1036 movl 80(%esp),%esp
1037 popl %edi
1038 popl %esi
1039 popl %ebx
1040 popl %ebp
1041 ret
1042 .size aesni_ctr32_encrypt_blocks,.-.L_aesni_ctr32_encrypt_blocks_begin
1043 .globl aesni_xts_encrypt
1044 .type aesni_xts_encrypt,@function
1045 .align 16
1046 aesni_xts_encrypt:
1047 .L_aesni_xts_encrypt_begin:
1048 pushl %ebp
1049 pushl %ebx
1050 pushl %esi
1051 pushl %edi
1052 movl 36(%esp),%edx
1053 movl 40(%esp),%esi
1054 movl 240(%edx),%ecx
1055 movups (%esi),%xmm2
1056 movups (%edx),%xmm0
1057 movups 16(%edx),%xmm1
1058 leal 32(%edx),%edx
1059 xorps %xmm0,%xmm2
1060 .L046enc1_loop_8:
1061 .byte 102,15,56,220,209
1062 decl %ecx
1063 movups (%edx),%xmm1
1064 leal 16(%edx),%edx
1065 jnz .L046enc1_loop_8
1066 .byte 102,15,56,221,209
1067 movl 20(%esp),%esi
1068 movl 24(%esp),%edi
1069 movl 28(%esp),%eax
1070 movl 32(%esp),%edx
1071 movl %esp,%ebp
1072 subl $120,%esp
1073 movl 240(%edx),%ecx
1074 andl $-16,%esp
1075 movl $135,96(%esp)
1076 movl $0,100(%esp)
1077 movl $1,104(%esp)
1078 movl $0,108(%esp)
1079 movl %eax,112(%esp)
1080 movl %ebp,116(%esp)
1081 movdqa %xmm2,%xmm1
1082 pxor %xmm0,%xmm0
1083 movdqa 96(%esp),%xmm3
1084 pcmpgtd %xmm1,%xmm0
1085 andl $-16,%eax
1086 movl %edx,%ebp
1087 movl %ecx,%ebx
1088 subl $96,%eax
1089 jc .L047xts_enc_short
1090 shll $4,%ecx
1091 movl $16,%ebx
1092 subl %ecx,%ebx
1093 leal 32(%edx,%ecx,1),%edx
1094 jmp .L048xts_enc_loop6
1095 .align 16
1096 .L048xts_enc_loop6:
1097 pshufd $19,%xmm0,%xmm2
1098 pxor %xmm0,%xmm0
1099 movdqa %xmm1,(%esp)
1100 paddq %xmm1,%xmm1
1101 pand %xmm3,%xmm2
1102 pcmpgtd %xmm1,%xmm0
1103 pxor %xmm2,%xmm1
1104 pshufd $19,%xmm0,%xmm2
1105 pxor %xmm0,%xmm0
1106 movdqa %xmm1,16(%esp)
1107 paddq %xmm1,%xmm1
1108 pand %xmm3,%xmm2
1109 pcmpgtd %xmm1,%xmm0
1110 pxor %xmm2,%xmm1
1111 pshufd $19,%xmm0,%xmm2
1112 pxor %xmm0,%xmm0
1113 movdqa %xmm1,32(%esp)
1114 paddq %xmm1,%xmm1
1115 pand %xmm3,%xmm2
1116 pcmpgtd %xmm1,%xmm0
1117 pxor %xmm2,%xmm1
1118 pshufd $19,%xmm0,%xmm2
1119 pxor %xmm0,%xmm0
1120 movdqa %xmm1,48(%esp)
1121 paddq %xmm1,%xmm1
1122 pand %xmm3,%xmm2
1123 pcmpgtd %xmm1,%xmm0
1124 pxor %xmm2,%xmm1
1125 pshufd $19,%xmm0,%xmm7
1126 movdqa %xmm1,64(%esp)
1127 paddq %xmm1,%xmm1
1128 movups (%ebp),%xmm0
1129 pand %xmm3,%xmm7
1130 movups (%esi),%xmm2
1131 pxor %xmm1,%xmm7
1132 movl %ebx,%ecx
1133 movdqu 16(%esi),%xmm3
1134 xorps %xmm0,%xmm2
1135 movdqu 32(%esi),%xmm4
1136 pxor %xmm0,%xmm3
1137 movdqu 48(%esi),%xmm5
1138 pxor %xmm0,%xmm4
1139 movdqu 64(%esi),%xmm6
1140 pxor %xmm0,%xmm5
1141 movdqu 80(%esi),%xmm1
1142 pxor %xmm0,%xmm6
1143 leal 96(%esi),%esi
1144 pxor (%esp),%xmm2
1145 movdqa %xmm7,80(%esp)
1146 pxor %xmm1,%xmm7
1147 movups 16(%ebp),%xmm1
1148 pxor 16(%esp),%xmm3
1149 pxor 32(%esp),%xmm4
1150 .byte 102,15,56,220,209
1151 pxor 48(%esp),%xmm5
1152 pxor 64(%esp),%xmm6
1153 .byte 102,15,56,220,217
1154 pxor %xmm0,%xmm7
1155 movups 32(%ebp),%xmm0
1156 .byte 102,15,56,220,225
1157 .byte 102,15,56,220,233
1158 .byte 102,15,56,220,241
1159 .byte 102,15,56,220,249
1160 call .L_aesni_encrypt6_enter
1161 movdqa 80(%esp),%xmm1
1162 pxor %xmm0,%xmm0
1163 xorps (%esp),%xmm2
1164 pcmpgtd %xmm1,%xmm0
1165 xorps 16(%esp),%xmm3
1166 movups %xmm2,(%edi)
1167 xorps 32(%esp),%xmm4
1168 movups %xmm3,16(%edi)
1169 xorps 48(%esp),%xmm5
1170 movups %xmm4,32(%edi)
1171 xorps 64(%esp),%xmm6
1172 movups %xmm5,48(%edi)
1173 xorps %xmm1,%xmm7
1174 movups %xmm6,64(%edi)
1175 pshufd $19,%xmm0,%xmm2
1176 movups %xmm7,80(%edi)
1177 leal 96(%edi),%edi
1178 movdqa 96(%esp),%xmm3
1179 pxor %xmm0,%xmm0
1180 paddq %xmm1,%xmm1
1181 pand %xmm3,%xmm2
1182 pcmpgtd %xmm1,%xmm0
1183 pxor %xmm2,%xmm1
1184 subl $96,%eax
1185 jnc .L048xts_enc_loop6
1186 movl 240(%ebp),%ecx
1187 movl %ebp,%edx
1188 movl %ecx,%ebx
1189 .L047xts_enc_short:
1190 addl $96,%eax
1191 jz .L049xts_enc_done6x
1192 movdqa %xmm1,%xmm5
1193 cmpl $32,%eax
1194 jb .L050xts_enc_one
1195 pshufd $19,%xmm0,%xmm2
1196 pxor %xmm0,%xmm0
1197 paddq %xmm1,%xmm1
1198 pand %xmm3,%xmm2
1199 pcmpgtd %xmm1,%xmm0
1200 pxor %xmm2,%xmm1
1201 je .L051xts_enc_two
1202 pshufd $19,%xmm0,%xmm2
1203 pxor %xmm0,%xmm0
1204 movdqa %xmm1,%xmm6
1205 paddq %xmm1,%xmm1
1206 pand %xmm3,%xmm2
1207 pcmpgtd %xmm1,%xmm0
1208 pxor %xmm2,%xmm1
1209 cmpl $64,%eax
1210 jb .L052xts_enc_three
1211 pshufd $19,%xmm0,%xmm2
1212 pxor %xmm0,%xmm0
1213 movdqa %xmm1,%xmm7
1214 paddq %xmm1,%xmm1
1215 pand %xmm3,%xmm2
1216 pcmpgtd %xmm1,%xmm0
1217 pxor %xmm2,%xmm1
1218 movdqa %xmm5,(%esp)
1219 movdqa %xmm6,16(%esp)
1220 je .L053xts_enc_four
1221 movdqa %xmm7,32(%esp)
1222 pshufd $19,%xmm0,%xmm7
1223 movdqa %xmm1,48(%esp)
1224 paddq %xmm1,%xmm1
1225 pand %xmm3,%xmm7
1226 pxor %xmm1,%xmm7
1227 movdqu (%esi),%xmm2
1228 movdqu 16(%esi),%xmm3
1229 movdqu 32(%esi),%xmm4
1230 pxor (%esp),%xmm2
1231 movdqu 48(%esi),%xmm5
1232 pxor 16(%esp),%xmm3
1233 movdqu 64(%esi),%xmm6
1234 pxor 32(%esp),%xmm4
1235 leal 80(%esi),%esi
1236 pxor 48(%esp),%xmm5
1237 movdqa %xmm7,64(%esp)
1238 pxor %xmm7,%xmm6
1239 call _aesni_encrypt6
1240 movaps 64(%esp),%xmm1
1241 xorps (%esp),%xmm2
1242 xorps 16(%esp),%xmm3
1243 xorps 32(%esp),%xmm4
1244 movups %xmm2,(%edi)
1245 xorps 48(%esp),%xmm5
1246 movups %xmm3,16(%edi)
1247 xorps %xmm1,%xmm6
1248 movups %xmm4,32(%edi)
1249 movups %xmm5,48(%edi)
1250 movups %xmm6,64(%edi)
1251 leal 80(%edi),%edi
1252 jmp .L054xts_enc_done
1253 .align 16
1254 .L050xts_enc_one:
1255 movups (%esi),%xmm2
1256 leal 16(%esi),%esi
1257 xorps %xmm5,%xmm2
1258 movups (%edx),%xmm0
1259 movups 16(%edx),%xmm1
1260 leal 32(%edx),%edx
1261 xorps %xmm0,%xmm2
1262 .L055enc1_loop_9:
1263 .byte 102,15,56,220,209
1264 decl %ecx
1265 movups (%edx),%xmm1
1266 leal 16(%edx),%edx
1267 jnz .L055enc1_loop_9
1268 .byte 102,15,56,221,209
1269 xorps %xmm5,%xmm2
1270 movups %xmm2,(%edi)
1271 leal 16(%edi),%edi
1272 movdqa %xmm5,%xmm1
1273 jmp .L054xts_enc_done
1274 .align 16
1275 .L051xts_enc_two:
1276 movaps %xmm1,%xmm6
1277 movups (%esi),%xmm2
1278 movups 16(%esi),%xmm3
1279 leal 32(%esi),%esi
1280 xorps %xmm5,%xmm2
1281 xorps %xmm6,%xmm3
1282 call _aesni_encrypt2
1283 xorps %xmm5,%xmm2
1284 xorps %xmm6,%xmm3
1285 movups %xmm2,(%edi)
1286 movups %xmm3,16(%edi)
1287 leal 32(%edi),%edi
1288 movdqa %xmm6,%xmm1
1289 jmp .L054xts_enc_done
1290 .align 16
1291 .L052xts_enc_three:
1292 movaps %xmm1,%xmm7
1293 movups (%esi),%xmm2
1294 movups 16(%esi),%xmm3
1295 movups 32(%esi),%xmm4
1296 leal 48(%esi),%esi
1297 xorps %xmm5,%xmm2
1298 xorps %xmm6,%xmm3
1299 xorps %xmm7,%xmm4
1300 call _aesni_encrypt3
1301 xorps %xmm5,%xmm2
1302 xorps %xmm6,%xmm3
1303 xorps %xmm7,%xmm4
1304 movups %xmm2,(%edi)
1305 movups %xmm3,16(%edi)
1306 movups %xmm4,32(%edi)
1307 leal 48(%edi),%edi
1308 movdqa %xmm7,%xmm1
1309 jmp .L054xts_enc_done
1310 .align 16
1311 .L053xts_enc_four:
1312 movaps %xmm1,%xmm6
1313 movups (%esi),%xmm2
1314 movups 16(%esi),%xmm3
1315 movups 32(%esi),%xmm4
1316 xorps (%esp),%xmm2
1317 movups 48(%esi),%xmm5
1318 leal 64(%esi),%esi
1319 xorps 16(%esp),%xmm3
1320 xorps %xmm7,%xmm4
1321 xorps %xmm6,%xmm5
1322 call _aesni_encrypt4
1323 xorps (%esp),%xmm2
1324 xorps 16(%esp),%xmm3
1325 xorps %xmm7,%xmm4
1326 movups %xmm2,(%edi)
1327 xorps %xmm6,%xmm5
1328 movups %xmm3,16(%edi)
1329 movups %xmm4,32(%edi)
1330 movups %xmm5,48(%edi)
1331 leal 64(%edi),%edi
1332 movdqa %xmm6,%xmm1
1333 jmp .L054xts_enc_done
1334 .align 16
1335 .L049xts_enc_done6x:
1336 movl 112(%esp),%eax
1337 andl $15,%eax
1338 jz .L056xts_enc_ret
1339 movdqa %xmm1,%xmm5
1340 movl %eax,112(%esp)
1341 jmp .L057xts_enc_steal
1342 .align 16
1343 .L054xts_enc_done:
1344 movl 112(%esp),%eax
1345 pxor %xmm0,%xmm0
1346 andl $15,%eax
1347 jz .L056xts_enc_ret
1348 pcmpgtd %xmm1,%xmm0
1349 movl %eax,112(%esp)
1350 pshufd $19,%xmm0,%xmm5
1351 paddq %xmm1,%xmm1
1352 pand 96(%esp),%xmm5
1353 pxor %xmm1,%xmm5
1354 .L057xts_enc_steal:
1355 movzbl (%esi),%ecx
1356 movzbl -16(%edi),%edx
1357 leal 1(%esi),%esi
1358 movb %cl,-16(%edi)
1359 movb %dl,(%edi)
1360 leal 1(%edi),%edi
1361 subl $1,%eax
1362 jnz .L057xts_enc_steal
1363 subl 112(%esp),%edi
1364 movl %ebp,%edx
1365 movl %ebx,%ecx
1366 movups -16(%edi),%xmm2
1367 xorps %xmm5,%xmm2
1368 movups (%edx),%xmm0
1369 movups 16(%edx),%xmm1
1370 leal 32(%edx),%edx
1371 xorps %xmm0,%xmm2
1372 .L058enc1_loop_10:
1373 .byte 102,15,56,220,209
1374 decl %ecx
1375 movups (%edx),%xmm1
1376 leal 16(%edx),%edx
1377 jnz .L058enc1_loop_10
1378 .byte 102,15,56,221,209
1379 xorps %xmm5,%xmm2
1380 movups %xmm2,-16(%edi)
1381 .L056xts_enc_ret:
1382 pxor %xmm0,%xmm0
1383 pxor %xmm1,%xmm1
1384 pxor %xmm2,%xmm2
1385 movdqa %xmm0,(%esp)
1386 pxor %xmm3,%xmm3
1387 movdqa %xmm0,16(%esp)
1388 pxor %xmm4,%xmm4
1389 movdqa %xmm0,32(%esp)
1390 pxor %xmm5,%xmm5
1391 movdqa %xmm0,48(%esp)
1392 pxor %xmm6,%xmm6
1393 movdqa %xmm0,64(%esp)
1394 pxor %xmm7,%xmm7
1395 movdqa %xmm0,80(%esp)
1396 movl 116(%esp),%esp
1397 popl %edi
1398 popl %esi
1399 popl %ebx
1400 popl %ebp
1401 ret
1402 .size aesni_xts_encrypt,.-.L_aesni_xts_encrypt_begin
1403 .globl aesni_xts_decrypt
1404 .type aesni_xts_decrypt,@function
1405 .align 16
1406 aesni_xts_decrypt:
1407 .L_aesni_xts_decrypt_begin:
1408 pushl %ebp
1409 pushl %ebx
1410 pushl %esi
1411 pushl %edi
1412 movl 36(%esp),%edx
1413 movl 40(%esp),%esi
1414 movl 240(%edx),%ecx
1415 movups (%esi),%xmm2
1416 movups (%edx),%xmm0
1417 movups 16(%edx),%xmm1
1418 leal 32(%edx),%edx
1419 xorps %xmm0,%xmm2
1420 .L059enc1_loop_11:
1421 .byte 102,15,56,220,209
1422 decl %ecx
1423 movups (%edx),%xmm1
1424 leal 16(%edx),%edx
1425 jnz .L059enc1_loop_11
1426 .byte 102,15,56,221,209
1427 movl 20(%esp),%esi
1428 movl 24(%esp),%edi
1429 movl 28(%esp),%eax
1430 movl 32(%esp),%edx
1431 movl %esp,%ebp
1432 subl $120,%esp
1433 andl $-16,%esp
1434 xorl %ebx,%ebx
1435 testl $15,%eax
1436 setnz %bl
1437 shll $4,%ebx
1438 subl %ebx,%eax
1439 movl $135,96(%esp)
1440 movl $0,100(%esp)
1441 movl $1,104(%esp)
1442 movl $0,108(%esp)
1443 movl %eax,112(%esp)
1444 movl %ebp,116(%esp)
1445 movl 240(%edx),%ecx
1446 movl %edx,%ebp
1447 movl %ecx,%ebx
1448 movdqa %xmm2,%xmm1
1449 pxor %xmm0,%xmm0
1450 movdqa 96(%esp),%xmm3
1451 pcmpgtd %xmm1,%xmm0
1452 andl $-16,%eax
1453 subl $96,%eax
1454 jc .L060xts_dec_short
1455 shll $4,%ecx
1456 movl $16,%ebx
1457 subl %ecx,%ebx
1458 leal 32(%edx,%ecx,1),%edx
1459 jmp .L061xts_dec_loop6
1460 .align 16
1461 .L061xts_dec_loop6:
1462 pshufd $19,%xmm0,%xmm2
1463 pxor %xmm0,%xmm0
1464 movdqa %xmm1,(%esp)
1465 paddq %xmm1,%xmm1
1466 pand %xmm3,%xmm2
1467 pcmpgtd %xmm1,%xmm0
1468 pxor %xmm2,%xmm1
1469 pshufd $19,%xmm0,%xmm2
1470 pxor %xmm0,%xmm0
1471 movdqa %xmm1,16(%esp)
1472 paddq %xmm1,%xmm1
1473 pand %xmm3,%xmm2
1474 pcmpgtd %xmm1,%xmm0
1475 pxor %xmm2,%xmm1
1476 pshufd $19,%xmm0,%xmm2
1477 pxor %xmm0,%xmm0
1478 movdqa %xmm1,32(%esp)
1479 paddq %xmm1,%xmm1
1480 pand %xmm3,%xmm2
1481 pcmpgtd %xmm1,%xmm0
1482 pxor %xmm2,%xmm1
1483 pshufd $19,%xmm0,%xmm2
1484 pxor %xmm0,%xmm0
1485 movdqa %xmm1,48(%esp)
1486 paddq %xmm1,%xmm1
1487 pand %xmm3,%xmm2
1488 pcmpgtd %xmm1,%xmm0
1489 pxor %xmm2,%xmm1
1490 pshufd $19,%xmm0,%xmm7
1491 movdqa %xmm1,64(%esp)
1492 paddq %xmm1,%xmm1
1493 movups (%ebp),%xmm0
1494 pand %xmm3,%xmm7
1495 movups (%esi),%xmm2
1496 pxor %xmm1,%xmm7
1497 movl %ebx,%ecx
1498 movdqu 16(%esi),%xmm3
1499 xorps %xmm0,%xmm2
1500 movdqu 32(%esi),%xmm4
1501 pxor %xmm0,%xmm3
1502 movdqu 48(%esi),%xmm5
1503 pxor %xmm0,%xmm4
1504 movdqu 64(%esi),%xmm6
1505 pxor %xmm0,%xmm5
1506 movdqu 80(%esi),%xmm1
1507 pxor %xmm0,%xmm6
1508 leal 96(%esi),%esi
1509 pxor (%esp),%xmm2
1510 movdqa %xmm7,80(%esp)
1511 pxor %xmm1,%xmm7
1512 movups 16(%ebp),%xmm1
1513 pxor 16(%esp),%xmm3
1514 pxor 32(%esp),%xmm4
1515 .byte 102,15,56,222,209
1516 pxor 48(%esp),%xmm5
1517 pxor 64(%esp),%xmm6
1518 .byte 102,15,56,222,217
1519 pxor %xmm0,%xmm7
1520 movups 32(%ebp),%xmm0
1521 .byte 102,15,56,222,225
1522 .byte 102,15,56,222,233
1523 .byte 102,15,56,222,241
1524 .byte 102,15,56,222,249
1525 call .L_aesni_decrypt6_enter
1526 movdqa 80(%esp),%xmm1
1527 pxor %xmm0,%xmm0
1528 xorps (%esp),%xmm2
1529 pcmpgtd %xmm1,%xmm0
1530 xorps 16(%esp),%xmm3
1531 movups %xmm2,(%edi)
1532 xorps 32(%esp),%xmm4
1533 movups %xmm3,16(%edi)
1534 xorps 48(%esp),%xmm5
1535 movups %xmm4,32(%edi)
1536 xorps 64(%esp),%xmm6
1537 movups %xmm5,48(%edi)
1538 xorps %xmm1,%xmm7
1539 movups %xmm6,64(%edi)
1540 pshufd $19,%xmm0,%xmm2
1541 movups %xmm7,80(%edi)
1542 leal 96(%edi),%edi
1543 movdqa 96(%esp),%xmm3
1544 pxor %xmm0,%xmm0
1545 paddq %xmm1,%xmm1
1546 pand %xmm3,%xmm2
1547 pcmpgtd %xmm1,%xmm0
1548 pxor %xmm2,%xmm1
1549 subl $96,%eax
1550 jnc .L061xts_dec_loop6
1551 movl 240(%ebp),%ecx
1552 movl %ebp,%edx
1553 movl %ecx,%ebx
1554 .L060xts_dec_short:
1555 addl $96,%eax
1556 jz .L062xts_dec_done6x
1557 movdqa %xmm1,%xmm5
1558 cmpl $32,%eax
1559 jb .L063xts_dec_one
1560 pshufd $19,%xmm0,%xmm2
1561 pxor %xmm0,%xmm0
1562 paddq %xmm1,%xmm1
1563 pand %xmm3,%xmm2
1564 pcmpgtd %xmm1,%xmm0
1565 pxor %xmm2,%xmm1
1566 je .L064xts_dec_two
1567 pshufd $19,%xmm0,%xmm2
1568 pxor %xmm0,%xmm0
1569 movdqa %xmm1,%xmm6
1570 paddq %xmm1,%xmm1
1571 pand %xmm3,%xmm2
1572 pcmpgtd %xmm1,%xmm0
1573 pxor %xmm2,%xmm1
1574 cmpl $64,%eax
1575 jb .L065xts_dec_three
1576 pshufd $19,%xmm0,%xmm2
1577 pxor %xmm0,%xmm0
1578 movdqa %xmm1,%xmm7
1579 paddq %xmm1,%xmm1
1580 pand %xmm3,%xmm2
1581 pcmpgtd %xmm1,%xmm0
1582 pxor %xmm2,%xmm1
1583 movdqa %xmm5,(%esp)
1584 movdqa %xmm6,16(%esp)
1585 je .L066xts_dec_four
1586 movdqa %xmm7,32(%esp)
1587 pshufd $19,%xmm0,%xmm7
1588 movdqa %xmm1,48(%esp)
1589 paddq %xmm1,%xmm1
1590 pand %xmm3,%xmm7
1591 pxor %xmm1,%xmm7
1592 movdqu (%esi),%xmm2
1593 movdqu 16(%esi),%xmm3
1594 movdqu 32(%esi),%xmm4
1595 pxor (%esp),%xmm2
1596 movdqu 48(%esi),%xmm5
1597 pxor 16(%esp),%xmm3
1598 movdqu 64(%esi),%xmm6
1599 pxor 32(%esp),%xmm4
1600 leal 80(%esi),%esi
1601 pxor 48(%esp),%xmm5
1602 movdqa %xmm7,64(%esp)
1603 pxor %xmm7,%xmm6
1604 call _aesni_decrypt6
1605 movaps 64(%esp),%xmm1
1606 xorps (%esp),%xmm2
1607 xorps 16(%esp),%xmm3
1608 xorps 32(%esp),%xmm4
1609 movups %xmm2,(%edi)
1610 xorps 48(%esp),%xmm5
1611 movups %xmm3,16(%edi)
1612 xorps %xmm1,%xmm6
1613 movups %xmm4,32(%edi)
1614 movups %xmm5,48(%edi)
1615 movups %xmm6,64(%edi)
1616 leal 80(%edi),%edi
1617 jmp .L067xts_dec_done
1618 .align 16
1619 .L063xts_dec_one:
1620 movups (%esi),%xmm2
1621 leal 16(%esi),%esi
1622 xorps %xmm5,%xmm2
1623 movups (%edx),%xmm0
1624 movups 16(%edx),%xmm1
1625 leal 32(%edx),%edx
1626 xorps %xmm0,%xmm2
1627 .L068dec1_loop_12:
1628 .byte 102,15,56,222,209
1629 decl %ecx
1630 movups (%edx),%xmm1
1631 leal 16(%edx),%edx
1632 jnz .L068dec1_loop_12
1633 .byte 102,15,56,223,209
1634 xorps %xmm5,%xmm2
1635 movups %xmm2,(%edi)
1636 leal 16(%edi),%edi
1637 movdqa %xmm5,%xmm1
1638 jmp .L067xts_dec_done
1639 .align 16
1640 .L064xts_dec_two:
1641 movaps %xmm1,%xmm6
1642 movups (%esi),%xmm2
1643 movups 16(%esi),%xmm3
1644 leal 32(%esi),%esi
1645 xorps %xmm5,%xmm2
1646 xorps %xmm6,%xmm3
1647 call _aesni_decrypt2
1648 xorps %xmm5,%xmm2
1649 xorps %xmm6,%xmm3
1650 movups %xmm2,(%edi)
1651 movups %xmm3,16(%edi)
1652 leal 32(%edi),%edi
1653 movdqa %xmm6,%xmm1
1654 jmp .L067xts_dec_done
1655 .align 16
1656 .L065xts_dec_three:
1657 movaps %xmm1,%xmm7
1658 movups (%esi),%xmm2
1659 movups 16(%esi),%xmm3
1660 movups 32(%esi),%xmm4
1661 leal 48(%esi),%esi
1662 xorps %xmm5,%xmm2
1663 xorps %xmm6,%xmm3
1664 xorps %xmm7,%xmm4
1665 call _aesni_decrypt3
1666 xorps %xmm5,%xmm2
1667 xorps %xmm6,%xmm3
1668 xorps %xmm7,%xmm4
1669 movups %xmm2,(%edi)
1670 movups %xmm3,16(%edi)
1671 movups %xmm4,32(%edi)
1672 leal 48(%edi),%edi
1673 movdqa %xmm7,%xmm1
1674 jmp .L067xts_dec_done
1675 .align 16
1676 .L066xts_dec_four:
1677 movaps %xmm1,%xmm6
1678 movups (%esi),%xmm2
1679 movups 16(%esi),%xmm3
1680 movups 32(%esi),%xmm4
1681 xorps (%esp),%xmm2
1682 movups 48(%esi),%xmm5
1683 leal 64(%esi),%esi
1684 xorps 16(%esp),%xmm3
1685 xorps %xmm7,%xmm4
1686 xorps %xmm6,%xmm5
1687 call _aesni_decrypt4
1688 xorps (%esp),%xmm2
1689 xorps 16(%esp),%xmm3
1690 xorps %xmm7,%xmm4
1691 movups %xmm2,(%edi)
1692 xorps %xmm6,%xmm5
1693 movups %xmm3,16(%edi)
1694 movups %xmm4,32(%edi)
1695 movups %xmm5,48(%edi)
1696 leal 64(%edi),%edi
1697 movdqa %xmm6,%xmm1
1698 jmp .L067xts_dec_done
1699 .align 16
1700 .L062xts_dec_done6x:
1701 movl 112(%esp),%eax
1702 andl $15,%eax
1703 jz .L069xts_dec_ret
1704 movl %eax,112(%esp)
1705 jmp .L070xts_dec_only_one_more
1706 .align 16
1707 .L067xts_dec_done:
1708 movl 112(%esp),%eax
1709 pxor %xmm0,%xmm0
1710 andl $15,%eax
1711 jz .L069xts_dec_ret
1712 pcmpgtd %xmm1,%xmm0
1713 movl %eax,112(%esp)
1714 pshufd $19,%xmm0,%xmm2
1715 pxor %xmm0,%xmm0
1716 movdqa 96(%esp),%xmm3
1717 paddq %xmm1,%xmm1
1718 pand %xmm3,%xmm2
1719 pcmpgtd %xmm1,%xmm0
1720 pxor %xmm2,%xmm1
1721 .L070xts_dec_only_one_more:
1722 pshufd $19,%xmm0,%xmm5
1723 movdqa %xmm1,%xmm6
1724 paddq %xmm1,%xmm1
1725 pand %xmm3,%xmm5
1726 pxor %xmm1,%xmm5
1727 movl %ebp,%edx
1728 movl %ebx,%ecx
1729 movups (%esi),%xmm2
1730 xorps %xmm5,%xmm2
1731 movups (%edx),%xmm0
1732 movups 16(%edx),%xmm1
1733 leal 32(%edx),%edx
1734 xorps %xmm0,%xmm2
1735 .L071dec1_loop_13:
1736 .byte 102,15,56,222,209
1737 decl %ecx
1738 movups (%edx),%xmm1
1739 leal 16(%edx),%edx
1740 jnz .L071dec1_loop_13
1741 .byte 102,15,56,223,209
1742 xorps %xmm5,%xmm2
1743 movups %xmm2,(%edi)
1744 .L072xts_dec_steal:
1745 movzbl 16(%esi),%ecx
1746 movzbl (%edi),%edx
1747 leal 1(%esi),%esi
1748 movb %cl,(%edi)
1749 movb %dl,16(%edi)
1750 leal 1(%edi),%edi
1751 subl $1,%eax
1752 jnz .L072xts_dec_steal
1753 subl 112(%esp),%edi
1754 movl %ebp,%edx
1755 movl %ebx,%ecx
1756 movups (%edi),%xmm2
1757 xorps %xmm6,%xmm2
1758 movups (%edx),%xmm0
1759 movups 16(%edx),%xmm1
1760 leal 32(%edx),%edx
1761 xorps %xmm0,%xmm2
1762 .L073dec1_loop_14:
1763 .byte 102,15,56,222,209
1764 decl %ecx
1765 movups (%edx),%xmm1
1766 leal 16(%edx),%edx
1767 jnz .L073dec1_loop_14
1768 .byte 102,15,56,223,209
1769 xorps %xmm6,%xmm2
1770 movups %xmm2,(%edi)
1771 .L069xts_dec_ret:
1772 pxor %xmm0,%xmm0
1773 pxor %xmm1,%xmm1
1774 pxor %xmm2,%xmm2
1775 movdqa %xmm0,(%esp)
1776 pxor %xmm3,%xmm3
1777 movdqa %xmm0,16(%esp)
1778 pxor %xmm4,%xmm4
1779 movdqa %xmm0,32(%esp)
1780 pxor %xmm5,%xmm5
1781 movdqa %xmm0,48(%esp)
1782 pxor %xmm6,%xmm6
1783 movdqa %xmm0,64(%esp)
1784 pxor %xmm7,%xmm7
1785 movdqa %xmm0,80(%esp)
1786 movl 116(%esp),%esp
1787 popl %edi
1788 popl %esi
1789 popl %ebx
1790 popl %ebp
1791 ret
1792 .size aesni_xts_decrypt,.-.L_aesni_xts_decrypt_begin
1793 .globl aesni_cbc_encrypt
1794 .type aesni_cbc_encrypt,@function
1795 .align 16
1796 aesni_cbc_encrypt:
1797 .L_aesni_cbc_encrypt_begin:
1798 pushl %ebp
1799 pushl %ebx
1800 pushl %esi
1801 pushl %edi
1802 movl 20(%esp),%esi
1803 movl %esp,%ebx
1804 movl 24(%esp),%edi
1805 subl $24,%ebx
1806 movl 28(%esp),%eax
1807 andl $-16,%ebx
1808 movl 32(%esp),%edx
1809 movl 36(%esp),%ebp
1810 testl %eax,%eax
1811 jz .L074cbc_abort
1812 cmpl $0,40(%esp)
1813 xchgl %esp,%ebx
1814 movups (%ebp),%xmm7
1815 movl 240(%edx),%ecx
1816 movl %edx,%ebp
1817 movl %ebx,16(%esp)
1818 movl %ecx,%ebx
1819 je .L075cbc_decrypt
1820 movaps %xmm7,%xmm2
1821 cmpl $16,%eax
1822 jb .L076cbc_enc_tail
1823 subl $16,%eax
1824 jmp .L077cbc_enc_loop
1825 .align 16
1826 .L077cbc_enc_loop:
1827 movups (%esi),%xmm7
1828 leal 16(%esi),%esi
1829 movups (%edx),%xmm0
1830 movups 16(%edx),%xmm1
1831 xorps %xmm0,%xmm7
1832 leal 32(%edx),%edx
1833 xorps %xmm7,%xmm2
1834 .L078enc1_loop_15:
1835 .byte 102,15,56,220,209
1836 decl %ecx
1837 movups (%edx),%xmm1
1838 leal 16(%edx),%edx
1839 jnz .L078enc1_loop_15
1840 .byte 102,15,56,221,209
1841 movl %ebx,%ecx
1842 movl %ebp,%edx
1843 movups %xmm2,(%edi)
1844 leal 16(%edi),%edi
1845 subl $16,%eax
1846 jnc .L077cbc_enc_loop
1847 addl $16,%eax
1848 jnz .L076cbc_enc_tail
1849 movaps %xmm2,%xmm7
1850 pxor %xmm2,%xmm2
1851 jmp .L079cbc_ret
1852 .L076cbc_enc_tail:
1853 movl %eax,%ecx
1854 .long 2767451785
1855 movl $16,%ecx
1856 subl %eax,%ecx
1857 xorl %eax,%eax
1858 .long 2868115081
1859 leal -16(%edi),%edi
1860 movl %ebx,%ecx
1861 movl %edi,%esi
1862 movl %ebp,%edx
1863 jmp .L077cbc_enc_loop
1864 .align 16
1865 .L075cbc_decrypt:
1866 cmpl $80,%eax
1867 jbe .L080cbc_dec_tail
1868 movaps %xmm7,(%esp)
1869 subl $80,%eax
1870 jmp .L081cbc_dec_loop6_enter
1871 .align 16
1872 .L082cbc_dec_loop6:
1873 movaps %xmm0,(%esp)
1874 movups %xmm7,(%edi)
1875 leal 16(%edi),%edi
1876 .L081cbc_dec_loop6_enter:
1877 movdqu (%esi),%xmm2
1878 movdqu 16(%esi),%xmm3
1879 movdqu 32(%esi),%xmm4
1880 movdqu 48(%esi),%xmm5
1881 movdqu 64(%esi),%xmm6
1882 movdqu 80(%esi),%xmm7
1883 call _aesni_decrypt6
1884 movups (%esi),%xmm1
1885 movups 16(%esi),%xmm0
1886 xorps (%esp),%xmm2
1887 xorps %xmm1,%xmm3
1888 movups 32(%esi),%xmm1
1889 xorps %xmm0,%xmm4
1890 movups 48(%esi),%xmm0
1891 xorps %xmm1,%xmm5
1892 movups 64(%esi),%xmm1
1893 xorps %xmm0,%xmm6
1894 movups 80(%esi),%xmm0
1895 xorps %xmm1,%xmm7
1896 movups %xmm2,(%edi)
1897 movups %xmm3,16(%edi)
1898 leal 96(%esi),%esi
1899 movups %xmm4,32(%edi)
1900 movl %ebx,%ecx
1901 movups %xmm5,48(%edi)
1902 movl %ebp,%edx
1903 movups %xmm6,64(%edi)
1904 leal 80(%edi),%edi
1905 subl $96,%eax
1906 ja .L082cbc_dec_loop6
1907 movaps %xmm7,%xmm2
1908 movaps %xmm0,%xmm7
1909 addl $80,%eax
1910 jle .L083cbc_dec_clear_tail_collected
1911 movups %xmm2,(%edi)
1912 leal 16(%edi),%edi
1913 .L080cbc_dec_tail:
1914 movups (%esi),%xmm2
1915 movaps %xmm2,%xmm6
1916 cmpl $16,%eax
1917 jbe .L084cbc_dec_one
1918 movups 16(%esi),%xmm3
1919 movaps %xmm3,%xmm5
1920 cmpl $32,%eax
1921 jbe .L085cbc_dec_two
1922 movups 32(%esi),%xmm4
1923 cmpl $48,%eax
1924 jbe .L086cbc_dec_three
1925 movups 48(%esi),%xmm5
1926 cmpl $64,%eax
1927 jbe .L087cbc_dec_four
1928 movups 64(%esi),%xmm6
1929 movaps %xmm7,(%esp)
1930 movups (%esi),%xmm2
1931 xorps %xmm7,%xmm7
1932 call _aesni_decrypt6
1933 movups (%esi),%xmm1
1934 movups 16(%esi),%xmm0
1935 xorps (%esp),%xmm2
1936 xorps %xmm1,%xmm3
1937 movups 32(%esi),%xmm1
1938 xorps %xmm0,%xmm4
1939 movups 48(%esi),%xmm0
1940 xorps %xmm1,%xmm5
1941 movups 64(%esi),%xmm7
1942 xorps %xmm0,%xmm6
1943 movups %xmm2,(%edi)
1944 movups %xmm3,16(%edi)
1945 pxor %xmm3,%xmm3
1946 movups %xmm4,32(%edi)
1947 pxor %xmm4,%xmm4
1948 movups %xmm5,48(%edi)
1949 pxor %xmm5,%xmm5
1950 leal 64(%edi),%edi
1951 movaps %xmm6,%xmm2
1952 pxor %xmm6,%xmm6
1953 subl $80,%eax
1954 jmp .L088cbc_dec_tail_collected
1955 .align 16
1956 .L084cbc_dec_one:
1957 movups (%edx),%xmm0
1958 movups 16(%edx),%xmm1
1959 leal 32(%edx),%edx
1960 xorps %xmm0,%xmm2
1961 .L089dec1_loop_16:
1962 .byte 102,15,56,222,209
1963 decl %ecx
1964 movups (%edx),%xmm1
1965 leal 16(%edx),%edx
1966 jnz .L089dec1_loop_16
1967 .byte 102,15,56,223,209
1968 xorps %xmm7,%xmm2
1969 movaps %xmm6,%xmm7
1970 subl $16,%eax
1971 jmp .L088cbc_dec_tail_collected
1972 .align 16
1973 .L085cbc_dec_two:
1974 call _aesni_decrypt2
1975 xorps %xmm7,%xmm2
1976 xorps %xmm6,%xmm3
1977 movups %xmm2,(%edi)
1978 movaps %xmm3,%xmm2
1979 pxor %xmm3,%xmm3
1980 leal 16(%edi),%edi
1981 movaps %xmm5,%xmm7
1982 subl $32,%eax
1983 jmp .L088cbc_dec_tail_collected
1984 .align 16
1985 .L086cbc_dec_three:
1986 call _aesni_decrypt3
1987 xorps %xmm7,%xmm2
1988 xorps %xmm6,%xmm3
1989 xorps %xmm5,%xmm4
1990 movups %xmm2,(%edi)
1991 movaps %xmm4,%xmm2
1992 pxor %xmm4,%xmm4
1993 movups %xmm3,16(%edi)
1994 pxor %xmm3,%xmm3
1995 leal 32(%edi),%edi
1996 movups 32(%esi),%xmm7
1997 subl $48,%eax
1998 jmp .L088cbc_dec_tail_collected
1999 .align 16
2000 .L087cbc_dec_four:
2001 call _aesni_decrypt4
2002 movups 16(%esi),%xmm1
2003 movups 32(%esi),%xmm0
2004 xorps %xmm7,%xmm2
2005 movups 48(%esi),%xmm7
2006 xorps %xmm6,%xmm3
2007 movups %xmm2,(%edi)
2008 xorps %xmm1,%xmm4
2009 movups %xmm3,16(%edi)
2010 pxor %xmm3,%xmm3
2011 xorps %xmm0,%xmm5
2012 movups %xmm4,32(%edi)
2013 pxor %xmm4,%xmm4
2014 leal 48(%edi),%edi
2015 movaps %xmm5,%xmm2
2016 pxor %xmm5,%xmm5
2017 subl $64,%eax
2018 jmp .L088cbc_dec_tail_collected
2019 .align 16
2020 .L083cbc_dec_clear_tail_collected:
2021 pxor %xmm3,%xmm3
2022 pxor %xmm4,%xmm4
2023 pxor %xmm5,%xmm5
2024 pxor %xmm6,%xmm6
2025 .L088cbc_dec_tail_collected:
2026 andl $15,%eax
2027 jnz .L090cbc_dec_tail_partial
2028 movups %xmm2,(%edi)
2029 pxor %xmm0,%xmm0
2030 jmp .L079cbc_ret
2031 .align 16
2032 .L090cbc_dec_tail_partial:
2033 movaps %xmm2,(%esp)
2034 pxor %xmm0,%xmm0
2035 movl $16,%ecx
2036 movl %esp,%esi
2037 subl %eax,%ecx
2038 .long 2767451785
2039 movdqa %xmm2,(%esp)
2040 .L079cbc_ret:
2041 movl 16(%esp),%esp
2042 movl 36(%esp),%ebp
2043 pxor %xmm2,%xmm2
2044 pxor %xmm1,%xmm1
2045 movups %xmm7,(%ebp)
2046 pxor %xmm7,%xmm7
2047 .L074cbc_abort:
2048 popl %edi
2049 popl %esi
2050 popl %ebx
2051 popl %ebp
2052 ret
2053 .size aesni_cbc_encrypt,.-.L_aesni_cbc_encrypt_begin
2054 .type _aesni_set_encrypt_key,@function
2055 .align 16
2056 _aesni_set_encrypt_key:
2057 pushl %ebp
2058 pushl %ebx
2059 testl %eax,%eax
2060 jz .L091bad_pointer
2061 testl %edx,%edx
2062 jz .L091bad_pointer
2063 call .L092pic
2064 .L092pic:
2065 popl %ebx
2066 leal .Lkey_const-.L092pic(%ebx),%ebx
2067 leal OPENSSL_ia32cap_P-.Lkey_const(%ebx),%ebp
2068 movups (%eax),%xmm0
2069 xorps %xmm4,%xmm4
2070 movl 4(%ebp),%ebp
2071 leal 16(%edx),%edx
2072 andl $268437504,%ebp
2073 cmpl $256,%ecx
2074 je .L09314rounds
2075 cmpl $192,%ecx
2076 je .L09412rounds
2077 cmpl $128,%ecx
2078 jne .L095bad_keybits
2079 .align 16
2080 .L09610rounds:
2081 cmpl $268435456,%ebp
2082 je .L09710rounds_alt
2083 movl $9,%ecx
2084 movups %xmm0,-16(%edx)
2085 .byte 102,15,58,223,200,1
2086 call .L098key_128_cold
2087 .byte 102,15,58,223,200,2
2088 call .L099key_128
2089 .byte 102,15,58,223,200,4
2090 call .L099key_128
2091 .byte 102,15,58,223,200,8
2092 call .L099key_128
2093 .byte 102,15,58,223,200,16
2094 call .L099key_128
2095 .byte 102,15,58,223,200,32
2096 call .L099key_128
2097 .byte 102,15,58,223,200,64
2098 call .L099key_128
2099 .byte 102,15,58,223,200,128
2100 call .L099key_128
2101 .byte 102,15,58,223,200,27
2102 call .L099key_128
2103 .byte 102,15,58,223,200,54
2104 call .L099key_128
2105 movups %xmm0,(%edx)
2106 movl %ecx,80(%edx)
2107 jmp .L100good_key
2108 .align 16
2109 .L099key_128:
2110 movups %xmm0,(%edx)
2111 leal 16(%edx),%edx
2112 .L098key_128_cold:
2113 shufps $16,%xmm0,%xmm4
2114 xorps %xmm4,%xmm0
2115 shufps $140,%xmm0,%xmm4
2116 xorps %xmm4,%xmm0
2117 shufps $255,%xmm1,%xmm1
2118 xorps %xmm1,%xmm0
2119 ret
2120 .align 16
2121 .L09710rounds_alt:
2122 movdqa (%ebx),%xmm5
2123 movl $8,%ecx
2124 movdqa 32(%ebx),%xmm4
2125 movdqa %xmm0,%xmm2
2126 movdqu %xmm0,-16(%edx)
2127 .L101loop_key128:
2128 .byte 102,15,56,0,197
2129 .byte 102,15,56,221,196
2130 pslld $1,%xmm4
2131 leal 16(%edx),%edx
2132 movdqa %xmm2,%xmm3
2133 pslldq $4,%xmm2
2134 pxor %xmm2,%xmm3
2135 pslldq $4,%xmm2
2136 pxor %xmm2,%xmm3
2137 pslldq $4,%xmm2
2138 pxor %xmm3,%xmm2
2139 pxor %xmm2,%xmm0
2140 movdqu %xmm0,-16(%edx)
2141 movdqa %xmm0,%xmm2
2142 decl %ecx
2143 jnz .L101loop_key128
2144 movdqa 48(%ebx),%xmm4
2145 .byte 102,15,56,0,197
2146 .byte 102,15,56,221,196
2147 pslld $1,%xmm4
2148 movdqa %xmm2,%xmm3
2149 pslldq $4,%xmm2
2150 pxor %xmm2,%xmm3
2151 pslldq $4,%xmm2
2152 pxor %xmm2,%xmm3
2153 pslldq $4,%xmm2
2154 pxor %xmm3,%xmm2
2155 pxor %xmm2,%xmm0
2156 movdqu %xmm0,(%edx)
2157 movdqa %xmm0,%xmm2
2158 .byte 102,15,56,0,197
2159 .byte 102,15,56,221,196
2160 movdqa %xmm2,%xmm3
2161 pslldq $4,%xmm2
2162 pxor %xmm2,%xmm3
2163 pslldq $4,%xmm2
2164 pxor %xmm2,%xmm3
2165 pslldq $4,%xmm2
2166 pxor %xmm3,%xmm2
2167 pxor %xmm2,%xmm0
2168 movdqu %xmm0,16(%edx)
2169 movl $9,%ecx
2170 movl %ecx,96(%edx)
2171 jmp .L100good_key
2172 .align 16
2173 .L09412rounds:
2174 movq 16(%eax),%xmm2
2175 cmpl $268435456,%ebp
2176 je .L10212rounds_alt
2177 movl $11,%ecx
2178 movups %xmm0,-16(%edx)
2179 .byte 102,15,58,223,202,1
2180 call .L103key_192a_cold
2181 .byte 102,15,58,223,202,2
2182 call .L104key_192b
2183 .byte 102,15,58,223,202,4
2184 call .L105key_192a
2185 .byte 102,15,58,223,202,8
2186 call .L104key_192b
2187 .byte 102,15,58,223,202,16
2188 call .L105key_192a
2189 .byte 102,15,58,223,202,32
2190 call .L104key_192b
2191 .byte 102,15,58,223,202,64
2192 call .L105key_192a
2193 .byte 102,15,58,223,202,128
2194 call .L104key_192b
2195 movups %xmm0,(%edx)
2196 movl %ecx,48(%edx)
2197 jmp .L100good_key
2198 .align 16
2199 .L105key_192a:
2200 movups %xmm0,(%edx)
2201 leal 16(%edx),%edx
2202 .align 16
2203 .L103key_192a_cold:
2204 movaps %xmm2,%xmm5
2205 .L106key_192b_warm:
2206 shufps $16,%xmm0,%xmm4
2207 movdqa %xmm2,%xmm3
2208 xorps %xmm4,%xmm0
2209 shufps $140,%xmm0,%xmm4
2210 pslldq $4,%xmm3
2211 xorps %xmm4,%xmm0
2212 pshufd $85,%xmm1,%xmm1
2213 pxor %xmm3,%xmm2
2214 pxor %xmm1,%xmm0
2215 pshufd $255,%xmm0,%xmm3
2216 pxor %xmm3,%xmm2
2217 ret
2218 .align 16
2219 .L104key_192b:
2220 movaps %xmm0,%xmm3
2221 shufps $68,%xmm0,%xmm5
2222 movups %xmm5,(%edx)
2223 shufps $78,%xmm2,%xmm3
2224 movups %xmm3,16(%edx)
2225 leal 32(%edx),%edx
2226 jmp .L106key_192b_warm
2227 .align 16
2228 .L10212rounds_alt:
2229 movdqa 16(%ebx),%xmm5
2230 movdqa 32(%ebx),%xmm4
2231 movl $8,%ecx
2232 movdqu %xmm0,-16(%edx)
2233 .L107loop_key192:
2234 movq %xmm2,(%edx)
2235 movdqa %xmm2,%xmm1
2236 .byte 102,15,56,0,213
2237 .byte 102,15,56,221,212
2238 pslld $1,%xmm4
2239 leal 24(%edx),%edx
2240 movdqa %xmm0,%xmm3
2241 pslldq $4,%xmm0
2242 pxor %xmm0,%xmm3
2243 pslldq $4,%xmm0
2244 pxor %xmm0,%xmm3
2245 pslldq $4,%xmm0
2246 pxor %xmm3,%xmm0
2247 pshufd $255,%xmm0,%xmm3
2248 pxor %xmm1,%xmm3
2249 pslldq $4,%xmm1
2250 pxor %xmm1,%xmm3
2251 pxor %xmm2,%xmm0
2252 pxor %xmm3,%xmm2
2253 movdqu %xmm0,-16(%edx)
2254 decl %ecx
2255 jnz .L107loop_key192
2256 movl $11,%ecx
2257 movl %ecx,32(%edx)
2258 jmp .L100good_key
2259 .align 16
2260 .L09314rounds:
2261 movups 16(%eax),%xmm2
2262 leal 16(%edx),%edx
2263 cmpl $268435456,%ebp
2264 je .L10814rounds_alt
2265 movl $13,%ecx
2266 movups %xmm0,-32(%edx)
2267 movups %xmm2,-16(%edx)
2268 .byte 102,15,58,223,202,1
2269 call .L109key_256a_cold
2270 .byte 102,15,58,223,200,1
2271 call .L110key_256b
2272 .byte 102,15,58,223,202,2
2273 call .L111key_256a
2274 .byte 102,15,58,223,200,2
2275 call .L110key_256b
2276 .byte 102,15,58,223,202,4
2277 call .L111key_256a
2278 .byte 102,15,58,223,200,4
2279 call .L110key_256b
2280 .byte 102,15,58,223,202,8
2281 call .L111key_256a
2282 .byte 102,15,58,223,200,8
2283 call .L110key_256b
2284 .byte 102,15,58,223,202,16
2285 call .L111key_256a
2286 .byte 102,15,58,223,200,16
2287 call .L110key_256b
2288 .byte 102,15,58,223,202,32
2289 call .L111key_256a
2290 .byte 102,15,58,223,200,32
2291 call .L110key_256b
2292 .byte 102,15,58,223,202,64
2293 call .L111key_256a
2294 movups %xmm0,(%edx)
2295 movl %ecx,16(%edx)
2296 xorl %eax,%eax
2297 jmp .L100good_key
2298 .align 16
2299 .L111key_256a:
2300 movups %xmm2,(%edx)
2301 leal 16(%edx),%edx
2302 .L109key_256a_cold:
2303 shufps $16,%xmm0,%xmm4
2304 xorps %xmm4,%xmm0
2305 shufps $140,%xmm0,%xmm4
2306 xorps %xmm4,%xmm0
2307 shufps $255,%xmm1,%xmm1
2308 xorps %xmm1,%xmm0
2309 ret
2310 .align 16
2311 .L110key_256b:
2312 movups %xmm0,(%edx)
2313 leal 16(%edx),%edx
2314 shufps $16,%xmm2,%xmm4
2315 xorps %xmm4,%xmm2
2316 shufps $140,%xmm2,%xmm4
2317 xorps %xmm4,%xmm2
2318 shufps $170,%xmm1,%xmm1
2319 xorps %xmm1,%xmm2
2320 ret
2321 .align 16
2322 .L10814rounds_alt:
2323 movdqa (%ebx),%xmm5
2324 movdqa 32(%ebx),%xmm4
2325 movl $7,%ecx
2326 movdqu %xmm0,-32(%edx)
2327 movdqa %xmm2,%xmm1
2328 movdqu %xmm2,-16(%edx)
2329 .L112loop_key256:
2330 .byte 102,15,56,0,213
2331 .byte 102,15,56,221,212
2332 movdqa %xmm0,%xmm3
2333 pslldq $4,%xmm0
2334 pxor %xmm0,%xmm3
2335 pslldq $4,%xmm0
2336 pxor %xmm0,%xmm3
2337 pslldq $4,%xmm0
2338 pxor %xmm3,%xmm0
2339 pslld $1,%xmm4
2340 pxor %xmm2,%xmm0
2341 movdqu %xmm0,(%edx)
2342 decl %ecx
2343 jz .L113done_key256
2344 pshufd $255,%xmm0,%xmm2
2345 pxor %xmm3,%xmm3
2346 .byte 102,15,56,221,211
2347 movdqa %xmm1,%xmm3
2348 pslldq $4,%xmm1
2349 pxor %xmm1,%xmm3
2350 pslldq $4,%xmm1
2351 pxor %xmm1,%xmm3
2352 pslldq $4,%xmm1
2353 pxor %xmm3,%xmm1
2354 pxor %xmm1,%xmm2
2355 movdqu %xmm2,16(%edx)
2356 leal 32(%edx),%edx
2357 movdqa %xmm2,%xmm1
2358 jmp .L112loop_key256
2359 .L113done_key256:
2360 movl $13,%ecx
2361 movl %ecx,16(%edx)
2362 .L100good_key:
2363 pxor %xmm0,%xmm0
2364 pxor %xmm1,%xmm1
2365 pxor %xmm2,%xmm2
2366 pxor %xmm3,%xmm3
2367 pxor %xmm4,%xmm4
2368 pxor %xmm5,%xmm5
2369 xorl %eax,%eax
2370 popl %ebx
2371 popl %ebp
2372 ret
2373 .align 4
2374 .L091bad_pointer:
2375 movl $-1,%eax
2376 popl %ebx
2377 popl %ebp
2378 ret
2379 .align 4
2380 .L095bad_keybits:
2381 pxor %xmm0,%xmm0
2382 movl $-2,%eax
2383 popl %ebx
2384 popl %ebp
2385 ret
2386 .size _aesni_set_encrypt_key,.-_aesni_set_encrypt_key
2387 .globl aesni_set_encrypt_key
2388 .type aesni_set_encrypt_key,@function
2389 .align 16
2390 aesni_set_encrypt_key:
2391 .L_aesni_set_encrypt_key_begin:
2392 movl 4(%esp),%eax
2393 movl 8(%esp),%ecx
2394 movl 12(%esp),%edx
2395 call _aesni_set_encrypt_key
2396 ret
2397 .size aesni_set_encrypt_key,.-.L_aesni_set_encrypt_key_begin
2398 .globl aesni_set_decrypt_key
2399 .type aesni_set_decrypt_key,@function
2400 .align 16
2401 aesni_set_decrypt_key:
2402 .L_aesni_set_decrypt_key_begin:
2403 movl 4(%esp),%eax
2404 movl 8(%esp),%ecx
2405 movl 12(%esp),%edx
2406 call _aesni_set_encrypt_key
2407 movl 12(%esp),%edx
2408 shll $4,%ecx
2409 testl %eax,%eax
2410 jnz .L114dec_key_ret
2411 leal 16(%edx,%ecx,1),%eax
2412 movups (%edx),%xmm0
2413 movups (%eax),%xmm1
2414 movups %xmm0,(%eax)
2415 movups %xmm1,(%edx)
2416 leal 16(%edx),%edx
2417 leal -16(%eax),%eax
2418 .L115dec_key_inverse:
2419 movups (%edx),%xmm0
2420 movups (%eax),%xmm1
2421 .byte 102,15,56,219,192
2422 .byte 102,15,56,219,201
2423 leal 16(%edx),%edx
2424 leal -16(%eax),%eax
2425 movups %xmm0,16(%eax)
2426 movups %xmm1,-16(%edx)
2427 cmpl %edx,%eax
2428 ja .L115dec_key_inverse
2429 movups (%edx),%xmm0
2430 .byte 102,15,56,219,192
2431 movups %xmm0,(%edx)
2432 pxor %xmm0,%xmm0
2433 pxor %xmm1,%xmm1
2434 xorl %eax,%eax
2435 .L114dec_key_ret:
2436 ret
2437 .size aesni_set_decrypt_key,.-.L_aesni_set_decrypt_key_begin
2438 .align 64
2439 .Lkey_const:
2440 .long 202313229,202313229,202313229,202313229
2441 .long 67569157,67569157,67569157,67569157
2442 .long 1,1,1,1
2443 .long 27,27,27,27
2444 .byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69
2445 .byte 83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83
2446 .byte 32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
2447 .byte 115,108,46,111,114,103,62,0
2448 .comm OPENSSL_ia32cap_P,16,4
2449