ghash-x86.S revision 1.6 1 #include <machine/asm.h>
2 .text
3 .globl gcm_gmult_4bit_x86
4 .type gcm_gmult_4bit_x86,@function
5 .align 16
6 gcm_gmult_4bit_x86:
7 .L_gcm_gmult_4bit_x86_begin:
8 pushl %ebp
9 pushl %ebx
10 pushl %esi
11 pushl %edi
12 subl $84,%esp
13 movl 104(%esp),%edi
14 movl 108(%esp),%esi
15 movl (%edi),%ebp
16 movl 4(%edi),%edx
17 movl 8(%edi),%ecx
18 movl 12(%edi),%ebx
19 movl $0,16(%esp)
20 movl $471859200,20(%esp)
21 movl $943718400,24(%esp)
22 movl $610271232,28(%esp)
23 movl $1887436800,32(%esp)
24 movl $1822425088,36(%esp)
25 movl $1220542464,40(%esp)
26 movl $1423966208,44(%esp)
27 movl $3774873600,48(%esp)
28 movl $4246732800,52(%esp)
29 movl $3644850176,56(%esp)
30 movl $3311403008,60(%esp)
31 movl $2441084928,64(%esp)
32 movl $2376073216,68(%esp)
33 movl $2847932416,72(%esp)
34 movl $3051356160,76(%esp)
35 movl %ebp,(%esp)
36 movl %edx,4(%esp)
37 movl %ecx,8(%esp)
38 movl %ebx,12(%esp)
39 shrl $20,%ebx
40 andl $240,%ebx
41 movl 4(%esi,%ebx,1),%ebp
42 movl (%esi,%ebx,1),%edx
43 movl 12(%esi,%ebx,1),%ecx
44 movl 8(%esi,%ebx,1),%ebx
45 xorl %eax,%eax
46 movl $15,%edi
47 jmp .L000x86_loop
48 .align 16
49 .L000x86_loop:
50 movb %bl,%al
51 shrdl $4,%ecx,%ebx
52 andb $15,%al
53 shrdl $4,%edx,%ecx
54 shrdl $4,%ebp,%edx
55 shrl $4,%ebp
56 xorl 16(%esp,%eax,4),%ebp
57 movb (%esp,%edi,1),%al
58 andb $240,%al
59 xorl 8(%esi,%eax,1),%ebx
60 xorl 12(%esi,%eax,1),%ecx
61 xorl (%esi,%eax,1),%edx
62 xorl 4(%esi,%eax,1),%ebp
63 decl %edi
64 js .L001x86_break
65 movb %bl,%al
66 shrdl $4,%ecx,%ebx
67 andb $15,%al
68 shrdl $4,%edx,%ecx
69 shrdl $4,%ebp,%edx
70 shrl $4,%ebp
71 xorl 16(%esp,%eax,4),%ebp
72 movb (%esp,%edi,1),%al
73 shlb $4,%al
74 xorl 8(%esi,%eax,1),%ebx
75 xorl 12(%esi,%eax,1),%ecx
76 xorl (%esi,%eax,1),%edx
77 xorl 4(%esi,%eax,1),%ebp
78 jmp .L000x86_loop
79 .align 16
80 .L001x86_break:
81 bswap %ebx
82 bswap %ecx
83 bswap %edx
84 bswap %ebp
85 movl 104(%esp),%edi
86 movl %ebx,12(%edi)
87 movl %ecx,8(%edi)
88 movl %edx,4(%edi)
89 movl %ebp,(%edi)
90 addl $84,%esp
91 popl %edi
92 popl %esi
93 popl %ebx
94 popl %ebp
95 ret
96 .size gcm_gmult_4bit_x86,.-.L_gcm_gmult_4bit_x86_begin
97 .globl gcm_ghash_4bit_x86
98 .type gcm_ghash_4bit_x86,@function
99 .align 16
100 gcm_ghash_4bit_x86:
101 .L_gcm_ghash_4bit_x86_begin:
102 pushl %ebp
103 pushl %ebx
104 pushl %esi
105 pushl %edi
106 subl $84,%esp
107 movl 104(%esp),%ebx
108 movl 108(%esp),%esi
109 movl 112(%esp),%edi
110 movl 116(%esp),%ecx
111 addl %edi,%ecx
112 movl %ecx,116(%esp)
113 movl (%ebx),%ebp
114 movl 4(%ebx),%edx
115 movl 8(%ebx),%ecx
116 movl 12(%ebx),%ebx
117 movl $0,16(%esp)
118 movl $471859200,20(%esp)
119 movl $943718400,24(%esp)
120 movl $610271232,28(%esp)
121 movl $1887436800,32(%esp)
122 movl $1822425088,36(%esp)
123 movl $1220542464,40(%esp)
124 movl $1423966208,44(%esp)
125 movl $3774873600,48(%esp)
126 movl $4246732800,52(%esp)
127 movl $3644850176,56(%esp)
128 movl $3311403008,60(%esp)
129 movl $2441084928,64(%esp)
130 movl $2376073216,68(%esp)
131 movl $2847932416,72(%esp)
132 movl $3051356160,76(%esp)
133 .align 16
134 .L002x86_outer_loop:
135 xorl 12(%edi),%ebx
136 xorl 8(%edi),%ecx
137 xorl 4(%edi),%edx
138 xorl (%edi),%ebp
139 movl %ebx,12(%esp)
140 movl %ecx,8(%esp)
141 movl %edx,4(%esp)
142 movl %ebp,(%esp)
143 shrl $20,%ebx
144 andl $240,%ebx
145 movl 4(%esi,%ebx,1),%ebp
146 movl (%esi,%ebx,1),%edx
147 movl 12(%esi,%ebx,1),%ecx
148 movl 8(%esi,%ebx,1),%ebx
149 xorl %eax,%eax
150 movl $15,%edi
151 jmp .L003x86_loop
152 .align 16
153 .L003x86_loop:
154 movb %bl,%al
155 shrdl $4,%ecx,%ebx
156 andb $15,%al
157 shrdl $4,%edx,%ecx
158 shrdl $4,%ebp,%edx
159 shrl $4,%ebp
160 xorl 16(%esp,%eax,4),%ebp
161 movb (%esp,%edi,1),%al
162 andb $240,%al
163 xorl 8(%esi,%eax,1),%ebx
164 xorl 12(%esi,%eax,1),%ecx
165 xorl (%esi,%eax,1),%edx
166 xorl 4(%esi,%eax,1),%ebp
167 decl %edi
168 js .L004x86_break
169 movb %bl,%al
170 shrdl $4,%ecx,%ebx
171 andb $15,%al
172 shrdl $4,%edx,%ecx
173 shrdl $4,%ebp,%edx
174 shrl $4,%ebp
175 xorl 16(%esp,%eax,4),%ebp
176 movb (%esp,%edi,1),%al
177 shlb $4,%al
178 xorl 8(%esi,%eax,1),%ebx
179 xorl 12(%esi,%eax,1),%ecx
180 xorl (%esi,%eax,1),%edx
181 xorl 4(%esi,%eax,1),%ebp
182 jmp .L003x86_loop
183 .align 16
184 .L004x86_break:
185 bswap %ebx
186 bswap %ecx
187 bswap %edx
188 bswap %ebp
189 movl 112(%esp),%edi
190 leal 16(%edi),%edi
191 cmpl 116(%esp),%edi
192 movl %edi,112(%esp)
193 jb .L002x86_outer_loop
194 movl 104(%esp),%edi
195 movl %ebx,12(%edi)
196 movl %ecx,8(%edi)
197 movl %edx,4(%edi)
198 movl %ebp,(%edi)
199 addl $84,%esp
200 popl %edi
201 popl %esi
202 popl %ebx
203 popl %ebp
204 ret
205 .size gcm_ghash_4bit_x86,.-.L_gcm_ghash_4bit_x86_begin
206 .type _mmx_gmult_4bit_inner,@function
207 .align 16
208 _mmx_gmult_4bit_inner:
209 xorl %ecx,%ecx
210 movl %ebx,%edx
211 movb %dl,%cl
212 shlb $4,%cl
213 andl $240,%edx
214 movq 8(%esi,%ecx,1),%mm0
215 movq (%esi,%ecx,1),%mm1
216 movd %mm0,%ebp
217 psrlq $4,%mm0
218 movq %mm1,%mm2
219 psrlq $4,%mm1
220 pxor 8(%esi,%edx,1),%mm0
221 movb 14(%edi),%cl
222 psllq $60,%mm2
223 andl $15,%ebp
224 pxor (%esi,%edx,1),%mm1
225 movl %ecx,%edx
226 movd %mm0,%ebx
227 pxor %mm2,%mm0
228 shlb $4,%cl
229 psrlq $4,%mm0
230 movq %mm1,%mm2
231 psrlq $4,%mm1
232 pxor 8(%esi,%ecx,1),%mm0
233 psllq $60,%mm2
234 andl $240,%edx
235 pxor (%eax,%ebp,8),%mm1
236 andl $15,%ebx
237 pxor (%esi,%ecx,1),%mm1
238 movd %mm0,%ebp
239 pxor %mm2,%mm0
240 psrlq $4,%mm0
241 movq %mm1,%mm2
242 psrlq $4,%mm1
243 pxor 8(%esi,%edx,1),%mm0
244 movb 13(%edi),%cl
245 psllq $60,%mm2
246 pxor (%eax,%ebx,8),%mm1
247 andl $15,%ebp
248 pxor (%esi,%edx,1),%mm1
249 movl %ecx,%edx
250 movd %mm0,%ebx
251 pxor %mm2,%mm0
252 shlb $4,%cl
253 psrlq $4,%mm0
254 movq %mm1,%mm2
255 psrlq $4,%mm1
256 pxor 8(%esi,%ecx,1),%mm0
257 psllq $60,%mm2
258 andl $240,%edx
259 pxor (%eax,%ebp,8),%mm1
260 andl $15,%ebx
261 pxor (%esi,%ecx,1),%mm1
262 movd %mm0,%ebp
263 pxor %mm2,%mm0
264 psrlq $4,%mm0
265 movq %mm1,%mm2
266 psrlq $4,%mm1
267 pxor 8(%esi,%edx,1),%mm0
268 movb 12(%edi),%cl
269 psllq $60,%mm2
270 pxor (%eax,%ebx,8),%mm1
271 andl $15,%ebp
272 pxor (%esi,%edx,1),%mm1
273 movl %ecx,%edx
274 movd %mm0,%ebx
275 pxor %mm2,%mm0
276 shlb $4,%cl
277 psrlq $4,%mm0
278 movq %mm1,%mm2
279 psrlq $4,%mm1
280 pxor 8(%esi,%ecx,1),%mm0
281 psllq $60,%mm2
282 andl $240,%edx
283 pxor (%eax,%ebp,8),%mm1
284 andl $15,%ebx
285 pxor (%esi,%ecx,1),%mm1
286 movd %mm0,%ebp
287 pxor %mm2,%mm0
288 psrlq $4,%mm0
289 movq %mm1,%mm2
290 psrlq $4,%mm1
291 pxor 8(%esi,%edx,1),%mm0
292 movb 11(%edi),%cl
293 psllq $60,%mm2
294 pxor (%eax,%ebx,8),%mm1
295 andl $15,%ebp
296 pxor (%esi,%edx,1),%mm1
297 movl %ecx,%edx
298 movd %mm0,%ebx
299 pxor %mm2,%mm0
300 shlb $4,%cl
301 psrlq $4,%mm0
302 movq %mm1,%mm2
303 psrlq $4,%mm1
304 pxor 8(%esi,%ecx,1),%mm0
305 psllq $60,%mm2
306 andl $240,%edx
307 pxor (%eax,%ebp,8),%mm1
308 andl $15,%ebx
309 pxor (%esi,%ecx,1),%mm1
310 movd %mm0,%ebp
311 pxor %mm2,%mm0
312 psrlq $4,%mm0
313 movq %mm1,%mm2
314 psrlq $4,%mm1
315 pxor 8(%esi,%edx,1),%mm0
316 movb 10(%edi),%cl
317 psllq $60,%mm2
318 pxor (%eax,%ebx,8),%mm1
319 andl $15,%ebp
320 pxor (%esi,%edx,1),%mm1
321 movl %ecx,%edx
322 movd %mm0,%ebx
323 pxor %mm2,%mm0
324 shlb $4,%cl
325 psrlq $4,%mm0
326 movq %mm1,%mm2
327 psrlq $4,%mm1
328 pxor 8(%esi,%ecx,1),%mm0
329 psllq $60,%mm2
330 andl $240,%edx
331 pxor (%eax,%ebp,8),%mm1
332 andl $15,%ebx
333 pxor (%esi,%ecx,1),%mm1
334 movd %mm0,%ebp
335 pxor %mm2,%mm0
336 psrlq $4,%mm0
337 movq %mm1,%mm2
338 psrlq $4,%mm1
339 pxor 8(%esi,%edx,1),%mm0
340 movb 9(%edi),%cl
341 psllq $60,%mm2
342 pxor (%eax,%ebx,8),%mm1
343 andl $15,%ebp
344 pxor (%esi,%edx,1),%mm1
345 movl %ecx,%edx
346 movd %mm0,%ebx
347 pxor %mm2,%mm0
348 shlb $4,%cl
349 psrlq $4,%mm0
350 movq %mm1,%mm2
351 psrlq $4,%mm1
352 pxor 8(%esi,%ecx,1),%mm0
353 psllq $60,%mm2
354 andl $240,%edx
355 pxor (%eax,%ebp,8),%mm1
356 andl $15,%ebx
357 pxor (%esi,%ecx,1),%mm1
358 movd %mm0,%ebp
359 pxor %mm2,%mm0
360 psrlq $4,%mm0
361 movq %mm1,%mm2
362 psrlq $4,%mm1
363 pxor 8(%esi,%edx,1),%mm0
364 movb 8(%edi),%cl
365 psllq $60,%mm2
366 pxor (%eax,%ebx,8),%mm1
367 andl $15,%ebp
368 pxor (%esi,%edx,1),%mm1
369 movl %ecx,%edx
370 movd %mm0,%ebx
371 pxor %mm2,%mm0
372 shlb $4,%cl
373 psrlq $4,%mm0
374 movq %mm1,%mm2
375 psrlq $4,%mm1
376 pxor 8(%esi,%ecx,1),%mm0
377 psllq $60,%mm2
378 andl $240,%edx
379 pxor (%eax,%ebp,8),%mm1
380 andl $15,%ebx
381 pxor (%esi,%ecx,1),%mm1
382 movd %mm0,%ebp
383 pxor %mm2,%mm0
384 psrlq $4,%mm0
385 movq %mm1,%mm2
386 psrlq $4,%mm1
387 pxor 8(%esi,%edx,1),%mm0
388 movb 7(%edi),%cl
389 psllq $60,%mm2
390 pxor (%eax,%ebx,8),%mm1
391 andl $15,%ebp
392 pxor (%esi,%edx,1),%mm1
393 movl %ecx,%edx
394 movd %mm0,%ebx
395 pxor %mm2,%mm0
396 shlb $4,%cl
397 psrlq $4,%mm0
398 movq %mm1,%mm2
399 psrlq $4,%mm1
400 pxor 8(%esi,%ecx,1),%mm0
401 psllq $60,%mm2
402 andl $240,%edx
403 pxor (%eax,%ebp,8),%mm1
404 andl $15,%ebx
405 pxor (%esi,%ecx,1),%mm1
406 movd %mm0,%ebp
407 pxor %mm2,%mm0
408 psrlq $4,%mm0
409 movq %mm1,%mm2
410 psrlq $4,%mm1
411 pxor 8(%esi,%edx,1),%mm0
412 movb 6(%edi),%cl
413 psllq $60,%mm2
414 pxor (%eax,%ebx,8),%mm1
415 andl $15,%ebp
416 pxor (%esi,%edx,1),%mm1
417 movl %ecx,%edx
418 movd %mm0,%ebx
419 pxor %mm2,%mm0
420 shlb $4,%cl
421 psrlq $4,%mm0
422 movq %mm1,%mm2
423 psrlq $4,%mm1
424 pxor 8(%esi,%ecx,1),%mm0
425 psllq $60,%mm2
426 andl $240,%edx
427 pxor (%eax,%ebp,8),%mm1
428 andl $15,%ebx
429 pxor (%esi,%ecx,1),%mm1
430 movd %mm0,%ebp
431 pxor %mm2,%mm0
432 psrlq $4,%mm0
433 movq %mm1,%mm2
434 psrlq $4,%mm1
435 pxor 8(%esi,%edx,1),%mm0
436 movb 5(%edi),%cl
437 psllq $60,%mm2
438 pxor (%eax,%ebx,8),%mm1
439 andl $15,%ebp
440 pxor (%esi,%edx,1),%mm1
441 movl %ecx,%edx
442 movd %mm0,%ebx
443 pxor %mm2,%mm0
444 shlb $4,%cl
445 psrlq $4,%mm0
446 movq %mm1,%mm2
447 psrlq $4,%mm1
448 pxor 8(%esi,%ecx,1),%mm0
449 psllq $60,%mm2
450 andl $240,%edx
451 pxor (%eax,%ebp,8),%mm1
452 andl $15,%ebx
453 pxor (%esi,%ecx,1),%mm1
454 movd %mm0,%ebp
455 pxor %mm2,%mm0
456 psrlq $4,%mm0
457 movq %mm1,%mm2
458 psrlq $4,%mm1
459 pxor 8(%esi,%edx,1),%mm0
460 movb 4(%edi),%cl
461 psllq $60,%mm2
462 pxor (%eax,%ebx,8),%mm1
463 andl $15,%ebp
464 pxor (%esi,%edx,1),%mm1
465 movl %ecx,%edx
466 movd %mm0,%ebx
467 pxor %mm2,%mm0
468 shlb $4,%cl
469 psrlq $4,%mm0
470 movq %mm1,%mm2
471 psrlq $4,%mm1
472 pxor 8(%esi,%ecx,1),%mm0
473 psllq $60,%mm2
474 andl $240,%edx
475 pxor (%eax,%ebp,8),%mm1
476 andl $15,%ebx
477 pxor (%esi,%ecx,1),%mm1
478 movd %mm0,%ebp
479 pxor %mm2,%mm0
480 psrlq $4,%mm0
481 movq %mm1,%mm2
482 psrlq $4,%mm1
483 pxor 8(%esi,%edx,1),%mm0
484 movb 3(%edi),%cl
485 psllq $60,%mm2
486 pxor (%eax,%ebx,8),%mm1
487 andl $15,%ebp
488 pxor (%esi,%edx,1),%mm1
489 movl %ecx,%edx
490 movd %mm0,%ebx
491 pxor %mm2,%mm0
492 shlb $4,%cl
493 psrlq $4,%mm0
494 movq %mm1,%mm2
495 psrlq $4,%mm1
496 pxor 8(%esi,%ecx,1),%mm0
497 psllq $60,%mm2
498 andl $240,%edx
499 pxor (%eax,%ebp,8),%mm1
500 andl $15,%ebx
501 pxor (%esi,%ecx,1),%mm1
502 movd %mm0,%ebp
503 pxor %mm2,%mm0
504 psrlq $4,%mm0
505 movq %mm1,%mm2
506 psrlq $4,%mm1
507 pxor 8(%esi,%edx,1),%mm0
508 movb 2(%edi),%cl
509 psllq $60,%mm2
510 pxor (%eax,%ebx,8),%mm1
511 andl $15,%ebp
512 pxor (%esi,%edx,1),%mm1
513 movl %ecx,%edx
514 movd %mm0,%ebx
515 pxor %mm2,%mm0
516 shlb $4,%cl
517 psrlq $4,%mm0
518 movq %mm1,%mm2
519 psrlq $4,%mm1
520 pxor 8(%esi,%ecx,1),%mm0
521 psllq $60,%mm2
522 andl $240,%edx
523 pxor (%eax,%ebp,8),%mm1
524 andl $15,%ebx
525 pxor (%esi,%ecx,1),%mm1
526 movd %mm0,%ebp
527 pxor %mm2,%mm0
528 psrlq $4,%mm0
529 movq %mm1,%mm2
530 psrlq $4,%mm1
531 pxor 8(%esi,%edx,1),%mm0
532 movb 1(%edi),%cl
533 psllq $60,%mm2
534 pxor (%eax,%ebx,8),%mm1
535 andl $15,%ebp
536 pxor (%esi,%edx,1),%mm1
537 movl %ecx,%edx
538 movd %mm0,%ebx
539 pxor %mm2,%mm0
540 shlb $4,%cl
541 psrlq $4,%mm0
542 movq %mm1,%mm2
543 psrlq $4,%mm1
544 pxor 8(%esi,%ecx,1),%mm0
545 psllq $60,%mm2
546 andl $240,%edx
547 pxor (%eax,%ebp,8),%mm1
548 andl $15,%ebx
549 pxor (%esi,%ecx,1),%mm1
550 movd %mm0,%ebp
551 pxor %mm2,%mm0
552 psrlq $4,%mm0
553 movq %mm1,%mm2
554 psrlq $4,%mm1
555 pxor 8(%esi,%edx,1),%mm0
556 movb (%edi),%cl
557 psllq $60,%mm2
558 pxor (%eax,%ebx,8),%mm1
559 andl $15,%ebp
560 pxor (%esi,%edx,1),%mm1
561 movl %ecx,%edx
562 movd %mm0,%ebx
563 pxor %mm2,%mm0
564 shlb $4,%cl
565 psrlq $4,%mm0
566 movq %mm1,%mm2
567 psrlq $4,%mm1
568 pxor 8(%esi,%ecx,1),%mm0
569 psllq $60,%mm2
570 andl $240,%edx
571 pxor (%eax,%ebp,8),%mm1
572 andl $15,%ebx
573 pxor (%esi,%ecx,1),%mm1
574 movd %mm0,%ebp
575 pxor %mm2,%mm0
576 psrlq $4,%mm0
577 movq %mm1,%mm2
578 psrlq $4,%mm1
579 pxor 8(%esi,%edx,1),%mm0
580 psllq $60,%mm2
581 pxor (%eax,%ebx,8),%mm1
582 andl $15,%ebp
583 pxor (%esi,%edx,1),%mm1
584 movd %mm0,%ebx
585 pxor %mm2,%mm0
586 movl 4(%eax,%ebp,8),%edi
587 psrlq $32,%mm0
588 movd %mm1,%edx
589 psrlq $32,%mm1
590 movd %mm0,%ecx
591 movd %mm1,%ebp
592 shll $4,%edi
593 bswap %ebx
594 bswap %edx
595 bswap %ecx
596 xorl %edi,%ebp
597 bswap %ebp
598 ret
599 .size _mmx_gmult_4bit_inner,.-_mmx_gmult_4bit_inner
600 .globl gcm_gmult_4bit_mmx
601 .type gcm_gmult_4bit_mmx,@function
602 .align 16
603 gcm_gmult_4bit_mmx:
604 .L_gcm_gmult_4bit_mmx_begin:
605 pushl %ebp
606 pushl %ebx
607 pushl %esi
608 pushl %edi
609 movl 20(%esp),%edi
610 movl 24(%esp),%esi
611 call .L005pic_point
612 .L005pic_point:
613 popl %eax
614 leal .Lrem_4bit-.L005pic_point(%eax),%eax
615 movzbl 15(%edi),%ebx
616 call _mmx_gmult_4bit_inner
617 movl 20(%esp),%edi
618 emms
619 movl %ebx,12(%edi)
620 movl %edx,4(%edi)
621 movl %ecx,8(%edi)
622 movl %ebp,(%edi)
623 popl %edi
624 popl %esi
625 popl %ebx
626 popl %ebp
627 ret
628 .size gcm_gmult_4bit_mmx,.-.L_gcm_gmult_4bit_mmx_begin
629 .globl gcm_ghash_4bit_mmx
630 .type gcm_ghash_4bit_mmx,@function
631 .align 16
632 gcm_ghash_4bit_mmx:
633 .L_gcm_ghash_4bit_mmx_begin:
634 pushl %ebp
635 pushl %ebx
636 pushl %esi
637 pushl %edi
638 movl 20(%esp),%ebp
639 movl 24(%esp),%esi
640 movl 28(%esp),%edi
641 movl 32(%esp),%ecx
642 call .L006pic_point
643 .L006pic_point:
644 popl %eax
645 leal .Lrem_4bit-.L006pic_point(%eax),%eax
646 addl %edi,%ecx
647 movl %ecx,32(%esp)
648 subl $20,%esp
649 movl 12(%ebp),%ebx
650 movl 4(%ebp),%edx
651 movl 8(%ebp),%ecx
652 movl (%ebp),%ebp
653 jmp .L007mmx_outer_loop
654 .align 16
655 .L007mmx_outer_loop:
656 xorl 12(%edi),%ebx
657 xorl 4(%edi),%edx
658 xorl 8(%edi),%ecx
659 xorl (%edi),%ebp
660 movl %edi,48(%esp)
661 movl %ebx,12(%esp)
662 movl %edx,4(%esp)
663 movl %ecx,8(%esp)
664 movl %ebp,(%esp)
665 movl %esp,%edi
666 shrl $24,%ebx
667 call _mmx_gmult_4bit_inner
668 movl 48(%esp),%edi
669 leal 16(%edi),%edi
670 cmpl 52(%esp),%edi
671 jb .L007mmx_outer_loop
672 movl 40(%esp),%edi
673 emms
674 movl %ebx,12(%edi)
675 movl %edx,4(%edi)
676 movl %ecx,8(%edi)
677 movl %ebp,(%edi)
678 addl $20,%esp
679 popl %edi
680 popl %esi
681 popl %ebx
682 popl %ebp
683 ret
684 .size gcm_ghash_4bit_mmx,.-.L_gcm_ghash_4bit_mmx_begin
685 .align 64
686 .Lrem_4bit:
687 .long 0,0,0,29491200,0,58982400,0,38141952
688 .long 0,117964800,0,113901568,0,76283904,0,88997888
689 .long 0,235929600,0,265420800,0,227803136,0,206962688
690 .long 0,152567808,0,148504576,0,177995776,0,190709760
691 .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67
692 .byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112
693 .byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62
694 .byte 0
695