swab.S revision 1.10 1 /*
2 * Written by J.T. Conklin <jtc (at) netbsd.org>.
3 * Public domain.
4 */
5
6 #include <machine/asm.h>
7
8 #if defined(LIBC_SCCS)
9 RCSID("$NetBSD: swab.S,v 1.10 1999/08/23 08:45:11 kleink Exp $")
10 #endif
11
12 /*
13 * On the i486, this code is negligibly faster than the code generated
14 * by gcc at about half the size. If my i386 databook is correct, it
15 * should be considerably faster than the gcc code on a i386.
16 */
17
18 ENTRY(swab)
19 pushl %esi
20 pushl %edi
21 movl 12(%esp),%esi
22 movl 16(%esp),%edi
23 movl 20(%esp),%ecx
24
25 cld # set direction forward
26
27 shrl $1,%ecx
28 testl $7,%ecx # copy first group of 1 to 7 words
29 jz L2 # while swaping alternate bytes.
30 _ALIGN_TEXT,0x90
31 L1: lodsw
32 rorw $8,%ax
33 stosw
34 decl %ecx
35 testl $7,%ecx
36 jnz L1
37
38 L2: shrl $3,%ecx # copy remainder 8 words at a time
39 jz L4 # while swapping alternate bytes.
40 _ALIGN_TEXT,0x90
41 L3: lodsw
42 rorw $8,%ax
43 stosw
44 lodsw
45 rorw $8,%ax
46 stosw
47 lodsw
48 rorw $8,%ax
49 stosw
50 lodsw
51 rorw $8,%ax
52 stosw
53 lodsw
54 rorw $8,%ax
55 stosw
56 lodsw
57 rorw $8,%ax
58 stosw
59 lodsw
60 rorw $8,%ax
61 stosw
62 lodsw
63 rorw $8,%ax
64 stosw
65 decl %ecx
66 jnz L3
67
68 L4: popl %edi
69 popl %esi
70 ret
71