swab.S revision 1.8 1 1.1 cgd /*
2 1.8 jtc * Written by J.T. Conklin <jtc (at) netbsd.org>.
3 1.8 jtc * Public domain.
4 1.1 cgd */
5 1.1 cgd
6 1.7 jtc #include <machine/asm.h>
7 1.7 jtc
8 1.2 jtc #if defined(LIBC_SCCS)
9 1.8 jtc RCSID("$Id: swab.S,v 1.8 1995/04/28 22:54:38 jtc Exp $")
10 1.1 cgd #endif
11 1.1 cgd
12 1.1 cgd /*
13 1.1 cgd * On the i486, this code is negligibly faster than the code generated
14 1.3 jtc * by gcc at about half the size. If my i386 databook is correct, it
15 1.1 cgd * should be considerably faster than the gcc code on a i386.
16 1.1 cgd */
17 1.1 cgd
18 1.1 cgd ENTRY(swab)
19 1.1 cgd pushl %esi
20 1.1 cgd pushl %edi
21 1.1 cgd movl 12(%esp),%esi
22 1.1 cgd movl 16(%esp),%edi
23 1.1 cgd movl 20(%esp),%ecx
24 1.1 cgd
25 1.1 cgd cld # set direction forward
26 1.1 cgd
27 1.4 jtc shrl $1,%ecx
28 1.1 cgd testl $7,%ecx # copy first group of 1 to 7 words
29 1.4 jtc jz L2 # while swaping alternate bytes.
30 1.1 cgd .align 2,0x90
31 1.1 cgd L1: lodsw
32 1.6 jtc rorw $8,%ax
33 1.1 cgd stosw
34 1.1 cgd decl %ecx
35 1.1 cgd testl $7,%ecx
36 1.4 jtc jnz L1
37 1.3 jtc
38 1.1 cgd L2: shrl $3,%ecx # copy remainder 8 words at a time
39 1.1 cgd jz L4 # while swapping alternate bytes.
40 1.1 cgd .align 2,0x90
41 1.1 cgd L3: lodsw
42 1.6 jtc rorw $8,%ax
43 1.1 cgd stosw
44 1.1 cgd lodsw
45 1.6 jtc rorw $8,%ax
46 1.1 cgd stosw
47 1.1 cgd lodsw
48 1.6 jtc rorw $8,%ax
49 1.1 cgd stosw
50 1.1 cgd lodsw
51 1.6 jtc rorw $8,%ax
52 1.1 cgd stosw
53 1.1 cgd lodsw
54 1.6 jtc rorw $8,%ax
55 1.1 cgd stosw
56 1.1 cgd lodsw
57 1.6 jtc rorw $8,%ax
58 1.1 cgd stosw
59 1.1 cgd lodsw
60 1.6 jtc rorw $8,%ax
61 1.1 cgd stosw
62 1.1 cgd lodsw
63 1.6 jtc rorw $8,%ax
64 1.1 cgd stosw
65 1.1 cgd decl %ecx
66 1.1 cgd jnz L3
67 1.1 cgd
68 1.6 jtc L4: popl %edi
69 1.1 cgd popl %esi
70 1.1 cgd ret
71