swab.S revision 1.14 1 /*
2 * Written by J.T. Conklin <jtc (at) NetBSD.org>.
3 * Public domain.
4 */
5
6 #include <machine/asm.h>
7
8 #if defined(LIBC_SCCS)
9 RCSID("$NetBSD: swab.S,v 1.14 2014/05/23 02:34:19 uebayasi Exp $")
10 #endif
11
12 /*
13 * On the i486, this code is negligibly faster than the code generated
14 * by gcc at about half the size. If my i386 databook is correct, it
15 * should be considerably faster than the gcc code on a i386.
16 */
17
18 ENTRY(swab)
19 pushl %esi
20 pushl %edi
21 movl 12(%esp),%esi
22 movl 16(%esp),%edi
23 movl 20(%esp),%ecx
24
25 shrl $1,%ecx
26 testl $7,%ecx # copy first group of 1 to 7 words
27 jz L2 # while swapping alternate bytes.
28 _ALIGN_TEXT,0x90
29 L1: lodsw
30 rorw $8,%ax
31 stosw
32 decl %ecx
33 testl $7,%ecx
34 jnz L1
35
36 L2: shrl $3,%ecx # copy remainder 8 words at a time
37 jz L4 # while swapping alternate bytes.
38 _ALIGN_TEXT,0x90
39 L3: lodsw
40 rorw $8,%ax
41 stosw
42 lodsw
43 rorw $8,%ax
44 stosw
45 lodsw
46 rorw $8,%ax
47 stosw
48 lodsw
49 rorw $8,%ax
50 stosw
51 lodsw
52 rorw $8,%ax
53 stosw
54 lodsw
55 rorw $8,%ax
56 stosw
57 lodsw
58 rorw $8,%ax
59 stosw
60 lodsw
61 rorw $8,%ax
62 stosw
63 decl %ecx
64 jnz L3
65
66 L4: popl %edi
67 popl %esi
68 ret
69 END(swab)
70