Home | History | Annotate | Line # | Download | only in string
swab.S revision 1.8
      1  1.1  cgd /*
      2  1.8  jtc  * Written by J.T. Conklin <jtc (at) netbsd.org>.
      3  1.8  jtc  * Public domain.
      4  1.1  cgd  */
      5  1.1  cgd 
      6  1.7  jtc #include <machine/asm.h>
      7  1.7  jtc 
      8  1.2  jtc #if defined(LIBC_SCCS)
      9  1.8  jtc 	RCSID("$Id: swab.S,v 1.8 1995/04/28 22:54:38 jtc Exp $")
     10  1.1  cgd #endif
     11  1.1  cgd 
     12  1.1  cgd /*
     13  1.1  cgd  * On the i486, this code is negligibly faster than the code generated
     14  1.3  jtc  * by gcc at about half the size.  If my i386 databook is correct, it
     15  1.1  cgd  * should be considerably faster than the gcc code on a i386.
     16  1.1  cgd  */
     17  1.1  cgd 
     18  1.1  cgd ENTRY(swab)
     19  1.1  cgd 	pushl	%esi
     20  1.1  cgd 	pushl	%edi
     21  1.1  cgd 	movl	12(%esp),%esi
     22  1.1  cgd 	movl	16(%esp),%edi
     23  1.1  cgd 	movl	20(%esp),%ecx
     24  1.1  cgd 
     25  1.1  cgd 	cld				# set direction forward
     26  1.1  cgd 
     27  1.4  jtc 	shrl	$1,%ecx
     28  1.1  cgd 	testl	$7,%ecx			# copy first group of 1 to 7 words
     29  1.4  jtc 	jz	L2			# while swaping alternate bytes.
     30  1.1  cgd 	.align	2,0x90
     31  1.1  cgd L1:	lodsw
     32  1.6  jtc 	rorw	$8,%ax
     33  1.1  cgd 	stosw
     34  1.1  cgd 	decl	%ecx
     35  1.1  cgd 	testl	$7,%ecx
     36  1.4  jtc 	jnz	L1
     37  1.3  jtc 
     38  1.1  cgd L2:	shrl	$3,%ecx			# copy remainder 8 words at a time
     39  1.1  cgd 	jz	L4			# while swapping alternate bytes.
     40  1.1  cgd 	.align	2,0x90
     41  1.1  cgd L3:	lodsw
     42  1.6  jtc 	rorw	$8,%ax
     43  1.1  cgd 	stosw
     44  1.1  cgd 	lodsw
     45  1.6  jtc 	rorw	$8,%ax
     46  1.1  cgd 	stosw
     47  1.1  cgd 	lodsw
     48  1.6  jtc 	rorw	$8,%ax
     49  1.1  cgd 	stosw
     50  1.1  cgd 	lodsw
     51  1.6  jtc 	rorw	$8,%ax
     52  1.1  cgd 	stosw
     53  1.1  cgd 	lodsw
     54  1.6  jtc 	rorw	$8,%ax
     55  1.1  cgd 	stosw
     56  1.1  cgd 	lodsw
     57  1.6  jtc 	rorw	$8,%ax
     58  1.1  cgd 	stosw
     59  1.1  cgd 	lodsw
     60  1.6  jtc 	rorw	$8,%ax
     61  1.1  cgd 	stosw
     62  1.1  cgd 	lodsw
     63  1.6  jtc 	rorw	$8,%ax
     64  1.1  cgd 	stosw
     65  1.1  cgd 	decl	%ecx
     66  1.1  cgd 	jnz	L3
     67  1.1  cgd 
     68  1.6  jtc L4:	popl	%edi
     69  1.1  cgd 	popl	%esi
     70  1.1  cgd 	ret
     71