1 1.1 cgd /* 2 1.11 salo * Written by J.T. Conklin <jtc (at) NetBSD.org>. 3 1.8 jtc * Public domain. 4 1.1 cgd */ 5 1.1 cgd 6 1.7 jtc #include <machine/asm.h> 7 1.7 jtc 8 1.2 jtc #if defined(LIBC_SCCS) 9 1.14 uebayasi RCSID("$NetBSD: swab.S,v 1.14 2014/05/23 02:34:19 uebayasi Exp $") 10 1.1 cgd #endif 11 1.1 cgd 12 1.1 cgd /* 13 1.1 cgd * On the i486, this code is negligibly faster than the code generated 14 1.3 jtc * by gcc at about half the size. If my i386 databook is correct, it 15 1.1 cgd * should be considerably faster than the gcc code on a i386. 16 1.1 cgd */ 17 1.1 cgd 18 1.1 cgd ENTRY(swab) 19 1.1 cgd pushl %esi 20 1.1 cgd pushl %edi 21 1.1 cgd movl 12(%esp),%esi 22 1.1 cgd movl 16(%esp),%edi 23 1.1 cgd movl 20(%esp),%ecx 24 1.1 cgd 25 1.4 jtc shrl $1,%ecx 26 1.1 cgd testl $7,%ecx # copy first group of 1 to 7 words 27 1.12 rpaulo jz L2 # while swapping alternate bytes. 28 1.10 kleink _ALIGN_TEXT,0x90 29 1.1 cgd L1: lodsw 30 1.6 jtc rorw $8,%ax 31 1.1 cgd stosw 32 1.1 cgd decl %ecx 33 1.1 cgd testl $7,%ecx 34 1.4 jtc jnz L1 35 1.3 jtc 36 1.1 cgd L2: shrl $3,%ecx # copy remainder 8 words at a time 37 1.1 cgd jz L4 # while swapping alternate bytes. 38 1.10 kleink _ALIGN_TEXT,0x90 39 1.1 cgd L3: lodsw 40 1.6 jtc rorw $8,%ax 41 1.1 cgd stosw 42 1.1 cgd lodsw 43 1.6 jtc rorw $8,%ax 44 1.1 cgd stosw 45 1.1 cgd lodsw 46 1.6 jtc rorw $8,%ax 47 1.1 cgd stosw 48 1.1 cgd lodsw 49 1.6 jtc rorw $8,%ax 50 1.1 cgd stosw 51 1.1 cgd lodsw 52 1.6 jtc rorw $8,%ax 53 1.1 cgd stosw 54 1.1 cgd lodsw 55 1.6 jtc rorw $8,%ax 56 1.1 cgd stosw 57 1.1 cgd lodsw 58 1.6 jtc rorw $8,%ax 59 1.1 cgd stosw 60 1.1 cgd lodsw 61 1.6 jtc rorw $8,%ax 62 1.1 cgd stosw 63 1.1 cgd decl %ecx 64 1.1 cgd jnz L3 65 1.1 cgd 66 1.6 jtc L4: popl %edi 67 1.1 cgd popl %esi 68 1.1 cgd ret 69 1.14 uebayasi END(swab) 70