Home | History | Annotate | Line # | Download | only in string
bcopy.S revision 1.3
      1 /*-
      2  * Copyright (c) 1990 The Regents of the University of California.
      3  * All rights reserved.
      4  *
      5  * This code is derived from locore.s.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. Neither the name of the University nor the names of its contributors
     16  *    may be used to endorse or promote products derived from this software
     17  *    without specific prior written permission.
     18  *
     19  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  * SUCH DAMAGE.
     30  */
     31 
     32 #include <machine/asm.h>
     33 
     34 #if defined(LIBC_SCCS)
     35 	RCSID("$NetBSD: bcopy.S,v 1.3 2009/11/21 19:52:54 dsl Exp $")
     36 #endif
     37 
     38 	/*
     39 	 * (ov)bcopy (src,dst,cnt)
     40 	 *  ws (at) tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
     41 	 *
     42 	 * Hacked about by dsl (at) netnsd.org
     43 	 */
     44 
     45 #ifdef MEMCOPY
     46 ENTRY(memcpy)
     47 #define NO_OVERLAP
     48 #else
     49 #ifdef MEMMOVE
     50 ENTRY(memmove)
     51 #else
     52 ENTRY(bcopy)
     53 #endif
     54 #endif
     55 	movq	%rdx,%rcx
     56 #if defined(MEMCOPY) || defined(MEMMOVE)
     57 	movq	%rdi,%rax	/* must return destination address */
     58 #else
     59 	xchgq	%rdi,%rsi	/* bcopy() has arg order reversed */
     60 #endif
     61 
     62 #if !defined(NO_OVERLAP)
     63 	movq	%rdi,%r8
     64 	subq	%rsi,%r8
     65 #endif
     66 
     67 	shrq	$3,%rcx		/* count for copy by words */
     68 	jz	8f		/* j if less than 8 bytes */
     69 
     70 	lea	-8(%rdi,%rdx),%r9	/* target address of last 8 */
     71 	mov	-8(%rsi,%rdx),%r10	/* get last bytes */
     72 #if !defined(NO_OVERLAP)
     73 	cmpq	%rdx,%r8	/* overlapping? */
     74 	jb	10f
     75 #endif
     76 
     77 /*
     78  * Non-overlaping, copy forwards.
     79  * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
     80  * if %ecx is more than 76.
     81  * AMD might do something similar some day.
     82  */
     83 	rep
     84 	movsq
     85 	mov	%r10,(%r9)	/* write last bytes */
     86 	ret
     87 
     88 #if !defined(NO_OVERLAP)
     89 /* Must copy backwards.
     90  * Reverse copy is probably easy to code faster than 'rep movds'
     91  * since that requires (IIRC) an extra clock per iteration.
     92  * However I don't suppose anything cares that much!
     93  * The copy is aligned with the buffer start (more likely to
     94  * be a multiple of 8 than the end).
     95  */
     96 10:
     97 	lea	-8(%rsi,%rcx,8),%rsi
     98 	lea	-8(%rdi,%rcx,8),%rdi
     99 	std
    100 	rep
    101 	movsq
    102 	cld
    103 	mov	%r10,(%r9)	/* write last bytes */
    104 	ret
    105 #endif
    106 
    107 /* Less than 8 bytes to copy, copy by bytes */
    108 /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
    109  * For long transfers it is 50+ !
    110  */
    111 8:	mov	%rdx,%rcx
    112 
    113 #if !defined(NO_OVERLAP)
    114 	cmpq	%rdx,%r8	/* overlapping? */
    115 	jb	81f
    116 #endif
    117 
    118 	/* nope, copy forwards. */
    119 	rep
    120 	movsb
    121 	ret
    122 
    123 #if !defined(NO_OVERLAP)
    124 /* Must copy backwards */
    125 81:
    126 	lea	-1(%rsi,%rcx),%rsi
    127 	lea	-1(%rdi,%rcx),%rdi
    128 	std
    129 	rep
    130 	movsb
    131 	cld
    132 	ret
    133 #endif
    134