Home | History | Annotate | Line # | Download | only in string
      1  1.1  christos /*-
      2  1.1  christos  * Copyright (c) 1990 The Regents of the University of California.
      3  1.1  christos  * All rights reserved.
      4  1.1  christos  *
      5  1.1  christos  * This code is derived from locore.s.
      6  1.1  christos  *
      7  1.1  christos  * Redistribution and use in source and binary forms, with or without
      8  1.1  christos  * modification, are permitted provided that the following conditions
      9  1.1  christos  * are met:
     10  1.1  christos  * 1. Redistributions of source code must retain the above copyright
     11  1.1  christos  *    notice, this list of conditions and the following disclaimer.
     12  1.1  christos  * 2. Redistributions in binary form must reproduce the above copyright
     13  1.1  christos  *    notice, this list of conditions and the following disclaimer in the
     14  1.1  christos  *    documentation and/or other materials provided with the distribution.
     15  1.1  christos  * 3. Neither the name of the University nor the names of its contributors
     16  1.1  christos  *    may be used to endorse or promote products derived from this software
     17  1.1  christos  *    without specific prior written permission.
     18  1.1  christos  *
     19  1.1  christos  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  1.1  christos  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  1.1  christos  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  1.1  christos  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  1.1  christos  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  1.1  christos  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  1.1  christos  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  1.1  christos  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  1.1  christos  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  1.1  christos  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  1.1  christos  * SUCH DAMAGE.
     30  1.1  christos  */
     31  1.1  christos 
     32  1.1  christos #include <machine/asm.h>
     33  1.1  christos 
     34  1.1  christos #if defined(LIBC_SCCS)
     35  1.5  jakllsch 	RCSID("$NetBSD: bcopy.S,v 1.5 2014/03/22 19:16:34 jakllsch Exp $")
     36  1.1  christos #endif
     37  1.1  christos 
     38  1.1  christos 	/*
     39  1.1  christos 	 * (ov)bcopy (src,dst,cnt)
     40  1.1  christos 	 *  ws (at) tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
     41  1.3       dsl 	 *
     42  1.4       dsl 	 * Hacked about by dsl (at) netbsd.org
     43  1.1  christos 	 */
     44  1.1  christos 
     45  1.1  christos #ifdef MEMCOPY
     46  1.1  christos ENTRY(memcpy)
     47  1.3       dsl #define NO_OVERLAP
     48  1.1  christos #else
     49  1.1  christos #ifdef MEMMOVE
     50  1.1  christos ENTRY(memmove)
     51  1.1  christos #else
     52  1.1  christos ENTRY(bcopy)
     53  1.1  christos #endif
     54  1.1  christos #endif
     55  1.3       dsl 	movq	%rdx,%rcx
     56  1.1  christos #if defined(MEMCOPY) || defined(MEMMOVE)
     57  1.3       dsl 	movq	%rdi,%rax	/* must return destination address */
     58  1.4       dsl 	mov	%rdi,%r11	/* for misaligned check */
     59  1.1  christos #else
     60  1.4       dsl 	mov	%rsi,%r11	/* for misaligned check */
     61  1.3       dsl 	xchgq	%rdi,%rsi	/* bcopy() has arg order reversed */
     62  1.3       dsl #endif
     63  1.3       dsl 
     64  1.3       dsl #if !defined(NO_OVERLAP)
     65  1.3       dsl 	movq	%rdi,%r8
     66  1.3       dsl 	subq	%rsi,%r8
     67  1.3       dsl #endif
     68  1.3       dsl 
     69  1.3       dsl 	shrq	$3,%rcx		/* count for copy by words */
     70  1.3       dsl 	jz	8f		/* j if less than 8 bytes */
     71  1.3       dsl 
     72  1.3       dsl 	lea	-8(%rdi,%rdx),%r9	/* target address of last 8 */
     73  1.4       dsl 	mov	-8(%rsi,%rdx),%r10	/* get last word */
     74  1.3       dsl #if !defined(NO_OVERLAP)
     75  1.3       dsl 	cmpq	%rdx,%r8	/* overlapping? */
     76  1.3       dsl 	jb	10f
     77  1.1  christos #endif
     78  1.3       dsl 
     79  1.3       dsl /*
     80  1.3       dsl  * Non-overlaping, copy forwards.
     81  1.3       dsl  * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
     82  1.3       dsl  * if %ecx is more than 76.
     83  1.3       dsl  * AMD might do something similar some day.
     84  1.3       dsl  */
     85  1.4       dsl 	and	$7,%r11		/* destination misaligned ? */
     86  1.4       dsl 	jnz	2f
     87  1.3       dsl 	rep
     88  1.3       dsl 	movsq
     89  1.4       dsl 	mov	%r10,(%r9)	/* write last word */
     90  1.4       dsl 	ret
     91  1.4       dsl 
     92  1.4       dsl /*
     93  1.4       dsl  * Destination misaligned
     94  1.4       dsl  * AMD say it is better to align the destination (not the source).
     95  1.4       dsl  * This will also re-align copies if the source and dest are both
     96  1.4       dsl  * misaligned by the same amount)
     97  1.4       dsl  * (I think Nehalem will use its accelerated copy if the source
     98  1.4       dsl  * and destination have the same alignment.)
     99  1.4       dsl  */
    100  1.4       dsl 2:
    101  1.4       dsl 	lea	-9(%r11,%rdx),%rcx	/* post re-alignment count */
    102  1.4       dsl 	neg	%r11			/* now -1 .. -7 */
    103  1.4       dsl 	mov	(%rsi),%rdx		/* get first word */
    104  1.4       dsl 	mov	%rdi,%r8		/* target for first word */
    105  1.4       dsl 	lea	8(%rsi,%r11),%rsi
    106  1.4       dsl 	lea	8(%rdi,%r11),%rdi
    107  1.4       dsl 	shr	$3,%rcx
    108  1.4       dsl 	rep
    109  1.4       dsl 	movsq
    110  1.4       dsl 	mov	%rdx,(%r8)		/* write first word */
    111  1.4       dsl 	mov	%r10,(%r9)		/* write last word */
    112  1.3       dsl 	ret
    113  1.3       dsl 
    114  1.3       dsl #if !defined(NO_OVERLAP)
    115  1.3       dsl /* Must copy backwards.
    116  1.3       dsl  * Reverse copy is probably easy to code faster than 'rep movds'
    117  1.4       dsl  * since that requires (IIRC) an extra clock every 3 iterations (AMD).
    118  1.3       dsl  * However I don't suppose anything cares that much!
    119  1.4       dsl  * The big cost is the std/cld pair - reputedly 50+ cycles on Netburst P4.
    120  1.3       dsl  * The copy is aligned with the buffer start (more likely to
    121  1.3       dsl  * be a multiple of 8 than the end).
    122  1.3       dsl  */
    123  1.3       dsl 10:
    124  1.3       dsl 	lea	-8(%rsi,%rcx,8),%rsi
    125  1.3       dsl 	lea	-8(%rdi,%rcx,8),%rdi
    126  1.3       dsl 	std
    127  1.1  christos 	rep
    128  1.1  christos 	movsq
    129  1.3       dsl 	cld
    130  1.3       dsl 	mov	%r10,(%r9)	/* write last bytes */
    131  1.3       dsl 	ret
    132  1.3       dsl #endif
    133  1.3       dsl 
    134  1.3       dsl /* Less than 8 bytes to copy, copy by bytes */
    135  1.3       dsl /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
    136  1.4       dsl  * For longer transfers it is 50+ !
    137  1.3       dsl  */
    138  1.3       dsl 8:	mov	%rdx,%rcx
    139  1.3       dsl 
    140  1.3       dsl #if !defined(NO_OVERLAP)
    141  1.3       dsl 	cmpq	%rdx,%r8	/* overlapping? */
    142  1.3       dsl 	jb	81f
    143  1.3       dsl #endif
    144  1.3       dsl 
    145  1.3       dsl 	/* nope, copy forwards. */
    146  1.1  christos 	rep
    147  1.1  christos 	movsb
    148  1.1  christos 	ret
    149  1.3       dsl 
    150  1.3       dsl #if !defined(NO_OVERLAP)
    151  1.3       dsl /* Must copy backwards */
    152  1.3       dsl 81:
    153  1.3       dsl 	lea	-1(%rsi,%rcx),%rsi
    154  1.3       dsl 	lea	-1(%rdi,%rcx),%rdi
    155  1.1  christos 	std
    156  1.1  christos 	rep
    157  1.1  christos 	movsb
    158  1.1  christos 	cld
    159  1.1  christos 	ret
    160  1.3       dsl #endif
    161  1.5  jakllsch 
    162  1.5  jakllsch #ifdef MEMCOPY
    163  1.5  jakllsch END(memcpy)
    164  1.5  jakllsch #else
    165  1.5  jakllsch #ifdef MEMMOVE
    166  1.5  jakllsch END(memmove)
    167  1.5  jakllsch #else
    168  1.5  jakllsch END(bcopy)
    169  1.5  jakllsch #endif
    170  1.5  jakllsch #endif
    171