Home | History | Annotate | Line # | Download | only in string
bcopy.S revision 1.3
      1  1.1  christos /*-
      2  1.1  christos  * Copyright (c) 1990 The Regents of the University of California.
      3  1.1  christos  * All rights reserved.
      4  1.1  christos  *
      5  1.1  christos  * This code is derived from locore.s.
      6  1.1  christos  *
      7  1.1  christos  * Redistribution and use in source and binary forms, with or without
      8  1.1  christos  * modification, are permitted provided that the following conditions
      9  1.1  christos  * are met:
     10  1.1  christos  * 1. Redistributions of source code must retain the above copyright
     11  1.1  christos  *    notice, this list of conditions and the following disclaimer.
     12  1.1  christos  * 2. Redistributions in binary form must reproduce the above copyright
     13  1.1  christos  *    notice, this list of conditions and the following disclaimer in the
     14  1.1  christos  *    documentation and/or other materials provided with the distribution.
     15  1.1  christos  * 3. Neither the name of the University nor the names of its contributors
     16  1.1  christos  *    may be used to endorse or promote products derived from this software
     17  1.1  christos  *    without specific prior written permission.
     18  1.1  christos  *
     19  1.1  christos  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
     20  1.1  christos  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     21  1.1  christos  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     22  1.1  christos  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
     23  1.1  christos  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     24  1.1  christos  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     25  1.1  christos  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     26  1.1  christos  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     27  1.1  christos  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     28  1.1  christos  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     29  1.1  christos  * SUCH DAMAGE.
     30  1.1  christos  */
     31  1.1  christos 
     32  1.1  christos #include <machine/asm.h>
     33  1.1  christos 
     34  1.1  christos #if defined(LIBC_SCCS)
     35  1.3       dsl 	RCSID("$NetBSD: bcopy.S,v 1.3 2009/11/21 19:52:54 dsl Exp $")
     36  1.1  christos #endif
     37  1.1  christos 
     38  1.1  christos 	/*
     39  1.1  christos 	 * (ov)bcopy (src,dst,cnt)
     40  1.1  christos 	 *  ws (at) tools.de     (Wolfgang Solfrank, TooLs GmbH) +49-228-985800
     41  1.3       dsl 	 *
     42  1.3       dsl 	 * Hacked about by dsl (at) netnsd.org
     43  1.1  christos 	 */
     44  1.1  christos 
     45  1.1  christos #ifdef MEMCOPY
     46  1.1  christos ENTRY(memcpy)
     47  1.3       dsl #define NO_OVERLAP
     48  1.1  christos #else
     49  1.1  christos #ifdef MEMMOVE
     50  1.1  christos ENTRY(memmove)
     51  1.1  christos #else
     52  1.1  christos ENTRY(bcopy)
     53  1.1  christos #endif
     54  1.1  christos #endif
     55  1.3       dsl 	movq	%rdx,%rcx
     56  1.1  christos #if defined(MEMCOPY) || defined(MEMMOVE)
     57  1.3       dsl 	movq	%rdi,%rax	/* must return destination address */
     58  1.1  christos #else
     59  1.3       dsl 	xchgq	%rdi,%rsi	/* bcopy() has arg order reversed */
     60  1.3       dsl #endif
     61  1.3       dsl 
     62  1.3       dsl #if !defined(NO_OVERLAP)
     63  1.3       dsl 	movq	%rdi,%r8
     64  1.3       dsl 	subq	%rsi,%r8
     65  1.3       dsl #endif
     66  1.3       dsl 
     67  1.3       dsl 	shrq	$3,%rcx		/* count for copy by words */
     68  1.3       dsl 	jz	8f		/* j if less than 8 bytes */
     69  1.3       dsl 
     70  1.3       dsl 	lea	-8(%rdi,%rdx),%r9	/* target address of last 8 */
     71  1.3       dsl 	mov	-8(%rsi,%rdx),%r10	/* get last bytes */
     72  1.3       dsl #if !defined(NO_OVERLAP)
     73  1.3       dsl 	cmpq	%rdx,%r8	/* overlapping? */
     74  1.3       dsl 	jb	10f
     75  1.1  christos #endif
     76  1.3       dsl 
     77  1.3       dsl /*
     78  1.3       dsl  * Non-overlaping, copy forwards.
     79  1.3       dsl  * Newer Intel cpus (Nehalem) will do 16byte read/write transfers
     80  1.3       dsl  * if %ecx is more than 76.
     81  1.3       dsl  * AMD might do something similar some day.
     82  1.3       dsl  */
     83  1.3       dsl 	rep
     84  1.3       dsl 	movsq
     85  1.3       dsl 	mov	%r10,(%r9)	/* write last bytes */
     86  1.3       dsl 	ret
     87  1.3       dsl 
     88  1.3       dsl #if !defined(NO_OVERLAP)
     89  1.3       dsl /* Must copy backwards.
     90  1.3       dsl  * Reverse copy is probably easy to code faster than 'rep movds'
     91  1.3       dsl  * since that requires (IIRC) an extra clock per iteration.
     92  1.3       dsl  * However I don't suppose anything cares that much!
     93  1.3       dsl  * The copy is aligned with the buffer start (more likely to
     94  1.3       dsl  * be a multiple of 8 than the end).
     95  1.3       dsl  */
     96  1.3       dsl 10:
     97  1.3       dsl 	lea	-8(%rsi,%rcx,8),%rsi
     98  1.3       dsl 	lea	-8(%rdi,%rcx,8),%rdi
     99  1.3       dsl 	std
    100  1.1  christos 	rep
    101  1.1  christos 	movsq
    102  1.3       dsl 	cld
    103  1.3       dsl 	mov	%r10,(%r9)	/* write last bytes */
    104  1.3       dsl 	ret
    105  1.3       dsl #endif
    106  1.3       dsl 
    107  1.3       dsl /* Less than 8 bytes to copy, copy by bytes */
    108  1.3       dsl /* Intel Nehalem optimise 'rep movsb' for <= 7 bytes (9-15 clocks).
    109  1.3       dsl  * For long transfers it is 50+ !
    110  1.3       dsl  */
    111  1.3       dsl 8:	mov	%rdx,%rcx
    112  1.3       dsl 
    113  1.3       dsl #if !defined(NO_OVERLAP)
    114  1.3       dsl 	cmpq	%rdx,%r8	/* overlapping? */
    115  1.3       dsl 	jb	81f
    116  1.3       dsl #endif
    117  1.3       dsl 
    118  1.3       dsl 	/* nope, copy forwards. */
    119  1.1  christos 	rep
    120  1.1  christos 	movsb
    121  1.1  christos 	ret
    122  1.3       dsl 
    123  1.3       dsl #if !defined(NO_OVERLAP)
    124  1.3       dsl /* Must copy backwards */
    125  1.3       dsl 81:
    126  1.3       dsl 	lea	-1(%rsi,%rcx),%rsi
    127  1.3       dsl 	lea	-1(%rdi,%rcx),%rdi
    128  1.1  christos 	std
    129  1.1  christos 	rep
    130  1.1  christos 	movsb
    131  1.1  christos 	cld
    132  1.1  christos 	ret
    133  1.3       dsl #endif
    134