Home | History | Annotate | Line # | Download | only in string
      1 /* $NetBSD: bcopy.S,v 1.1 2005/12/20 19:28:49 christos Exp $ */
      2 
      3 /*
      4  * Copyright (c) 1995 Carnegie-Mellon University.
      5  * All rights reserved.
      6  *
      7  * Author: Trevor Blackwell.  Support for use as memcpy() and memmove()
      8  *	   added by Chris Demetriou.
      9  *
     10  * Permission to use, copy, modify and distribute this software and
     11  * its documentation is hereby granted, provided that both the copyright
     12  * notice and this permission notice appear in all copies of the
     13  * software, derivative works or modified versions, and any portions
     14  * thereof, and that both notices appear in supporting documentation.
     15  *
     16  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     17  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     18  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     19  *
     20  * Carnegie Mellon requests users of this software to return to
     21  *
     22  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     23  *  School of Computer Science
     24  *  Carnegie Mellon University
     25  *  Pittsburgh PA 15213-3890
     26  *
     27  * any improvements or extensions that they make and grant Carnegie the
     28  * rights to redistribute these changes.
     29  */
     30 
     31 #include <machine/asm.h>
     32 
     33 #if defined(MEMCOPY) || defined(MEMMOVE)
     34 #ifdef MEMCOPY
     35 #define	FUNCTION	memcpy
     36 #else
     37 #define FUNCTION	memmove
     38 #endif
     39 #define	SRCREG		a1
     40 #define	DSTREG		a0
     41 #else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
     42 #define	FUNCTION	bcopy
     43 #define	SRCREG		a0
     44 #define	DSTREG		a1
     45 #endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
     46 
     47 #define	SIZEREG		a2
     48 
     49 /*
     50  * Copy bytes.
     51  *
     52  * void bcopy(char *from, char *to, size_t len);
     53  * char *memcpy(void *to, const void *from, size_t len);
     54  * char *memmove(void *to, const void *from, size_t len);
     55  *
     56  * No matter how invoked, the source and destination registers
     57  * for calculation.  There's no point in copying them to "working"
     58  * registers, since the code uses their values "in place," and
     59  * copying them would be slower.
     60  */
     61 
     62 LEAF(FUNCTION,3)
     63 
     64 #if defined(MEMCOPY) || defined(MEMMOVE)
     65 	/* set up return value, while we still can */
     66 	mov	DSTREG,v0
     67 #endif
     68 
     69 	/* Check for negative length */
     70 	ble	SIZEREG,bcopy_done
     71 
     72 	/* Check for overlap */
     73 	subq	DSTREG,SRCREG,t5
     74 	cmpult	t5,SIZEREG,t5
     75 	bne	t5,bcopy_overlap
     76 
     77 	/* a3 = end address */
     78 	addq	SRCREG,SIZEREG,a3
     79 
     80 	/* Get the first word */
     81 	ldq_u	t2,0(SRCREG)
     82 
     83 	/* Do they have the same alignment? */
     84 	xor	SRCREG,DSTREG,t0
     85 	and	t0,7,t0
     86 	and	DSTREG,7,t1
     87 	bne	t0,bcopy_different_alignment
     88 
     89 	/* src & dst have same alignment */
     90 	beq	t1,bcopy_all_aligned
     91 
     92 	ldq_u	t3,0(DSTREG)
     93 	addq	SIZEREG,t1,SIZEREG
     94 	mskqh	t2,SRCREG,t2
     95 	mskql	t3,SRCREG,t3
     96 	or	t2,t3,t2
     97 
     98 	/* Dst is 8-byte aligned */
     99 
    100 bcopy_all_aligned:
    101 	/* If less than 8 bytes,skip loop */
    102 	subq	SIZEREG,1,t0
    103 	and	SIZEREG,7,SIZEREG
    104 	bic	t0,7,t0
    105 	beq	t0,bcopy_samealign_lp_end
    106 
    107 bcopy_samealign_lp:
    108 	stq_u	t2,0(DSTREG)
    109 	addq	DSTREG,8,DSTREG
    110 	ldq_u	t2,8(SRCREG)
    111 	subq	t0,8,t0
    112 	addq	SRCREG,8,SRCREG
    113 	bne	t0,bcopy_samealign_lp
    114 
    115 bcopy_samealign_lp_end:
    116 	/* If we're done, exit */
    117 	bne	SIZEREG,bcopy_small_left
    118 	stq_u	t2,0(DSTREG)
    119 	RET
    120 
    121 bcopy_small_left:
    122 	mskql	t2,SIZEREG,t4
    123 	ldq_u	t3,0(DSTREG)
    124 	mskqh	t3,SIZEREG,t3
    125 	or	t4,t3,t4
    126 	stq_u	t4,0(DSTREG)
    127 	RET
    128 
    129 bcopy_different_alignment:
    130 	/*
    131 	 * this is the fun part
    132 	 */
    133 	addq	SRCREG,SIZEREG,a3
    134 	cmpule	SIZEREG,8,t0
    135 	bne	t0,bcopy_da_finish
    136 
    137 	beq	t1,bcopy_da_noentry
    138 
    139 	/* Do the initial partial word */
    140 	subq	zero,DSTREG,t0
    141 	and	t0,7,t0
    142 	ldq_u	t3,7(SRCREG)
    143 	extql	t2,SRCREG,t2
    144 	extqh	t3,SRCREG,t3
    145 	or	t2,t3,t5
    146 	insql	t5,DSTREG,t5
    147 	ldq_u	t6,0(DSTREG)
    148 	mskql	t6,DSTREG,t6
    149 	or	t5,t6,t5
    150 	stq_u	t5,0(DSTREG)
    151 	addq	SRCREG,t0,SRCREG
    152 	addq	DSTREG,t0,DSTREG
    153 	subq	SIZEREG,t0,SIZEREG
    154 	ldq_u	t2,0(SRCREG)
    155 
    156 bcopy_da_noentry:
    157 	subq	SIZEREG,1,t0
    158 	bic	t0,7,t0
    159 	and	SIZEREG,7,SIZEREG
    160 	beq	t0,bcopy_da_finish2
    161 
    162 bcopy_da_lp:
    163 	ldq_u	t3,7(SRCREG)
    164 	addq	SRCREG,8,SRCREG
    165 	extql	t2,SRCREG,t4
    166 	extqh	t3,SRCREG,t5
    167 	subq	t0,8,t0
    168 	or	t4,t5,t5
    169 	stq	t5,0(DSTREG)
    170 	addq	DSTREG,8,DSTREG
    171 	beq	t0,bcopy_da_finish1
    172 	ldq_u	t2,7(SRCREG)
    173 	addq	SRCREG,8,SRCREG
    174 	extql	t3,SRCREG,t4
    175 	extqh	t2,SRCREG,t5
    176 	subq	t0,8,t0
    177 	or	t4,t5,t5
    178 	stq	t5,0(DSTREG)
    179 	addq	DSTREG,8,DSTREG
    180 	bne	t0,bcopy_da_lp
    181 
    182 bcopy_da_finish2:
    183 	/* Do the last new word */
    184 	mov	t2,t3
    185 
    186 bcopy_da_finish1:
    187 	/* Do the last partial word */
    188 	ldq_u	t2,-1(a3)
    189 	extql	t3,SRCREG,t3
    190 	extqh	t2,SRCREG,t2
    191 	or	t2,t3,t2
    192 	br	zero,bcopy_samealign_lp_end
    193 
    194 bcopy_da_finish:
    195 	/* Do the last word in the next source word */
    196 	ldq_u	t3,-1(a3)
    197 	extql	t2,SRCREG,t2
    198 	extqh	t3,SRCREG,t3
    199 	or	t2,t3,t2
    200 	insqh	t2,DSTREG,t3
    201 	insql	t2,DSTREG,t2
    202 	lda	t4,-1(zero)
    203 	mskql	t4,SIZEREG,t5
    204 	cmovne	t5,t5,t4
    205 	insqh	t4,DSTREG,t5
    206 	insql	t4,DSTREG,t4
    207 	addq	DSTREG,SIZEREG,a4
    208 	ldq_u	t6,0(DSTREG)
    209 	ldq_u	t7,-1(a4)
    210 	bic	t6,t4,t6
    211 	bic	t7,t5,t7
    212 	and	t2,t4,t2
    213 	and	t3,t5,t3
    214 	or	t2,t6,t2
    215 	or	t3,t7,t3
    216 	stq_u	t3,-1(a4)
    217 	stq_u	t2,0(DSTREG)
    218 	RET
    219 
    220 bcopy_overlap:
    221 	/*
    222 	 * Basically equivalent to previous case, only backwards.
    223 	 * Not quite as highly optimized
    224 	 */
    225 	addq	SRCREG,SIZEREG,a3
    226 	addq	DSTREG,SIZEREG,a4
    227 
    228 	/* less than 8 bytes - don't worry about overlap */
    229 	cmpule	SIZEREG,8,t0
    230 	bne	t0,bcopy_ov_short
    231 
    232 	/* Possibly do a partial first word */
    233 	and	a4,7,t4
    234 	beq	t4,bcopy_ov_nostart2
    235 	subq	a3,t4,a3
    236 	subq	a4,t4,a4
    237 	ldq_u	t1,0(a3)
    238 	subq	SIZEREG,t4,SIZEREG
    239 	ldq_u	t2,7(a3)
    240 	ldq	t3,0(a4)
    241 	extql	t1,a3,t1
    242 	extqh	t2,a3,t2
    243 	or	t1,t2,t1
    244 	mskqh	t3,t4,t3
    245 	mskql	t1,t4,t1
    246 	or	t1,t3,t1
    247 	stq	t1,0(a4)
    248 
    249 bcopy_ov_nostart2:
    250 	bic	SIZEREG,7,t4
    251 	and	SIZEREG,7,SIZEREG
    252 	beq	t4,bcopy_ov_lp_end
    253 
    254 bcopy_ov_lp:
    255 	/* This could be more pipelined, but it doesn't seem worth it */
    256 	ldq_u	t0,-8(a3)
    257 	subq	a4,8,a4
    258 	ldq_u	t1,-1(a3)
    259 	subq	a3,8,a3
    260 	extql	t0,a3,t0
    261 	extqh	t1,a3,t1
    262 	subq	t4,8,t4
    263 	or	t0,t1,t0
    264 	stq	t0,0(a4)
    265 	bne	t4,bcopy_ov_lp
    266 
    267 bcopy_ov_lp_end:
    268 	beq	SIZEREG,bcopy_done
    269 
    270 	ldq_u	t0,0(SRCREG)
    271 	ldq_u	t1,7(SRCREG)
    272 	ldq_u	t2,0(DSTREG)
    273 	extql	t0,SRCREG,t0
    274 	extqh	t1,SRCREG,t1
    275 	or	t0,t1,t0
    276 	insql	t0,DSTREG,t0
    277 	mskql	t2,DSTREG,t2
    278 	or	t2,t0,t2
    279 	stq_u	t2,0(DSTREG)
    280 
    281 bcopy_done:
    282 	RET
    283 
    284 bcopy_ov_short:
    285 	ldq_u	t2,0(SRCREG)
    286 	br	zero,bcopy_da_finish
    287 
    288 	END(FUNCTION)
    289