Home | History | Annotate | Line # | Download | only in string
bcopy.S revision 1.2.40.2
      1 /*	$NetBSD: bcopy.S,v 1.2.40.2 2009/08/19 06:56:13 matt Exp $	*/
      2 
      3 /*
      4  * Mach Operating System
      5  * Copyright (c) 1993 Carnegie Mellon University
      6  * All Rights Reserved.
      7  *
      8  * Permission to use, copy, modify and distribute this software and its
      9  * documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
     16  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie Mellon
     26  * the rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  *	File:	mips_bcopy.s
     31  *	Author:	Chris Maeda
     32  *	Date:	June 1993
     33  *
     34  *	Fast copy routine.  Derived from aligned_block_copy.
     35  */
     36 
     37 
     38 #include <mips/asm.h>
     39 #ifndef _LOCORE
     40 #define _LOCORE		/* XXX not really, just assembly-code source */
     41 #endif
     42 #include <machine/endian.h>
     43 
     44 
     45 #if defined(LIBC_SCCS) && !defined(lint)
     46 	/* RCSID("from: @(#)mips_bcopy.s	2.2 CMU 18/06/93") */
     47 	RCSID("$NetBSD: bcopy.S,v 1.2.40.2 2009/08/19 06:56:13 matt Exp $")
     48 #endif /* LIBC_SCCS and not lint */
     49 
     50 /*
     51  *	bcopy(caddr_t src, caddr_t dst, unsigned int len)
     52  *
     53  *	a0 	src address
     54  *	a1	dst address
     55  *	a2	length
     56  */
     57 
     58 #if defined(MEMCOPY) || defined(MEMMOVE)
     59 #ifdef MEMCOPY
     60 #define	FUNCTION	memcpy
     61 #else
     62 #define FUNCTION	memmove
     63 #endif
     64 #define	SRCREG		a1
     65 #define	DSTREG		a0
     66 #else
     67 #define	FUNCTION	bcopy
     68 #define	SRCREG		a0
     69 #define	DSTREG		a1
     70 #endif
     71 
     72 #define	SIZEREG		a2
     73 
     74 LEAF(FUNCTION)
     75 	.set	noat
     76 	.set	noreorder
     77 
     78 #if defined(MEMCOPY) || defined(MEMMOVE)
     79 	/* set up return value, while we still can */
     80 	move	v0,DSTREG
     81 #endif
     82 	/*
     83 	 *	Make sure we can copy forwards.
     84 	 */
     85 	sltu	t0,SRCREG,DSTREG	# t0 == SRCREG < DSTREG
     86 	bne	t0,zero,6f		# copy backwards
     87 
     88 	/*
     89 	 * 	There are four alignment cases (with frequency)
     90 	 *	(Based on measurements taken with a DECstation 5000/200
     91 	 *	 inside a Mach kernel.)
     92 	 *
     93 	 * 	aligned   -> aligned		(mostly)
     94 	 * 	unaligned -> aligned		(sometimes)
     95 	 * 	aligned,unaligned -> unaligned	(almost never)
     96 	 *
     97 	 *	Note that we could add another case that checks if
     98 	 *	the destination and source are unaligned but the
     99 	 *	copy is alignable.  eg if src and dest are both
    100 	 *	on a halfword boundary.
    101 	 */
    102 #if 1
    103 	andi		t1,DSTREG,(SZREG-1)	# get last bits of dest
    104 	bne		t1,zero,3f		# dest unaligned
    105 	andi		t0,SRCREG,(SZREG-1)	# get last bits of src
    106 	bne		t0,zero,5f
    107 #else
    108 	andi		t1,DSTREG,(SZREG-1)	# get last bits of dest
    109 	andi		t0,SRCREG,(SZREG-1)	# get last bits of src
    110 	beq		t1,t0,97f		# aligned on non-word
    111 	nop;
    112 	bne		t1,zero,3f		# dest unaligned
    113 	nop
    114 	b		5f			# source unaligned
    115 	nop
    116 
    117 97:
    118 	sltiu		t1,SIZEREG,SZREG
    119 	bne		t1,zero,3f
    120 	nop
    121 	subu		t2,zero,t2		# t2 = -t0
    122 	andi		t2,t2,(SZREG-1)		# t2 &= (SZREG-1)
    123 						# t0 + t2 == SZREG
    124 	PTR_SUBU	SIZEREG,SIZEREG,t2	# retreat to word boundary
    125 	PTR_ADDU	DSTREG,DSTREG,t2	# advance to word boundary
    126 	PTR_ADDU	SRCREG,SRCREG,t2	# advance to word boundary
    127 	REG_L		a3,-SZREG(DSTREG)
    128 	REG_L		v1,-SZREG(SRCREG)
    129 	sll		t0,t0,3			# bits to clear in dest
    130 	sll		t1,t1,3			# bits to clear in source
    131 /*
    132  * DST = 01 23 45 67 BE: 01234567 LE: 67452301
    133  *			 01000000     00000001
    134  * SRC = 89 ab cd ef BE: 89abcdef LE: efcdab89
    135  *			 00abcdef     efcdab00
    136  *			 01abcdef
    137  */
    138 #if _BYTE_ORDER == _BIG_ENDIAN
    139 	REG_SRLV	a3,a3,t1		# clear lsb dest bits
    140 	REG_SLLV	a3,a3,t1
    141 	REG_SLLV	v1,v1,t0		# clear msb source bits
    142 	REG_SRLV	v1,v1,t0
    143 #endif
    144 #if _BYTE_ORDER == _LITTLE_ENDIAN
    145 	REG_SLLV	a3,a3,t1		# clear msb dest bits
    146 	REG_SRLV	a3,a3,t1
    147 	REG_SRLV	v1,v1,t0		# clear lsb source bits
    148 	REG_SLLV	v1,v1,t0
    149 #endif
    150 	or		a3,a3,v1		# merge
    151 	REG_S		a3,-SZREG(DSTREG)	# and save
    152 99:
    153 #endif
    154 
    155 	/*
    156 	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
    157 	 */
    158 98:
    159 	li		AT,-(SZREG*8)
    160 	and		t0,SIZEREG,AT		# count truncated to multiples
    161 	PTR_ADDU	a3,SRCREG,t0		# run fast loop up to this addr
    162 	sltu		AT,SRCREG,a3		# any work to do?
    163 	beq		AT,zero,2f
    164 	PTR_SUBU	SIZEREG,t0
    165 
    166 	/*
    167 	 *	loop body
    168 	 */
    169 1:	# cp
    170 	REG_L		t3,(0*SZREG)(SRCREG)
    171 	REG_L		v1,(1*SZREG)(SRCREG)
    172 	REG_L		t0,(2*SZREG)(SRCREG)
    173 	REG_L		t1,(3*SZREG)(SRCREG)
    174 	PTR_ADDU	SRCREG,SZREG*8
    175 	REG_S		t3,(0*SZREG)(DSTREG)
    176 	REG_S		v1,(1*SZREG)(DSTREG)
    177 	REG_S		t0,(2*SZREG)(DSTREG)
    178 	REG_S		t1,(3*SZREG)(DSTREG)
    179 	REG_L		t1,(-1*SZREG)(SRCREG)
    180 	REG_L		t0,(-2*SZREG)(SRCREG)
    181 	REG_L		v1,(-3*SZREG)(SRCREG)
    182 	REG_L		t3,(-4*SZREG)(SRCREG)
    183 	PTR_ADDU	DSTREG,SZREG*8
    184 	REG_S		t1,(-1*SZREG)(DSTREG)
    185 	REG_S		t0,(-2*SZREG)(DSTREG)
    186 	REG_S		v1,(-3*SZREG)(DSTREG)
    187 	bne		SRCREG,a3,1b
    188 	REG_S		t3,(-4*SZREG)(DSTREG)
    189 
    190 	/*
    191 	 *	Copy a word at a time, no loop unrolling.
    192 	 */
    193 2:	# wordcopy
    194 	andi		t2,SIZEREG,(SZREG-1)	# get byte count / SZREG
    195 	PTR_SUBU	t2,SIZEREG,t2		# t2 = words to copy * SZREG
    196 	beq		t2,zero,3f
    197 	PTR_ADDU	t0,SRCREG,t2		# stop at t0
    198 	PTR_SUBU	SIZEREG,SIZEREG,t2
    199 1:
    200 	REG_L		t3,0(SRCREG)
    201 	PTR_ADDU	SRCREG,SZREG
    202 	REG_S		t3,0(DSTREG)
    203 	bne		SRCREG,t0,1b
    204 	PTR_ADDU	DSTREG,SZREG
    205 
    206 3:	# bytecopy
    207 	beq		SIZEREG,zero,4f		# nothing left to do?
    208 	nop
    209 1:
    210 	lb		t3,0(SRCREG)
    211 	PTR_ADDU	SRCREG,1
    212 	sb		t3,0(DSTREG)
    213 	PTR_SUBU	SIZEREG,1
    214 	bgtz		SIZEREG,1b
    215 	PTR_ADDU	DSTREG,1
    216 
    217 4:	# copydone
    218 	j	ra
    219 	nop
    220 
    221 	/*
    222 	 *	Copy from unaligned source to aligned dest.
    223 	 */
    224 5:	# destaligned
    225 	andi		t0,SIZEREG,(SZREG-1)	# t0 = bytecount mod SZREG
    226 	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
    227 	beq		a3,zero,3b
    228 	nop
    229 	move		SIZEREG,t0		# this many to do after we are done
    230 	PTR_ADDU	a3,SRCREG,a3		# stop point
    231 
    232 1:
    233 	LWHI		t3,0(SRCREG)
    234 	LWLO		t3,3(SRCREG)
    235 	PTR_ADDI	SRCREG,SZREG
    236 	sw		t3,0(DSTREG)
    237 	bne		SRCREG,a3,1b
    238 	PTR_ADDI	DSTREG,SZREG
    239 
    240 	j		3b
    241 	nop
    242 
    243 6:	# backcopy -- based on above
    244 	PTR_ADDU	SRCREG,SIZEREG
    245 	PTR_ADDU	DSTREG,SIZEREG
    246 	andi		t1,DSTREG,3		# get last 3 bits of dest
    247 	bne		t1,zero,3f
    248 	andi		t0,SRCREG,3		# get last 3 bits of src
    249 	bne		t0,zero,5f
    250 
    251 	/*
    252 	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
    253 	 */
    254 	li		AT,(-8*SZREG)
    255 	and		t0,SIZEREG,AT		# count truncated to multiple of 32
    256 	beq		t0,zero,2f		# any work to do?
    257 	PTR_SUBU	SIZEREG,t0
    258 	PTR_SUBU	a3,SRCREG,t0
    259 
    260 	/*
    261 	 *	loop body
    262 	 */
    263 1:	# cp
    264 	REG_L		t3,(-4*SZREG)(SRCREG)
    265 	REG_L		v1,(-3*SZREG)(SRCREG)
    266 	REG_L		t0,(-2*SZREG)(SRCREG)
    267 	REG_L		t1,(-1*SZREG)(SRCREG)
    268 	PTR_SUBU	SRCREG,8*SZREG
    269 	REG_S		t3,(-4*SZREG)(DSTREG)
    270 	REG_S		v1,(-3*SZREG)(DSTREG)
    271 	REG_S		t0,(-2*SZREG)(DSTREG)
    272 	REG_S		t1,(-1*SZREG)(DSTREG)
    273 	REG_L		t1,(3*SZREG)(SRCREG)
    274 	REG_L		t0,(2*SZREG)(SRCREG)
    275 	REG_L		v1,(1*SZREG)(SRCREG)
    276 	REG_L		t3,(0*SZREG)(SRCREG)
    277 	PTR_SUBU	DSTREG,8*SZREG
    278 	REG_S		t1,(3*SZREG)(DSTREG)
    279 	REG_S		t0,(2*SZREG)(DSTREG)
    280 	REG_S		v1,(1*SZREG)(DSTREG)
    281 	bne		SRCREG,a3,1b
    282 	REG_S		t3,(0*SZREG)(DSTREG)
    283 
    284 	/*
    285 	 *	Copy a word at a time, no loop unrolling.
    286 	 */
    287 2:	# wordcopy
    288 	andi		t2,SIZEREG,SZREG-1	# get byte count / 4
    289 	PTR_SUBU	t2,SIZEREG,t2		# t2 = number of words to copy
    290 	beq		t2,zero,3f
    291 	PTR_SUBU	t0,SRCREG,t2		# stop at t0
    292 	PTR_SUBU	SIZEREG,SIZEREG,t2
    293 1:
    294 	REG_L		t3,-SZREG(SRCREG)
    295 	PTR_SUBU	SRCREG,SZREG
    296 	REG_S		t3,-SZREG(DSTREG)
    297 	bne		SRCREG,t0,1b
    298 	PTR_SUBU	DSTREG,SZREG
    299 
    300 3:	# bytecopy
    301 	beq		SIZEREG,zero,4f		# nothing left to do?
    302 	nop
    303 1:
    304 	lb		t3,-1(SRCREG)
    305 	PTR_SUBU	SRCREG,1
    306 	sb		t3,-1(DSTREG)
    307 	PTR_SUBU	SIZEREG,1
    308 	bgtz		SIZEREG,1b
    309 	PTR_SUBU	DSTREG,1
    310 
    311 4:	# copydone
    312 	j	ra
    313 	nop
    314 
    315 	/*
    316 	 *	Copy from unaligned source to aligned dest.
    317 	 */
    318 5:	# destaligned
    319 	andi		t0,SIZEREG,3		# t0 = bytecount mod 4
    320 	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
    321 	beq		a3,zero,3b
    322 	nop
    323 	move		SIZEREG,t0		# this many to do after we are done
    324 	PTR_SUBU	a3,SRCREG,a3		# stop point
    325 
    326 1:
    327 	LWHI		t3,-4(SRCREG)
    328 	LWLO		t3,-1(SRCREG)
    329 	PTR_SUBU	SRCREG,4
    330 	sw		t3,-4(DSTREG)
    331 	bne		SRCREG,a3,1b
    332 	PTR_SUBU	DSTREG,4
    333 
    334 	j		3b
    335 	nop
    336 
    337 	.set	reorder
    338 	.set	at
    339 	END(FUNCTION)
    340