Home | History | Annotate | Line # | Download | only in string
bcopy.S revision 1.2
      1 /*	$NetBSD: bcopy.S,v 1.2 2005/12/27 11:23:53 tsutsui Exp $	*/
      2 
      3 /*
      4  * Mach Operating System
      5  * Copyright (c) 1993 Carnegie Mellon University
      6  * All Rights Reserved.
      7  *
      8  * Permission to use, copy, modify and distribute this software and its
      9  * documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
     16  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie Mellon
     26  * the rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  *	File:	mips_bcopy.s
     31  *	Author:	Chris Maeda
     32  *	Date:	June 1993
     33  *
     34  *	Fast copy routine.  Derived from aligned_block_copy.
     35  */
     36 
     37 
     38 #include <mips/asm.h>
     39 #ifndef _LOCORE
     40 #define _LOCORE		/* XXX not really, just assembly-code source */
     41 #endif
     42 #include <machine/endian.h>
     43 
     44 
     45 #if defined(LIBC_SCCS) && !defined(lint)
     46 	ASMSTR("from: @(#)mips_bcopy.s	2.2 CMU 18/06/93")
     47 	ASMSTR("$NetBSD: bcopy.S,v 1.2 2005/12/27 11:23:53 tsutsui Exp $")
     48 #endif /* LIBC_SCCS and not lint */
     49 
     50 #ifdef __ABICALLS__
     51 	.abicalls
     52 #endif
     53 
     54 /*
     55  *	bcopy(caddr_t src, caddr_t dst, unsigned int len)
     56  *
     57  *	a0 	src address
     58  *	a1	dst address
     59  *	a2	length
     60  */
     61 
     62 #if defined(MEMCOPY) || defined(MEMMOVE)
     63 #ifdef MEMCOPY
     64 #define	FUNCTION	memcpy
     65 #else
     66 #define FUNCTION	memmove
     67 #endif
     68 #define	SRCREG		a1
     69 #define	DSTREG		a0
     70 #else
     71 #define	FUNCTION	bcopy
     72 #define	SRCREG		a0
     73 #define	DSTREG		a1
     74 #endif
     75 
     76 #define	SIZEREG		a2
     77 
     78 LEAF(FUNCTION)
     79 	.set	noat
     80 	.set	noreorder
     81 
     82 #if defined(MEMCOPY) || defined(MEMMOVE)
     83 	/* set up return value, while we still can */
     84 	move	v0,DSTREG
     85 #endif
     86 	/*
     87 	 *	Make sure we can copy forwards.
     88 	 */
     89 	sltu	t0,SRCREG,DSTREG	# t0 == SRCREG < DSTREG
     90 	bne	t0,zero,6f		# copy backwards
     91 
     92 	/*
     93 	 * 	There are four alignment cases (with frequency)
     94 	 *	(Based on measurements taken with a DECstation 5000/200
     95 	 *	 inside a Mach kernel.)
     96 	 *
     97 	 * 	aligned   -> aligned		(mostly)
     98 	 * 	unaligned -> aligned		(sometimes)
     99 	 * 	aligned,unaligned -> unaligned	(almost never)
    100 	 *
    101 	 *	Note that we could add another case that checks if
    102 	 *	the destination and source are unaligned but the
    103 	 *	copy is alignable.  eg if src and dest are both
    104 	 *	on a halfword boundary.
    105 	 */
    106 	andi	t1,DSTREG,3		# get last 3 bits of dest
    107 	bne	t1,zero,3f
    108 	andi	t0,SRCREG,3		# get last 3 bits of src
    109 	bne	t0,zero,5f
    110 
    111 	/*
    112 	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
    113 	 */
    114 	li	AT,-32
    115 	and	t0,SIZEREG,AT		# count truncated to multiple of 32 */
    116 	addu	a3,SRCREG,t0		# run fast loop up to this address
    117 	sltu	AT,SRCREG,a3		# any work to do?
    118 	beq	AT,zero,2f
    119 	subu	SIZEREG,t0
    120 
    121 	/*
    122 	 *	loop body
    123 	 */
    124 1:	# cp
    125 	lw	t3,0(SRCREG)
    126 	lw	v1,4(SRCREG)
    127 	lw	t0,8(SRCREG)
    128 	lw	t1,12(SRCREG)
    129 	addu	SRCREG,32
    130 	sw	t3,0(DSTREG)
    131 	sw	v1,4(DSTREG)
    132 	sw	t0,8(DSTREG)
    133 	sw	t1,12(DSTREG)
    134 	lw	t1,-4(SRCREG)
    135 	lw	t0,-8(SRCREG)
    136 	lw	v1,-12(SRCREG)
    137 	lw	t3,-16(SRCREG)
    138 	addu	DSTREG,32
    139 	sw	t1,-4(DSTREG)
    140 	sw	t0,-8(DSTREG)
    141 	sw	v1,-12(DSTREG)
    142 	bne	SRCREG,a3,1b
    143 	sw	t3,-16(DSTREG)
    144 
    145 	/*
    146 	 *	Copy a word at a time, no loop unrolling.
    147 	 */
    148 2:	# wordcopy
    149 	andi	t2,SIZEREG,3		# get byte count / 4
    150 	subu	t2,SIZEREG,t2		# t2 = number of words to copy * 4
    151 	beq	t2,zero,3f
    152 	addu	t0,SRCREG,t2		# stop at t0
    153 	subu	SIZEREG,SIZEREG,t2
    154 1:
    155 	lw	t3,0(SRCREG)
    156 	addu	SRCREG,4
    157 	sw	t3,0(DSTREG)
    158 	bne	SRCREG,t0,1b
    159 	addu	DSTREG,4
    160 
    161 3:	# bytecopy
    162 	beq	SIZEREG,zero,4f	# nothing left to do?
    163 	nop
    164 1:
    165 	lb	t3,0(SRCREG)
    166 	addu	SRCREG,1
    167 	sb	t3,0(DSTREG)
    168 	subu	SIZEREG,1
    169 	bgtz	SIZEREG,1b
    170 	addu	DSTREG,1
    171 
    172 4:	# copydone
    173 	j	ra
    174 	nop
    175 
    176 	/*
    177 	 *	Copy from unaligned source to aligned dest.
    178 	 */
    179 5:	# destaligned
    180 	andi	t0,SIZEREG,3		# t0 = bytecount mod 4
    181 	subu	a3,SIZEREG,t0		# number of words to transfer
    182 	beq	a3,zero,3b
    183 	nop
    184 	move	SIZEREG,t0		# this many to do after we are done
    185 	addu	a3,SRCREG,a3		# stop point
    186 
    187 1:
    188 	LWHI	t3,0(SRCREG)
    189 	LWLO	t3,3(SRCREG)
    190 	addi	SRCREG,4
    191 	sw	t3,0(DSTREG)
    192 	bne	SRCREG,a3,1b
    193 	addi	DSTREG,4
    194 
    195 	j	3b
    196 	nop
    197 
    198 6:	# backcopy -- based on above
    199 	addu	SRCREG,SIZEREG
    200 	addu	DSTREG,SIZEREG
    201 	andi	t1,DSTREG,3		# get last 3 bits of dest
    202 	bne	t1,zero,3f
    203 	andi	t0,SRCREG,3		# get last 3 bits of src
    204 	bne	t0,zero,5f
    205 
    206 	/*
    207 	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
    208 	 */
    209 	li	AT,-32
    210 	and	t0,SIZEREG,AT		# count truncated to multiple of 32
    211 	beq	t0,zero,2f		# any work to do?
    212 	subu	SIZEREG,t0
    213 	subu	a3,SRCREG,t0
    214 
    215 	/*
    216 	 *	loop body
    217 	 */
    218 1:	# cp
    219 	lw	t3,-16(SRCREG)
    220 	lw	v1,-12(SRCREG)
    221 	lw	t0,-8(SRCREG)
    222 	lw	t1,-4(SRCREG)
    223 	subu	SRCREG,32
    224 	sw	t3,-16(DSTREG)
    225 	sw	v1,-12(DSTREG)
    226 	sw	t0,-8(DSTREG)
    227 	sw	t1,-4(DSTREG)
    228 	lw	t1,12(SRCREG)
    229 	lw	t0,8(SRCREG)
    230 	lw	v1,4(SRCREG)
    231 	lw	t3,0(SRCREG)
    232 	subu	DSTREG,32
    233 	sw	t1,12(DSTREG)
    234 	sw	t0,8(DSTREG)
    235 	sw	v1,4(DSTREG)
    236 	bne	SRCREG,a3,1b
    237 	sw	t3,0(DSTREG)
    238 
    239 	/*
    240 	 *	Copy a word at a time, no loop unrolling.
    241 	 */
    242 2:	# wordcopy
    243 	andi	t2,SIZEREG,3		# get byte count / 4
    244 	subu	t2,SIZEREG,t2		# t2 = number of words to copy * 4
    245 	beq	t2,zero,3f
    246 	subu	t0,SRCREG,t2		# stop at t0
    247 	subu	SIZEREG,SIZEREG,t2
    248 1:
    249 	lw	t3,-4(SRCREG)
    250 	subu	SRCREG,4
    251 	sw	t3,-4(DSTREG)
    252 	bne	SRCREG,t0,1b
    253 	subu	DSTREG,4
    254 
    255 3:	# bytecopy
    256 	beq	SIZEREG,zero,4f		# nothing left to do?
    257 	nop
    258 1:
    259 	lb	t3,-1(SRCREG)
    260 	subu	SRCREG,1
    261 	sb	t3,-1(DSTREG)
    262 	subu	SIZEREG,1
    263 	bgtz	SIZEREG,1b
    264 	subu	DSTREG,1
    265 
    266 4:	# copydone
    267 	j	ra
    268 	nop
    269 
    270 	/*
    271 	 *	Copy from unaligned source to aligned dest.
    272 	 */
    273 5:	# destaligned
    274 	andi	t0,SIZEREG,3		# t0 = bytecount mod 4
    275 	subu	a3,SIZEREG,t0		# number of words to transfer
    276 	beq	a3,zero,3b
    277 	nop
    278 	move	SIZEREG,t0		# this many to do after we are done
    279 	subu	a3,SRCREG,a3		# stop point
    280 
    281 1:
    282 	LWHI	t3,-4(SRCREG)
    283 	LWLO	t3,-1(SRCREG)
    284 	subu	SRCREG,4
    285 	sw	t3,-4(DSTREG)
    286 	bne	SRCREG,a3,1b
    287 	subu	DSTREG,4
    288 
    289 	j	3b
    290 	nop
    291 
    292 	.set	reorder
    293 	.set	at
    294 	END(FUNCTION)
    295