Home | History | Annotate | Line # | Download | only in string
bcopy.S revision 1.1
      1 /*	$NetBSD: bcopy.S,v 1.1 2005/12/21 00:25:56 christos Exp $	*/
      2 
      3 /*
      4  * Mach Operating System
      5  * Copyright (c) 1993 Carnegie Mellon University
      6  * All Rights Reserved.
      7  *
      8  * Permission to use, copy, modify and distribute this software and its
      9  * documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
     16  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie Mellon
     26  * the rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  *	File:	mips_bcopy.s
     31  *	Author:	Chris Maeda
     32  *	Date:	June 1993
     33  *
     34  *	Fast copy routine.  Derived from aligned_block_copy.
     35  */
     36 
     37 
     38 #include <mips/asm.h>
     39 #define _LOCORE		/* XXX not really, just assembly-code source */
     40 #include <machine/endian.h>
     41 
     42 
     43 #if defined(LIBC_SCCS) && !defined(lint)
     44 	ASMSTR("from: @(#)mips_bcopy.s	2.2 CMU 18/06/93")
     45 	ASMSTR("$NetBSD: bcopy.S,v 1.1 2005/12/21 00:25:56 christos Exp $")
     46 #endif /* LIBC_SCCS and not lint */
     47 
     48 #ifdef __ABICALLS__
     49 	.abicalls
     50 #endif
     51 
     52 /*
     53  *	bcopy(caddr_t src, caddr_t dst, unsigned int len)
     54  *
     55  *	a0 	src address
     56  *	a1	dst address
     57  *	a2	length
     58  */
     59 
     60 #if defined(MEMCOPY) || defined(MEMMOVE)
     61 #ifdef MEMCOPY
     62 #define	FUNCTION	memcpy
     63 #else
     64 #define FUNCTION	memmove
     65 #endif
     66 #define	SRCREG		a1
     67 #define	DSTREG		a0
     68 #else
     69 #define	FUNCTION	bcopy
     70 #define	SRCREG		a0
     71 #define	DSTREG		a1
     72 #endif
     73 
     74 #define	SIZEREG		a2
     75 
     76 LEAF(FUNCTION)
     77 	.set	noat
     78 	.set	noreorder
     79 
     80 #if defined(MEMCOPY) || defined(MEMMOVE)
     81 	/* set up return value, while we still can */
     82 	move	v0,DSTREG
     83 #endif
     84 	/*
     85 	 *	Make sure we can copy forwards.
     86 	 */
     87 	sltu	t0,SRCREG,DSTREG	# t0 == SRCREG < DSTREG
     88 	bne	t0,zero,6f		# copy backwards
     89 
     90 	/*
     91 	 * 	There are four alignment cases (with frequency)
     92 	 *	(Based on measurements taken with a DECstation 5000/200
     93 	 *	 inside a Mach kernel.)
     94 	 *
     95 	 * 	aligned   -> aligned		(mostly)
     96 	 * 	unaligned -> aligned		(sometimes)
     97 	 * 	aligned,unaligned -> unaligned	(almost never)
     98 	 *
     99 	 *	Note that we could add another case that checks if
    100 	 *	the destination and source are unaligned but the
    101 	 *	copy is alignable.  eg if src and dest are both
    102 	 *	on a halfword boundary.
    103 	 */
    104 	andi	t1,DSTREG,3		# get last 3 bits of dest
    105 	bne	t1,zero,3f
    106 	andi	t0,SRCREG,3		# get last 3 bits of src
    107 	bne	t0,zero,5f
    108 
    109 	/*
    110 	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
    111 	 */
    112 	li	AT,-32
    113 	and	t0,SIZEREG,AT		# count truncated to multiple of 32 */
    114 	addu	a3,SRCREG,t0		# run fast loop up to this address
    115 	sltu	AT,SRCREG,a3		# any work to do?
    116 	beq	AT,zero,2f
    117 	subu	SIZEREG,t0
    118 
    119 	/*
    120 	 *	loop body
    121 	 */
    122 1:	# cp
    123 	lw	t3,0(SRCREG)
    124 	lw	v1,4(SRCREG)
    125 	lw	t0,8(SRCREG)
    126 	lw	t1,12(SRCREG)
    127 	addu	SRCREG,32
    128 	sw	t3,0(DSTREG)
    129 	sw	v1,4(DSTREG)
    130 	sw	t0,8(DSTREG)
    131 	sw	t1,12(DSTREG)
    132 	lw	t1,-4(SRCREG)
    133 	lw	t0,-8(SRCREG)
    134 	lw	v1,-12(SRCREG)
    135 	lw	t3,-16(SRCREG)
    136 	addu	DSTREG,32
    137 	sw	t1,-4(DSTREG)
    138 	sw	t0,-8(DSTREG)
    139 	sw	v1,-12(DSTREG)
    140 	bne	SRCREG,a3,1b
    141 	sw	t3,-16(DSTREG)
    142 
    143 	/*
    144 	 *	Copy a word at a time, no loop unrolling.
    145 	 */
    146 2:	# wordcopy
    147 	andi	t2,SIZEREG,3		# get byte count / 4
    148 	subu	t2,SIZEREG,t2		# t2 = number of words to copy * 4
    149 	beq	t2,zero,3f
    150 	addu	t0,SRCREG,t2		# stop at t0
    151 	subu	SIZEREG,SIZEREG,t2
    152 1:
    153 	lw	t3,0(SRCREG)
    154 	addu	SRCREG,4
    155 	sw	t3,0(DSTREG)
    156 	bne	SRCREG,t0,1b
    157 	addu	DSTREG,4
    158 
    159 3:	# bytecopy
    160 	beq	SIZEREG,zero,4f	# nothing left to do?
    161 	nop
    162 1:
    163 	lb	t3,0(SRCREG)
    164 	addu	SRCREG,1
    165 	sb	t3,0(DSTREG)
    166 	subu	SIZEREG,1
    167 	bgtz	SIZEREG,1b
    168 	addu	DSTREG,1
    169 
    170 4:	# copydone
    171 	j	ra
    172 	nop
    173 
    174 	/*
    175 	 *	Copy from unaligned source to aligned dest.
    176 	 */
    177 5:	# destaligned
    178 	andi	t0,SIZEREG,3		# t0 = bytecount mod 4
    179 	subu	a3,SIZEREG,t0		# number of words to transfer
    180 	beq	a3,zero,3b
    181 	nop
    182 	move	SIZEREG,t0		# this many to do after we are done
    183 	addu	a3,SRCREG,a3		# stop point
    184 
    185 1:
    186 	LWHI	t3,0(SRCREG)
    187 	LWLO	t3,3(SRCREG)
    188 	addi	SRCREG,4
    189 	sw	t3,0(DSTREG)
    190 	bne	SRCREG,a3,1b
    191 	addi	DSTREG,4
    192 
    193 	j	3b
    194 	nop
    195 
    196 6:	# backcopy -- based on above
    197 	addu	SRCREG,SIZEREG
    198 	addu	DSTREG,SIZEREG
    199 	andi	t1,DSTREG,3		# get last 3 bits of dest
    200 	bne	t1,zero,3f
    201 	andi	t0,SRCREG,3		# get last 3 bits of src
    202 	bne	t0,zero,5f
    203 
    204 	/*
    205 	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
    206 	 */
    207 	li	AT,-32
    208 	and	t0,SIZEREG,AT		# count truncated to multiple of 32
    209 	beq	t0,zero,2f		# any work to do?
    210 	subu	SIZEREG,t0
    211 	subu	a3,SRCREG,t0
    212 
    213 	/*
    214 	 *	loop body
    215 	 */
    216 1:	# cp
    217 	lw	t3,-16(SRCREG)
    218 	lw	v1,-12(SRCREG)
    219 	lw	t0,-8(SRCREG)
    220 	lw	t1,-4(SRCREG)
    221 	subu	SRCREG,32
    222 	sw	t3,-16(DSTREG)
    223 	sw	v1,-12(DSTREG)
    224 	sw	t0,-8(DSTREG)
    225 	sw	t1,-4(DSTREG)
    226 	lw	t1,12(SRCREG)
    227 	lw	t0,8(SRCREG)
    228 	lw	v1,4(SRCREG)
    229 	lw	t3,0(SRCREG)
    230 	subu	DSTREG,32
    231 	sw	t1,12(DSTREG)
    232 	sw	t0,8(DSTREG)
    233 	sw	v1,4(DSTREG)
    234 	bne	SRCREG,a3,1b
    235 	sw	t3,0(DSTREG)
    236 
    237 	/*
    238 	 *	Copy a word at a time, no loop unrolling.
    239 	 */
    240 2:	# wordcopy
    241 	andi	t2,SIZEREG,3		# get byte count / 4
    242 	subu	t2,SIZEREG,t2		# t2 = number of words to copy * 4
    243 	beq	t2,zero,3f
    244 	subu	t0,SRCREG,t2		# stop at t0
    245 	subu	SIZEREG,SIZEREG,t2
    246 1:
    247 	lw	t3,-4(SRCREG)
    248 	subu	SRCREG,4
    249 	sw	t3,-4(DSTREG)
    250 	bne	SRCREG,t0,1b
    251 	subu	DSTREG,4
    252 
    253 3:	# bytecopy
    254 	beq	SIZEREG,zero,4f		# nothing left to do?
    255 	nop
    256 1:
    257 	lb	t3,-1(SRCREG)
    258 	subu	SRCREG,1
    259 	sb	t3,-1(DSTREG)
    260 	subu	SIZEREG,1
    261 	bgtz	SIZEREG,1b
    262 	subu	DSTREG,1
    263 
    264 4:	# copydone
    265 	j	ra
    266 	nop
    267 
    268 	/*
    269 	 *	Copy from unaligned source to aligned dest.
    270 	 */
    271 5:	# destaligned
    272 	andi	t0,SIZEREG,3		# t0 = bytecount mod 4
    273 	subu	a3,SIZEREG,t0		# number of words to transfer
    274 	beq	a3,zero,3b
    275 	nop
    276 	move	SIZEREG,t0		# this many to do after we are done
    277 	subu	a3,SRCREG,a3		# stop point
    278 
    279 1:
    280 	LWHI	t3,-4(SRCREG)
    281 	LWLO	t3,-1(SRCREG)
    282 	subu	SRCREG,4
    283 	sw	t3,-4(DSTREG)
    284 	bne	SRCREG,a3,1b
    285 	subu	DSTREG,4
    286 
    287 	j	3b
    288 	nop
    289 
    290 	.set	reorder
    291 	.set	at
    292 	END(FUNCTION)
    293