Home | History | Annotate | Line # | Download | only in string
      1 /*	$NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $	*/
      2 
      3 /*
      4  * Mach Operating System
      5  * Copyright (c) 1993 Carnegie Mellon University
      6  * All Rights Reserved.
      7  *
      8  * Permission to use, copy, modify and distribute this software and its
      9  * documentation is hereby granted, provided that both the copyright
     10  * notice and this permission notice appear in all copies of the
     11  * software, derivative works or modified versions, and any portions
     12  * thereof, and that both notices appear in supporting documentation.
     13  *
     14  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     15  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR
     16  * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     17  *
     18  * Carnegie Mellon requests users of this software to return to
     19  *
     20  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     21  *  School of Computer Science
     22  *  Carnegie Mellon University
     23  *  Pittsburgh PA 15213-3890
     24  *
     25  * any improvements or extensions that they make and grant Carnegie Mellon
     26  * the rights to redistribute these changes.
     27  */
     28 
     29 /*
     30  *	File:	mips_bcopy.s
     31  *	Author:	Chris Maeda
     32  *	Date:	June 1993
     33  *
     34  *	Fast copy routine.  Derived from aligned_block_copy.
     35  */
     36 
     37 
     38 #include <mips/asm.h>
     39 #ifndef _LOCORE
     40 #define _LOCORE		/* XXX not really, just assembly-code source */
     41 #endif
     42 #include <machine/endian.h>
     43 
     44 
     45 #if defined(LIBC_SCCS) && !defined(lint)
     46 #if 0
     47 	RCSID("from: @(#)mips_bcopy.s	2.2 CMU 18/06/93")
     48 #else
     49 	RCSID("$NetBSD: bcopy.S,v 1.4 2011/08/27 13:23:52 bouyer Exp $")
     50 #endif
     51 #endif /* LIBC_SCCS and not lint */
     52 
     53 /*
     54  *	bcopy(caddr_t src, caddr_t dst, unsigned int len)
     55  *
     56  *	a0 	src address
     57  *	a1	dst address
     58  *	a2	length
     59  */
     60 
     61 #if defined(MEMCOPY) || defined(MEMMOVE)
     62 #ifdef MEMCOPY
     63 #define	FUNCTION	memcpy
     64 #else
     65 #define FUNCTION	memmove
     66 #endif
     67 #define	SRCREG		a1
     68 #define	DSTREG		a0
     69 #else
     70 #define	FUNCTION	bcopy
     71 #define	SRCREG		a0
     72 #define	DSTREG		a1
     73 #endif
     74 
     75 #define	SIZEREG		a2
     76 
     77 LEAF(FUNCTION)
     78 	.set	noat
     79 	.set	noreorder
     80 
     81 #if defined(MEMCOPY) || defined(MEMMOVE)
     82 	/* set up return value, while we still can */
     83 	move	v0,DSTREG
     84 #endif
     85 	/*
     86 	 *	Make sure we can copy forwards.
     87 	 */
     88 	sltu	t0,SRCREG,DSTREG	# t0 == SRCREG < DSTREG
     89 	bne	t0,zero,6f		# copy backwards
     90 
     91 	/*
     92 	 * 	There are four alignment cases (with frequency)
     93 	 *	(Based on measurements taken with a DECstation 5000/200
     94 	 *	 inside a Mach kernel.)
     95 	 *
     96 	 * 	aligned   -> aligned		(mostly)
     97 	 * 	unaligned -> aligned		(sometimes)
     98 	 * 	aligned,unaligned -> unaligned	(almost never)
     99 	 *
    100 	 *	Note that we could add another case that checks if
    101 	 *	the destination and source are unaligned but the
    102 	 *	copy is alignable.  eg if src and dest are both
    103 	 *	on a halfword boundary.
    104 	 */
    105 	andi		t1,DSTREG,(SZREG-1)	# get last bits of dest
    106 	bne		t1,zero,3f		# dest unaligned
    107 	andi		t0,SRCREG,(SZREG-1)	# get last bits of src
    108 	bne		t0,zero,5f
    109 
    110 	/*
    111 	 *	Forward aligned->aligned copy, 8 words at a time.
    112 	 */
    113 98:
    114 	li		AT,-(SZREG*8)
    115 	and		t0,SIZEREG,AT		# count truncated to multiples
    116 	PTR_ADDU	a3,SRCREG,t0		# run fast loop up to this addr
    117 	sltu		AT,SRCREG,a3		# any work to do?
    118 	beq		AT,zero,2f
    119 	PTR_SUBU	SIZEREG,t0
    120 
    121 	/*
    122 	 *	loop body
    123 	 */
    124 1:	# cp
    125 	REG_L		t3,(0*SZREG)(SRCREG)
    126 	REG_L		v1,(1*SZREG)(SRCREG)
    127 	REG_L		t0,(2*SZREG)(SRCREG)
    128 	REG_L		t1,(3*SZREG)(SRCREG)
    129 	PTR_ADDU	SRCREG,SZREG*8
    130 	REG_S		t3,(0*SZREG)(DSTREG)
    131 	REG_S		v1,(1*SZREG)(DSTREG)
    132 	REG_S		t0,(2*SZREG)(DSTREG)
    133 	REG_S		t1,(3*SZREG)(DSTREG)
    134 	REG_L		t1,(-1*SZREG)(SRCREG)
    135 	REG_L		t0,(-2*SZREG)(SRCREG)
    136 	REG_L		v1,(-3*SZREG)(SRCREG)
    137 	REG_L		t3,(-4*SZREG)(SRCREG)
    138 	PTR_ADDU	DSTREG,SZREG*8
    139 	REG_S		t1,(-1*SZREG)(DSTREG)
    140 	REG_S		t0,(-2*SZREG)(DSTREG)
    141 	REG_S		v1,(-3*SZREG)(DSTREG)
    142 	bne		SRCREG,a3,1b
    143 	REG_S		t3,(-4*SZREG)(DSTREG)
    144 
    145 	/*
    146 	 *	Copy a word at a time, no loop unrolling.
    147 	 */
    148 2:	# wordcopy
    149 	andi		t2,SIZEREG,(SZREG-1)	# get byte count / SZREG
    150 	PTR_SUBU	t2,SIZEREG,t2		# t2 = words to copy * SZREG
    151 	beq		t2,zero,3f
    152 	PTR_ADDU	t0,SRCREG,t2		# stop at t0
    153 	PTR_SUBU	SIZEREG,SIZEREG,t2
    154 1:
    155 	REG_L		t3,0(SRCREG)
    156 	PTR_ADDU	SRCREG,SZREG
    157 	REG_S		t3,0(DSTREG)
    158 	bne		SRCREG,t0,1b
    159 	PTR_ADDU	DSTREG,SZREG
    160 
    161 3:	# bytecopy
    162 	beq		SIZEREG,zero,4f		# nothing left to do?
    163 	nop
    164 1:
    165 	lb		t3,0(SRCREG)
    166 	PTR_ADDU	SRCREG,1
    167 	sb		t3,0(DSTREG)
    168 	PTR_SUBU	SIZEREG,1
    169 	bgtz		SIZEREG,1b
    170 	PTR_ADDU	DSTREG,1
    171 
    172 4:	# copydone
    173 	.set at		#-mfix-loongson2f-btb
    174 	j	ra
    175 	nop
    176 	.set noat
    177 
    178 	/*
    179 	 *	Copy from unaligned source to aligned dest.
    180 	 */
    181 5:	# destaligned
    182 	andi		t0,SIZEREG,(SZREG-1)	# t0 = bytecount mod SZREG
    183 	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
    184 	beq		a3,zero,3b
    185 	nop
    186 	move		SIZEREG,t0		# this many to do after we are done
    187 	PTR_ADDU	a3,SRCREG,a3		# stop point
    188 
    189 1:
    190 	REG_LHI		t3,0(SRCREG)
    191 	REG_LLO		t3,SZREG-1(SRCREG)
    192 	PTR_ADDI	SRCREG,SZREG
    193 	REG_S		t3,0(DSTREG)
    194 	bne		SRCREG,a3,1b
    195 	PTR_ADDI	DSTREG,SZREG
    196 
    197 	b		3b
    198 	nop
    199 
    200 6:	# backcopy -- based on above
    201 	PTR_ADDU	SRCREG,SIZEREG
    202 	PTR_ADDU	DSTREG,SIZEREG
    203 	andi		t1,DSTREG,SZREG-1	# get last 3 bits of dest
    204 	bne		t1,zero,3f
    205 	andi		t0,SRCREG,SZREG-1	# get last 3 bits of src
    206 	bne		t0,zero,5f
    207 
    208 	/*
    209 	 *	Forward aligned->aligned copy, 8*4 bytes at a time.
    210 	 */
    211 	li		AT,(-8*SZREG)
    212 	and		t0,SIZEREG,AT		# count truncated to multiple of 32
    213 	beq		t0,zero,2f		# any work to do?
    214 	PTR_SUBU	SIZEREG,t0
    215 	PTR_SUBU	a3,SRCREG,t0
    216 
    217 	/*
    218 	 *	loop body
    219 	 */
    220 1:	# cp
    221 	REG_L		t3,(-4*SZREG)(SRCREG)
    222 	REG_L		v1,(-3*SZREG)(SRCREG)
    223 	REG_L		t0,(-2*SZREG)(SRCREG)
    224 	REG_L		t1,(-1*SZREG)(SRCREG)
    225 	PTR_SUBU	SRCREG,8*SZREG
    226 	REG_S		t3,(-4*SZREG)(DSTREG)
    227 	REG_S		v1,(-3*SZREG)(DSTREG)
    228 	REG_S		t0,(-2*SZREG)(DSTREG)
    229 	REG_S		t1,(-1*SZREG)(DSTREG)
    230 	REG_L		t1,(3*SZREG)(SRCREG)
    231 	REG_L		t0,(2*SZREG)(SRCREG)
    232 	REG_L		v1,(1*SZREG)(SRCREG)
    233 	REG_L		t3,(0*SZREG)(SRCREG)
    234 	PTR_SUBU	DSTREG,8*SZREG
    235 	REG_S		t1,(3*SZREG)(DSTREG)
    236 	REG_S		t0,(2*SZREG)(DSTREG)
    237 	REG_S		v1,(1*SZREG)(DSTREG)
    238 	bne		SRCREG,a3,1b
    239 	REG_S		t3,(0*SZREG)(DSTREG)
    240 
    241 	/*
    242 	 *	Copy a word at a time, no loop unrolling.
    243 	 */
    244 2:	# wordcopy
    245 	andi		t2,SIZEREG,SZREG-1	# get byte count / 4
    246 	PTR_SUBU	t2,SIZEREG,t2		# t2 = number of words to copy
    247 	beq		t2,zero,3f
    248 	PTR_SUBU	t0,SRCREG,t2		# stop at t0
    249 	PTR_SUBU	SIZEREG,SIZEREG,t2
    250 1:
    251 	REG_L		t3,-SZREG(SRCREG)
    252 	PTR_SUBU	SRCREG,SZREG
    253 	REG_S		t3,-SZREG(DSTREG)
    254 	bne		SRCREG,t0,1b
    255 	PTR_SUBU	DSTREG,SZREG
    256 
    257 3:	# bytecopy
    258 	beq		SIZEREG,zero,4f		# nothing left to do?
    259 	nop
    260 1:
    261 	lb		t3,-1(SRCREG)
    262 	PTR_SUBU	SRCREG,1
    263 	sb		t3,-1(DSTREG)
    264 	PTR_SUBU	SIZEREG,1
    265 	bgtz		SIZEREG,1b
    266 	PTR_SUBU	DSTREG,1
    267 
    268 4:	# copydone
    269 	.set at		#-mfix-loongson2f-btb
    270 	j	ra
    271 	nop
    272 	.set noat
    273 
    274 	/*
    275 	 *	Copy from unaligned source to aligned dest.
    276 	 */
    277 5:	# destaligned
    278 	andi		t0,SIZEREG,SZREG-1	# t0 = bytecount mod 4
    279 	PTR_SUBU	a3,SIZEREG,t0		# number of words to transfer
    280 	beq		a3,zero,3b
    281 	nop
    282 	move		SIZEREG,t0		# this many to do after we are done
    283 	PTR_SUBU	a3,SRCREG,a3		# stop point
    284 
    285 1:
    286 	REG_LHI		t3,-SZREG(SRCREG)
    287 	REG_LLO		t3,-1(SRCREG)
    288 	PTR_SUBU	SRCREG,SZREG
    289 	REG_S		t3,-SZREG(DSTREG)
    290 	bne		SRCREG,a3,1b
    291 	PTR_SUBU	DSTREG,SZREG
    292 
    293 	b		3b
    294 	nop
    295 
    296 	.set	reorder
    297 	.set	at
    298 	END(FUNCTION)
    299