Home | History | Annotate | Line # | Download | only in string
      1  1.1  christos /* $NetBSD: bcopy.S,v 1.1 2005/12/20 19:28:49 christos Exp $ */
      2  1.1  christos 
      3  1.1  christos /*
      4  1.1  christos  * Copyright (c) 1995 Carnegie-Mellon University.
      5  1.1  christos  * All rights reserved.
      6  1.1  christos  *
      7  1.1  christos  * Author: Trevor Blackwell.  Support for use as memcpy() and memmove()
      8  1.1  christos  *	   added by Chris Demetriou.
      9  1.1  christos  *
     10  1.1  christos  * Permission to use, copy, modify and distribute this software and
     11  1.1  christos  * its documentation is hereby granted, provided that both the copyright
     12  1.1  christos  * notice and this permission notice appear in all copies of the
     13  1.1  christos  * software, derivative works or modified versions, and any portions
     14  1.1  christos  * thereof, and that both notices appear in supporting documentation.
     15  1.1  christos  *
     16  1.1  christos  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
     17  1.1  christos  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
     18  1.1  christos  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
     19  1.1  christos  *
     20  1.1  christos  * Carnegie Mellon requests users of this software to return to
     21  1.1  christos  *
     22  1.1  christos  *  Software Distribution Coordinator  or  Software.Distribution (at) CS.CMU.EDU
     23  1.1  christos  *  School of Computer Science
     24  1.1  christos  *  Carnegie Mellon University
     25  1.1  christos  *  Pittsburgh PA 15213-3890
     26  1.1  christos  *
     27  1.1  christos  * any improvements or extensions that they make and grant Carnegie the
     28  1.1  christos  * rights to redistribute these changes.
     29  1.1  christos  */
     30  1.1  christos 
     31  1.1  christos #include <machine/asm.h>
     32  1.1  christos 
     33  1.1  christos #if defined(MEMCOPY) || defined(MEMMOVE)
     34  1.1  christos #ifdef MEMCOPY
     35  1.1  christos #define	FUNCTION	memcpy
     36  1.1  christos #else
     37  1.1  christos #define FUNCTION	memmove
     38  1.1  christos #endif
     39  1.1  christos #define	SRCREG		a1
     40  1.1  christos #define	DSTREG		a0
     41  1.1  christos #else /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
     42  1.1  christos #define	FUNCTION	bcopy
     43  1.1  christos #define	SRCREG		a0
     44  1.1  christos #define	DSTREG		a1
     45  1.1  christos #endif /* !(defined(MEMCOPY) || defined(MEMMOVE)) */
     46  1.1  christos 
     47  1.1  christos #define	SIZEREG		a2
     48  1.1  christos 
     49  1.1  christos /*
     50  1.1  christos  * Copy bytes.
     51  1.1  christos  *
     52  1.1  christos  * void bcopy(char *from, char *to, size_t len);
     53  1.1  christos  * char *memcpy(void *to, const void *from, size_t len);
     54  1.1  christos  * char *memmove(void *to, const void *from, size_t len);
     55  1.1  christos  *
     56  1.1  christos  * No matter how invoked, the source and destination registers
     57  1.1  christos  * for calculation.  There's no point in copying them to "working"
     58  1.1  christos  * registers, since the code uses their values "in place," and
     59  1.1  christos  * copying them would be slower.
     60  1.1  christos  */
     61  1.1  christos 
     62  1.1  christos LEAF(FUNCTION,3)
     63  1.1  christos 
     64  1.1  christos #if defined(MEMCOPY) || defined(MEMMOVE)
     65  1.1  christos 	/* set up return value, while we still can */
     66  1.1  christos 	mov	DSTREG,v0
     67  1.1  christos #endif
     68  1.1  christos 
     69  1.1  christos 	/* Check for negative length */
     70  1.1  christos 	ble	SIZEREG,bcopy_done
     71  1.1  christos 
     72  1.1  christos 	/* Check for overlap */
     73  1.1  christos 	subq	DSTREG,SRCREG,t5
     74  1.1  christos 	cmpult	t5,SIZEREG,t5
     75  1.1  christos 	bne	t5,bcopy_overlap
     76  1.1  christos 
     77  1.1  christos 	/* a3 = end address */
     78  1.1  christos 	addq	SRCREG,SIZEREG,a3
     79  1.1  christos 
     80  1.1  christos 	/* Get the first word */
     81  1.1  christos 	ldq_u	t2,0(SRCREG)
     82  1.1  christos 
     83  1.1  christos 	/* Do they have the same alignment? */
     84  1.1  christos 	xor	SRCREG,DSTREG,t0
     85  1.1  christos 	and	t0,7,t0
     86  1.1  christos 	and	DSTREG,7,t1
     87  1.1  christos 	bne	t0,bcopy_different_alignment
     88  1.1  christos 
     89  1.1  christos 	/* src & dst have same alignment */
     90  1.1  christos 	beq	t1,bcopy_all_aligned
     91  1.1  christos 
     92  1.1  christos 	ldq_u	t3,0(DSTREG)
     93  1.1  christos 	addq	SIZEREG,t1,SIZEREG
     94  1.1  christos 	mskqh	t2,SRCREG,t2
     95  1.1  christos 	mskql	t3,SRCREG,t3
     96  1.1  christos 	or	t2,t3,t2
     97  1.1  christos 
     98  1.1  christos 	/* Dst is 8-byte aligned */
     99  1.1  christos 
    100  1.1  christos bcopy_all_aligned:
    101  1.1  christos 	/* If less than 8 bytes,skip loop */
    102  1.1  christos 	subq	SIZEREG,1,t0
    103  1.1  christos 	and	SIZEREG,7,SIZEREG
    104  1.1  christos 	bic	t0,7,t0
    105  1.1  christos 	beq	t0,bcopy_samealign_lp_end
    106  1.1  christos 
    107  1.1  christos bcopy_samealign_lp:
    108  1.1  christos 	stq_u	t2,0(DSTREG)
    109  1.1  christos 	addq	DSTREG,8,DSTREG
    110  1.1  christos 	ldq_u	t2,8(SRCREG)
    111  1.1  christos 	subq	t0,8,t0
    112  1.1  christos 	addq	SRCREG,8,SRCREG
    113  1.1  christos 	bne	t0,bcopy_samealign_lp
    114  1.1  christos 
    115  1.1  christos bcopy_samealign_lp_end:
    116  1.1  christos 	/* If we're done, exit */
    117  1.1  christos 	bne	SIZEREG,bcopy_small_left
    118  1.1  christos 	stq_u	t2,0(DSTREG)
    119  1.1  christos 	RET
    120  1.1  christos 
    121  1.1  christos bcopy_small_left:
    122  1.1  christos 	mskql	t2,SIZEREG,t4
    123  1.1  christos 	ldq_u	t3,0(DSTREG)
    124  1.1  christos 	mskqh	t3,SIZEREG,t3
    125  1.1  christos 	or	t4,t3,t4
    126  1.1  christos 	stq_u	t4,0(DSTREG)
    127  1.1  christos 	RET
    128  1.1  christos 
    129  1.1  christos bcopy_different_alignment:
    130  1.1  christos 	/*
    131  1.1  christos 	 * this is the fun part
    132  1.1  christos 	 */
    133  1.1  christos 	addq	SRCREG,SIZEREG,a3
    134  1.1  christos 	cmpule	SIZEREG,8,t0
    135  1.1  christos 	bne	t0,bcopy_da_finish
    136  1.1  christos 
    137  1.1  christos 	beq	t1,bcopy_da_noentry
    138  1.1  christos 
    139  1.1  christos 	/* Do the initial partial word */
    140  1.1  christos 	subq	zero,DSTREG,t0
    141  1.1  christos 	and	t0,7,t0
    142  1.1  christos 	ldq_u	t3,7(SRCREG)
    143  1.1  christos 	extql	t2,SRCREG,t2
    144  1.1  christos 	extqh	t3,SRCREG,t3
    145  1.1  christos 	or	t2,t3,t5
    146  1.1  christos 	insql	t5,DSTREG,t5
    147  1.1  christos 	ldq_u	t6,0(DSTREG)
    148  1.1  christos 	mskql	t6,DSTREG,t6
    149  1.1  christos 	or	t5,t6,t5
    150  1.1  christos 	stq_u	t5,0(DSTREG)
    151  1.1  christos 	addq	SRCREG,t0,SRCREG
    152  1.1  christos 	addq	DSTREG,t0,DSTREG
    153  1.1  christos 	subq	SIZEREG,t0,SIZEREG
    154  1.1  christos 	ldq_u	t2,0(SRCREG)
    155  1.1  christos 
    156  1.1  christos bcopy_da_noentry:
    157  1.1  christos 	subq	SIZEREG,1,t0
    158  1.1  christos 	bic	t0,7,t0
    159  1.1  christos 	and	SIZEREG,7,SIZEREG
    160  1.1  christos 	beq	t0,bcopy_da_finish2
    161  1.1  christos 
    162  1.1  christos bcopy_da_lp:
    163  1.1  christos 	ldq_u	t3,7(SRCREG)
    164  1.1  christos 	addq	SRCREG,8,SRCREG
    165  1.1  christos 	extql	t2,SRCREG,t4
    166  1.1  christos 	extqh	t3,SRCREG,t5
    167  1.1  christos 	subq	t0,8,t0
    168  1.1  christos 	or	t4,t5,t5
    169  1.1  christos 	stq	t5,0(DSTREG)
    170  1.1  christos 	addq	DSTREG,8,DSTREG
    171  1.1  christos 	beq	t0,bcopy_da_finish1
    172  1.1  christos 	ldq_u	t2,7(SRCREG)
    173  1.1  christos 	addq	SRCREG,8,SRCREG
    174  1.1  christos 	extql	t3,SRCREG,t4
    175  1.1  christos 	extqh	t2,SRCREG,t5
    176  1.1  christos 	subq	t0,8,t0
    177  1.1  christos 	or	t4,t5,t5
    178  1.1  christos 	stq	t5,0(DSTREG)
    179  1.1  christos 	addq	DSTREG,8,DSTREG
    180  1.1  christos 	bne	t0,bcopy_da_lp
    181  1.1  christos 
    182  1.1  christos bcopy_da_finish2:
    183  1.1  christos 	/* Do the last new word */
    184  1.1  christos 	mov	t2,t3
    185  1.1  christos 
    186  1.1  christos bcopy_da_finish1:
    187  1.1  christos 	/* Do the last partial word */
    188  1.1  christos 	ldq_u	t2,-1(a3)
    189  1.1  christos 	extql	t3,SRCREG,t3
    190  1.1  christos 	extqh	t2,SRCREG,t2
    191  1.1  christos 	or	t2,t3,t2
    192  1.1  christos 	br	zero,bcopy_samealign_lp_end
    193  1.1  christos 
    194  1.1  christos bcopy_da_finish:
    195  1.1  christos 	/* Do the last word in the next source word */
    196  1.1  christos 	ldq_u	t3,-1(a3)
    197  1.1  christos 	extql	t2,SRCREG,t2
    198  1.1  christos 	extqh	t3,SRCREG,t3
    199  1.1  christos 	or	t2,t3,t2
    200  1.1  christos 	insqh	t2,DSTREG,t3
    201  1.1  christos 	insql	t2,DSTREG,t2
    202  1.1  christos 	lda	t4,-1(zero)
    203  1.1  christos 	mskql	t4,SIZEREG,t5
    204  1.1  christos 	cmovne	t5,t5,t4
    205  1.1  christos 	insqh	t4,DSTREG,t5
    206  1.1  christos 	insql	t4,DSTREG,t4
    207  1.1  christos 	addq	DSTREG,SIZEREG,a4
    208  1.1  christos 	ldq_u	t6,0(DSTREG)
    209  1.1  christos 	ldq_u	t7,-1(a4)
    210  1.1  christos 	bic	t6,t4,t6
    211  1.1  christos 	bic	t7,t5,t7
    212  1.1  christos 	and	t2,t4,t2
    213  1.1  christos 	and	t3,t5,t3
    214  1.1  christos 	or	t2,t6,t2
    215  1.1  christos 	or	t3,t7,t3
    216  1.1  christos 	stq_u	t3,-1(a4)
    217  1.1  christos 	stq_u	t2,0(DSTREG)
    218  1.1  christos 	RET
    219  1.1  christos 
    220  1.1  christos bcopy_overlap:
    221  1.1  christos 	/*
    222  1.1  christos 	 * Basically equivalent to previous case, only backwards.
    223  1.1  christos 	 * Not quite as highly optimized
    224  1.1  christos 	 */
    225  1.1  christos 	addq	SRCREG,SIZEREG,a3
    226  1.1  christos 	addq	DSTREG,SIZEREG,a4
    227  1.1  christos 
    228  1.1  christos 	/* less than 8 bytes - don't worry about overlap */
    229  1.1  christos 	cmpule	SIZEREG,8,t0
    230  1.1  christos 	bne	t0,bcopy_ov_short
    231  1.1  christos 
    232  1.1  christos 	/* Possibly do a partial first word */
    233  1.1  christos 	and	a4,7,t4
    234  1.1  christos 	beq	t4,bcopy_ov_nostart2
    235  1.1  christos 	subq	a3,t4,a3
    236  1.1  christos 	subq	a4,t4,a4
    237  1.1  christos 	ldq_u	t1,0(a3)
    238  1.1  christos 	subq	SIZEREG,t4,SIZEREG
    239  1.1  christos 	ldq_u	t2,7(a3)
    240  1.1  christos 	ldq	t3,0(a4)
    241  1.1  christos 	extql	t1,a3,t1
    242  1.1  christos 	extqh	t2,a3,t2
    243  1.1  christos 	or	t1,t2,t1
    244  1.1  christos 	mskqh	t3,t4,t3
    245  1.1  christos 	mskql	t1,t4,t1
    246  1.1  christos 	or	t1,t3,t1
    247  1.1  christos 	stq	t1,0(a4)
    248  1.1  christos 
    249  1.1  christos bcopy_ov_nostart2:
    250  1.1  christos 	bic	SIZEREG,7,t4
    251  1.1  christos 	and	SIZEREG,7,SIZEREG
    252  1.1  christos 	beq	t4,bcopy_ov_lp_end
    253  1.1  christos 
    254  1.1  christos bcopy_ov_lp:
    255  1.1  christos 	/* This could be more pipelined, but it doesn't seem worth it */
    256  1.1  christos 	ldq_u	t0,-8(a3)
    257  1.1  christos 	subq	a4,8,a4
    258  1.1  christos 	ldq_u	t1,-1(a3)
    259  1.1  christos 	subq	a3,8,a3
    260  1.1  christos 	extql	t0,a3,t0
    261  1.1  christos 	extqh	t1,a3,t1
    262  1.1  christos 	subq	t4,8,t4
    263  1.1  christos 	or	t0,t1,t0
    264  1.1  christos 	stq	t0,0(a4)
    265  1.1  christos 	bne	t4,bcopy_ov_lp
    266  1.1  christos 
    267  1.1  christos bcopy_ov_lp_end:
    268  1.1  christos 	beq	SIZEREG,bcopy_done
    269  1.1  christos 
    270  1.1  christos 	ldq_u	t0,0(SRCREG)
    271  1.1  christos 	ldq_u	t1,7(SRCREG)
    272  1.1  christos 	ldq_u	t2,0(DSTREG)
    273  1.1  christos 	extql	t0,SRCREG,t0
    274  1.1  christos 	extqh	t1,SRCREG,t1
    275  1.1  christos 	or	t0,t1,t0
    276  1.1  christos 	insql	t0,DSTREG,t0
    277  1.1  christos 	mskql	t2,DSTREG,t2
    278  1.1  christos 	or	t2,t0,t2
    279  1.1  christos 	stq_u	t2,0(DSTREG)
    280  1.1  christos 
    281  1.1  christos bcopy_done:
    282  1.1  christos 	RET
    283  1.1  christos 
    284  1.1  christos bcopy_ov_short:
    285  1.1  christos 	ldq_u	t2,0(SRCREG)
    286  1.1  christos 	br	zero,bcopy_da_finish
    287  1.1  christos 
    288  1.1  christos 	END(FUNCTION)
    289