Home | History | Annotate | Line # | Download | only in string
memset.S revision 1.3
      1  1.3    andvar /*	$NetBSD: memset.S,v 1.3 2021/08/13 20:47:54 andvar Exp $	*/
      2  1.1  christos 
      3  1.1  christos /*
      4  1.1  christos  * Copyright (c) 1996-2002 Eduardo Horvath
      5  1.1  christos  * All rights reserved.
      6  1.1  christos  *
      7  1.1  christos  * Redistribution and use in source and binary forms, with or without
      8  1.1  christos  * modification, are permitted provided that the following conditions
      9  1.1  christos  * are met:
     10  1.1  christos  * 1. Redistributions of source code must retain the above copyright
     11  1.1  christos  *    notice, this list of conditions and the following disclaimer.
     12  1.1  christos  *
     13  1.1  christos  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
     14  1.1  christos  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  1.1  christos  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     16  1.1  christos  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
     17  1.1  christos  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     18  1.1  christos  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     19  1.1  christos  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     20  1.1  christos  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     21  1.1  christos  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     22  1.1  christos  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     23  1.1  christos  * SUCH DAMAGE.
     24  1.1  christos  *
     25  1.1  christos  */
     26  1.1  christos #include "strmacros.h"
     27  1.2  christos #if defined(LIBC_SCCS) && !defined(lint)
     28  1.3    andvar RCSID("$NetBSD: memset.S,v 1.3 2021/08/13 20:47:54 andvar Exp $")
     29  1.2  christos #endif  /* LIBC_SCCS and not lint */
     30  1.2  christos 
     31  1.1  christos 
     32  1.1  christos /*
     33  1.1  christos  * XXXXXXXXXXXXXXXXXXXX
     34  1.1  christos  * We need to make sure that this doesn't use floating point
     35  1.1  christos  * before our trap handlers are installed or we could panic
     36  1.1  christos  * XXXXXXXXXXXXXXXXXXXX
     37  1.1  christos  */
     38  1.1  christos /*
     39  1.1  christos  * memset(addr, c, len)
     40  1.1  christos  *
     41  1.1  christos  * We want to use VIS instructions if we're clearing out more than
     42  1.1  christos  * 256 bytes, but to do that we need to properly save and restore the
     43  1.1  christos  * FP registers.  Unfortunately the code to do that in the kernel needs
     44  1.1  christos  * to keep track of the current owner of the FPU, hence the different
     45  1.1  christos  * code.
     46  1.1  christos  *
     47  1.1  christos  * XXXXX To produce more efficient code, we do not allow lengths
     48  1.1  christos  * greater than 0x80000000000000000, which are negative numbers.
     49  1.1  christos  * This should not really be an issue since the VA hole should
     50  1.1  christos  * cause any such ranges to fail anyway.
     51  1.1  christos  */
     52  1.1  christos #if !defined(_KERNEL) || defined(_RUMPKERNEL)
     53  1.1  christos ENTRY(bzero)
     54  1.1  christos 	! %o0 = addr, %o1 = len
     55  1.1  christos 	mov	%o1, %o2
     56  1.2  christos 	clr	%o1			! ser pattern
     57  1.1  christos #endif
     58  1.1  christos ENTRY(memset)
     59  1.1  christos 	! %o0 = addr, %o1 = pattern, %o2 = len
     60  1.1  christos 	mov	%o0, %o4		! Save original pointer
     61  1.1  christos 
     62  1.1  christos Lmemset_internal:
     63  1.1  christos 	btst	7, %o0			! Word aligned?
     64  1.1  christos 	bz,pn	%xcc, 0f
     65  1.1  christos 	 nop
     66  1.1  christos 	inc	%o0
     67  1.1  christos 	deccc	%o2			! Store up to 7 bytes
     68  1.1  christos 	bge,a,pt	CCCR, Lmemset_internal
     69  1.1  christos 	 stb	%o1, [%o0 - 1]
     70  1.1  christos 
     71  1.1  christos 	retl				! Duplicate Lmemset_done
     72  1.1  christos 	 mov	%o4, %o0
     73  1.1  christos 0:
     74  1.1  christos 	/*
     75  1.1  christos 	 * Duplicate the pattern so it fills 64-bits.
     76  1.1  christos 	 */
     77  1.1  christos 	andcc	%o1, 0x0ff, %o1		! No need to extend zero
     78  1.1  christos 	bz,pt	%icc, 1f
     79  1.1  christos 	 sllx	%o1, 8, %o3		! sigh.  all dependent insns.
     80  1.1  christos 	or	%o1, %o3, %o1
     81  1.1  christos 	sllx	%o1, 16, %o3
     82  1.1  christos 	or	%o1, %o3, %o1
     83  1.1  christos 	sllx	%o1, 32, %o3
     84  1.1  christos 	 or	%o1, %o3, %o1
     85  1.1  christos 1:
     86  1.1  christos #ifdef USE_BLOCK_STORE_LOAD
     87  1.1  christos 	!! Now we are 64-bit aligned
     88  1.1  christos 	cmp	%o2, 256		! Use block clear if len > 256
     89  1.1  christos 	bge,pt	CCCR, Lmemset_block	! use block store insns
     90  1.1  christos #endif	/* USE_BLOCK_STORE_LOAD */
     91  1.1  christos 	 deccc	8, %o2
     92  1.1  christos Lmemset_longs:
     93  1.1  christos 	bl,pn	CCCR, Lmemset_cleanup	! Less than 8 bytes left
     94  1.1  christos 	 nop
     95  1.1  christos 3:
     96  1.1  christos 	inc	8, %o0
     97  1.1  christos 	deccc	8, %o2
     98  1.1  christos 	bge,pt	CCCR, 3b
     99  1.1  christos 	 stx	%o1, [%o0 - 8]		! Do 1 longword at a time
    100  1.1  christos 
    101  1.1  christos 	/*
    102  1.1  christos 	 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
    103  1.1  christos 	 * -6 => two bytes, etc.  Mop up this remainder, if any.
    104  1.1  christos 	 */
    105  1.1  christos Lmemset_cleanup:
    106  1.1  christos 	btst	4, %o2
    107  1.1  christos 	bz,pt	CCCR, 5f		! if (len & 4) {
    108  1.1  christos 	 nop
    109  1.1  christos 	stw	%o1, [%o0]		!	*(int *)addr = 0;
    110  1.1  christos 	inc	4, %o0			!	addr += 4;
    111  1.1  christos 5:
    112  1.1  christos 	btst	2, %o2
    113  1.1  christos 	bz,pt	CCCR, 7f		! if (len & 2) {
    114  1.1  christos 	 nop
    115  1.1  christos 	sth	%o1, [%o0]		!	*(short *)addr = 0;
    116  1.1  christos 	inc	2, %o0			!	addr += 2;
    117  1.1  christos 7:
    118  1.1  christos 	btst	1, %o2
    119  1.1  christos 	bnz,a	%icc, Lmemset_done	! if (len & 1)
    120  1.1  christos 	 stb	%o1, [%o0]		!	*addr = 0;
    121  1.1  christos Lmemset_done:
    122  1.1  christos 	retl
    123  1.3    andvar 	 mov	%o4, %o0		! Restore pointer for memset (ugh)
    124  1.1  christos 
    125  1.1  christos #ifdef USE_BLOCK_STORE_LOAD
    126  1.1  christos Lmemset_block:
    127  1.1  christos 	sethi	%hi(block_disable), %o3
    128  1.1  christos 	ldx	[ %o3 + %lo(block_disable) ], %o3
    129  1.1  christos 	brnz,pn	%o3, Lmemset_longs
    130  1.1  christos 	!! Make sure our trap table is installed
    131  1.1  christos 	set	_C_LABEL(trapbase), %o5
    132  1.1  christos 	rdpr	%tba, %o3
    133  1.1  christos 	sub	%o3, %o5, %o3
    134  1.1  christos 	brnz,pn	%o3, Lmemset_longs	! No, then don't use block load/store
    135  1.1  christos 	 nop
    136  1.1  christos /*
    137  1.1  christos  * Kernel:
    138  1.1  christos  *
    139  1.1  christos  * Here we use VIS instructions to do a block clear of a page.
    140  1.1  christos  * But before we can do that we need to save and enable the FPU.
    141  1.1  christos  * The last owner of the FPU registers is fplwp, and
    142  1.1  christos  * fplwp->l_md.md_fpstate is the current fpstate.  If that's not
    143  1.1  christos  * null, call savefpstate() with it to store our current fp state.
    144  1.1  christos  *
    145  1.1  christos  * Next, allocate an aligned fpstate on the stack.  We will properly
    146  1.1  christos  * nest calls on a particular stack so this should not be a problem.
    147  1.1  christos  *
    148  1.1  christos  * Now we grab either curlwp (or if we're on the interrupt stack
    149  1.1  christos  * lwp0).  We stash its existing fpstate in a local register and
    150  1.1  christos  * put our new fpstate in curlwp->p_md.md_fpstate.  We point
    151  1.1  christos  * fplwp at curlwp (or lwp0) and enable the FPU.
    152  1.1  christos  *
    153  1.1  christos  * If we are ever preempted, our FPU state will be saved in our
    154  1.1  christos  * fpstate.  Then, when we're resumed and we take an FPDISABLED
    155  1.1  christos  * trap, the trap handler will be able to fish our FPU state out
    156  1.1  christos  * of curlwp (or lwp0).
    157  1.1  christos  *
    158  1.1  christos  * On exiting this routine we undo the damage: restore the original
    159  1.1  christos  * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable
    160  1.1  christos  * the MMU.
    161  1.1  christos  *
    162  1.1  christos  */
    163  1.1  christos 
    164  1.1  christos 	ENABLE_FPU(0)
    165  1.1  christos 
    166  1.1  christos 	!! We are now 8-byte aligned.  We need to become 64-byte aligned.
    167  1.1  christos 	btst	63, %i0
    168  1.1  christos 	bz,pt	CCCR, 2f
    169  1.1  christos 	 nop
    170  1.1  christos 1:
    171  1.1  christos 	stx	%i1, [%i0]
    172  1.1  christos 	inc	8, %i0
    173  1.1  christos 	btst	63, %i0
    174  1.1  christos 	bnz,pt	%xcc, 1b
    175  1.1  christos 	 dec	8, %i2
    176  1.1  christos 
    177  1.1  christos 2:
    178  1.1  christos 	brz	%i1, 3f					! Skip the memory op
    179  1.1  christos 	 fzero	%f0					! if pattern is 0
    180  1.1  christos 
    181  1.1  christos #ifdef _LP64
    182  1.1  christos 	stx	%i1, [%i0]				! Flush this puppy to RAM
    183  1.1  christos 	membar	#StoreLoad
    184  1.1  christos 	ldd	[%i0], %f0
    185  1.1  christos #else
    186  1.1  christos 	stw	%i1, [%i0]				! Flush this puppy to RAM
    187  1.1  christos 	membar	#StoreLoad
    188  1.1  christos 	ld	[%i0], %f0
    189  1.1  christos 	fmovsa	%icc, %f0, %f1
    190  1.1  christos #endif
    191  1.1  christos 
    192  1.1  christos 3:
    193  1.1  christos 	fmovd	%f0, %f2				! Duplicate the pattern
    194  1.1  christos 	fmovd	%f0, %f4
    195  1.1  christos 	fmovd	%f0, %f6
    196  1.1  christos 	fmovd	%f0, %f8
    197  1.1  christos 	fmovd	%f0, %f10
    198  1.1  christos 	fmovd	%f0, %f12
    199  1.1  christos 	fmovd	%f0, %f14
    200  1.1  christos 
    201  1.1  christos 	!! Remember: we were 8 bytes too far
    202  1.1  christos 	dec	56, %i2					! Go one iteration too far
    203  1.1  christos 5:
    204  1.1  christos 	stda	%f0, [%i0] ASI_STORE			! Store 64 bytes
    205  1.1  christos 	deccc	BLOCK_SIZE, %i2
    206  1.1  christos 	bg,pt	%icc, 5b
    207  1.1  christos 	 inc	BLOCK_SIZE, %i0
    208  1.1  christos 
    209  1.1  christos 	membar	#Sync
    210  1.1  christos /*
    211  1.1  christos  * We've saved our possible fpstate, now disable the fpu
    212  1.1  christos  * and continue with life.
    213  1.1  christos  */
    214  1.1  christos 	RESTORE_FPU
    215  1.1  christos 	addcc	%i2, 56, %i2				! Restore the count
    216  1.1  christos 	ba,pt	%xcc, Lmemset_longs			! Finish up the remainder
    217  1.1  christos 	 restore
    218  1.1  christos #endif	/* USE_BLOCK_STORE_LOAD */
    219