Home | History | Annotate | Line # | Download | only in string
memset.S revision 1.3
      1 /*	$NetBSD: memset.S,v 1.3 2021/08/13 20:47:54 andvar Exp $	*/
      2 
      3 /*
      4  * Copyright (c) 1996-2002 Eduardo Horvath
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  *
     13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
     14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
     17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     23  * SUCH DAMAGE.
     24  *
     25  */
     26 #include "strmacros.h"
     27 #if defined(LIBC_SCCS) && !defined(lint)
     28 RCSID("$NetBSD: memset.S,v 1.3 2021/08/13 20:47:54 andvar Exp $")
     29 #endif  /* LIBC_SCCS and not lint */
     30 
     31 
     32 /*
     33  * XXXXXXXXXXXXXXXXXXXX
     34  * We need to make sure that this doesn't use floating point
     35  * before our trap handlers are installed or we could panic
     36  * XXXXXXXXXXXXXXXXXXXX
     37  */
     38 /*
     39  * memset(addr, c, len)
     40  *
     41  * We want to use VIS instructions if we're clearing out more than
     42  * 256 bytes, but to do that we need to properly save and restore the
     43  * FP registers.  Unfortunately the code to do that in the kernel needs
     44  * to keep track of the current owner of the FPU, hence the different
     45  * code.
     46  *
     47  * XXXXX To produce more efficient code, we do not allow lengths
     48  * greater than 0x80000000000000000, which are negative numbers.
     49  * This should not really be an issue since the VA hole should
     50  * cause any such ranges to fail anyway.
     51  */
     52 #if !defined(_KERNEL) || defined(_RUMPKERNEL)
     53 ENTRY(bzero)
     54 	! %o0 = addr, %o1 = len
     55 	mov	%o1, %o2
     56 	clr	%o1			! ser pattern
     57 #endif
     58 ENTRY(memset)
     59 	! %o0 = addr, %o1 = pattern, %o2 = len
     60 	mov	%o0, %o4		! Save original pointer
     61 
     62 Lmemset_internal:
     63 	btst	7, %o0			! Word aligned?
     64 	bz,pn	%xcc, 0f
     65 	 nop
     66 	inc	%o0
     67 	deccc	%o2			! Store up to 7 bytes
     68 	bge,a,pt	CCCR, Lmemset_internal
     69 	 stb	%o1, [%o0 - 1]
     70 
     71 	retl				! Duplicate Lmemset_done
     72 	 mov	%o4, %o0
     73 0:
     74 	/*
     75 	 * Duplicate the pattern so it fills 64-bits.
     76 	 */
     77 	andcc	%o1, 0x0ff, %o1		! No need to extend zero
     78 	bz,pt	%icc, 1f
     79 	 sllx	%o1, 8, %o3		! sigh.  all dependent insns.
     80 	or	%o1, %o3, %o1
     81 	sllx	%o1, 16, %o3
     82 	or	%o1, %o3, %o1
     83 	sllx	%o1, 32, %o3
     84 	 or	%o1, %o3, %o1
     85 1:
     86 #ifdef USE_BLOCK_STORE_LOAD
     87 	!! Now we are 64-bit aligned
     88 	cmp	%o2, 256		! Use block clear if len > 256
     89 	bge,pt	CCCR, Lmemset_block	! use block store insns
     90 #endif	/* USE_BLOCK_STORE_LOAD */
     91 	 deccc	8, %o2
     92 Lmemset_longs:
     93 	bl,pn	CCCR, Lmemset_cleanup	! Less than 8 bytes left
     94 	 nop
     95 3:
     96 	inc	8, %o0
     97 	deccc	8, %o2
     98 	bge,pt	CCCR, 3b
     99 	 stx	%o1, [%o0 - 8]		! Do 1 longword at a time
    100 
    101 	/*
    102 	 * Len is in [-8..-1] where -8 => done, -7 => 1 byte to zero,
    103 	 * -6 => two bytes, etc.  Mop up this remainder, if any.
    104 	 */
    105 Lmemset_cleanup:
    106 	btst	4, %o2
    107 	bz,pt	CCCR, 5f		! if (len & 4) {
    108 	 nop
    109 	stw	%o1, [%o0]		!	*(int *)addr = 0;
    110 	inc	4, %o0			!	addr += 4;
    111 5:
    112 	btst	2, %o2
    113 	bz,pt	CCCR, 7f		! if (len & 2) {
    114 	 nop
    115 	sth	%o1, [%o0]		!	*(short *)addr = 0;
    116 	inc	2, %o0			!	addr += 2;
    117 7:
    118 	btst	1, %o2
    119 	bnz,a	%icc, Lmemset_done	! if (len & 1)
    120 	 stb	%o1, [%o0]		!	*addr = 0;
    121 Lmemset_done:
    122 	retl
    123 	 mov	%o4, %o0		! Restore pointer for memset (ugh)
    124 
    125 #ifdef USE_BLOCK_STORE_LOAD
    126 Lmemset_block:
    127 	sethi	%hi(block_disable), %o3
    128 	ldx	[ %o3 + %lo(block_disable) ], %o3
    129 	brnz,pn	%o3, Lmemset_longs
    130 	!! Make sure our trap table is installed
    131 	set	_C_LABEL(trapbase), %o5
    132 	rdpr	%tba, %o3
    133 	sub	%o3, %o5, %o3
    134 	brnz,pn	%o3, Lmemset_longs	! No, then don't use block load/store
    135 	 nop
    136 /*
    137  * Kernel:
    138  *
    139  * Here we use VIS instructions to do a block clear of a page.
    140  * But before we can do that we need to save and enable the FPU.
    141  * The last owner of the FPU registers is fplwp, and
    142  * fplwp->l_md.md_fpstate is the current fpstate.  If that's not
    143  * null, call savefpstate() with it to store our current fp state.
    144  *
    145  * Next, allocate an aligned fpstate on the stack.  We will properly
    146  * nest calls on a particular stack so this should not be a problem.
    147  *
    148  * Now we grab either curlwp (or if we're on the interrupt stack
    149  * lwp0).  We stash its existing fpstate in a local register and
    150  * put our new fpstate in curlwp->p_md.md_fpstate.  We point
    151  * fplwp at curlwp (or lwp0) and enable the FPU.
    152  *
    153  * If we are ever preempted, our FPU state will be saved in our
    154  * fpstate.  Then, when we're resumed and we take an FPDISABLED
    155  * trap, the trap handler will be able to fish our FPU state out
    156  * of curlwp (or lwp0).
    157  *
    158  * On exiting this routine we undo the damage: restore the original
    159  * pointer to curlwp->p_md.md_fpstate, clear our fplwp, and disable
    160  * the MMU.
    161  *
    162  */
    163 
    164 	ENABLE_FPU(0)
    165 
    166 	!! We are now 8-byte aligned.  We need to become 64-byte aligned.
    167 	btst	63, %i0
    168 	bz,pt	CCCR, 2f
    169 	 nop
    170 1:
    171 	stx	%i1, [%i0]
    172 	inc	8, %i0
    173 	btst	63, %i0
    174 	bnz,pt	%xcc, 1b
    175 	 dec	8, %i2
    176 
    177 2:
    178 	brz	%i1, 3f					! Skip the memory op
    179 	 fzero	%f0					! if pattern is 0
    180 
    181 #ifdef _LP64
    182 	stx	%i1, [%i0]				! Flush this puppy to RAM
    183 	membar	#StoreLoad
    184 	ldd	[%i0], %f0
    185 #else
    186 	stw	%i1, [%i0]				! Flush this puppy to RAM
    187 	membar	#StoreLoad
    188 	ld	[%i0], %f0
    189 	fmovsa	%icc, %f0, %f1
    190 #endif
    191 
    192 3:
    193 	fmovd	%f0, %f2				! Duplicate the pattern
    194 	fmovd	%f0, %f4
    195 	fmovd	%f0, %f6
    196 	fmovd	%f0, %f8
    197 	fmovd	%f0, %f10
    198 	fmovd	%f0, %f12
    199 	fmovd	%f0, %f14
    200 
    201 	!! Remember: we were 8 bytes too far
    202 	dec	56, %i2					! Go one iteration too far
    203 5:
    204 	stda	%f0, [%i0] ASI_STORE			! Store 64 bytes
    205 	deccc	BLOCK_SIZE, %i2
    206 	bg,pt	%icc, 5b
    207 	 inc	BLOCK_SIZE, %i0
    208 
    209 	membar	#Sync
    210 /*
    211  * We've saved our possible fpstate, now disable the fpu
    212  * and continue with life.
    213  */
    214 	RESTORE_FPU
    215 	addcc	%i2, 56, %i2				! Restore the count
    216 	ba,pt	%xcc, Lmemset_longs			! Finish up the remainder
    217 	 restore
    218 #endif	/* USE_BLOCK_STORE_LOAD */
    219