Home | History | Annotate | Line # | Download | only in string
bzero.S revision 1.12
      1 /*	$NetBSD: bzero.S,v 1.12 2013/07/18 12:20:41 matt Exp $ */
      2 
      3 /*-
      4  * Copyright (C) 2001	Martin J. Laubach <mjl (at) NetBSD.org>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. The name of the author may not be used to endorse or promote products
     16  *    derived from this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     27  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 /*----------------------------------------------------------------------*/
     30 
     31 #include <machine/asm.h>
     32 
     33 
     34 #if defined(LIBC_SCCS) && !defined(lint)
     35 __RCSID("$NetBSD: bzero.S,v 1.12 2013/07/18 12:20:41 matt Exp $")
     36 #endif /* LIBC_SCCS && !lint */
     37 
     38 #ifdef _KERNEL
     39 #include <assym.h>
     40 #endif
     41 
     42 #define USE_STSWX 0	/* don't. slower than trivial copy loop */
     43 
     44 /*----------------------------------------------------------------------*/
     45 /*
     46      void bzero(void *b %r3, size_t len %r4);
     47      void * memset(void *b %r3, int c %r4, size_t len %r5);
     48 */
     49 /*----------------------------------------------------------------------*/
     50 
     51 #define r_dst	%r3
     52 #define r_len	%r4
     53 #define r_val	%r0
     54 
     55 		.text
     56 		.align 4
     57 ENTRY(bzero)
     58 		li	r_val, 0		/* Value to stuff in */
     59 		b	cb_memset
     60 END(bzero)
     61 
     62 ENTRY(memset)
     63 		cmplwi	%cr1, %r5, 0
     64 		mr.	%r0, %r4
     65 		mr	%r8, %r3
     66 		beqlr-	%cr1			/* Nothing to do */
     67 
     68 		rlwimi	%r0, %r4, 8, 16, 23	/* word extend fill value */
     69 		rlwimi	%r0, %r0, 16, 0, 15
     70 		mr	%r4, %r5
     71 		bne-	simple_fill		/* =! 0, use trivial fill */
     72 cb_memset:
     73 
     74 /*----------------------------------------------------------------------*/
     75 #ifndef _KERNEL
     76 		/* First find out cache line size */
     77 		mflr	%r9
     78 #ifdef PIC
     79 		bcl	20,31,1f
     80 1:		mflr	%r5
     81 		mtlr	%r9
     82 		addis	%r5,%r5,cache_info+4-1b@ha
     83 		lwzu	%r9,cache_info+4-1b@l(%r5)
     84 #else
     85 		lis	%r5,cache_info+4@ha
     86 		lwzu	%r9,cache_info+4@l(%r5)
     87 #endif
     88 		lwz	%r10,cache_sh-(cache_info+4)(%r5)
     89 		cmpwi	%r9, -1
     90 		bne+	cb_cacheline_known
     91 
     92 		addi	%r5, %r5, -4	/* point r5 @ beginning of cache_info */
     93 
     94 /*----------------------------------------------------------------------*/
     95 #define CTL_MACHDEP	7
     96 #define CPU_CACHELINE	1
     97 #define	CPU_CACHEINFO	5
     98 
     99 #define STKFRAME_SZ	64
    100 #define MIB		8
    101 #define OLDPLEN		16
    102 #define R3_SAVE		20
    103 #define R4_SAVE		24
    104 #define R0_SAVE		28
    105 #define R8_SAVE		32
    106 #define R31_SAVE	36
    107 #ifdef PIC
    108 #define R30_SAVE	40
    109 #endif
    110 
    111 		stw	%r9, 4(%r1)
    112 		stwu	%r1, -STKFRAME_SZ(%r1)
    113 
    114 		stw	%r31, R31_SAVE(%r1)
    115 		mr	%r31, %r5		/* cache info */
    116 
    117 #ifdef PIC
    118 		stw	%r30, R30_SAVE(%r1)
    119 		PIC_TOCSETUP(cb_memset,%r30)
    120 #endif
    121 
    122 		stw	%r8, R8_SAVE(%r1)
    123 		stw	%r3, R3_SAVE(%r1)
    124 		stw	%r4, R4_SAVE(%r1)
    125 		stw	%r0, R0_SAVE(%r1)
    126 
    127 		li	%r0, CTL_MACHDEP		/* Construct MIB */
    128 		stw	%r0, MIB(%r1)
    129 		li	%r0, CPU_CACHEINFO
    130 		stw	%r0, MIB+4(%r1)
    131 
    132 		li	%r0, 4*4			/* Oldlenp := 4*4 */
    133 		stw	%r0, OLDPLEN(%r1)
    134 
    135 		addi	%r3, %r1, MIB
    136 		li	%r4, 2			/* namelen */
    137 		/* %r5 already contains &cache_info */
    138 		addi	%r6, %r1, OLDPLEN
    139 		li	%r7, 0
    140 		li	%r8, 0
    141 		bl	PIC_PLT(_C_LABEL(sysctl))
    142 
    143 		cmpwi	%r3, 0			/* Check result */
    144 		beq	1f
    145 
    146 		/* Failure, try older sysctl */
    147 
    148 		li	%r0, CTL_MACHDEP	/* Construct MIB */
    149 		stw	%r0, MIB(%r1)
    150 		li	%r0, CPU_CACHELINE
    151 		stw	%r0, MIB+4(%r1)
    152 
    153 		li	%r0, 4			/* Oldlenp := 4 */
    154 		stw	%r0, OLDPLEN(%r1)
    155 
    156 		addi	%r3, %r1, MIB
    157 		li	%r4, 2			/* namelen */
    158 		addi	%r5, %r31, 4
    159 		addi	%r6, %r1, OLDPLEN
    160 		li	%r7, 0
    161 		li	%r8, 0
    162 		bl	PIC_PLT(_C_LABEL(sysctl))
    163 1:
    164 		lwz	%r3, R3_SAVE(%r1)
    165 		lwz	%r4, R4_SAVE(%r1)
    166 		lwz	%r8, R8_SAVE(%r1)
    167 		lwz	%r0, R0_SAVE(%r1)
    168 		lwz	%r9, 4(%r31)
    169 		lwz	%r31, R31_SAVE(%r1)
    170 #ifdef PIC
    171 		lwz	%r30, R30_SAVE(%r1)
    172 #endif
    173 		addi	%r1, %r1, STKFRAME_SZ
    174 		lwz	%r7, 4(%r1)
    175 		mtlr	%r7
    176 
    177 		cntlzw	%r6, %r9			/* compute shift value */
    178 		li	%r5, 31
    179 		subf	%r10, %r6, %r5
    180 
    181 #ifdef PIC
    182 		mflr	%r9
    183 		bcl	20,31,1f
    184 1:		mflr	%r5
    185 		mtlr	%r9
    186 
    187 		addis	%r5, %r5, cache_info+4-1b@ha
    188 		lwzu	%r9, cache_info+4-1b@l(%r5)
    189 #else
    190 		lis	%r5, cache_info+4@ha
    191 		lwzu	%r9, cache_info+4@l(%r5)
    192 #endif
    193 		stw	%r10, cache_sh-(cache_info+4)(%r5)
    194 
    195 /*----------------------------------------------------------------------*/
    196 /* Okay, we know the cache line size (%r9) and shift value (%r10) */
    197 cb_cacheline_known:
    198 #else /* _KERNEL */
    199 #ifdef	MULTIPROCESSOR
    200 		mfsprg	%r10, 0			/* Get cpu_info pointer */
    201 #else
    202 		lis	%r10, cpu_info_store@ha
    203 		addi	%r10, %r10, cpu_info_store@l
    204 #endif
    205 		lwz	%r9, CPU_CI+4(%r10)	/* Load D$ line size */
    206 		cntlzw	%r10, %r9			/* Calculate shift.. */
    207 		li	%r6, 31
    208 		subf	%r10, %r10, %r6
    209 #endif /* _KERNEL */
    210 		/* Back in memory filling business */
    211 
    212 		cmplwi	%cr1, r_len, 0		/* Nothing to do? */
    213 		add	%r5, %r9, %r9
    214 		cmplw	r_len, %r5		/* <= 2*CL bytes to move? */
    215 		beqlr-	%cr1			/* then do nothing */
    216 
    217 		blt+	simple_fill		/* a trivial fill routine */
    218 
    219 		/* Word align the block, fill bytewise until dst even*/
    220 
    221 		andi.	%r5, r_dst, 0x03
    222 		li	%r6, 4
    223 		beq+	cb_aligned_w		/* already aligned to word? */
    224 
    225 		subf	%r5, %r5, %r6		/* bytes to fill to align4 */
    226 #if USE_STSWX
    227 		mtxer	%r5
    228 		stswx	%r0, 0, r_dst
    229 		add	r_dst, %r5, r_dst
    230 #else
    231 		mtctr	%r5
    232 
    233 		subi	r_dst, r_dst, 1
    234 1:		stbu	r_val, 1(r_dst)		/* Fill bytewise */
    235 		bdnz	1b
    236 
    237 		addi	r_dst, r_dst, 1
    238 #endif
    239 		subf	r_len, %r5, r_len
    240 
    241 cb_aligned_w:	/* Cache block align, fill wordwise until dst aligned */
    242 
    243 		/* I know I have something to do since we had > 2*CL initially */
    244 		/* so no need to check for r_len = 0 */
    245 
    246 		subi	%r6, %r9, 1		/* CL mask */
    247 		and.	%r5, r_dst, %r6
    248 		srwi	%r5, %r5, 2
    249 		srwi	%r6, %r9, 2
    250 		beq	cb_aligned_cb		/* already on CL boundary? */
    251 
    252 		subf	%r5, %r5, %r6		/* words to fill to alignment */
    253 		mtctr	%r5
    254 		slwi	%r5, %r5, 2
    255 		subf	r_len, %r5, r_len
    256 
    257 		subi	r_dst, r_dst, 4
    258 1:		stwu	r_val, 4(r_dst)		/* Fill wordwise */
    259 		bdnz	1b
    260 		addi	r_dst, r_dst, 4
    261 
    262 cb_aligned_cb:	/* no need to check r_len, see above */
    263 
    264 		srw.	%r5, r_len, %r10		/* Number of cache blocks */
    265 		mtctr	%r5
    266 		beq	cblocks_done
    267 
    268 		slw	%r5, %r5, %r10
    269 		subf	r_len, %r5, r_len
    270 
    271 1:		dcbz	0, r_dst		/* Clear blockwise */
    272 		add	r_dst, r_dst, %r9
    273 		bdnz	1b
    274 
    275 cblocks_done:	/* still CL aligned, but less than CL bytes left */
    276 		cmplwi	%cr1, r_len, 0
    277 		cmplwi	r_len, 8
    278 		beq-	%cr1, sf_return
    279 
    280 		blt-	sf_bytewise		/* <8 remaining? */
    281 		b	sf_aligned_w
    282 
    283 /*----------------------------------------------------------------------*/
    284 wbzero:		li	r_val, 0
    285 
    286 		cmplwi	r_len, 0
    287 		beqlr-				/* Nothing to do */
    288 
    289 simple_fill:
    290 #if USE_STSWX
    291 		cmplwi	%cr1, r_len, 12		/* < 12 bytes to move? */
    292 #else
    293 		cmplwi	%cr1, r_len, 8		/* < 8 bytes to move? */
    294 #endif
    295 		andi.	%r5, r_dst, 0x03		/* bytes to fill to align4 */
    296 		blt	%cr1, sf_bytewise	/* trivial byte mover */
    297 
    298 		li	%r6, 4
    299 		subf	%r5, %r5, %r6
    300 		beq+	sf_aligned_w		/* dest is word aligned */
    301 
    302 #if USE_STSWX
    303 		mtxer	%r5
    304 		stswx	%r0, 0, r_dst
    305 		add	r_dst, %r5, r_dst
    306 #else
    307 		mtctr	%r5			/* nope, then fill bytewise */
    308 		subi	r_dst, r_dst, 1		/* until it is */
    309 1:		stbu	r_val, 1(r_dst)
    310 		bdnz	1b
    311 
    312 		addi	r_dst, r_dst, 1
    313 #endif
    314 		subf	r_len, %r5, r_len
    315 
    316 sf_aligned_w:	/* no need to check r_len since it were >= 8 bytes initially */
    317 #if USE_STSWX
    318 		mr	%r6, %r0
    319 		mr	%r7, %r0
    320 
    321 		srwi	%r5, r_len, 3
    322 		mtctr	%r5
    323 
    324 		slwi	%r5, %r5, 3		/* adjust len */
    325 		subf.	r_len, %r5, r_len
    326 
    327 1:		stswi	%r6, r_dst, 8
    328 		addi	r_dst, r_dst, 8
    329 		bdnz	1b
    330 #else
    331 		srwi	%r5, r_len, 2		/* words to fill */
    332 		mtctr	%r5
    333 
    334 		slwi	%r5, %r5, 2
    335 		subf.	r_len, %r5, r_len	/* adjust len for fill */
    336 
    337 		subi	r_dst, r_dst, 4
    338 1:		stwu	r_val, 4(r_dst)
    339 		bdnz	1b
    340 		addi	r_dst, r_dst, 4
    341 #endif
    342 
    343 sf_word_done:	bne-	sf_bytewise
    344 
    345 sf_return:	mr	%r3, %r8			/* restore orig ptr */
    346 		blr				/* for memset functionality */
    347 
    348 sf_bytewise:
    349 #if USE_STSWX
    350 		mr	%r5, %r0
    351 		mr	%r6, %r0
    352 		mr	%r7, %r0
    353 
    354 		mtxer	r_len
    355 		stswx	%r5, 0, r_dst
    356 #else
    357 		mtctr	r_len
    358 
    359 		subi	r_dst, r_dst, 1
    360 1:		stbu	r_val, 1(r_dst)
    361 		bdnz	1b
    362 #endif
    363 		mr	%r3, %r8			/* restore orig ptr */
    364 		blr				/* for memset functionality */
    365 END(memset)
    366 
    367 /*----------------------------------------------------------------------*/
    368 #ifndef _KERNEL
    369 		.data
    370 		.p2align 2
    371 cache_info:	.long	-1, -1, -1, -1
    372 cache_sh:	.long	0
    373 
    374 #endif
    375 /*----------------------------------------------------------------------*/
    376