Home | History | Annotate | Line # | Download | only in string
bzero.S revision 1.3.2.3
      1 /*	$NetBSD: bzero.S,v 1.3.2.3 2002/03/22 20:41:54 nathanw Exp $ */
      2 
      3 /*-
      4  * Copyright (C) 2001	Martin J. Laubach <mjl (at) netbsd.org>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. The name of the author may not be used to endorse or promote products
     16  *    derived from this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     27  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 /*----------------------------------------------------------------------*/
     30 
     31 #include <machine/asm.h>
     32 #ifdef _KERNEL
     33 #include <assym.h>
     34 #endif
     35 
     36 #define USE_STSWX 0	/* don't. slower than trivial copy loop */
     37 
     38 /*----------------------------------------------------------------------*/
     39 /*
     40      void bzero(void *b r3, size_t len r4);
     41      void * memset(void *b r3, int c r4, size_t len r5);
     42 */
     43 /*----------------------------------------------------------------------*/
     44 
     45 #define r_dst	r3
     46 #define r_len	r4
     47 #define r_val	r0
     48 
     49 		.text
     50 		.align 4
     51 ENTRY(bzero)
     52 		li	r_val, 0		/* Value to stuff in */
     53 		b	cb_memset
     54 
     55 ENTRY(memset)
     56 		cmplwi	cr1, r5, 0
     57 		mr.	r0, r4
     58 		mr	r8, r3
     59 		beqlr-	cr1			/* Nothing to do */
     60 
     61 		rlwimi	r0, r4, 8, 16, 23	/* word extend fill value */
     62 		rlwimi	r0, r0, 16, 0, 15
     63 		mr	r4, r5
     64 		bne-	simple_fill		/* =! 0, use trivial fill */
     65 cb_memset:
     66 
     67 /*----------------------------------------------------------------------*/
     68 #ifndef _KERNEL
     69 		/* First find out cache line size */
     70 #ifdef PIC
     71 		mflr	r9
     72 		bl	_GLOBAL_OFFSET_TABLE_@local-4
     73 		mflr	r10
     74 		mtlr	r9
     75 		lwz	r5,cache_info@got(r10)
     76 #else
     77 		lis	r5,cache_info@h
     78 		ori	r5,r5,cache_info@l
     79 #endif
     80 		lwz	r6, 4(r5)
     81 		cmpwi	r6, -1
     82 		bne+	cb_cacheline_known
     83 
     84 /*----------------------------------------------------------------------*/
     85 #define CTL_MACHDEP	7
     86 #define CPU_CACHELINE	1
     87 #define	CPU_CACHEINFO	5
     88 
     89 #define STKFRAME_SZ	48
     90 #define MIB		8
     91 #define OLDPLEN		16
     92 #define R3_SAVE		20
     93 #define R4_SAVE		24
     94 #define R0_SAVE		28
     95 #define R8_SAVE		32
     96 
     97 		mflr	r6
     98 		stw	r6, 4(r1)
     99 		stwu	r1, -STKFRAME_SZ(r1)
    100 
    101 		stw	r8, R8_SAVE(r1)
    102 		stw	r3, R3_SAVE(r1)
    103 		stw	r4, R4_SAVE(r1)
    104 		stw	r0, R0_SAVE(r1)
    105 
    106 
    107 
    108 		li	r0, CTL_MACHDEP		/* Construct MIB */
    109 		stw	r0, MIB(r1)
    110 		li	r0, CPU_CACHEINFO
    111 		stw	r0, MIB+4(r1)
    112 
    113 		li	r0, 4*4			/* Oldlenp := 4*4 */
    114 		stw	r0, OLDPLEN(r1)
    115 
    116 		addi	r3, r1, MIB
    117 		li	r4, 2			/* namelen */
    118 		/* r5 already contains &cache_info */
    119 		addi	r6, r1, OLDPLEN
    120 		li	r7, 0
    121 		li	r8, 0
    122 		bl	PIC_PLT(_C_LABEL(sysctl))
    123 
    124 		cmpwi	r3, 0			/* Check result */
    125 		beq	1f
    126 
    127 		/* Failure, try older sysctl */
    128 
    129 		li	r0, CTL_MACHDEP		/* Construct MIB */
    130 		stw	r0, MIB(r1)
    131 		li	r0, CPU_CACHELINE
    132 		stw	r0, MIB+4(r1)
    133 
    134 		li	r0, 4			/* Oldlenp := 4 */
    135 		stw	r0, OLDPLEN(r1)
    136 
    137 		addi	r3, r1, MIB
    138 		li	r4, 2			/* namelen */
    139 #ifdef PIC
    140 		mflr	r9
    141 		bl	_GLOBAL_OFFSET_TABLE_@local-4
    142 		mflr	r10
    143 		mtlr	r9
    144 		lwz	r5,cache_info@got(r10)
    145 		addi	r5, r5, 4
    146 #else
    147 		lis	r5,cache_info+4@h
    148 		ori	r5,r5,cache_info+4@l
    149 #endif
    150 		addi	r6, r1, OLDPLEN
    151 		li	r7, 0
    152 		li	r8, 0
    153 		bl	PIC_PLT(_C_LABEL(sysctl))
    154 1:
    155 		lwz	r8, R8_SAVE(r1)
    156 		lwz	r3, R3_SAVE(r1)
    157 		lwz	r4, R4_SAVE(r1)
    158 		lwz	r0, R0_SAVE(r1)
    159 
    160 #ifdef PIC
    161 		bl	_GLOBAL_OFFSET_TABLE_@local-4
    162 		mflr	r10
    163 		lwz	r9, cache_info@got(r10)
    164 		lwz	r9, 4(r9)
    165 #else
    166 		lis	r5, cache_info+4@ha
    167 		lwz	r9, cache_info+4@l(r5)
    168 #endif
    169 		la	r1, STKFRAME_SZ(r1)
    170 		lwz	r5, 4(r1)
    171 		mtlr	r5
    172 
    173 		cntlzw	r6, r9			/* compute shift value */
    174 		li	r5, 31
    175 		subf	r5, r6, r5
    176 
    177 #ifdef PIC
    178 		lwz	r6, cache_sh@got(r10)
    179 		stw	r5, 0(r6)
    180 #else
    181 		lis	r6, cache_sh@ha
    182 		stw	r5, cache_sh@l(r6)
    183 #endif
    184 /*----------------------------------------------------------------------*/
    185 /* Okay, we know the cache line size (r9) and shift value (r10) */
    186 cb_cacheline_known:
    187 #ifdef PIC
    188 		lwz	r5, cache_info@got(r10)
    189 		lwz	r9, 4(r5)
    190 		lwz	r5, cache_sh@got(r10)
    191 		lwz	r10, 0(r5)
    192 #else
    193 		lis	r9, cache_info+4@ha
    194 		lwz	r9, cache_info+4@l(r9)
    195 		lis	r10, cache_sh@ha
    196 		lwz	r10, cache_sh@l(r10)
    197 #endif
    198 
    199 #else /* _KERNEL */
    200 #ifdef	MULTIPROCESSOR
    201 		mfspr	r10, 0			/* Get cpu_info pointer */
    202 #else
    203 		lis	r10, cpu_info_store@ha
    204 		addi	r10, r10, cpu_info_store@l
    205 #endif
    206 		lwz	r9, CPU_CI+4(r10)	/* Load D$ line size */
    207 		cntlzw	r10, r9			/* Calculate shift.. */
    208 		li	r6, 31
    209 		subf	r10, r10, r6
    210 #endif /* _KERNEL */
    211 		/* Back in memory filling business */
    212 
    213 		cmplwi	cr1, r_len, 0		/* Nothing to do? */
    214 		add	r5, r9, r9
    215 		cmplw	r_len, r5		/* <= 2*CL bytes to move? */
    216 		beqlr-	cr1			/* then do nothing */
    217 
    218 		blt+	simple_fill		/* a trivial fill routine */
    219 
    220 		/* Word align the block, fill bytewise until dst even*/
    221 
    222 		andi.	r5, r_dst, 0x03
    223 		li	r6, 4
    224 		beq+	cb_aligned_w		/* already aligned to word? */
    225 
    226 		subf	r5, r5, r6		/* bytes to fill to align4 */
    227 #if USE_STSWX
    228 		mtxer	r5
    229 		stswx	r0, 0, r_dst
    230 		add	r_dst, r5, r_dst
    231 #else
    232 		mtctr	r5
    233 
    234 		subi	r_dst, r_dst, 1
    235 1:		stbu	r_val, 1(r_dst)		/* Fill bytewise */
    236 		bdnz	1b
    237 
    238 		addi	r_dst, r_dst, 1
    239 #endif
    240 		subf	r_len, r5, r_len
    241 
    242 cb_aligned_w:	/* Cache block align, fill wordwise until dst aligned */
    243 
    244 		/* I know I have something to do since we had > 2*CL initially */
    245 		/* so no need to check for r_len = 0 */
    246 
    247 		rlwinm.	r5, r_dst, 30, 29, 31
    248 		srwi	r6, r9, 2
    249 		beq	cb_aligned_cb		/* already on CL boundary? */
    250 
    251 		subf	r5, r5, r6		/* words to fill to alignment */
    252 		mtctr	r5
    253 		slwi	r5, r5, 2
    254 		subf	r_len, r5, r_len
    255 
    256 		subi	r_dst, r_dst, 4
    257 1:		stwu	r_val, 4(r_dst)		/* Fill wordwise */
    258 		bdnz	1b
    259 		addi	r_dst, r_dst, 4
    260 
    261 cb_aligned_cb:	/* no need to check r_len, see above */
    262 
    263 		srw.	r5, r_len, r10		/* Number of cache blocks */
    264 		mtctr	r5
    265 		beq	cblocks_done
    266 
    267 		slw	r5, r5, r10
    268 		subf	r_len, r5, r_len
    269 
    270 1:		dcbz	0, r_dst		/* Clear blockwise */
    271 		add	r_dst, r_dst, r9
    272 		bdnz	1b
    273 
    274 cblocks_done:	/* still CL aligned, but less than CL bytes left */
    275 		cmplwi	cr1, r_len, 0
    276 		cmplwi	r_len, 8
    277 		beq-	cr1, sf_return
    278 
    279 		blt-	sf_bytewise		/* <8 remaining? */
    280 		b	sf_aligned_w
    281 
    282 /*----------------------------------------------------------------------*/
    283 wbzero:		li	r_val, 0
    284 
    285 		cmplwi	r_len, 0
    286 		beqlr-				/* Nothing to do */
    287 
    288 simple_fill:
    289 #if USE_STSWX
    290 		cmplwi	cr1, r_len, 12		/* < 12 bytes to move? */
    291 #else
    292 		cmplwi	cr1, r_len, 8		/* < 8 bytes to move? */
    293 #endif
    294 		andi.	r5, r_dst, 0x03		/* bytes to fill to align4 */
    295 		blt	cr1, sf_bytewise	/* trivial byte mover */
    296 
    297 		li	r6, 4
    298 		subf	r5, r5, r6
    299 		beq+	sf_aligned_w		/* dest is word aligned */
    300 
    301 #if USE_STSWX
    302 		mtxer	r5
    303 		stswx	r0, 0, r_dst
    304 		add	r_dst, r5, r_dst
    305 #else
    306 		mtctr	r5			/* nope, then fill bytewise */
    307 		subi	r_dst, r_dst, 1		/* until it is */
    308 1:		stbu	r_val, 1(r_dst)
    309 		bdnz	1b
    310 
    311 		addi	r_dst, r_dst, 1
    312 #endif
    313 		subf	r_len, r5, r_len
    314 
    315 sf_aligned_w:	/* no need to check r_len since it were >= 8 bytes initially */
    316 #if USE_STSWX
    317 		mr	r6, r0
    318 		mr	r7, r0
    319 
    320 		srwi	r5, r_len, 3
    321 		mtctr	r5
    322 
    323 		slwi	r5, r5, 3		/* adjust len */
    324 		subf.	r_len, r5, r_len
    325 
    326 1:		stswi	r6, r_dst, 8
    327 		addi	r_dst, r_dst, 8
    328 		bdnz	1b
    329 #else
    330 		srwi	r5, r_len, 2		/* words to fill */
    331 		mtctr	r5
    332 
    333 		slwi	r5, r5, 2
    334 		subf.	r_len, r5, r_len	/* adjust len for fill */
    335 
    336 		subi	r_dst, r_dst, 4
    337 1:		stwu	r_val, 4(r_dst)
    338 		bdnz	1b
    339 		addi	r_dst, r_dst, 4
    340 #endif
    341 
    342 sf_word_done:	bne-	sf_bytewise
    343 
    344 sf_return:	mr	r3, r8			/* restore orig ptr */
    345 		blr				/* for memset functionality */
    346 
    347 sf_bytewise:
    348 #if USE_STSWX
    349 		mr	r5, r0
    350 		mr	r6, r0
    351 		mr	r7, r0
    352 
    353 		mtxer	r_len
    354 		stswx	r5, 0, r_dst
    355 #else
    356 		mtctr	r_len
    357 
    358 		subi	r_dst, r_dst, 1
    359 1:		stbu	r_val, 1(r_dst)
    360 		bdnz	1b
    361 #endif
    362 		mr	r3, r8			/* restore orig ptr */
    363 		blr				/* for memset functionality */
    364 
    365 /*----------------------------------------------------------------------*/
    366 #ifndef _KERNEL
    367 		.data
    368 cache_info:	.long	-1, -1, -1, -1
    369 cache_sh:	.long	0
    370 
    371 #endif
    372 /*----------------------------------------------------------------------*/
    373