Home | History | Annotate | Line # | Download | only in string
bzero.S revision 1.3
      1 /*	$NetBSD: bzero.S,v 1.3 2001/11/30 02:25:50 mjl Exp $ */
      2 
      3 /*-
      4  * Copyright (C) 2001	Martin J. Laubach <mjl (at) netbsd.org>
      5  * All rights reserved.
      6  *
      7  * Redistribution and use in source and binary forms, with or without
      8  * modification, are permitted provided that the following conditions
      9  * are met:
     10  * 1. Redistributions of source code must retain the above copyright
     11  *    notice, this list of conditions and the following disclaimer.
     12  * 2. Redistributions in binary form must reproduce the above copyright
     13  *    notice, this list of conditions and the following disclaimer in the
     14  *    documentation and/or other materials provided with the distribution.
     15  * 3. The name of the author may not be used to endorse or promote products
     16  *    derived from this software without specific prior written permission.
     17  *
     18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     27  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  */
     29 /*----------------------------------------------------------------------*/
     30 
     31 #include <machine/asm.h>
     32 #ifdef _KERNEL
     33 #include <assym.h>
     34 #endif
     35 
     36 #define USE_STSWX 0	/* don't. slower than trivial copy loop */
     37 
     38 /*----------------------------------------------------------------------*/
     39 /*
     40      void bzero(void *b r3, size_t len r4);
     41      void * memset(void *b r3, int c r4, size_t len r5);
     42 */
     43 /*----------------------------------------------------------------------*/
     44 
     45 #define r_dst	r3
     46 #define r_len	r4
     47 #define r_val	r0
     48 
     49 		.text
     50 		.align 4
     51 ENTRY(bzero)
     52 		li	r_val, 0		/* Value to stuff in */
     53 		b	cb_memset
     54 
     55 ENTRY(memset)
     56 		cmplwi	cr1, r5, 0
     57 		mr.	r0, r4
     58 		mr	r8, r3
     59 		beqlr-	cr1			/* Nothing to do */
     60 
     61 		rlwimi	r0, r4, 8, 16, 23	/* word extend fill value */
     62 		rlwimi	r0, r0, 16, 0, 15
     63 		mr	r4, r5
     64 		bne-	simple_fill		/* =! 0, use trivial fill */
     65 cb_memset:
     66 
     67 /*----------------------------------------------------------------------*/
     68 #ifndef _KERNEL
     69 		/* First find out cache line size */
     70 #ifdef PIC
     71 		mflr	r9
     72 		bl	_GLOBAL_OFFSET_TABLE_@local-4
     73 		mflr	r10
     74 		mtlr	r9
     75 		lwz	r5,cache_size@got(r10)
     76 #else
     77 		lis	r5,cache_size@h
     78 		ori	r5,r5,cache_size@l
     79 #endif
     80 		lwz	r6, 0(r5)
     81 		cmpwi	r6, -1
     82 		bne+	cb_cacheline_known
     83 
     84 /*----------------------------------------------------------------------*/
     85 #define CTL_MACHDEP	7
     86 #define CPU_CACHELINE	1
     87 
     88 #define STKFRAME_SZ	48
     89 #define MIB		8
     90 #define OLDPLEN		16
     91 #define R3_SAVE		20
     92 #define R4_SAVE		24
     93 #define R0_SAVE		28
     94 #define R8_SAVE		32
     95 
     96 		mflr	r6
     97 		stw	r6, 4(r1)
     98 		stwu	r1, -STKFRAME_SZ(r1)
     99 
    100 		stw	r8, R8_SAVE(r1)
    101 		stw	r3, R3_SAVE(r1)
    102 		stw	r4, R4_SAVE(r1)
    103 		stw	r0, R0_SAVE(r1)
    104 
    105 		li	r0, CTL_MACHDEP		/* Construct MIB */
    106 		stw	r0, MIB(r1)
    107 		li	r0, CPU_CACHELINE
    108 		stw	r0, MIB+4(r1)
    109 
    110 		li	r0, 4			/* Oldlenp := 4 */
    111 		stw	r0, OLDPLEN(r1)
    112 
    113 		addi	r3, r1, MIB
    114 		li	r4, 2			/* namelen */
    115 		/* r5 already contains &cache_size */
    116 		addi	r6, r1, OLDPLEN
    117 		li	r7, 0
    118 		li	r8, 0
    119 		bl	PIC_PLT(_C_LABEL(sysctl))
    120 
    121 		lwz	r8, R8_SAVE(r1)
    122 		lwz	r3, R3_SAVE(r1)
    123 		lwz	r4, R4_SAVE(r1)
    124 		lwz	r0, R0_SAVE(r1)
    125 
    126 #ifdef PIC
    127 		bl	_GLOBAL_OFFSET_TABLE_@local-4
    128 		mflr	r10
    129 		lwz	r9, cache_size@got(r10)
    130 		lwz	r9, 0(r9)
    131 #else
    132 		lis	r5, cache_size@ha
    133 		lwz	r9, cache_size@l(r5)
    134 #endif
    135 		la	r1, STKFRAME_SZ(r1)
    136 		lwz	r5, 4(r1)
    137 		mtlr	r5
    138 
    139 		cntlzw	r6, r9			/* compute shift value */
    140 		li	r5, 31
    141 		subf	r5, r6, r5
    142 
    143 #ifdef PIC
    144 		lwz	r6, cache_sh@got(r10)
    145 		stw	r5, 0(r6)
    146 #else
    147 		lis	r6, cache_sh@ha
    148 		stw	r5, cache_sh@l(r6)
    149 #endif
    150 /*----------------------------------------------------------------------*/
    151 /* Okay, we know the cache line size (r9) and shift value (r10) */
    152 cb_cacheline_known:
    153 #ifdef PIC
    154 		lwz	r5, cache_size@got(r10)
    155 		lwz	r9, 0(r5)
    156 		lwz	r5, cache_sh@got(r10)
    157 		lwz	r10, 0(r5)
    158 #else
    159 		lis	r9, cache_size@ha
    160 		lwz	r9, cache_size@l(r9)
    161 		lis	r10, cache_sh@ha
    162 		lwz	r10, cache_sh@l(r10)
    163 #endif
    164 
    165 #else /* _KERNEL */
    166 		li	r9, CACHELINESIZE
    167 #if CACHELINESIZE == 32
    168 #define CACHELINESHIFT 5
    169 #else
    170 #error Define CACHELINESHIFT for your CACHELINESIZE
    171 #endif
    172 		li	r10, CACHELINESHIFT
    173 #endif /* _KERNEL */
    174 		/* Back in memory filling business */
    175 
    176 		cmplwi	cr1, r_len, 0		/* Nothing to do? */
    177 		add	r5, r9, r9
    178 		cmplw	r_len, r5		/* <= 2*CL bytes to move? */
    179 		beqlr-	cr1			/* then do nothing */
    180 
    181 		blt+	simple_fill		/* a trivial fill routine */
    182 
    183 		/* Word align the block, fill bytewise until dst even*/
    184 
    185 		andi.	r5, r_dst, 0x03
    186 		li	r6, 4
    187 		beq+	cb_aligned_w		/* already aligned to word? */
    188 
    189 		subf	r5, r5, r6		/* bytes to fill to align4 */
    190 #if USE_STSWX
    191 		mtxer	r5
    192 		stswx	r0, 0, r_dst
    193 		add	r_dst, r5, r_dst
    194 #else
    195 		mtctr	r5
    196 
    197 		subi	r_dst, r_dst, 1
    198 1:		stbu	r_val, 1(r_dst)		/* Fill bytewise */
    199 		bdnz	1b
    200 
    201 		addi	r_dst, r_dst, 1
    202 #endif
    203 		subf	r_len, r5, r_len
    204 
    205 cb_aligned_w:	/* Cache block align, fill wordwise until dst aligned */
    206 
    207 		/* I know I have something to do since we had > 2*CL initially */
    208 		/* so no need to check for r_len = 0 */
    209 
    210 		rlwinm.	r5, r_dst, 30, 29, 31
    211 		srwi	r6, r9, 2
    212 		beq	cb_aligned_cb		/* already on CL boundary? */
    213 
    214 		subf	r5, r5, r6		/* words to fill to alignment */
    215 		mtctr	r5
    216 		slwi	r5, r5, 2
    217 		subf	r_len, r5, r_len
    218 
    219 		subi	r_dst, r_dst, 4
    220 1:		stwu	r_val, 4(r_dst)		/* Fill wordwise */
    221 		bdnz	1b
    222 		addi	r_dst, r_dst, 4
    223 
    224 cb_aligned_cb:	/* no need to check r_len, see above */
    225 
    226 		srw.	r5, r_len, r10		/* Number of cache blocks */
    227 		mtctr	r5
    228 		beq	cblocks_done
    229 
    230 		slw	r5, r5, r10
    231 		subf	r_len, r5, r_len
    232 
    233 1:		dcbz	0, r_dst		/* Clear blockwise */
    234 		add	r_dst, r_dst, r9
    235 		bdnz	1b
    236 
    237 cblocks_done:	/* still CL aligned, but less than CL bytes left */
    238 		cmplwi	cr1, r_len, 0
    239 		cmplwi	r_len, 8
    240 		beq-	cr1, sf_return
    241 
    242 		blt-	sf_bytewise		/* <8 remaining? */
    243 		b	sf_aligned_w
    244 
    245 /*----------------------------------------------------------------------*/
    246 wbzero:		li	r_val, 0
    247 
    248 		cmplwi	r_len, 0
    249 		beqlr-				/* Nothing to do */
    250 
    251 simple_fill:
    252 #if USE_STSWX
    253 		cmplwi	cr1, r_len, 12		/* < 12 bytes to move? */
    254 #else
    255 		cmplwi	cr1, r_len, 8		/* < 8 bytes to move? */
    256 #endif
    257 		andi.	r5, r_dst, 0x03		/* bytes to fill to align4 */
    258 		blt	cr1, sf_bytewise	/* trivial byte mover */
    259 
    260 		li	r6, 4
    261 		subf	r5, r5, r6
    262 		beq+	sf_aligned_w		/* dest is word aligned */
    263 
    264 #if USE_STSWX
    265 		mtxer	r5
    266 		stswx	r0, 0, r_dst
    267 		add	r_dst, r5, r_dst
    268 #else
    269 		mtctr	r5			/* nope, then fill bytewise */
    270 		subi	r_dst, r_dst, 1		/* until it is */
    271 1:		stbu	r_val, 1(r_dst)
    272 		bdnz	1b
    273 
    274 		addi	r_dst, r_dst, 1
    275 #endif
    276 		subf	r_len, r5, r_len
    277 
    278 sf_aligned_w:	/* no need to check r_len since it were >= 8 bytes initially */
    279 #if USE_STSWX
    280 		mr	r6, r0
    281 		mr	r7, r0
    282 
    283 		srwi	r5, r_len, 3
    284 		mtctr	r5
    285 
    286 		slwi	r5, r5, 3		/* adjust len */
    287 		subf.	r_len, r5, r_len
    288 
    289 1:		stswi	r6, r_dst, 8
    290 		addi	r_dst, r_dst, 8
    291 		bdnz	1b
    292 #else
    293 		srwi	r5, r_len, 2		/* words to fill */
    294 		mtctr	r5
    295 
    296 		slwi	r5, r5, 2
    297 		subf.	r_len, r5, r_len	/* adjust len for fill */
    298 
    299 		subi	r_dst, r_dst, 4
    300 1:		stwu	r_val, 4(r_dst)
    301 		bdnz	1b
    302 		addi	r_dst, r_dst, 4
    303 #endif
    304 
    305 sf_word_done:	bne-	sf_bytewise
    306 
    307 sf_return:	mr	r3, r8			/* restore orig ptr */
    308 		blr				/* for memset functionality */
    309 
    310 sf_bytewise:
    311 #if USE_STSWX
    312 		mr	r5, r0
    313 		mr	r6, r0
    314 		mr	r7, r0
    315 
    316 		mtxer	r_len
    317 		stswx	r5, 0, r_dst
    318 #else
    319 		mtctr	r_len
    320 
    321 		subi	r_dst, r_dst, 1
    322 1:		stbu	r_val, 1(r_dst)
    323 		bdnz	1b
    324 #endif
    325 		mr	r3, r8			/* restore orig ptr */
    326 		blr				/* for memset functionality */
    327 
    328 /*----------------------------------------------------------------------*/
    329 #ifndef _KERNEL
    330 		.data
    331 cache_size:	.long	-1
    332 cache_sh:	.long	0
    333 
    334 #endif
    335 /*----------------------------------------------------------------------*/
    336