Home | History | Annotate | Line # | Download | only in string
bzero.S revision 1.8
      1  1.8     matt /*	$NetBSD: bzero.S,v 1.8 2011/01/15 07:31:12 matt Exp $ */
      2  1.1      mjl 
      3  1.1      mjl /*-
      4  1.7     salo  * Copyright (C) 2001	Martin J. Laubach <mjl (at) NetBSD.org>
      5  1.1      mjl  * All rights reserved.
      6  1.1      mjl  *
      7  1.1      mjl  * Redistribution and use in source and binary forms, with or without
      8  1.1      mjl  * modification, are permitted provided that the following conditions
      9  1.1      mjl  * are met:
     10  1.1      mjl  * 1. Redistributions of source code must retain the above copyright
     11  1.1      mjl  *    notice, this list of conditions and the following disclaimer.
     12  1.1      mjl  * 2. Redistributions in binary form must reproduce the above copyright
     13  1.1      mjl  *    notice, this list of conditions and the following disclaimer in the
     14  1.1      mjl  *    documentation and/or other materials provided with the distribution.
     15  1.1      mjl  * 3. The name of the author may not be used to endorse or promote products
     16  1.1      mjl  *    derived from this software without specific prior written permission.
     17  1.1      mjl  *
     18  1.1      mjl  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19  1.1      mjl  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20  1.1      mjl  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21  1.1      mjl  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22  1.1      mjl  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23  1.1      mjl  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24  1.1      mjl  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25  1.1      mjl  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26  1.1      mjl  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     27  1.1      mjl  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28  1.1      mjl  */
     29  1.1      mjl /*----------------------------------------------------------------------*/
     30  1.1      mjl 
     31  1.1      mjl #include <machine/asm.h>
     32  1.8     matt 
     33  1.8     matt 
     34  1.8     matt #if defined(LIBC_SCCS) && !defined(lint)
     35  1.8     matt __RCSID("$NetBSD: bzero.S,v 1.8 2011/01/15 07:31:12 matt Exp $")
     36  1.8     matt #endif /* LIBC_SCCS && !lint */
     37  1.8     matt 
     38  1.2      mjl #ifdef _KERNEL
     39  1.2      mjl #include <assym.h>
     40  1.2      mjl #endif
     41  1.1      mjl 
     42  1.1      mjl #define USE_STSWX 0	/* don't. slower than trivial copy loop */
     43  1.1      mjl 
     44  1.1      mjl /*----------------------------------------------------------------------*/
     45  1.1      mjl /*
     46  1.5     matt      void bzero(void *b %r3, size_t len %r4);
     47  1.5     matt      void * memset(void *b %r3, int c %r4, size_t len %r5);
     48  1.1      mjl */
     49  1.1      mjl /*----------------------------------------------------------------------*/
     50  1.1      mjl 
     51  1.5     matt #define r_dst	%r3
     52  1.5     matt #define r_len	%r4
     53  1.5     matt #define r_val	%r0
     54  1.1      mjl 
     55  1.1      mjl 		.text
     56  1.1      mjl 		.align 4
     57  1.1      mjl ENTRY(bzero)
     58  1.1      mjl 		li	r_val, 0		/* Value to stuff in */
     59  1.1      mjl 		b	cb_memset
     60  1.8     matt END(bzero)
     61  1.1      mjl 
     62  1.1      mjl ENTRY(memset)
     63  1.5     matt 		cmplwi	cr1, %r5, 0
     64  1.5     matt 		mr.	%r0, %r4
     65  1.5     matt 		mr	%r8, %r3
     66  1.1      mjl 		beqlr-	cr1			/* Nothing to do */
     67  1.1      mjl 
     68  1.5     matt 		rlwimi	%r0, %r4, 8, 16, 23	/* word extend fill value */
     69  1.5     matt 		rlwimi	%r0, %r0, 16, 0, 15
     70  1.5     matt 		mr	%r4, %r5
     71  1.1      mjl 		bne-	simple_fill		/* =! 0, use trivial fill */
     72  1.1      mjl cb_memset:
     73  1.1      mjl 
     74  1.1      mjl /*----------------------------------------------------------------------*/
     75  1.2      mjl #ifndef _KERNEL
     76  1.1      mjl 		/* First find out cache line size */
     77  1.1      mjl #ifdef PIC
     78  1.5     matt 		mflr	%r9
     79  1.1      mjl 		bl	_GLOBAL_OFFSET_TABLE_@local-4
     80  1.5     matt 		mflr	%r10
     81  1.5     matt 		mtlr	%r9
     82  1.5     matt 		lwz	%r5,cache_info@got(%r10)
     83  1.1      mjl #else
     84  1.5     matt 		lis	%r5,cache_info@h
     85  1.5     matt 		ori	%r5,%r5,cache_info@l
     86  1.1      mjl #endif
     87  1.5     matt 		lwz	%r6, 4(%r5)
     88  1.5     matt 		cmpwi	%r6, -1
     89  1.1      mjl 		bne+	cb_cacheline_known
     90  1.1      mjl 
     91  1.1      mjl /*----------------------------------------------------------------------*/
     92  1.1      mjl #define CTL_MACHDEP	7
     93  1.1      mjl #define CPU_CACHELINE	1
     94  1.4      eeh #define	CPU_CACHEINFO	5
     95  1.1      mjl 
     96  1.1      mjl #define STKFRAME_SZ	48
     97  1.1      mjl #define MIB		8
     98  1.1      mjl #define OLDPLEN		16
     99  1.1      mjl #define R3_SAVE		20
    100  1.1      mjl #define R4_SAVE		24
    101  1.1      mjl #define R0_SAVE		28
    102  1.1      mjl #define R8_SAVE		32
    103  1.1      mjl 
    104  1.5     matt 		mflr	%r6
    105  1.5     matt 		stw	%r6, 4(%r1)
    106  1.5     matt 		stwu	%r1, -STKFRAME_SZ(%r1)
    107  1.5     matt 
    108  1.5     matt 		stw	%r8, R8_SAVE(%r1)
    109  1.5     matt 		stw	%r3, R3_SAVE(%r1)
    110  1.5     matt 		stw	%r4, R4_SAVE(%r1)
    111  1.5     matt 		stw	%r0, R0_SAVE(%r1)
    112  1.1      mjl 
    113  1.4      eeh 
    114  1.4      eeh 
    115  1.5     matt 		li	%r0, CTL_MACHDEP		/* Construct MIB */
    116  1.5     matt 		stw	%r0, MIB(%r1)
    117  1.5     matt 		li	%r0, CPU_CACHEINFO
    118  1.5     matt 		stw	%r0, MIB+4(%r1)
    119  1.5     matt 
    120  1.5     matt 		li	%r0, 4*4			/* Oldlenp := 4*4 */
    121  1.5     matt 		stw	%r0, OLDPLEN(%r1)
    122  1.5     matt 
    123  1.5     matt 		addi	%r3, %r1, MIB
    124  1.5     matt 		li	%r4, 2			/* namelen */
    125  1.5     matt 		/* %r5 already contains &cache_info */
    126  1.5     matt 		addi	%r6, %r1, OLDPLEN
    127  1.5     matt 		li	%r7, 0
    128  1.5     matt 		li	%r8, 0
    129  1.4      eeh 		bl	PIC_PLT(_C_LABEL(sysctl))
    130  1.4      eeh 
    131  1.5     matt 		cmpwi	%r3, 0			/* Check result */
    132  1.4      eeh 		beq	1f
    133  1.4      eeh 
    134  1.4      eeh 		/* Failure, try older sysctl */
    135  1.4      eeh 
    136  1.5     matt 		li	%r0, CTL_MACHDEP		/* Construct MIB */
    137  1.5     matt 		stw	%r0, MIB(%r1)
    138  1.5     matt 		li	%r0, CPU_CACHELINE
    139  1.5     matt 		stw	%r0, MIB+4(%r1)
    140  1.1      mjl 
    141  1.5     matt 		li	%r0, 4			/* Oldlenp := 4 */
    142  1.5     matt 		stw	%r0, OLDPLEN(%r1)
    143  1.1      mjl 
    144  1.5     matt 		addi	%r3, %r1, MIB
    145  1.5     matt 		li	%r4, 2			/* namelen */
    146  1.4      eeh #ifdef PIC
    147  1.5     matt 		mflr	%r9
    148  1.4      eeh 		bl	_GLOBAL_OFFSET_TABLE_@local-4
    149  1.5     matt 		mflr	%r10
    150  1.5     matt 		mtlr	%r9
    151  1.5     matt 		lwz	%r5,cache_info@got(%r10)
    152  1.5     matt 		addi	%r5, %r5, 4
    153  1.5     matt #else
    154  1.5     matt 		lis	%r5,cache_info+4@h
    155  1.5     matt 		ori	%r5,%r5,cache_info+4@l
    156  1.5     matt #endif
    157  1.5     matt 		addi	%r6, %r1, OLDPLEN
    158  1.5     matt 		li	%r7, 0
    159  1.5     matt 		li	%r8, 0
    160  1.1      mjl 		bl	PIC_PLT(_C_LABEL(sysctl))
    161  1.4      eeh 1:
    162  1.5     matt 		lwz	%r8, R8_SAVE(%r1)
    163  1.5     matt 		lwz	%r3, R3_SAVE(%r1)
    164  1.5     matt 		lwz	%r4, R4_SAVE(%r1)
    165  1.5     matt 		lwz	%r0, R0_SAVE(%r1)
    166  1.1      mjl 
    167  1.1      mjl #ifdef PIC
    168  1.1      mjl 		bl	_GLOBAL_OFFSET_TABLE_@local-4
    169  1.5     matt 		mflr	%r10
    170  1.5     matt 		lwz	%r9, cache_info@got(%r10)
    171  1.5     matt 		lwz	%r9, 4(%r9)
    172  1.5     matt #else
    173  1.5     matt 		lis	%r5, cache_info+4@ha
    174  1.5     matt 		lwz	%r9, cache_info+4@l(%r5)
    175  1.5     matt #endif
    176  1.5     matt 		la	%r1, STKFRAME_SZ(%r1)
    177  1.5     matt 		lwz	%r5, 4(%r1)
    178  1.5     matt 		mtlr	%r5
    179  1.5     matt 
    180  1.5     matt 		cntlzw	%r6, %r9			/* compute shift value */
    181  1.5     matt 		li	%r5, 31
    182  1.5     matt 		subf	%r5, %r6, %r5
    183  1.1      mjl 
    184  1.1      mjl #ifdef PIC
    185  1.5     matt 		lwz	%r6, cache_sh@got(%r10)
    186  1.5     matt 		stw	%r5, 0(%r6)
    187  1.1      mjl #else
    188  1.5     matt 		lis	%r6, cache_sh@ha
    189  1.5     matt 		stw	%r5, cache_sh@l(%r6)
    190  1.1      mjl #endif
    191  1.1      mjl /*----------------------------------------------------------------------*/
    192  1.5     matt /* Okay, we know the cache line size (%r9) and shift value (%r10) */
    193  1.1      mjl cb_cacheline_known:
    194  1.1      mjl #ifdef PIC
    195  1.5     matt 		lwz	%r5, cache_info@got(%r10)
    196  1.5     matt 		lwz	%r9, 4(%r5)
    197  1.5     matt 		lwz	%r5, cache_sh@got(%r10)
    198  1.5     matt 		lwz	%r10, 0(%r5)
    199  1.5     matt #else
    200  1.5     matt 		lis	%r9, cache_info+4@ha
    201  1.5     matt 		lwz	%r9, cache_info+4@l(%r9)
    202  1.5     matt 		lis	%r10, cache_sh@ha
    203  1.5     matt 		lwz	%r10, cache_sh@l(%r10)
    204  1.1      mjl #endif
    205  1.2      mjl 
    206  1.2      mjl #else /* _KERNEL */
    207  1.4      eeh #ifdef	MULTIPROCESSOR
    208  1.5     matt 		mfsprg	%r10, 0			/* Get cpu_info pointer */
    209  1.2      mjl #else
    210  1.5     matt 		lis	%r10, cpu_info_store@ha
    211  1.5     matt 		addi	%r10, %r10, cpu_info_store@l
    212  1.2      mjl #endif
    213  1.5     matt 		lwz	%r9, CPU_CI+4(%r10)	/* Load D$ line size */
    214  1.5     matt 		cntlzw	%r10, %r9			/* Calculate shift.. */
    215  1.5     matt 		li	%r6, 31
    216  1.5     matt 		subf	%r10, %r10, %r6
    217  1.2      mjl #endif /* _KERNEL */
    218  1.1      mjl 		/* Back in memory filling business */
    219  1.1      mjl 
    220  1.1      mjl 		cmplwi	cr1, r_len, 0		/* Nothing to do? */
    221  1.5     matt 		add	%r5, %r9, %r9
    222  1.5     matt 		cmplw	r_len, %r5		/* <= 2*CL bytes to move? */
    223  1.1      mjl 		beqlr-	cr1			/* then do nothing */
    224  1.1      mjl 
    225  1.1      mjl 		blt+	simple_fill		/* a trivial fill routine */
    226  1.1      mjl 
    227  1.1      mjl 		/* Word align the block, fill bytewise until dst even*/
    228  1.1      mjl 
    229  1.5     matt 		andi.	%r5, r_dst, 0x03
    230  1.5     matt 		li	%r6, 4
    231  1.1      mjl 		beq+	cb_aligned_w		/* already aligned to word? */
    232  1.1      mjl 
    233  1.5     matt 		subf	%r5, %r5, %r6		/* bytes to fill to align4 */
    234  1.1      mjl #if USE_STSWX
    235  1.5     matt 		mtxer	%r5
    236  1.5     matt 		stswx	%r0, 0, r_dst
    237  1.5     matt 		add	r_dst, %r5, r_dst
    238  1.1      mjl #else
    239  1.5     matt 		mtctr	%r5
    240  1.1      mjl 
    241  1.1      mjl 		subi	r_dst, r_dst, 1
    242  1.1      mjl 1:		stbu	r_val, 1(r_dst)		/* Fill bytewise */
    243  1.1      mjl 		bdnz	1b
    244  1.1      mjl 
    245  1.1      mjl 		addi	r_dst, r_dst, 1
    246  1.1      mjl #endif
    247  1.5     matt 		subf	r_len, %r5, r_len
    248  1.1      mjl 
    249  1.1      mjl cb_aligned_w:	/* Cache block align, fill wordwise until dst aligned */
    250  1.1      mjl 
    251  1.1      mjl 		/* I know I have something to do since we had > 2*CL initially */
    252  1.1      mjl 		/* so no need to check for r_len = 0 */
    253  1.1      mjl 
    254  1.6  hannken 		subi	%r6, %r9, 1		/* CL mask */
    255  1.6  hannken 		and.	%r5, r_dst, %r6
    256  1.6  hannken 		srwi	%r5, %r5, 2
    257  1.5     matt 		srwi	%r6, %r9, 2
    258  1.1      mjl 		beq	cb_aligned_cb		/* already on CL boundary? */
    259  1.1      mjl 
    260  1.5     matt 		subf	%r5, %r5, %r6		/* words to fill to alignment */
    261  1.5     matt 		mtctr	%r5
    262  1.5     matt 		slwi	%r5, %r5, 2
    263  1.5     matt 		subf	r_len, %r5, r_len
    264  1.1      mjl 
    265  1.1      mjl 		subi	r_dst, r_dst, 4
    266  1.1      mjl 1:		stwu	r_val, 4(r_dst)		/* Fill wordwise */
    267  1.1      mjl 		bdnz	1b
    268  1.1      mjl 		addi	r_dst, r_dst, 4
    269  1.1      mjl 
    270  1.1      mjl cb_aligned_cb:	/* no need to check r_len, see above */
    271  1.1      mjl 
    272  1.5     matt 		srw.	%r5, r_len, %r10		/* Number of cache blocks */
    273  1.5     matt 		mtctr	%r5
    274  1.1      mjl 		beq	cblocks_done
    275  1.1      mjl 
    276  1.5     matt 		slw	%r5, %r5, %r10
    277  1.5     matt 		subf	r_len, %r5, r_len
    278  1.1      mjl 
    279  1.1      mjl 1:		dcbz	0, r_dst		/* Clear blockwise */
    280  1.5     matt 		add	r_dst, r_dst, %r9
    281  1.1      mjl 		bdnz	1b
    282  1.1      mjl 
    283  1.1      mjl cblocks_done:	/* still CL aligned, but less than CL bytes left */
    284  1.1      mjl 		cmplwi	cr1, r_len, 0
    285  1.1      mjl 		cmplwi	r_len, 8
    286  1.1      mjl 		beq-	cr1, sf_return
    287  1.1      mjl 
    288  1.1      mjl 		blt-	sf_bytewise		/* <8 remaining? */
    289  1.1      mjl 		b	sf_aligned_w
    290  1.1      mjl 
    291  1.1      mjl /*----------------------------------------------------------------------*/
    292  1.1      mjl wbzero:		li	r_val, 0
    293  1.1      mjl 
    294  1.1      mjl 		cmplwi	r_len, 0
    295  1.1      mjl 		beqlr-				/* Nothing to do */
    296  1.1      mjl 
    297  1.1      mjl simple_fill:
    298  1.1      mjl #if USE_STSWX
    299  1.1      mjl 		cmplwi	cr1, r_len, 12		/* < 12 bytes to move? */
    300  1.1      mjl #else
    301  1.1      mjl 		cmplwi	cr1, r_len, 8		/* < 8 bytes to move? */
    302  1.1      mjl #endif
    303  1.5     matt 		andi.	%r5, r_dst, 0x03		/* bytes to fill to align4 */
    304  1.1      mjl 		blt	cr1, sf_bytewise	/* trivial byte mover */
    305  1.1      mjl 
    306  1.5     matt 		li	%r6, 4
    307  1.5     matt 		subf	%r5, %r5, %r6
    308  1.1      mjl 		beq+	sf_aligned_w		/* dest is word aligned */
    309  1.1      mjl 
    310  1.1      mjl #if USE_STSWX
    311  1.5     matt 		mtxer	%r5
    312  1.5     matt 		stswx	%r0, 0, r_dst
    313  1.5     matt 		add	r_dst, %r5, r_dst
    314  1.1      mjl #else
    315  1.5     matt 		mtctr	%r5			/* nope, then fill bytewise */
    316  1.1      mjl 		subi	r_dst, r_dst, 1		/* until it is */
    317  1.1      mjl 1:		stbu	r_val, 1(r_dst)
    318  1.1      mjl 		bdnz	1b
    319  1.1      mjl 
    320  1.1      mjl 		addi	r_dst, r_dst, 1
    321  1.1      mjl #endif
    322  1.5     matt 		subf	r_len, %r5, r_len
    323  1.1      mjl 
    324  1.1      mjl sf_aligned_w:	/* no need to check r_len since it were >= 8 bytes initially */
    325  1.1      mjl #if USE_STSWX
    326  1.5     matt 		mr	%r6, %r0
    327  1.5     matt 		mr	%r7, %r0
    328  1.1      mjl 
    329  1.5     matt 		srwi	%r5, r_len, 3
    330  1.5     matt 		mtctr	%r5
    331  1.1      mjl 
    332  1.5     matt 		slwi	%r5, %r5, 3		/* adjust len */
    333  1.5     matt 		subf.	r_len, %r5, r_len
    334  1.1      mjl 
    335  1.5     matt 1:		stswi	%r6, r_dst, 8
    336  1.1      mjl 		addi	r_dst, r_dst, 8
    337  1.1      mjl 		bdnz	1b
    338  1.1      mjl #else
    339  1.5     matt 		srwi	%r5, r_len, 2		/* words to fill */
    340  1.5     matt 		mtctr	%r5
    341  1.1      mjl 
    342  1.5     matt 		slwi	%r5, %r5, 2
    343  1.5     matt 		subf.	r_len, %r5, r_len	/* adjust len for fill */
    344  1.1      mjl 
    345  1.1      mjl 		subi	r_dst, r_dst, 4
    346  1.1      mjl 1:		stwu	r_val, 4(r_dst)
    347  1.1      mjl 		bdnz	1b
    348  1.1      mjl 		addi	r_dst, r_dst, 4
    349  1.1      mjl #endif
    350  1.1      mjl 
    351  1.1      mjl sf_word_done:	bne-	sf_bytewise
    352  1.1      mjl 
    353  1.5     matt sf_return:	mr	%r3, %r8			/* restore orig ptr */
    354  1.1      mjl 		blr				/* for memset functionality */
    355  1.1      mjl 
    356  1.1      mjl sf_bytewise:
    357  1.1      mjl #if USE_STSWX
    358  1.5     matt 		mr	%r5, %r0
    359  1.5     matt 		mr	%r6, %r0
    360  1.5     matt 		mr	%r7, %r0
    361  1.1      mjl 
    362  1.1      mjl 		mtxer	r_len
    363  1.5     matt 		stswx	%r5, 0, r_dst
    364  1.1      mjl #else
    365  1.1      mjl 		mtctr	r_len
    366  1.1      mjl 
    367  1.1      mjl 		subi	r_dst, r_dst, 1
    368  1.1      mjl 1:		stbu	r_val, 1(r_dst)
    369  1.1      mjl 		bdnz	1b
    370  1.1      mjl #endif
    371  1.5     matt 		mr	%r3, %r8			/* restore orig ptr */
    372  1.1      mjl 		blr				/* for memset functionality */
    373  1.8     matt END(memset)
    374  1.1      mjl 
    375  1.1      mjl /*----------------------------------------------------------------------*/
    376  1.3      mjl #ifndef _KERNEL
    377  1.1      mjl 		.data
    378  1.4      eeh cache_info:	.long	-1, -1, -1, -1
    379  1.1      mjl cache_sh:	.long	0
    380  1.1      mjl 
    381  1.3      mjl #endif
    382  1.1      mjl /*----------------------------------------------------------------------*/
    383