Home | History | Annotate | Line # | Download | only in string
bzero.S revision 1.12
      1  1.12     matt /*	$NetBSD: bzero.S,v 1.12 2013/07/18 12:20:41 matt Exp $ */
      2   1.1      mjl 
      3   1.1      mjl /*-
      4   1.7     salo  * Copyright (C) 2001	Martin J. Laubach <mjl (at) NetBSD.org>
      5   1.1      mjl  * All rights reserved.
      6   1.1      mjl  *
      7   1.1      mjl  * Redistribution and use in source and binary forms, with or without
      8   1.1      mjl  * modification, are permitted provided that the following conditions
      9   1.1      mjl  * are met:
     10   1.1      mjl  * 1. Redistributions of source code must retain the above copyright
     11   1.1      mjl  *    notice, this list of conditions and the following disclaimer.
     12   1.1      mjl  * 2. Redistributions in binary form must reproduce the above copyright
     13   1.1      mjl  *    notice, this list of conditions and the following disclaimer in the
     14   1.1      mjl  *    documentation and/or other materials provided with the distribution.
     15   1.1      mjl  * 3. The name of the author may not be used to endorse or promote products
     16   1.1      mjl  *    derived from this software without specific prior written permission.
     17   1.1      mjl  *
     18   1.1      mjl  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     19   1.1      mjl  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     20   1.1      mjl  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
     21   1.1      mjl  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
     22   1.1      mjl  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     23   1.1      mjl  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
     24   1.1      mjl  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
     25   1.1      mjl  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
     26   1.1      mjl  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
     27   1.1      mjl  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     28   1.1      mjl  */
     29   1.1      mjl /*----------------------------------------------------------------------*/
     30   1.1      mjl 
     31   1.1      mjl #include <machine/asm.h>
     32   1.8     matt 
     33   1.8     matt 
     34   1.8     matt #if defined(LIBC_SCCS) && !defined(lint)
     35  1.12     matt __RCSID("$NetBSD: bzero.S,v 1.12 2013/07/18 12:20:41 matt Exp $")
     36   1.8     matt #endif /* LIBC_SCCS && !lint */
     37   1.8     matt 
     38   1.2      mjl #ifdef _KERNEL
     39   1.2      mjl #include <assym.h>
     40   1.2      mjl #endif
     41   1.1      mjl 
     42   1.1      mjl #define USE_STSWX 0	/* don't. slower than trivial copy loop */
     43   1.1      mjl 
     44   1.1      mjl /*----------------------------------------------------------------------*/
     45   1.1      mjl /*
     46   1.5     matt      void bzero(void *b %r3, size_t len %r4);
     47   1.5     matt      void * memset(void *b %r3, int c %r4, size_t len %r5);
     48   1.1      mjl */
     49   1.1      mjl /*----------------------------------------------------------------------*/
     50   1.1      mjl 
     51   1.5     matt #define r_dst	%r3
     52   1.5     matt #define r_len	%r4
     53   1.5     matt #define r_val	%r0
     54   1.1      mjl 
     55   1.1      mjl 		.text
     56   1.1      mjl 		.align 4
     57   1.1      mjl ENTRY(bzero)
     58   1.1      mjl 		li	r_val, 0		/* Value to stuff in */
     59   1.1      mjl 		b	cb_memset
     60   1.8     matt END(bzero)
     61   1.1      mjl 
     62   1.1      mjl ENTRY(memset)
     63  1.11     matt 		cmplwi	%cr1, %r5, 0
     64   1.5     matt 		mr.	%r0, %r4
     65   1.5     matt 		mr	%r8, %r3
     66  1.11     matt 		beqlr-	%cr1			/* Nothing to do */
     67   1.1      mjl 
     68   1.5     matt 		rlwimi	%r0, %r4, 8, 16, 23	/* word extend fill value */
     69   1.5     matt 		rlwimi	%r0, %r0, 16, 0, 15
     70   1.5     matt 		mr	%r4, %r5
     71   1.1      mjl 		bne-	simple_fill		/* =! 0, use trivial fill */
     72   1.1      mjl cb_memset:
     73   1.1      mjl 
     74   1.1      mjl /*----------------------------------------------------------------------*/
     75   1.2      mjl #ifndef _KERNEL
     76   1.1      mjl 		/* First find out cache line size */
     77  1.10     matt 		mflr	%r9
     78   1.1      mjl #ifdef PIC
     79  1.12     matt 		bcl	20,31,1f
     80  1.12     matt 1:		mflr	%r5
     81   1.5     matt 		mtlr	%r9
     82  1.12     matt 		addis	%r5,%r5,cache_info+4-1b@ha
     83  1.12     matt 		lwzu	%r9,cache_info+4-1b@l(%r5)
     84   1.1      mjl #else
     85  1.12     matt 		lis	%r5,cache_info+4@ha
     86  1.12     matt 		lwzu	%r9,cache_info+4@l(%r5)
     87   1.1      mjl #endif
     88  1.12     matt 		lwz	%r10,cache_sh-(cache_info+4)(%r5)
     89  1.12     matt 		cmpwi	%r9, -1
     90   1.1      mjl 		bne+	cb_cacheline_known
     91   1.1      mjl 
     92  1.12     matt 		addi	%r5, %r5, -4	/* point r5 @ beginning of cache_info */
     93  1.12     matt 
     94   1.1      mjl /*----------------------------------------------------------------------*/
     95   1.1      mjl #define CTL_MACHDEP	7
     96   1.1      mjl #define CPU_CACHELINE	1
     97   1.4      eeh #define	CPU_CACHEINFO	5
     98   1.1      mjl 
     99   1.9     matt #define STKFRAME_SZ	64
    100   1.1      mjl #define MIB		8
    101   1.1      mjl #define OLDPLEN		16
    102   1.1      mjl #define R3_SAVE		20
    103   1.1      mjl #define R4_SAVE		24
    104   1.1      mjl #define R0_SAVE		28
    105   1.1      mjl #define R8_SAVE		32
    106   1.9     matt #define R31_SAVE	36
    107   1.9     matt #ifdef PIC
    108   1.9     matt #define R30_SAVE	40
    109   1.9     matt #endif
    110   1.1      mjl 
    111   1.9     matt 		stw	%r9, 4(%r1)
    112   1.5     matt 		stwu	%r1, -STKFRAME_SZ(%r1)
    113   1.5     matt 
    114   1.9     matt 		stw	%r31, R31_SAVE(%r1)
    115   1.9     matt 		mr	%r31, %r5		/* cache info */
    116   1.9     matt 
    117   1.9     matt #ifdef PIC
    118   1.9     matt 		stw	%r30, R30_SAVE(%r1)
    119   1.9     matt 		PIC_TOCSETUP(cb_memset,%r30)
    120   1.9     matt #endif
    121   1.9     matt 
    122   1.5     matt 		stw	%r8, R8_SAVE(%r1)
    123   1.5     matt 		stw	%r3, R3_SAVE(%r1)
    124   1.5     matt 		stw	%r4, R4_SAVE(%r1)
    125   1.5     matt 		stw	%r0, R0_SAVE(%r1)
    126   1.1      mjl 
    127   1.5     matt 		li	%r0, CTL_MACHDEP		/* Construct MIB */
    128   1.5     matt 		stw	%r0, MIB(%r1)
    129   1.5     matt 		li	%r0, CPU_CACHEINFO
    130   1.5     matt 		stw	%r0, MIB+4(%r1)
    131   1.5     matt 
    132   1.5     matt 		li	%r0, 4*4			/* Oldlenp := 4*4 */
    133   1.5     matt 		stw	%r0, OLDPLEN(%r1)
    134   1.5     matt 
    135   1.5     matt 		addi	%r3, %r1, MIB
    136   1.5     matt 		li	%r4, 2			/* namelen */
    137   1.5     matt 		/* %r5 already contains &cache_info */
    138   1.5     matt 		addi	%r6, %r1, OLDPLEN
    139   1.5     matt 		li	%r7, 0
    140   1.5     matt 		li	%r8, 0
    141   1.4      eeh 		bl	PIC_PLT(_C_LABEL(sysctl))
    142   1.4      eeh 
    143   1.5     matt 		cmpwi	%r3, 0			/* Check result */
    144   1.4      eeh 		beq	1f
    145   1.4      eeh 
    146   1.4      eeh 		/* Failure, try older sysctl */
    147   1.4      eeh 
    148   1.9     matt 		li	%r0, CTL_MACHDEP	/* Construct MIB */
    149   1.5     matt 		stw	%r0, MIB(%r1)
    150   1.5     matt 		li	%r0, CPU_CACHELINE
    151   1.5     matt 		stw	%r0, MIB+4(%r1)
    152   1.1      mjl 
    153   1.5     matt 		li	%r0, 4			/* Oldlenp := 4 */
    154   1.5     matt 		stw	%r0, OLDPLEN(%r1)
    155   1.1      mjl 
    156   1.5     matt 		addi	%r3, %r1, MIB
    157   1.5     matt 		li	%r4, 2			/* namelen */
    158   1.9     matt 		addi	%r5, %r31, 4
    159   1.5     matt 		addi	%r6, %r1, OLDPLEN
    160   1.5     matt 		li	%r7, 0
    161   1.5     matt 		li	%r8, 0
    162   1.1      mjl 		bl	PIC_PLT(_C_LABEL(sysctl))
    163   1.4      eeh 1:
    164   1.5     matt 		lwz	%r3, R3_SAVE(%r1)
    165   1.5     matt 		lwz	%r4, R4_SAVE(%r1)
    166  1.10     matt 		lwz	%r8, R8_SAVE(%r1)
    167   1.5     matt 		lwz	%r0, R0_SAVE(%r1)
    168   1.9     matt 		lwz	%r9, 4(%r31)
    169   1.9     matt 		lwz	%r31, R31_SAVE(%r1)
    170   1.1      mjl #ifdef PIC
    171   1.9     matt 		lwz	%r30, R30_SAVE(%r1)
    172   1.5     matt #endif
    173  1.10     matt 		addi	%r1, %r1, STKFRAME_SZ
    174  1.11     matt 		lwz	%r7, 4(%r1)
    175  1.11     matt 		mtlr	%r7
    176   1.5     matt 
    177   1.5     matt 		cntlzw	%r6, %r9			/* compute shift value */
    178   1.5     matt 		li	%r5, 31
    179  1.12     matt 		subf	%r10, %r6, %r5
    180   1.1      mjl 
    181   1.1      mjl #ifdef PIC
    182  1.10     matt 		mflr	%r9
    183  1.12     matt 		bcl	20,31,1f
    184  1.12     matt 1:		mflr	%r5
    185  1.10     matt 		mtlr	%r9
    186  1.12     matt 
    187  1.12     matt 		addis	%r5, %r5, cache_info+4-1b@ha
    188  1.12     matt 		lwzu	%r9, cache_info+4-1b@l(%r5)
    189   1.1      mjl #else
    190  1.12     matt 		lis	%r5, cache_info+4@ha
    191  1.12     matt 		lwzu	%r9, cache_info+4@l(%r5)
    192   1.1      mjl #endif
    193  1.12     matt 		stw	%r10, cache_sh-(cache_info+4)(%r5)
    194  1.12     matt 
    195   1.1      mjl /*----------------------------------------------------------------------*/
    196   1.5     matt /* Okay, we know the cache line size (%r9) and shift value (%r10) */
    197   1.1      mjl cb_cacheline_known:
    198   1.2      mjl #else /* _KERNEL */
    199   1.4      eeh #ifdef	MULTIPROCESSOR
    200   1.5     matt 		mfsprg	%r10, 0			/* Get cpu_info pointer */
    201   1.2      mjl #else
    202   1.5     matt 		lis	%r10, cpu_info_store@ha
    203   1.5     matt 		addi	%r10, %r10, cpu_info_store@l
    204   1.2      mjl #endif
    205   1.5     matt 		lwz	%r9, CPU_CI+4(%r10)	/* Load D$ line size */
    206   1.5     matt 		cntlzw	%r10, %r9			/* Calculate shift.. */
    207   1.5     matt 		li	%r6, 31
    208   1.5     matt 		subf	%r10, %r10, %r6
    209   1.2      mjl #endif /* _KERNEL */
    210   1.1      mjl 		/* Back in memory filling business */
    211   1.1      mjl 
    212  1.11     matt 		cmplwi	%cr1, r_len, 0		/* Nothing to do? */
    213   1.5     matt 		add	%r5, %r9, %r9
    214   1.5     matt 		cmplw	r_len, %r5		/* <= 2*CL bytes to move? */
    215  1.11     matt 		beqlr-	%cr1			/* then do nothing */
    216   1.1      mjl 
    217   1.1      mjl 		blt+	simple_fill		/* a trivial fill routine */
    218   1.1      mjl 
    219   1.1      mjl 		/* Word align the block, fill bytewise until dst even*/
    220   1.1      mjl 
    221   1.5     matt 		andi.	%r5, r_dst, 0x03
    222   1.5     matt 		li	%r6, 4
    223   1.1      mjl 		beq+	cb_aligned_w		/* already aligned to word? */
    224   1.1      mjl 
    225   1.5     matt 		subf	%r5, %r5, %r6		/* bytes to fill to align4 */
    226   1.1      mjl #if USE_STSWX
    227   1.5     matt 		mtxer	%r5
    228   1.5     matt 		stswx	%r0, 0, r_dst
    229   1.5     matt 		add	r_dst, %r5, r_dst
    230   1.1      mjl #else
    231   1.5     matt 		mtctr	%r5
    232   1.1      mjl 
    233   1.1      mjl 		subi	r_dst, r_dst, 1
    234   1.1      mjl 1:		stbu	r_val, 1(r_dst)		/* Fill bytewise */
    235   1.1      mjl 		bdnz	1b
    236   1.1      mjl 
    237   1.1      mjl 		addi	r_dst, r_dst, 1
    238   1.1      mjl #endif
    239   1.5     matt 		subf	r_len, %r5, r_len
    240   1.1      mjl 
    241   1.1      mjl cb_aligned_w:	/* Cache block align, fill wordwise until dst aligned */
    242   1.1      mjl 
    243   1.1      mjl 		/* I know I have something to do since we had > 2*CL initially */
    244   1.1      mjl 		/* so no need to check for r_len = 0 */
    245   1.1      mjl 
    246   1.6  hannken 		subi	%r6, %r9, 1		/* CL mask */
    247   1.6  hannken 		and.	%r5, r_dst, %r6
    248   1.6  hannken 		srwi	%r5, %r5, 2
    249   1.5     matt 		srwi	%r6, %r9, 2
    250   1.1      mjl 		beq	cb_aligned_cb		/* already on CL boundary? */
    251   1.1      mjl 
    252   1.5     matt 		subf	%r5, %r5, %r6		/* words to fill to alignment */
    253   1.5     matt 		mtctr	%r5
    254   1.5     matt 		slwi	%r5, %r5, 2
    255   1.5     matt 		subf	r_len, %r5, r_len
    256   1.1      mjl 
    257   1.1      mjl 		subi	r_dst, r_dst, 4
    258   1.1      mjl 1:		stwu	r_val, 4(r_dst)		/* Fill wordwise */
    259   1.1      mjl 		bdnz	1b
    260   1.1      mjl 		addi	r_dst, r_dst, 4
    261   1.1      mjl 
    262   1.1      mjl cb_aligned_cb:	/* no need to check r_len, see above */
    263   1.1      mjl 
    264   1.5     matt 		srw.	%r5, r_len, %r10		/* Number of cache blocks */
    265   1.5     matt 		mtctr	%r5
    266   1.1      mjl 		beq	cblocks_done
    267   1.1      mjl 
    268   1.5     matt 		slw	%r5, %r5, %r10
    269   1.5     matt 		subf	r_len, %r5, r_len
    270   1.1      mjl 
    271   1.1      mjl 1:		dcbz	0, r_dst		/* Clear blockwise */
    272   1.5     matt 		add	r_dst, r_dst, %r9
    273   1.1      mjl 		bdnz	1b
    274   1.1      mjl 
    275   1.1      mjl cblocks_done:	/* still CL aligned, but less than CL bytes left */
    276  1.11     matt 		cmplwi	%cr1, r_len, 0
    277   1.1      mjl 		cmplwi	r_len, 8
    278  1.11     matt 		beq-	%cr1, sf_return
    279   1.1      mjl 
    280   1.1      mjl 		blt-	sf_bytewise		/* <8 remaining? */
    281   1.1      mjl 		b	sf_aligned_w
    282   1.1      mjl 
    283   1.1      mjl /*----------------------------------------------------------------------*/
    284   1.1      mjl wbzero:		li	r_val, 0
    285   1.1      mjl 
    286   1.1      mjl 		cmplwi	r_len, 0
    287   1.1      mjl 		beqlr-				/* Nothing to do */
    288   1.1      mjl 
    289   1.1      mjl simple_fill:
    290   1.1      mjl #if USE_STSWX
    291  1.11     matt 		cmplwi	%cr1, r_len, 12		/* < 12 bytes to move? */
    292   1.1      mjl #else
    293  1.11     matt 		cmplwi	%cr1, r_len, 8		/* < 8 bytes to move? */
    294   1.1      mjl #endif
    295   1.5     matt 		andi.	%r5, r_dst, 0x03		/* bytes to fill to align4 */
    296  1.11     matt 		blt	%cr1, sf_bytewise	/* trivial byte mover */
    297   1.1      mjl 
    298   1.5     matt 		li	%r6, 4
    299   1.5     matt 		subf	%r5, %r5, %r6
    300   1.1      mjl 		beq+	sf_aligned_w		/* dest is word aligned */
    301   1.1      mjl 
    302   1.1      mjl #if USE_STSWX
    303   1.5     matt 		mtxer	%r5
    304   1.5     matt 		stswx	%r0, 0, r_dst
    305   1.5     matt 		add	r_dst, %r5, r_dst
    306   1.1      mjl #else
    307   1.5     matt 		mtctr	%r5			/* nope, then fill bytewise */
    308   1.1      mjl 		subi	r_dst, r_dst, 1		/* until it is */
    309   1.1      mjl 1:		stbu	r_val, 1(r_dst)
    310   1.1      mjl 		bdnz	1b
    311   1.1      mjl 
    312   1.1      mjl 		addi	r_dst, r_dst, 1
    313   1.1      mjl #endif
    314   1.5     matt 		subf	r_len, %r5, r_len
    315   1.1      mjl 
    316   1.1      mjl sf_aligned_w:	/* no need to check r_len since it were >= 8 bytes initially */
    317   1.1      mjl #if USE_STSWX
    318   1.5     matt 		mr	%r6, %r0
    319   1.5     matt 		mr	%r7, %r0
    320   1.1      mjl 
    321   1.5     matt 		srwi	%r5, r_len, 3
    322   1.5     matt 		mtctr	%r5
    323   1.1      mjl 
    324   1.5     matt 		slwi	%r5, %r5, 3		/* adjust len */
    325   1.5     matt 		subf.	r_len, %r5, r_len
    326   1.1      mjl 
    327   1.5     matt 1:		stswi	%r6, r_dst, 8
    328   1.1      mjl 		addi	r_dst, r_dst, 8
    329   1.1      mjl 		bdnz	1b
    330   1.1      mjl #else
    331   1.5     matt 		srwi	%r5, r_len, 2		/* words to fill */
    332   1.5     matt 		mtctr	%r5
    333   1.1      mjl 
    334   1.5     matt 		slwi	%r5, %r5, 2
    335   1.5     matt 		subf.	r_len, %r5, r_len	/* adjust len for fill */
    336   1.1      mjl 
    337   1.1      mjl 		subi	r_dst, r_dst, 4
    338   1.1      mjl 1:		stwu	r_val, 4(r_dst)
    339   1.1      mjl 		bdnz	1b
    340   1.1      mjl 		addi	r_dst, r_dst, 4
    341   1.1      mjl #endif
    342   1.1      mjl 
    343   1.1      mjl sf_word_done:	bne-	sf_bytewise
    344   1.1      mjl 
    345   1.5     matt sf_return:	mr	%r3, %r8			/* restore orig ptr */
    346   1.1      mjl 		blr				/* for memset functionality */
    347   1.1      mjl 
    348   1.1      mjl sf_bytewise:
    349   1.1      mjl #if USE_STSWX
    350   1.5     matt 		mr	%r5, %r0
    351   1.5     matt 		mr	%r6, %r0
    352   1.5     matt 		mr	%r7, %r0
    353   1.1      mjl 
    354   1.1      mjl 		mtxer	r_len
    355   1.5     matt 		stswx	%r5, 0, r_dst
    356   1.1      mjl #else
    357   1.1      mjl 		mtctr	r_len
    358   1.1      mjl 
    359   1.1      mjl 		subi	r_dst, r_dst, 1
    360   1.1      mjl 1:		stbu	r_val, 1(r_dst)
    361   1.1      mjl 		bdnz	1b
    362   1.1      mjl #endif
    363   1.5     matt 		mr	%r3, %r8			/* restore orig ptr */
    364   1.1      mjl 		blr				/* for memset functionality */
    365   1.8     matt END(memset)
    366   1.1      mjl 
    367   1.1      mjl /*----------------------------------------------------------------------*/
    368   1.3      mjl #ifndef _KERNEL
    369   1.1      mjl 		.data
    370  1.12     matt 		.p2align 2
    371   1.4      eeh cache_info:	.long	-1, -1, -1, -1
    372   1.1      mjl cache_sh:	.long	0
    373   1.1      mjl 
    374   1.3      mjl #endif
    375   1.1      mjl /*----------------------------------------------------------------------*/
    376