Home | History | Annotate | Line # | Download | only in string
memset_arm.S revision 1.2.38.2
      1  1.2.38.1  martin /*	$NetBSD: memset_arm.S,v 1.2.38.2 2020/04/21 19:37:42 martin Exp $	*/
      2       1.1    matt 
      3       1.1    matt /*-
      4       1.1    matt  * Copyright (c) 2012 The NetBSD Foundation, Inc.
      5       1.1    matt  * All rights reserved.
      6       1.1    matt  *
      7       1.1    matt  * This code is derived from software contributed to The NetBSD Foundation
      8       1.1    matt  * by Matt Thomas of 3am Software Foundry.
      9       1.1    matt  *
     10       1.1    matt  * Redistribution and use in source and binary forms, with or without
     11       1.1    matt  * modification, are permitted provided that the following conditions
     12       1.1    matt  * are met:
     13       1.1    matt  * 1. Redistributions of source code must retain the above copyright
     14       1.1    matt  *    notice, this list of conditions and the following disclaimer.
     15       1.1    matt  * 2. Redistributions in binary form must reproduce the above copyright
     16       1.1    matt  *    notice, this list of conditions and the following disclaimer in the
     17       1.1    matt  *    documentation and/or other materials provided with the distribution.
     18       1.1    matt  *
     19       1.1    matt  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
     20       1.1    matt  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
     21       1.1    matt  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     22       1.1    matt  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
     23       1.1    matt  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     24       1.1    matt  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     25       1.1    matt  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     26       1.1    matt  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     27       1.1    matt  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     28       1.1    matt  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     29       1.1    matt  * POSSIBILITY OF SUCH DAMAGE.
     30       1.1    matt  */
     31       1.1    matt #include <machine/asm.h>
     32       1.1    matt 
     33       1.1    matt #if defined(NEON)
     34       1.1    matt #define	STORE8		vst1.32		{d0}, [ip:64]!
     35       1.1    matt #define	STORE16		vst1.32		{d0-d1}, [ip:64]!
     36       1.1    matt #define	STORE32		vst1.32		{d0-d3}, [ip:64]!
     37       1.1    matt #elif defined(VFP)
     38       1.1    matt #define	STORE8		vstmia		ip!, {d0}
     39       1.1    matt #define	STORE16		vstmia		ip!, {d0-d1}
     40       1.1    matt #define	STORE32		vstmia		ip!, {d0-d3}
     41       1.1    matt #elif defined(_ARM_ARCH_DWORD_OK)
     42       1.1    matt #define	STORE8		strd		r2, [ip], #8
     43       1.1    matt #define	STORE16		STORE8; STORE8
     44       1.1    matt #define	STORE32		STORE16; STORE16
     45       1.1    matt #else
     46       1.1    matt #define	STORE8		stmia		ip!, {r2,r3}
     47       1.1    matt #define	STORE16		STORE8; STORE8
     48       1.1    matt #define	STORE32		STORE16; STORE16
     49       1.1    matt #endif
     50       1.1    matt /*
     51       1.1    matt  * memset: Sets a block of memory to the specified value
     52       1.1    matt  * Using NEON instructions
     53       1.1    matt  *
     54       1.1    matt  * On entry:
     55       1.1    matt  *   r0 - dest address
     56       1.1    matt  *   r1 - byte to write
     57       1.1    matt  *   r2 - number of bytes to write
     58       1.1    matt  *
     59       1.1    matt  * On exit:
     60       1.1    matt  *   r0 - dest address
     61       1.1    matt  */
     62       1.1    matt /* LINTSTUB: Func: void *memset(void *, int, size_t) */
     63       1.1    matt ENTRY(memset)
     64       1.1    matt 	ands		r3, r1, #0xff	/* We deal with bytes */
     65       1.1    matt 	orrne		r3, r3, r3, lsl #8	/* replicate to all bytes */
     66       1.1    matt 	orrne		r3, r3, r3, lsl #16	/* replicate to all bytes */
     67       1.1    matt 	movs		r1, r2		/* we need r2 & r3 */
     68       1.1    matt 	RETc(eq)			/* return if length is 0 */
     69       1.1    matt 	mov		ip, r0		/* r0 needs to stay the same */
     70       1.1    matt 
     71       1.1    matt 	cmp		r1, #12		/* is this a small memset? *?
     72       1.1    matt 	blt		.Lbyte_by_byte	/*   then do it byte by byte */
     73       1.1    matt 
     74       1.1    matt 	/* Ok first we will dword align the address */
     75       1.1    matt 	ands		r2, ip, #7	/* grab the bottom three bits */
     76       1.1    matt 	beq		.Lmemset_dwordaligned	/* The addr is dword aligned */
     77       1.1    matt 
     78       1.1    matt 	rsb		r2, r2, #8	/* how far until dword aligned? */
     79       1.1    matt 	sub		r1, r1, r2	/* subtract it from remaining length */
     80       1.1    matt 	mov		r2, r3		/* duplicate fill value */
     81       1.1    matt 
     82       1.1    matt 	tst		ip, #1		/* halfword aligned? */
     83       1.1    matt 	strneb		r3, [ip], #1	/*   no, write a byte */
     84       1.1    matt 	tst		ip, #2		/* word aligned? */
     85       1.1    matt 	strneh		r3, [ip], #2	/*   no, write a halfword */
     86       1.1    matt 	tst		ip, #4		/* dword aligned? */
     87       1.1    matt 	strne		r3, [ip], #4	/*   no, write a word */
     88       1.1    matt 
     89       1.1    matt 	/* We are now doubleword aligned */
     90       1.1    matt .Lmemset_dwordaligned:
     91       1.1    matt #if defined(NEON)
     92       1.1    matt 	vdup.8		q0, r3		/* move fill to SIMD */
     93       1.1    matt 	vmov		q1, q0		/* put fill in q1 (d2-d3) */
     94       1.1    matt #elif defined(VFP)
     95       1.1    matt 	mov		r2, r3		/* duplicate fill value */
     96       1.1    matt 	vmov		d0, r2, r3	/* move to VFP */
     97       1.1    matt 	vmov		d1, r2, r3
     98       1.1    matt 	vmov		d2, r2, r3
     99       1.1    matt 	vmov		d3, r2, r3
    100       1.1    matt #endif
    101       1.1    matt 
    102       1.1    matt #if 1
    103       1.1    matt 	cmp		r1, #128
    104       1.1    matt 	blt		.Lmemset_mainloop
    105       1.1    matt 	ands		r2, ip, #63	/* check for 64-byte alignment */
    106       1.1    matt 	beq		.Lmemset_mainloop
    107       1.1    matt 	/*
    108       1.1    matt 	 * Let's align to a 64-byte boundary so that stores don't cross
    109       1.1    matt 	 * cacheline boundaries.  We also know we have at least 128-bytes to
    110       1.1    matt 	 * copy so we don't have to worry about the length at the moment.
    111       1.1    matt 	 */
    112       1.1    matt 	rsb		r2, r2, #64	/* how many bytes until 64 bytes */
    113       1.2    matt 	sub		r1, r1, r2	/* subtract from remaining length */
    114       1.1    matt #if !defined(NEON) && !defined(VFP)
    115       1.1    matt 	mov		r2, r3		/* put fill back in r2 */
    116       1.1    matt #endif
    117       1.1    matt 
    118       1.1    matt 	tst		ip, #8		/* quadword aligned? */
    119       1.1    matt 	beq		1f		/*   yes */
    120       1.1    matt 	STORE8				/*   no, store a dword */
    121       1.1    matt 1:	tst		ip, #16		/* octaword aligned? *?
    122       1.1    matt 	beq		2f		/*   yes */
    123       1.1    matt 	STORE16				/*   no, store a quadword */
    124       1.1    matt 2:	tst		ip, #32		/* 32 word aligned? */
    125       1.1    matt 	beq		.Lmemset_mainloop		/*   yes */
    126       1.2    matt 	STORE32				/*   no, make 64-byte aligned */
    127       1.1    matt #endif
    128       1.1    matt 
    129       1.1    matt .Lmemset_mainloop:
    130       1.1    matt #if !defined(NEON) && !defined(VFP)
    131       1.1    matt 	mov		r2, r3		/* put fill back in r2 */
    132       1.1    matt #endif
    133       1.1    matt 	subs		r1, r1, #64	/* subtract an initial 64 */
    134       1.1    matt 	blt		.Lmemset_lessthan_64bytes
    135       1.1    matt 
    136       1.1    matt 3:	STORE32				/* store first octaword */
    137       1.1    matt 	STORE32				/* store second octaword */
    138       1.1    matt 	RETc(eq)			/* return if done */
    139       1.1    matt 	subs		r1, r1, #64	/* subtract another 64 */
    140       1.1    matt 	bge		3b		/* and do other if still >= 0 */
    141       1.1    matt .Lmemset_lessthan_64bytes:
    142       1.1    matt 	tst		r1, #32		/* do we have 16 bytes left? */
    143       1.1    matt 	beq		.Lmemset_lessthan_32bytes
    144       1.1    matt 	STORE32				/*    yes, store an octaword */
    145       1.1    matt 	bics		r1, r1, #32	/* subtract 16 */
    146       1.1    matt 	RETc(eq)			/* return if length is 0 */
    147       1.1    matt .Lmemset_lessthan_32bytes:
    148       1.1    matt 	tst		r1, #16		/* do we have 16 bytes left? */
    149       1.1    matt 	beq		.Lmemset_lessthan_16bytes
    150       1.1    matt 	STORE16				/*   yes, store a quadword */
    151       1.1    matt 	bics		r1, r1, #16	/* subtract 16 */
    152       1.1    matt 	RETc(eq)			/* return if length is 0 */
    153       1.1    matt .Lmemset_lessthan_16bytes:
    154       1.1    matt 	tst		r1, #8		/* do we have 8 bytes left? */
    155       1.1    matt 	beq		.Lmemset_lessthan_8bytes/*   no */
    156       1.1    matt 	STORE8				/*   yes, store a dword */
    157       1.1    matt 	bics		r1, r1, #8	/* subtract 8 */
    158       1.1    matt 	RETc(eq)			/* return if length is 0 */
    159       1.1    matt .Lmemset_lessthan_8bytes:
    160       1.1    matt 	tst		r1, #4		/* do we have a word left? */
    161       1.1    matt 	strne		r2, [ip], #4	/*   yes, so write one */
    162       1.1    matt 	tst		r1, #2		/* do we have a halfword left? */
    163       1.1    matt 	strneh		r2, [ip], #2	/*   yes, so write one */
    164       1.1    matt 	tst		r1, #1		/* do we have a byte left? */
    165       1.1    matt 	strneb		r2, [ip], #1	/*   yes, so write one */
    166       1.1    matt 	RET				/* return */
    167       1.1    matt 
    168       1.1    matt .Lbyte_by_byte:
    169       1.1    matt 	subs		r1, r1, #1	/* can we write a byte? */
    170       1.1    matt 	RETc(lt)			/*   no, we're done */
    171       1.1    matt 	strb		r3, [ip], #1	/*   yes, so do it */
    172       1.1    matt 	b		.Lbyte_by_byte	/* try next byte */
    173       1.1    matt END(memset)
    174