strlen_arm.S revision 1.11
11.1Smatt/*-
21.1Smatt * Copyright (c) 2012 The NetBSD Foundation, Inc.
31.1Smatt * All rights reserved.
41.1Smatt *
51.1Smatt * This code is derived from software contributed to The NetBSD Foundation
61.1Smatt * by Matt Thomas of 3am Software Foundry.
71.1Smatt *
81.1Smatt * Redistribution and use in source and binary forms, with or without
91.1Smatt * modification, are permitted provided that the following conditions
101.1Smatt * are met:
111.1Smatt * 1. Redistributions of source code must retain the above copyright
121.1Smatt *    notice, this list of conditions and the following disclaimer.
131.1Smatt * 2. Redistributions in binary form must reproduce the above copyright
141.1Smatt *    notice, this list of conditions and the following disclaimer in the
151.1Smatt *    documentation and/or other materials provided with the distribution.
161.1Smatt *
171.1Smatt * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
181.1Smatt * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
191.1Smatt * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
201.1Smatt * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
211.1Smatt * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
221.1Smatt * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
231.1Smatt * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
241.1Smatt * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
251.1Smatt * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
261.1Smatt * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
271.1Smatt * POSSIBILITY OF SUCH DAMAGE.
281.1Smatt */
291.1Smatt
301.1Smatt#include <machine/asm.h>
311.1Smatt
321.11SskrllRCSID("$NetBSD: strlen_arm.S,v 1.11 2023/01/15 08:43:03 skrll Exp $")
331.5Smatt
341.5Smatt#if defined(__thumb__) && !defined(_ARM_ARCH_T2)
351.5Smatt#error Only Thumb2 or ARM supported
361.5Smatt#endif
371.1Smatt
381.1Smatt#ifdef __ARMEL__
391.1Smatt#define	BYTE0	0x000000ff
401.1Smatt#define	BYTE1	0x0000ff00
411.1Smatt#define	BYTE2	0x00ff0000
421.1Smatt#define	BYTE3	0xff000000
431.1Smatt#else
441.1Smatt#define	BYTE0	0xff000000
451.1Smatt#define	BYTE1	0x00ff0000
461.1Smatt#define	BYTE2	0x0000ff00
471.1Smatt#define	BYTE3	0x000000ff
481.1Smatt#endif
491.1Smatt
501.3Smatt#ifdef STRNLEN
511.3Smatt#define	FUNCNAME	strnlen
521.3Smatt#else
531.3Smatt#define	FUNCNAME	strlen
541.3Smatt#endif
551.3Smatt
561.1Smatt	.text
571.3SmattENTRY(FUNCNAME)
581.8Smatt#if defined(__ARM_EABI__) && defined(__UNWIND_TABLES__)
591.9Sjoerg# if !defined(__ARM_DWARF_EH__)
601.4Smatt	.fnstart
611.9Sjoerg# endif
621.4Smatt	.cfi_startproc
631.4Smatt#endif
641.3Smatt#ifdef STRNLEN
651.3Smatt	push	{r4,r5}			/* save some registers */
661.8Smatt#if defined(__ARM_EABI__) && defined(__UNWIND_TABLES__)
671.9Sjoerg# if !defined(__ARM_DWARF_EH__)
681.4Smatt	.save	{r4,r5}
691.9Sjoerg# endif
701.4Smatt	.cfi_def_cfa_offset 8
711.4Smatt	.cfi_offset 5, -4
721.4Smatt	.cfi_offset 4, -8
731.4Smatt#endif
741.5Smatt	adds	r5, r0, r1		/* get ptr to end of string */
751.3Smatt	mov	r4, r1			/* save maxlen */
761.3Smatt#endif
771.5Smatt	adds	r2, r0, #4		/* for the final post-inc */
781.1Smatt1:	tst	r0, #3			/* test for word alignment */
791.1Smatt	beq	.Lpre_main_loop		/*   finally word aligned */
801.3Smatt#ifdef STRNLEN
811.3Smatt	cmp	r0, r5			/* have we gone too far? */
821.3Smatt	beq	.Lmaxed_out		/*   yes, return maxlen */
831.3Smatt#endif
841.1Smatt	ldrb	r3, [r0], #1		/* load a byte */
851.5Smatt	cmp	r3, #0			/* is it 0? */
861.1Smatt	bne	1b			/*   no, try next byte */
871.5Smatt	subs	r2, r2, #3		/* subtract (4 - the NUL) */
881.5Smatt	subs	r0, r0, r2		/* subtract start */
891.3Smatt#ifdef STRNLEN
901.3Smatt	pop	{r4, r5}		/* restore registers */
911.3Smatt#endif
921.1Smatt	RET				/* return */
931.1Smatt.Lpre_main_loop:
941.1Smatt#if defined(_ARM_ARCH_7)
951.1Smatt	movw	r1, #0xfefe		/* magic constant; 254 in each byte */
961.2Smatt	movt	r1, #0xfefe		/* magic constant; 254 in each byte */
971.2Smatt#elif defined(_ARM_ARCH_6)
981.1Smatt	mov	r1, #0xfe		/* put 254 in low byte */
991.1Smatt	orr	r1, r1, r1, lsl #8	/* move to next byte */
1001.1Smatt	orr	r1, r1, r1, lsl #16	/* move to next halfword */
1011.1Smatt#endif /* _ARM_ARCH_6 */
1021.1Smatt.Lmain_loop:
1031.3Smatt#ifdef STRNLEN
1041.3Smatt	cmp	r0, r5			/* gone too far? */
1051.11Sskrll	bhs	.Lmaxed_out		/*   yes, return maxlen */
1061.3Smatt#endif
1071.1Smatt	ldr	r3, [r0], #4		/* load next word */
1081.1Smatt#if defined(_ARM_ARCH_6)
1091.1Smatt	/*
1101.1Smatt	 * Add 254 to each byte using the UQADD8 (unsigned saturating add 8)
1111.1Smatt	 * instruction.  For every non-NUL byte, the result for that byte will
1121.1Smatt	 * become 255.  For NUL, it will be 254.  When we complement the
1131.1Smatt	 * result, if the result is non-0 then we must have encountered a NUL.
1141.1Smatt	 */
1151.1Smatt	uqadd8	r3, r3, r1		/* magic happens here */
1161.1Smatt	mvns	r3, r3			/* is the complemented result non-0? */
1171.1Smatt	beq	.Lmain_loop		/*    no, then we encountered no NULs */
1181.1Smatt#else
1191.1Smatt	/*
1201.1Smatt	 * No fancy shortcuts so just test each byte lane for a NUL.
1211.1Smatt	 * (other tests for NULs in a word take more instructions/cycles).
1221.1Smatt	 */
1231.1Smatt	tst	r3, #BYTE0		/* is this byte 0? */
1241.10Sskrll	tstne	r3, #BYTE1		/*   no, is this byte 1? */
1251.10Sskrll	tstne	r3, #BYTE2		/*   no, is this byte 2? */
1261.10Sskrll	tstne	r3, #BYTE3		/*   no, is this byte 3? */
1271.1Smatt	bne	.Lmain_loop		/*   no, then get next word */
1281.1Smatt#endif
1291.1Smatt#if defined(_ARM_ARCH_6)
1301.1Smatt	/*
1311.1Smatt	 * We encountered a NUL.  Find out where by doing a CLZ and then
1321.1Smatt	 * shifting right by 3.  That will be the number of non-NUL bytes.
1331.1Smatt	 */
1341.1Smatt#ifdef __ARMEL__
1351.1Smatt	rev	r3, r3			/* we want this in BE for the CLZ */
1361.1Smatt#endif
1371.1Smatt	clz	r3, r3			/* count how many leading zeros */
1381.5Smatt#ifdef __thumb__
1391.5Smatt	lsrs	r3, r3, #3
1401.5Smatt	adds	r0, r0, r3		/* divide that by 8 and add to count */
1411.5Smatt#else
1421.1Smatt	add	r0, r0, r3, lsr #3	/* divide that by 8 and add to count */
1431.5Smatt#endif
1441.1Smatt#else
1451.1Smatt	/*
1461.1Smatt	 * We encountered a NUL.
1471.1Smatt	 */
1481.1Smatt	tst	r3, #BYTE0		/* 1st byte was NUL? */
1491.1Smatt	beq	1f			/*   yes, done adding */
1501.1Smatt	add	r0, r0, #1		/* we have one more non-NUL byte */
1511.1Smatt	tst	r3, #BYTE1		/* 2nd byte was NUL? */
1521.1Smatt	beq	1f			/*   yes, done adding */
1531.1Smatt	add	r0, r0, #1		/* we have one more non-NUL byte */
1541.1Smatt	tst	r3, #BYTE2		/* 3rd byte was NUL? */
1551.1Smatt	addne	r0, r0, #1		/* no, we have one more non-NUL byte */
1561.1Smatt1:
1571.1Smatt#endif /* _ARM_ARCH_6 */
1581.1Smatt	/*
1591.1Smatt	 * r0 now points to 4 past the NUL due to the post-inc.  Subtract the
1601.1Smatt	 * start of the string (which also has 4 added to it to compensate for
1611.1Smatt	 * the post-inc.
1621.1Smatt	 */
1631.5Smatt	subs	r0, r0, r2		/* subtract start to get length */
1641.3Smatt#ifdef STRNLEN
1651.3Smatt	cmp	r0, r4			/* is it larger than maxlen? */
1661.6Smatt#ifdef __thumb__
1671.11Sskrll	it	hi
1681.6Smatt#endif
1691.11Sskrll	movhi	r0, r4			/*   yes, return maxlen */
1701.3Smatt	pop	{r4, r5}		/* restore registers */
1711.3Smatt#endif
1721.3Smatt	RET				/* return */
1731.3Smatt
1741.3Smatt#ifdef STRNLEN
1751.3Smatt.Lmaxed_out:
1761.3Smatt	mov	r0, r4			/* return maxlen */
1771.3Smatt	pop	{r4, r5}		/* restore registers */
1781.3Smatt	RET				/* return */
1791.3Smatt#endif
1801.8Smatt#if defined(__ARM_EABI__) && defined(__UNWIND_TABLES__)
1811.4Smatt	.cfi_endproc
1821.9Sjoerg# if !defined(__ARM_DWARF_EH__)
1831.4Smatt	.fnend
1841.9Sjoerg# endif
1851.4Smatt#endif
1861.3SmattEND(FUNCNAME)
187