11.1Smatt/*-
21.1Smatt * Copyright (c) 2013 The NetBSD Foundation, Inc.
31.1Smatt * All rights reserved.
41.1Smatt *
51.1Smatt * This code is derived from software contributed to The NetBSD Foundation
61.1Smatt * by Matt Thomas of 3am Software Foundry.
71.1Smatt *
81.1Smatt * Redistribution and use in source and binary forms, with or without
91.1Smatt * modification, are permitted provided that the following conditions
101.1Smatt * are met:
111.1Smatt * 1. Redistributions of source code must retain the above copyright
121.1Smatt *    notice, this list of conditions and the following disclaimer.
131.1Smatt * 2. Redistributions in binary form must reproduce the above copyright
141.1Smatt *    notice, this list of conditions and the following disclaimer in the
151.1Smatt *    documentation and/or other materials provided with the distribution.
161.1Smatt *
171.1Smatt * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
181.1Smatt * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
191.1Smatt * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
201.1Smatt * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
211.1Smatt * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
221.1Smatt * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
231.1Smatt * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
241.1Smatt * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
251.1Smatt * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
261.1Smatt * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
271.1Smatt * POSSIBILITY OF SUCH DAMAGE.
281.1Smatt */
291.1Smatt
301.1Smatt#include <machine/asm.h>
311.1Smatt
321.6SmattRCSID("$NetBSD: strrchr_arm.S,v 1.6 2013/08/25 06:15:06 matt Exp $")
331.1Smatt
341.1Smatt#ifdef __ARMEL__
351.1Smatt#define	BYTE0	0x000000ff
361.1Smatt#define	BYTE1	0x0000ff00
371.1Smatt#define	BYTE2	0x00ff0000
381.1Smatt#define	BYTE3	0xff000000
391.1Smatt#define	lshi	lsl
401.5Smatt#define	lshis	lsls
411.1Smatt#else
421.1Smatt#define	BYTE0	0xff000000
431.1Smatt#define	BYTE1	0x00ff0000
441.1Smatt#define	BYTE2	0x0000ff00
451.1Smatt#define	BYTE3	0x000000ff
461.1Smatt#define	lshi	lsr
471.5Smatt#define	lshis	lsrs
481.1Smatt#endif
491.1Smatt
501.1SmattENTRY(strrchr)
511.5Smatt	ands	r2, r1, #0xff		/* is the byte value NUL? */
521.3Smatt	bne	1f			/*   no, do it the hard way */
531.3Smatt	push	{r0, lr}		/* save pointer and return addr */
541.3Smatt	bl	PLT_SYM(strlen)		/* get length */
551.5Smatt	pop	{r1, r2}		/* restore pointer / return addr */
561.5Smatt	adds	r0, r0, r1		/* add pointer to length */
571.5Smatt	RETr(r2)			/* return */
581.3Smatt
591.5Smatt1:	mov	r1, r0			/* we use r0 at the return value */
601.5Smatt	movs	r0, #0			/* return NULL by default */
611.5Smatt2:	tst	r1, #3			/* test for word alignment */
621.1Smatt	beq	.Lpre_main_loop		/*   finally word aligned */
631.5Smatt	ldrb	r3, [r1], #1		/* load a byte */
641.1Smatt	cmp	r3, r2			/* did it match? */
651.5Smatt#ifdef __thumb__
661.5Smatt	it	eq
671.6Smatt#endif
681.5Smatt	subeq	r0, r1, #1		/*   yes, remember that it did */
691.5Smatt	cmp	r3, #0			/* was it NUL? */
701.3Smatt	bne	2b			/*   no, try next byte */
711.1Smatt	RET				/* return */
721.1Smatt.Lpre_main_loop:
731.1Smatt	push	{r4, r5}		/* save some registers */
741.1Smatt#if defined(_ARM_ARCH_7)
751.5Smatt	movw	ip, #0xfefe		/* magic constant; 254 in each byte */
761.5Smatt	movt	ip, #0xfefe		/* magic constant; 254 in each byte */
771.1Smatt#elif defined(_ARM_ARCH_6)
781.5Smatt	mov	ip, #0xfe		/* put 254 in low byte */
791.5Smatt	orr	ip, ip, ip, lsl #8	/* move to next byte */
801.5Smatt	orr	ip, ip, ip, lsl #16	/* move to next halfword */
811.1Smatt#endif /* _ARM_ARCH_6 */
821.1Smatt	orr	r2, r2, r2, lsl #8	/* move to next byte */
831.1Smatt	orr	r2, r2, r2, lsl #16	/* move to next halfword */
841.1Smatt.Lmain_loop:
851.5Smatt	ldr	r3, [r1], #4		/* load next word */
861.1Smatt#if defined(_ARM_ARCH_6)
871.1Smatt	/*
881.1Smatt	 * Add 254 to each byte using the UQADD8 (unsigned saturating add 8)
891.1Smatt	 * instruction.  For every non-NUL byte, the result for that byte will
901.1Smatt	 * become 255.  For NUL, it will be 254.  When we complement the
911.1Smatt	 * result, if the result is non-0 then we must have encountered a NUL.
921.1Smatt	 */
931.5Smatt	uqadd8	r4, r3, ip		/* NUL detection happens here */
941.1Smatt	usub8	r3, r3, r2		/* bias for char looked for? */
951.5Smatt	uqadd8	r5, r3, ip		/* char detection happens here */
961.5Smatt	ands	r3, r4, r5		/* merge results */
971.1Smatt	mvns	r3, r3			/* is the complement non-0? */
981.1Smatt	beq	.Lmain_loop		/*   no, then keep going */
991.1Smatt
1001.1Smatt	mvns	r5, r5			/* get we find any matching bytes? */
1011.1Smatt	beq	.Ldone			/*   no, then we hit the end, return */
1021.1Smatt	mvns	r4, r4			/* did we encounter a NUL? */
1031.1Smatt	beq	.Lfind_match		/*   no, find matching byte */
1041.1Smatt	/*
1051.1Smatt	 * Copy the NUL bit to the following byte lanes.  Then clear any match
1061.1Smatt	 * bits in those byte lanes to prevent false positives in those bytes.
1071.1Smatt	 */
1081.2Smatt	bics	r5, r5, r4		/* clear any NUL match bits */
1091.2Smatt	beq	.Ldone			/*   no remaining matches, we're done */
1101.5Smatt	lshis	r3, r4, #8		/* shift up a byte */
1111.5Smatt#ifdef __thumb__
1121.5Smatt	itt	ne
1131.5Smatt#endif
1141.4Smatt	orrsne	r3, r3, r3, lshi #8	/* if non 0, copy up to next byte */
1151.4Smatt	orrsne	r3, r3, r3, lshi #8	/* if non 0, copy up to last byte */
1161.1Smatt	bics	r5, r5, r3		/* clear match bits */
1171.1Smatt	beq	.Ldone			/*   no remaining matches, we're done */
1181.1Smatt.Lfind_match:
1191.1Smatt#ifdef __ARMEL__
1201.1Smatt	rev	r5, r5			/* we want this in BE for the CLZ */
1211.1Smatt#endif
1221.1Smatt	/*
1231.1Smatt	 * If we have multiple matches, we want to the select the "last" match
1241.1Smatt	 * in the word which will be the lowest bit set.
1251.1Smatt	 */
1261.5Smatt	subs	r3, r5, #1		/* subtract 1 */
1271.5Smatt	ands	r3, r3, r5		/* and with mask */
1281.5Smatt	eors	r5, r5, r3		/* only have the lowest bit set left */
1291.1Smatt	clz	r5, r5			/* count how many leading zeros */
1301.5Smatt	add	r0, r1, r5, lsr #3	/* divide that by 8 and add to count */
1311.5Smatt	subs	r0, r0, #4		/* compensate for the post-inc */
1321.5Smatt	cmp	r4, #0			/* did we read any NULs? */
1331.1Smatt	beq	.Lmain_loop		/*   no, get next word */
1341.1Smatt#else
1351.1Smatt	/*
1361.1Smatt	 * No fancy shortcuts so just test each byte lane for a NUL.
1371.1Smatt	 * (other tests for NULs in a word take more instructions/cycles).
1381.1Smatt	 */
1391.1Smatt	eor	r4, r3, r2		/* xor .. */
1401.1Smatt	tst	r3, #BYTE0		/* is byte 0 a NUL? */
1411.1Smatt	beq	.Ldone			/*   yes, then we're done */
1421.1Smatt	tst	r4, #BYTE0		/* is byte 0 a match? */
1431.5Smatt	subeq	r0, r1, #4		/*   yes, remember its location */
1441.1Smatt	tst	r3, #BYTE1		/* is byte 1 a NUL? */
1451.1Smatt	beq	.Ldone			/*   yes, then we're done */
1461.1Smatt	tst	r4, #BYTE1		/* is byte 1 a match? */
1471.5Smatt	subeq	r0, r1, #3		/*   yes, remember its location */
1481.1Smatt	tst	r3, #BYTE2		/* is byte 2 a NUL? */
1491.1Smatt	beq	.Ldone			/*   yes, then we're done */
1501.1Smatt	tst	r4, #BYTE2		/* is byte 2 a match? */
1511.5Smatt	subeq	r0, r1, #2		/*   yes, remember its location */
1521.1Smatt	tst	r3, #BYTE3		/* is byte 3 a NUL? */
1531.1Smatt	beq	.Ldone			/*   yes, then we're done */
1541.1Smatt	tst	r4, #BYTE3		/* is byte 3 a match? */
1551.5Smatt	subeq	r0, r1, #1		/*   yes, remember its location */
1561.1Smatt	b	.Lmain_loop
1571.1Smatt#endif /* _ARM_ARCH_6 */
1581.1Smatt.Ldone:
1591.1Smatt	pop	{r4, r5}
1601.1Smatt	RET
1611.1SmattEND(strrchr)
162