11.1Smatt/*- 21.1Smatt * Copyright (c) 2013 The NetBSD Foundation, Inc. 31.1Smatt * All rights reserved. 41.1Smatt * 51.1Smatt * This code is derived from software contributed to The NetBSD Foundation 61.1Smatt * by Matt Thomas of 3am Software Foundry. 71.1Smatt * 81.1Smatt * Redistribution and use in source and binary forms, with or without 91.1Smatt * modification, are permitted provided that the following conditions 101.1Smatt * are met: 111.1Smatt * 1. Redistributions of source code must retain the above copyright 121.1Smatt * notice, this list of conditions and the following disclaimer. 131.1Smatt * 2. Redistributions in binary form must reproduce the above copyright 141.1Smatt * notice, this list of conditions and the following disclaimer in the 151.1Smatt * documentation and/or other materials provided with the distribution. 161.1Smatt * 171.1Smatt * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 181.1Smatt * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 191.1Smatt * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 201.1Smatt * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 211.1Smatt * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 221.1Smatt * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 231.1Smatt * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 241.1Smatt * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 251.1Smatt * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 261.1Smatt * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 271.1Smatt * POSSIBILITY OF SUCH DAMAGE. 281.1Smatt */ 291.1Smatt 301.1Smatt#include <machine/asm.h> 311.1Smatt 321.6SmattRCSID("$NetBSD: strrchr_arm.S,v 1.6 2013/08/25 06:15:06 matt Exp $") 331.1Smatt 341.1Smatt#ifdef __ARMEL__ 351.1Smatt#define BYTE0 0x000000ff 361.1Smatt#define BYTE1 0x0000ff00 371.1Smatt#define BYTE2 0x00ff0000 381.1Smatt#define BYTE3 0xff000000 391.1Smatt#define lshi lsl 401.5Smatt#define lshis lsls 411.1Smatt#else 421.1Smatt#define BYTE0 0xff000000 431.1Smatt#define BYTE1 0x00ff0000 441.1Smatt#define BYTE2 0x0000ff00 451.1Smatt#define BYTE3 0x000000ff 461.1Smatt#define lshi lsr 471.5Smatt#define lshis lsrs 481.1Smatt#endif 491.1Smatt 501.1SmattENTRY(strrchr) 511.5Smatt ands r2, r1, #0xff /* is the byte value NUL? */ 521.3Smatt bne 1f /* no, do it the hard way */ 531.3Smatt push {r0, lr} /* save pointer and return addr */ 541.3Smatt bl PLT_SYM(strlen) /* get length */ 551.5Smatt pop {r1, r2} /* restore pointer / return addr */ 561.5Smatt adds r0, r0, r1 /* add pointer to length */ 571.5Smatt RETr(r2) /* return */ 581.3Smatt 591.5Smatt1: mov r1, r0 /* we use r0 at the return value */ 601.5Smatt movs r0, #0 /* return NULL by default */ 611.5Smatt2: tst r1, #3 /* test for word alignment */ 621.1Smatt beq .Lpre_main_loop /* finally word aligned */ 631.5Smatt ldrb r3, [r1], #1 /* load a byte */ 641.1Smatt cmp r3, r2 /* did it match? */ 651.5Smatt#ifdef __thumb__ 661.5Smatt it eq 671.6Smatt#endif 681.5Smatt subeq r0, r1, #1 /* yes, remember that it did */ 691.5Smatt cmp r3, #0 /* was it NUL? */ 701.3Smatt bne 2b /* no, try next byte */ 711.1Smatt RET /* return */ 721.1Smatt.Lpre_main_loop: 731.1Smatt push {r4, r5} /* save some registers */ 741.1Smatt#if defined(_ARM_ARCH_7) 751.5Smatt movw ip, #0xfefe /* magic constant; 254 in each byte */ 761.5Smatt movt ip, #0xfefe /* magic constant; 254 in each byte */ 771.1Smatt#elif defined(_ARM_ARCH_6) 781.5Smatt mov ip, #0xfe /* put 254 in low byte */ 791.5Smatt orr ip, ip, ip, lsl #8 /* move to next byte */ 801.5Smatt orr ip, ip, ip, lsl #16 /* move to next halfword */ 811.1Smatt#endif /* _ARM_ARCH_6 */ 821.1Smatt orr r2, r2, r2, lsl #8 /* move to next byte */ 831.1Smatt orr r2, r2, r2, lsl #16 /* move to next halfword */ 841.1Smatt.Lmain_loop: 851.5Smatt ldr r3, [r1], #4 /* load next word */ 861.1Smatt#if defined(_ARM_ARCH_6) 871.1Smatt /* 881.1Smatt * Add 254 to each byte using the UQADD8 (unsigned saturating add 8) 891.1Smatt * instruction. For every non-NUL byte, the result for that byte will 901.1Smatt * become 255. For NUL, it will be 254. When we complement the 911.1Smatt * result, if the result is non-0 then we must have encountered a NUL. 921.1Smatt */ 931.5Smatt uqadd8 r4, r3, ip /* NUL detection happens here */ 941.1Smatt usub8 r3, r3, r2 /* bias for char looked for? */ 951.5Smatt uqadd8 r5, r3, ip /* char detection happens here */ 961.5Smatt ands r3, r4, r5 /* merge results */ 971.1Smatt mvns r3, r3 /* is the complement non-0? */ 981.1Smatt beq .Lmain_loop /* no, then keep going */ 991.1Smatt 1001.1Smatt mvns r5, r5 /* get we find any matching bytes? */ 1011.1Smatt beq .Ldone /* no, then we hit the end, return */ 1021.1Smatt mvns r4, r4 /* did we encounter a NUL? */ 1031.1Smatt beq .Lfind_match /* no, find matching byte */ 1041.1Smatt /* 1051.1Smatt * Copy the NUL bit to the following byte lanes. Then clear any match 1061.1Smatt * bits in those byte lanes to prevent false positives in those bytes. 1071.1Smatt */ 1081.2Smatt bics r5, r5, r4 /* clear any NUL match bits */ 1091.2Smatt beq .Ldone /* no remaining matches, we're done */ 1101.5Smatt lshis r3, r4, #8 /* shift up a byte */ 1111.5Smatt#ifdef __thumb__ 1121.5Smatt itt ne 1131.5Smatt#endif 1141.4Smatt orrsne r3, r3, r3, lshi #8 /* if non 0, copy up to next byte */ 1151.4Smatt orrsne r3, r3, r3, lshi #8 /* if non 0, copy up to last byte */ 1161.1Smatt bics r5, r5, r3 /* clear match bits */ 1171.1Smatt beq .Ldone /* no remaining matches, we're done */ 1181.1Smatt.Lfind_match: 1191.1Smatt#ifdef __ARMEL__ 1201.1Smatt rev r5, r5 /* we want this in BE for the CLZ */ 1211.1Smatt#endif 1221.1Smatt /* 1231.1Smatt * If we have multiple matches, we want to the select the "last" match 1241.1Smatt * in the word which will be the lowest bit set. 1251.1Smatt */ 1261.5Smatt subs r3, r5, #1 /* subtract 1 */ 1271.5Smatt ands r3, r3, r5 /* and with mask */ 1281.5Smatt eors r5, r5, r3 /* only have the lowest bit set left */ 1291.1Smatt clz r5, r5 /* count how many leading zeros */ 1301.5Smatt add r0, r1, r5, lsr #3 /* divide that by 8 and add to count */ 1311.5Smatt subs r0, r0, #4 /* compensate for the post-inc */ 1321.5Smatt cmp r4, #0 /* did we read any NULs? */ 1331.1Smatt beq .Lmain_loop /* no, get next word */ 1341.1Smatt#else 1351.1Smatt /* 1361.1Smatt * No fancy shortcuts so just test each byte lane for a NUL. 1371.1Smatt * (other tests for NULs in a word take more instructions/cycles). 1381.1Smatt */ 1391.1Smatt eor r4, r3, r2 /* xor .. */ 1401.1Smatt tst r3, #BYTE0 /* is byte 0 a NUL? */ 1411.1Smatt beq .Ldone /* yes, then we're done */ 1421.1Smatt tst r4, #BYTE0 /* is byte 0 a match? */ 1431.5Smatt subeq r0, r1, #4 /* yes, remember its location */ 1441.1Smatt tst r3, #BYTE1 /* is byte 1 a NUL? */ 1451.1Smatt beq .Ldone /* yes, then we're done */ 1461.1Smatt tst r4, #BYTE1 /* is byte 1 a match? */ 1471.5Smatt subeq r0, r1, #3 /* yes, remember its location */ 1481.1Smatt tst r3, #BYTE2 /* is byte 2 a NUL? */ 1491.1Smatt beq .Ldone /* yes, then we're done */ 1501.1Smatt tst r4, #BYTE2 /* is byte 2 a match? */ 1511.5Smatt subeq r0, r1, #2 /* yes, remember its location */ 1521.1Smatt tst r3, #BYTE3 /* is byte 3 a NUL? */ 1531.1Smatt beq .Ldone /* yes, then we're done */ 1541.1Smatt tst r4, #BYTE3 /* is byte 3 a match? */ 1551.5Smatt subeq r0, r1, #1 /* yes, remember its location */ 1561.1Smatt b .Lmain_loop 1571.1Smatt#endif /* _ARM_ARCH_6 */ 1581.1Smatt.Ldone: 1591.1Smatt pop {r4, r5} 1601.1Smatt RET 1611.1SmattEND(strrchr) 162