1 1.6 matt /* $NetBSD: strlen.S,v 1.6 2011/01/15 07:31:12 matt Exp $ */ 2 1.1 christos 3 1.4 ross /*- 4 1.4 ross * Copyright (C) 2001 Martin J. Laubach <mjl (at) NetBSD.org> 5 1.4 ross * All rights reserved. 6 1.4 ross * 7 1.4 ross * Redistribution and use in source and binary forms, with or without 8 1.4 ross * modification, are permitted provided that the following conditions 9 1.4 ross * are met: 10 1.4 ross * 1. Redistributions of source code must retain the above copyright 11 1.4 ross * notice, this list of conditions and the following disclaimer. 12 1.4 ross * 2. Redistributions in binary form must reproduce the above copyright 13 1.4 ross * notice, this list of conditions and the following disclaimer in the 14 1.4 ross * documentation and/or other materials provided with the distribution. 15 1.4 ross * 3. The name of the author may not be used to endorse or promote products 16 1.4 ross * derived from this software without specific prior written permission. 17 1.4 ross * 18 1.4 ross * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 19 1.4 ross * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 20 1.4 ross * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 21 1.4 ross * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 22 1.4 ross * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 23 1.4 ross * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 1.4 ross * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 1.4 ross * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 1.4 ross * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 1.4 ross * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 1.4 ross */ 29 1.4 ross /*----------------------------------------------------------------------*/ 30 1.1 christos 31 1.6 matt #include <machine/asm.h> 32 1.4 ross 33 1.6 matt __RCSID("$NetBSD: strlen.S,v 1.6 2011/01/15 07:31:12 matt Exp $"); 34 1.4 ross 35 1.4 ross /*----------------------------------------------------------------------*/ 36 1.4 ross /* The algorithm here uses the following techniques: 37 1.4 ross 38 1.4 ross 1) Given a word 'x', we can test to see if it contains any 0 bytes 39 1.4 ross by subtracting 0x01010101, and seeing if any of the high bits of each 40 1.4 ross byte changed from 0 to 1. This works because the least significant 41 1.4 ross 0 byte must have had no incoming carry (otherwise it's not the least 42 1.4 ross significant), so it is 0x00 - 0x01 == 0xff. For all other 43 1.4 ross byte values, either they have the high bit set initially, or when 44 1.4 ross 1 is subtracted you get a value in the range 0x00-0x7f, none of which 45 1.4 ross have their high bit set. The expression here is 46 1.4 ross (x + 0xfefefeff) & ~(x | 0x7f7f7f7f), which gives 0x00000000 when 47 1.4 ross there were no 0x00 bytes in the word. 48 1.4 ross 49 1.4 ross 2) Given a word 'x', we can test to see _which_ byte was zero by 50 1.4 ross calculating ~(((x & 0x7f7f7f7f) + 0x7f7f7f7f) | x | 0x7f7f7f7f). 51 1.4 ross This produces 0x80 in each byte that was zero, and 0x00 in all 52 1.4 ross the other bytes. The '| 0x7f7f7f7f' clears the low 7 bits in each 53 1.4 ross byte, and the '| x' part ensures that bytes with the high bit set 54 1.4 ross produce 0x00. The addition will carry into the high bit of each byte 55 1.4 ross iff that byte had one of its low 7 bits set. We can then just see 56 1.4 ross which was the most significant bit set and divide by 8 to find how 57 1.4 ross many to add to the index. 58 1.4 ross This is from the book 'The PowerPC Compiler Writer's Guide', 59 1.4 ross by Steve Hoxey, Faraydon Karim, Bill Hay and Hank Warren. 60 1.4 ross */ 61 1.4 ross /*----------------------------------------------------------------------*/ 62 1.4 ross 63 1.4 ross .text 64 1.4 ross .align 4 65 1.4 ross 66 1.4 ross ENTRY(strlen) 67 1.4 ross 68 1.4 ross /* Setup constants */ 69 1.4 ross lis %r10, 0x7f7f 70 1.4 ross lis %r9, 0xfefe 71 1.4 ross ori %r10, %r10, 0x7f7f 72 1.4 ross ori %r9, %r9, 0xfeff 73 1.4 ross 74 1.4 ross /* Mask out leading bytes on non aligned strings */ 75 1.4 ross rlwinm. %r8, %r3, 3, 27, 28 /* leading bits to mask */ 76 1.5 ross #ifdef _LP64 77 1.5 ross clrrdi %r5, %r3, 2 /* clear low 2 addr bits */ 78 1.5 ross #else 79 1.4 ross clrrwi %r5, %r3, 2 /* clear low 2 addr bits */ 80 1.5 ross #endif 81 1.4 ross li %r0, -1 82 1.4 ross beq+ 3f /* skip alignment if already */ 83 1.4 ross /* aligned */ 84 1.4 ross 85 1.4 ross srw %r0, %r0, %r8 /* make 0000...1111 mask */ 86 1.4 ross 87 1.4 ross lwz %r7, 0(%r5) 88 1.4 ross nor %r0, %r0, %r0 /* invert mask */ 89 1.4 ross or %r7, %r7, %r0 /* make leading bytes != 0 */ 90 1.4 ross b 2f 91 1.4 ross 92 1.4 ross 3: subi %r5, %r5, 4 93 1.4 ross 94 1.4 ross 1: lwzu %r7, 4(%r5) /* fetch data word */ 95 1.4 ross 96 1.4 ross 2: nor %r0, %r7, %r10 /* do step 1 */ 97 1.4 ross add %r6, %r7, %r9 98 1.4 ross and. %r0, %r0, %r6 99 1.4 ross 100 1.4 ross beq+ 1b /* no NUL bytes here */ 101 1.4 ross 102 1.4 ross and %r8, %r7, %r10 /* ok, a NUL is somewhere */ 103 1.4 ross or %r7, %r7, %r10 /* do step 2 to find out */ 104 1.4 ross add %r0, %r8, %r10 /* where */ 105 1.4 ross nor %r8, %r7, %r0 106 1.4 ross 107 1.4 ross cntlzw %r0, %r8 /* offset from this word */ 108 1.4 ross srwi %r4, %r0, 3 109 1.4 ross 110 1.4 ross add %r4, %r5, %r4 /* r4 contains end pointer */ 111 1.4 ross /* NOTE: Keep it so this function returns the end pointer 112 1.4 ross in r4, so we can it use from other str* calls (strcat 113 1.4 ross comes to mind */ 114 1.4 ross 115 1.4 ross subf %r3, %r3, %r4 116 1.4 ross blr 117 1.6 matt END(strlen) 118 1.4 ross /*----------------------------------------------------------------------*/ 119